nushell/crates/nu-command/tests/commands/parse.rs
panicbit 56ed1eb807
parse: collect external stream chunks before matching (#9950)
<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx
-->

# Description
This PR implements the workaround discussed in #9795, i.e. having
`parse` collect an external stream before operating on it with a regex.

- Should close #9795 

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
- `parse` will give the correct output for external streams
- increased memory and time overhead due to collecting the entire stream
(no short-circuiting)

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
- formatting is checked
- clippy is happy
- no tests that weren't already broken fail
- added test case

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
2023-08-08 06:48:13 -05:00

230 lines
6.5 KiB
Rust

use nu_test_support::fs::Stub;
use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline};
mod simple {
use super::*;
#[test]
fn extracts_fields_from_the_given_the_pattern() {
Playground::setup("parse_test_1", |dirs, sandbox| {
sandbox.with_files(vec![Stub::FileWithContentToBeTrimmed(
"key_value_separated_arepa_ingredients.txt",
r#"
VAR1=Cheese
VAR2=JTParsed
VAR3=NushellSecretIngredient
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open key_value_separated_arepa_ingredients.txt
| lines
| each { |it| echo $it | parse "{Name}={Value}" }
| flatten
| get 1
| get Value
"#
));
assert_eq!(actual.out, "JTParsed");
})
}
#[test]
fn double_open_curly_evaluates_to_a_single_curly() {
Playground::setup("parse_test_regex_2", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo "{abc}123"
| parse "{{abc}{name}"
| get name.0
"#
));
assert_eq!(actual.out, "123");
})
}
#[test]
fn properly_escapes_text() {
Playground::setup("parse_test_regex_3", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo "(abc)123"
| parse "(abc){name}"
| get name.0
"#
));
assert_eq!(actual.out, "123");
})
}
#[test]
fn properly_captures_empty_column() {
Playground::setup("parse_test_regex_4", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"]
| parse "{timestamp}:{level}:{tag}:{entry}"
| get entry
| get 1
"#
));
assert_eq!(actual.out, "something bad happened");
})
}
#[test]
fn errors_when_missing_closing_brace() {
Playground::setup("parse_test_regex_5", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo "(abc)123"
| parse "(abc){name"
| get name
"#
));
assert!(actual
.err
.contains("Found opening `{` without an associated closing `}`"));
})
}
}
mod regex {
use super::*;
fn nushell_git_log_oneline<'a>() -> Vec<Stub<'a>> {
vec![Stub::FileWithContentToBeTrimmed(
"nushell_git_log_oneline.txt",
r#"
ae87582c Fix missing invocation errors (#1846)
b89976da let format access variables also (#1842)
"#,
)]
}
#[test]
fn extracts_fields_with_all_named_groups() {
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\\w+) (?P<Message>.+) \\(#(?P<PR>\\d+)\\)"
| get 1
| get PR
"#
));
assert_eq!(actual.out, "1842");
})
}
#[test]
fn extracts_fields_with_all_unnamed_groups() {
Playground::setup("parse_test_regex_2", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(\\w+) (.+) \\(#(\\d+)\\)"
| get 1
| get capture0
"#
));
assert_eq!(actual.out, "b89976da");
})
}
#[test]
fn extracts_fields_with_named_and_unnamed_groups() {
Playground::setup("parse_test_regex_3", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\\w+) (.+) \\(#(?P<PR>\\d+)\\)"
| get 1
| get capture1
"#
));
assert_eq!(actual.out, "let format access variables also");
})
}
#[test]
fn errors_with_invalid_regex() {
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\\w+ unfinished capture group"
"#
));
assert!(actual
.err
.contains("Opening parenthesis without closing parenthesis"));
})
}
#[test]
fn parse_works_with_streaming() {
let actual =
nu!(r#"seq char a z | each {|c| $c + " a"} | parse '{letter} {a}' | describe"#);
assert_eq!(actual.out, "table<letter: string, a: string> (stream)")
}
#[test]
fn parse_does_not_truncate_list_streams() {
let actual = nu!(pipeline(
r#"
[a b c]
| each {|x| $x}
| parse --regex "[ac]"
| length
"#
));
assert_eq!(actual.out, "2");
}
#[test]
fn parse_handles_external_stream_chunking() {
Playground::setup("parse_test_streaming_1", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
"abcdefghijklmnopqrstuvwxyz" * 1000 | save --force data.txt;
open data.txt | parse --regex "(abcdefghijklmnopqrstuvwxyz)" | length
"#
));
assert_eq!(actual.out, "1000");
})
}
}