From eb9eb09ac50b426835c83e46ce3cab66f7d9bace Mon Sep 17 00:00:00 2001 From: 132ikl <132@ikl.sh> Date: Mon, 2 Jun 2025 20:11:05 -0400 Subject: [PATCH] Make `parse` simple patterns ignore fields with placeholder (`_`) (#15873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Simple `parse` patterns let you quickly put together simple parsers, but sometimes you aren't actually interested in some of the output (such as variable whitespace). This PR lets you use `{_}` to discard part of the input. Example: ```nushell "hello world" | parse "{foo} {_}" # => ╭───┬───────╮ # => │ # │ foo │ # => ├───┼───────┤ # => │ 0 │ hello │ # => ╰───┴───────╯ ``` here's a simple parser for the `apropops` using the `_` placeholder to discard the variable whitespace, without needing to resort to a full regex pattern: ```nushell apropos linux | parse "{name} ({section}) {_}- {topic}" # => ╭───┬───────────────────────────────────────┬─────────┬─────────────────────────────────────────────────────────────────────╮ # => │ # │ name │ section │ topic │ # => ├───┼───────────────────────────────────────┼─────────┼─────────────────────────────────────────────────────────────────────┤ # => │ 0 │ PAM │ 8 │ Pluggable Authentication Modules for Linux │ # => │ 1 │ aarch64-linux-gnu-addr2line │ 1 │ convert addresses or symbol+offset into file names and line numbers │ # => │ 2 │ ... │ ... │ ... │ # => │ 3 │ xcb_selinux_set_window_create_context │ 3 │ (unknown subject) │ # => │ 4 │ xorriso-dd-target │ 1 │ Device evaluator and disk image copier for GNU/Linux │ # => ╰───┴───────────────────────────────────────┴─────────┴─────────────────────────────────────────────────────────────────────╯ ``` # User-Facing Changes * `parse` simple patterns can now discard input using `{_}` # Tests + Formatting N/A # After Submitting N/A --- crates/nu-command/src/strings/parse.rs | 21 ++++- crates/nu-command/tests/commands/parse.rs | 105 +++++++++++----------- 2 files changed, 72 insertions(+), 54 deletions(-) diff --git a/crates/nu-command/src/strings/parse.rs b/crates/nu-command/src/strings/parse.rs index c8e52c8e87..80e7e3f6e9 100644 --- a/crates/nu-command/src/strings/parse.rs +++ b/crates/nu-command/src/strings/parse.rs @@ -45,6 +45,13 @@ impl Command for Parse { "bar" => Value::test_string("there"), })])), }, + Example { + description: "Parse a string, ignoring a column with _", + example: "\"hello world\" | parse \"{foo} {_}\"", + result: Some(Value::test_list(vec![Value::test_record(record! { + "foo" => Value::test_string("hello"), + })])), + }, Example { description: "This is how the first example is interpreted in the source code", example: "\"hi there\" | parse --regex '(?s)\\A(?P.*?) (?P.*?)\\z'", @@ -277,9 +284,17 @@ fn build_regex(input: &str, span: Span) -> Result { } if !column.is_empty() { - output.push_str("(?P<"); - output.push_str(&column); - output.push_str(">.*?)"); + output.push_str("(?"); + if column == "_" { + // discard placeholder column(s) + output.push(':'); + } else { + // create capture group for column + output.push_str("P<"); + output.push_str(&column); + output.push('>'); + } + output.push_str(".*?)"); } if before.is_empty() && column.is_empty() { diff --git a/crates/nu-command/tests/commands/parse.rs b/crates/nu-command/tests/commands/parse.rs index a5dafa4a38..3ee15922ce 100644 --- a/crates/nu-command/tests/commands/parse.rs +++ b/crates/nu-command/tests/commands/parse.rs @@ -7,7 +7,7 @@ mod simple { #[test] fn extracts_fields_from_the_given_the_pattern() { - Playground::setup("parse_test_1", |dirs, sandbox| { + Playground::setup("parse_test_simple_1", |dirs, sandbox| { sandbox.with_files(&[Stub::FileWithContentToBeTrimmed( "key_value_separated_arepa_ingredients.txt", r#" @@ -35,71 +35,74 @@ mod simple { #[test] fn double_open_curly_evaluates_to_a_single_curly() { - Playground::setup("parse_test_regex_2", |dirs, _sandbox| { - let actual = nu!( - cwd: dirs.test(), pipeline( - r#" - echo "{abc}123" - | parse "{{abc}{name}" - | get name.0 - "# - )); - - assert_eq!(actual.out, "123"); - }) + let actual = nu!(pipeline( + r#" + echo "{abc}123" + | parse "{{abc}{name}" + | get name.0 + "# + )); + assert_eq!(actual.out, "123"); } #[test] fn properly_escapes_text() { - Playground::setup("parse_test_regex_3", |dirs, _sandbox| { - let actual = nu!( - cwd: dirs.test(), pipeline( - r#" - echo "(abc)123" - | parse "(abc){name}" - | get name.0 - "# - )); + let actual = nu!(pipeline( + r#" + echo "(abc)123" + | parse "(abc){name}" + | get name.0 + "# + )); - assert_eq!(actual.out, "123"); - }) + assert_eq!(actual.out, "123"); } #[test] fn properly_captures_empty_column() { - Playground::setup("parse_test_regex_4", |dirs, _sandbox| { - let actual = nu!( - cwd: dirs.test(), pipeline( - r#" - echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"] - | parse "{timestamp}:{level}:{tag}:{entry}" - | get entry - | get 1 - "# - )); + let actual = nu!(pipeline( + r#" + echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"] + | parse "{timestamp}:{level}:{tag}:{entry}" + | get entry + | get 1 + "# + )); - assert_eq!(actual.out, "something bad happened"); - }) + assert_eq!(actual.out, "something bad happened"); } #[test] fn errors_when_missing_closing_brace() { - Playground::setup("parse_test_regex_5", |dirs, _sandbox| { - let actual = nu!( - cwd: dirs.test(), pipeline( - r#" - echo "(abc)123" - | parse "(abc){name" - | get name - "# - )); + let actual = nu!(pipeline( + r#" + echo "(abc)123" + | parse "(abc){name" + | get name + "# + )); - assert!( - actual - .err - .contains("Found opening `{` without an associated closing `}`") - ); - }) + assert!( + actual + .err + .contains("Found opening `{` without an associated closing `}`") + ); + } + + #[test] + fn ignore_multiple_placeholder() { + let actual = nu!(pipeline( + r#" + echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"] + | parse "{_}:{level}:{_}:{entry}" + | to json -r + "# + )); + + assert_eq!( + actual.out, + r#"[{"level":"INFO","entry":"all is well"},{"level":"ERROR","entry":"something bad happened"}]"# + ); } }