Make parse simple patterns ignore fields with placeholder (_) (#15873)

# Description
Simple `parse` patterns let you quickly put together simple parsers, but
sometimes you aren't actually interested in some of the output (such as
variable whitespace). This PR lets you use `{_}` to discard part of the
input.

Example:
```nushell
"hello world" | parse "{foo} {_}"
# => ╭───┬───────╮
# => │ # │  foo  │
# => ├───┼───────┤
# => │ 0 │ hello │
# => ╰───┴───────╯
```

here's a simple parser for the `apropops` using the `_` placeholder to
discard the variable whitespace, without needing to resort to a full
regex pattern:

```nushell
apropos linux | parse "{name} ({section}) {_}- {topic}"
# => ╭───┬───────────────────────────────────────┬─────────┬─────────────────────────────────────────────────────────────────────╮
# => │ # │                 name                  │ section │                                topic                                │
# => ├───┼───────────────────────────────────────┼─────────┼─────────────────────────────────────────────────────────────────────┤
# => │ 0 │ PAM                                   │ 8       │ Pluggable Authentication Modules for Linux                          │
# => │ 1 │ aarch64-linux-gnu-addr2line           │ 1       │ convert addresses or symbol+offset into file names and line numbers │
# => │ 2 │ ...                                   │ ...     │ ...                                                                 │
# => │ 3 │ xcb_selinux_set_window_create_context │ 3       │ (unknown subject)                                                   │
# => │ 4 │ xorriso-dd-target                     │ 1       │ Device evaluator and disk image copier for GNU/Linux                │
# => ╰───┴───────────────────────────────────────┴─────────┴─────────────────────────────────────────────────────────────────────╯
```

# User-Facing Changes
* `parse` simple patterns can now discard input using `{_}`

# Tests + Formatting
N/A

# After Submitting
N/A
This commit is contained in:
132ikl 2025-06-02 20:11:05 -04:00 committed by GitHub
parent 6eacbabe17
commit eb9eb09ac5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 54 deletions

View File

@ -45,6 +45,13 @@ impl Command for Parse {
"bar" => Value::test_string("there"), "bar" => Value::test_string("there"),
})])), })])),
}, },
Example {
description: "Parse a string, ignoring a column with _",
example: "\"hello world\" | parse \"{foo} {_}\"",
result: Some(Value::test_list(vec![Value::test_record(record! {
"foo" => Value::test_string("hello"),
})])),
},
Example { Example {
description: "This is how the first example is interpreted in the source code", description: "This is how the first example is interpreted in the source code",
example: "\"hi there\" | parse --regex '(?s)\\A(?P<foo>.*?) (?P<bar>.*?)\\z'", example: "\"hi there\" | parse --regex '(?s)\\A(?P<foo>.*?) (?P<bar>.*?)\\z'",
@ -277,9 +284,17 @@ fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
} }
if !column.is_empty() { if !column.is_empty() {
output.push_str("(?P<"); output.push_str("(?");
output.push_str(&column); if column == "_" {
output.push_str(">.*?)"); // discard placeholder column(s)
output.push(':');
} else {
// create capture group for column
output.push_str("P<");
output.push_str(&column);
output.push('>');
}
output.push_str(".*?)");
} }
if before.is_empty() && column.is_empty() { if before.is_empty() && column.is_empty() {

View File

@ -7,7 +7,7 @@ mod simple {
#[test] #[test]
fn extracts_fields_from_the_given_the_pattern() { fn extracts_fields_from_the_given_the_pattern() {
Playground::setup("parse_test_1", |dirs, sandbox| { Playground::setup("parse_test_simple_1", |dirs, sandbox| {
sandbox.with_files(&[Stub::FileWithContentToBeTrimmed( sandbox.with_files(&[Stub::FileWithContentToBeTrimmed(
"key_value_separated_arepa_ingredients.txt", "key_value_separated_arepa_ingredients.txt",
r#" r#"
@ -35,71 +35,74 @@ mod simple {
#[test] #[test]
fn double_open_curly_evaluates_to_a_single_curly() { fn double_open_curly_evaluates_to_a_single_curly() {
Playground::setup("parse_test_regex_2", |dirs, _sandbox| { let actual = nu!(pipeline(
let actual = nu!( r#"
cwd: dirs.test(), pipeline( echo "{abc}123"
r#" | parse "{{abc}{name}"
echo "{abc}123" | get name.0
| parse "{{abc}{name}" "#
| get name.0 ));
"# assert_eq!(actual.out, "123");
));
assert_eq!(actual.out, "123");
})
} }
#[test] #[test]
fn properly_escapes_text() { fn properly_escapes_text() {
Playground::setup("parse_test_regex_3", |dirs, _sandbox| { let actual = nu!(pipeline(
let actual = nu!( r#"
cwd: dirs.test(), pipeline( echo "(abc)123"
r#" | parse "(abc){name}"
echo "(abc)123" | get name.0
| parse "(abc){name}" "#
| get name.0 ));
"#
));
assert_eq!(actual.out, "123"); assert_eq!(actual.out, "123");
})
} }
#[test] #[test]
fn properly_captures_empty_column() { fn properly_captures_empty_column() {
Playground::setup("parse_test_regex_4", |dirs, _sandbox| { let actual = nu!(pipeline(
let actual = nu!( r#"
cwd: dirs.test(), pipeline( echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"]
r#" | parse "{timestamp}:{level}:{tag}:{entry}"
echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"] | get entry
| parse "{timestamp}:{level}:{tag}:{entry}" | get 1
| get entry "#
| get 1 ));
"#
));
assert_eq!(actual.out, "something bad happened"); assert_eq!(actual.out, "something bad happened");
})
} }
#[test] #[test]
fn errors_when_missing_closing_brace() { fn errors_when_missing_closing_brace() {
Playground::setup("parse_test_regex_5", |dirs, _sandbox| { let actual = nu!(pipeline(
let actual = nu!( r#"
cwd: dirs.test(), pipeline( echo "(abc)123"
r#" | parse "(abc){name"
echo "(abc)123" | get name
| parse "(abc){name" "#
| get name ));
"#
));
assert!( assert!(
actual actual
.err .err
.contains("Found opening `{` without an associated closing `}`") .contains("Found opening `{` without an associated closing `}`")
); );
}) }
#[test]
fn ignore_multiple_placeholder() {
let actual = nu!(pipeline(
r#"
echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"]
| parse "{_}:{level}:{_}:{entry}"
| to json -r
"#
));
assert_eq!(
actual.out,
r#"[{"level":"INFO","entry":"all is well"},{"level":"ERROR","entry":"something bad happened"}]"#
);
} }
} }