diff --git a/crates/nu-cli/tests/commands/parse.rs b/crates/nu-cli/tests/commands/parse.rs index 07d8949ef..aa14faf62 100644 --- a/crates/nu-cli/tests/commands/parse.rs +++ b/crates/nu-cli/tests/commands/parse.rs @@ -1,11 +1,11 @@ -use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; +use nu_test_support::fs::Stub; use nu_test_support::playground::Playground; use nu_test_support::{nu, pipeline}; #[test] fn extracts_fields_from_the_given_the_pattern() { Playground::setup("parse_test_1", |dirs, sandbox| { - sandbox.with_files(vec![FileWithContentToBeTrimmed( + sandbox.with_files(vec![Stub::FileWithContentToBeTrimmed( "key_value_separated_arepa_ingredients.txt", r#" VAR1=Cheese @@ -28,3 +28,77 @@ fn extracts_fields_from_the_given_the_pattern() { assert_eq!(actual.out, "JonathanParsed"); }) } + +mod regex { + use super::*; + + fn nushell_git_log_oneline<'a>() -> Vec> { + vec![Stub::FileWithContentToBeTrimmed( + "nushell_git_log_oneline.txt", + r#" + ae87582c Fix missing invocation errors (#1846) + b89976da let format access variables also (#1842) + "#, + )] + } + + #[test] + fn extracts_fields_with_all_named_groups() { + Playground::setup("parse_test_regex_1", |dirs, sandbox| { + sandbox.with_files(nushell_git_log_oneline()); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open nushell_git_log_oneline.txt + | parse --regex "(?P\w+) (?P.+) \(#(?P\d+)\)" + | nth 1 + | get PR + | echo $it + "# + )); + + assert_eq!(actual.out, "1842"); + }) + } + + #[test] + fn extracts_fields_with_all_unnamed_groups() { + Playground::setup("parse_test_regex_2", |dirs, sandbox| { + sandbox.with_files(nushell_git_log_oneline()); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open nushell_git_log_oneline.txt + | parse --regex "(\w+) (.+) \(#(\d+)\)" + | nth 1 + | get Capture1 + | echo $it + "# + )); + + assert_eq!(actual.out, "b89976da"); + }) + } + + #[test] + fn extracts_fields_with_named_and_unnamed_groups() { + Playground::setup("parse_test_regex_3", |dirs, sandbox| { + sandbox.with_files(nushell_git_log_oneline()); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open nushell_git_log_oneline.txt + | parse --regex "(?P\w+) (.+) \(#(?P\d+)\)" + | nth 1 + | get Capture2 + | echo $it + "# + )); + + assert_eq!(actual.out, "let format access variables also"); + }) + } +} diff --git a/crates/nu_plugin_parse/src/lib.rs b/crates/nu_plugin_parse/src/lib.rs index b5bd5842d..d3c29cb0d 100644 --- a/crates/nu_plugin_parse/src/lib.rs +++ b/crates/nu_plugin_parse/src/lib.rs @@ -1,4 +1,4 @@ mod nu; mod parse; -pub use parse::Parse; +pub use parse::{ColumnNames, Parse}; diff --git a/crates/nu_plugin_parse/src/nu/mod.rs b/crates/nu_plugin_parse/src/nu/mod.rs index 7e0aae0b2..90ba204e1 100644 --- a/crates/nu_plugin_parse/src/nu/mod.rs +++ b/crates/nu_plugin_parse/src/nu/mod.rs @@ -5,12 +5,13 @@ use nu_protocol::{ UntaggedValue, Value, }; -use crate::Parse; +use crate::{ColumnNames, Parse}; use regex::Regex; impl Plugin for Parse { fn config(&mut self) -> Result { Ok(Signature::build("parse") + .switch("regex", "use full regex syntax for patterns", Some('r')) .required( "pattern", SyntaxShape::String, @@ -20,24 +21,34 @@ impl Plugin for Parse { } fn begin_filter(&mut self, call_info: CallInfo) -> Result, ShellError> { - if let Some(args) = call_info.args.positional { + if let Some(ref args) = &call_info.args.positional { match &args[0] { Value { value: UntaggedValue::Primitive(Primitive::String(s)), tag, } => { self.pattern_tag = tag.clone(); - let parse_pattern = parse(&s); - let parse_regex = build_regex(&parse_pattern); - self.column_names = column_names(&parse_pattern); - - self.regex = Regex::new(&parse_regex).map_err(|_| { - ShellError::labeled_error( - "Could not parse regex", - "could not parse regex", - tag.span, - ) - })?; + if call_info.args.has("regex") { + self.regex = Regex::new(&s).map_err(|_| { + ShellError::labeled_error( + "Could not parse regex", + "could not parse regex", + tag.span, + ) + })?; + self.column_names = ColumnNames::from(&self.regex); + } else { + let parse_pattern = parse(&s); + let parse_regex = build_regex(&parse_pattern); + self.column_names = ColumnNames::from(parse_pattern.as_slice()); + self.regex = Regex::new(&parse_regex).map_err(|_| { + ShellError::labeled_error( + "Could not parse regex", + "could not parse regex", + tag.span, + ) + })?; + }; } Value { tag, .. } => { return Err(ShellError::labeled_error( @@ -58,11 +69,11 @@ impl Plugin for Parse { for caps in self.regex.captures_iter(&s) { let group_count = caps.len() - 1; - if self.column_names.len() != group_count { + if self.column_names.0.len() != group_count { return Err(ShellError::labeled_error( format!( "There are {} column(s) specified in the pattern, but could only match the first {}: [{}]", - self.column_names.len(), + self.column_names.0.len(), group_count, caps.iter() .skip(1) @@ -89,7 +100,7 @@ impl Plugin for Parse { } let mut dict = TaggedDictBuilder::new(&input.tag); - for (idx, column_name) in self.column_names.iter().enumerate() { + for (idx, column_name) in self.column_names.0.iter().enumerate() { dict.insert_untagged( column_name, UntaggedValue::string(caps[idx + 1].to_string()), @@ -150,16 +161,30 @@ fn parse(input: &str) -> Vec { output } -fn column_names(commands: &[ParseCommand]) -> Vec { - let mut output = vec![]; +impl From<&[ParseCommand]> for ColumnNames { + fn from(commands: &[ParseCommand]) -> ColumnNames { + let mut output = vec![]; - for command in commands { - if let ParseCommand::Column(c) = command { - output.push(c.clone()); + for command in commands { + if let ParseCommand::Column(c) = command { + output.push(c.clone()); + } } - } - output + ColumnNames(output) + } +} + +impl From<&Regex> for ColumnNames { + fn from(regex: &Regex) -> ColumnNames { + let output = regex + .capture_names() + .enumerate() + .skip(1) + .map(|(i, name)| name.map(String::from).unwrap_or(format!("Capture{}", i))) + .collect::>(); + ColumnNames(output) + } } fn build_regex(commands: &[ParseCommand]) -> String { diff --git a/crates/nu_plugin_parse/src/parse.rs b/crates/nu_plugin_parse/src/parse.rs index ba8263244..9eb6f7d65 100644 --- a/crates/nu_plugin_parse/src/parse.rs +++ b/crates/nu_plugin_parse/src/parse.rs @@ -5,9 +5,11 @@ pub struct Parse { pub regex: Regex, pub name: Tag, pub pattern_tag: Tag, - pub column_names: Vec, + pub column_names: ColumnNames, } +pub struct ColumnNames(pub Vec); + impl Parse { #[allow(clippy::trivial_regex)] pub fn new() -> Result> { @@ -15,7 +17,7 @@ impl Parse { regex: Regex::new("")?, name: Tag::unknown(), pattern_tag: Tag::unknown(), - column_names: vec![], + column_names: ColumnNames(vec![]), }) } }