Add --regex flag to parse (#1863)

This commit is contained in:
Oleksii Filonenko 2020-05-22 17:13:58 +03:00 committed by GitHub
parent 5de30d0ae5
commit 9e6ab33fd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 28 deletions

View File

@ -1,11 +1,11 @@
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; use nu_test_support::fs::Stub;
use nu_test_support::playground::Playground; use nu_test_support::playground::Playground;
use nu_test_support::{nu, pipeline}; use nu_test_support::{nu, pipeline};
#[test] #[test]
fn extracts_fields_from_the_given_the_pattern() { fn extracts_fields_from_the_given_the_pattern() {
Playground::setup("parse_test_1", |dirs, sandbox| { Playground::setup("parse_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed( sandbox.with_files(vec![Stub::FileWithContentToBeTrimmed(
"key_value_separated_arepa_ingredients.txt", "key_value_separated_arepa_ingredients.txt",
r#" r#"
VAR1=Cheese VAR1=Cheese
@ -28,3 +28,77 @@ fn extracts_fields_from_the_given_the_pattern() {
assert_eq!(actual.out, "JonathanParsed"); assert_eq!(actual.out, "JonathanParsed");
}) })
} }
mod regex {
use super::*;
fn nushell_git_log_oneline<'a>() -> Vec<Stub<'a>> {
vec![Stub::FileWithContentToBeTrimmed(
"nushell_git_log_oneline.txt",
r#"
ae87582c Fix missing invocation errors (#1846)
b89976da let format access variables also (#1842)
"#,
)]
}
#[test]
fn extracts_fields_with_all_named_groups() {
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
| nth 1
| get PR
| echo $it
"#
));
assert_eq!(actual.out, "1842");
})
}
#[test]
fn extracts_fields_with_all_unnamed_groups() {
Playground::setup("parse_test_regex_2", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(\w+) (.+) \(#(\d+)\)"
| nth 1
| get Capture1
| echo $it
"#
));
assert_eq!(actual.out, "b89976da");
})
}
#[test]
fn extracts_fields_with_named_and_unnamed_groups() {
Playground::setup("parse_test_regex_3", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
| nth 1
| get Capture2
| echo $it
"#
));
assert_eq!(actual.out, "let format access variables also");
})
}
}

View File

@ -1,4 +1,4 @@
mod nu; mod nu;
mod parse; mod parse;
pub use parse::Parse; pub use parse::{ColumnNames, Parse};

View File

@ -5,12 +5,13 @@ use nu_protocol::{
UntaggedValue, Value, UntaggedValue, Value,
}; };
use crate::Parse; use crate::{ColumnNames, Parse};
use regex::Regex; use regex::Regex;
impl Plugin for Parse { impl Plugin for Parse {
fn config(&mut self) -> Result<Signature, ShellError> { fn config(&mut self) -> Result<Signature, ShellError> {
Ok(Signature::build("parse") Ok(Signature::build("parse")
.switch("regex", "use full regex syntax for patterns", Some('r'))
.required( .required(
"pattern", "pattern",
SyntaxShape::String, SyntaxShape::String,
@ -20,24 +21,34 @@ impl Plugin for Parse {
} }
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> { fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
if let Some(args) = call_info.args.positional { if let Some(ref args) = &call_info.args.positional {
match &args[0] { match &args[0] {
Value { Value {
value: UntaggedValue::Primitive(Primitive::String(s)), value: UntaggedValue::Primitive(Primitive::String(s)),
tag, tag,
} => { } => {
self.pattern_tag = tag.clone(); self.pattern_tag = tag.clone();
let parse_pattern = parse(&s); if call_info.args.has("regex") {
let parse_regex = build_regex(&parse_pattern); self.regex = Regex::new(&s).map_err(|_| {
self.column_names = column_names(&parse_pattern); ShellError::labeled_error(
"Could not parse regex",
self.regex = Regex::new(&parse_regex).map_err(|_| { "could not parse regex",
ShellError::labeled_error( tag.span,
"Could not parse regex", )
"could not parse regex", })?;
tag.span, self.column_names = ColumnNames::from(&self.regex);
) } else {
})?; let parse_pattern = parse(&s);
let parse_regex = build_regex(&parse_pattern);
self.column_names = ColumnNames::from(parse_pattern.as_slice());
self.regex = Regex::new(&parse_regex).map_err(|_| {
ShellError::labeled_error(
"Could not parse regex",
"could not parse regex",
tag.span,
)
})?;
};
} }
Value { tag, .. } => { Value { tag, .. } => {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
@ -58,11 +69,11 @@ impl Plugin for Parse {
for caps in self.regex.captures_iter(&s) { for caps in self.regex.captures_iter(&s) {
let group_count = caps.len() - 1; let group_count = caps.len() - 1;
if self.column_names.len() != group_count { if self.column_names.0.len() != group_count {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
format!( format!(
"There are {} column(s) specified in the pattern, but could only match the first {}: [{}]", "There are {} column(s) specified in the pattern, but could only match the first {}: [{}]",
self.column_names.len(), self.column_names.0.len(),
group_count, group_count,
caps.iter() caps.iter()
.skip(1) .skip(1)
@ -89,7 +100,7 @@ impl Plugin for Parse {
} }
let mut dict = TaggedDictBuilder::new(&input.tag); let mut dict = TaggedDictBuilder::new(&input.tag);
for (idx, column_name) in self.column_names.iter().enumerate() { for (idx, column_name) in self.column_names.0.iter().enumerate() {
dict.insert_untagged( dict.insert_untagged(
column_name, column_name,
UntaggedValue::string(caps[idx + 1].to_string()), UntaggedValue::string(caps[idx + 1].to_string()),
@ -150,16 +161,30 @@ fn parse(input: &str) -> Vec<ParseCommand> {
output output
} }
fn column_names(commands: &[ParseCommand]) -> Vec<String> { impl From<&[ParseCommand]> for ColumnNames {
let mut output = vec![]; fn from(commands: &[ParseCommand]) -> ColumnNames {
let mut output = vec![];
for command in commands { for command in commands {
if let ParseCommand::Column(c) = command { if let ParseCommand::Column(c) = command {
output.push(c.clone()); output.push(c.clone());
}
} }
}
output ColumnNames(output)
}
}
impl From<&Regex> for ColumnNames {
fn from(regex: &Regex) -> ColumnNames {
let output = regex
.capture_names()
.enumerate()
.skip(1)
.map(|(i, name)| name.map(String::from).unwrap_or(format!("Capture{}", i)))
.collect::<Vec<_>>();
ColumnNames(output)
}
} }
fn build_regex(commands: &[ParseCommand]) -> String { fn build_regex(commands: &[ParseCommand]) -> String {

View File

@ -5,9 +5,11 @@ pub struct Parse {
pub regex: Regex, pub regex: Regex,
pub name: Tag, pub name: Tag,
pub pattern_tag: Tag, pub pattern_tag: Tag,
pub column_names: Vec<String>, pub column_names: ColumnNames,
} }
pub struct ColumnNames(pub Vec<String>);
impl Parse { impl Parse {
#[allow(clippy::trivial_regex)] #[allow(clippy::trivial_regex)]
pub fn new() -> Result<Self, Box<dyn std::error::Error>> { pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
@ -15,7 +17,7 @@ impl Parse {
regex: Regex::new("")?, regex: Regex::new("")?,
name: Tag::unknown(), name: Tag::unknown(),
pattern_tag: Tag::unknown(), pattern_tag: Tag::unknown(),
column_names: vec![], column_names: ColumnNames(vec![]),
}) })
} }
} }