Simplify parse plugin code. (#1904)

Primarily, instead of building a parse pattern enum, we just build the regex
directly, with the appropriate capture group names so that the column name
codepaths can be shared between simple and `--regex` patterns.

Also removed capture group count compared to column name count. I don't think
this codepath can possibly be reached with the regex we now use for the
simplified capture form.
This commit is contained in:
Jason Gedge 2020-05-28 09:58:06 -04:00 committed by GitHub
parent 98a3d9fff6
commit 0a6692ac44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 147 additions and 157 deletions

View File

@ -38,10 +38,10 @@ mod simple {
let actual = nu!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
echo "{abc}123" echo "{abc}123"
| parse "{{abc}{name}" | parse "{{abc}{name}"
| echo $it.name | echo $it.name
"# "#
)); ));
assert_eq!(actual.out, "123"); assert_eq!(actual.out, "123");
@ -54,15 +54,48 @@ mod simple {
let actual = nu!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
echo "(abc)123" echo "(abc)123"
| parse "(abc){name}" | parse "(abc){name}"
| echo $it.name | echo $it.name
"# "#
)); ));
assert_eq!(actual.out, "123"); assert_eq!(actual.out, "123");
}) })
} }
#[test]
fn properly_captures_empty_column() {
Playground::setup("parse_test_regex_4", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"]
| parse "{timestamp}:{level}:{tag}:{entry}"
| get entry
| nth 1
"#
));
assert_eq!(actual.out, "something bad happened");
})
}
#[test]
fn errors_when_missing_closing_brace() {
Playground::setup("parse_test_regex_5", |dirs, _sandbox| {
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
echo "(abc)123"
| parse "(abc){name"
| echo $it.name
"#
));
assert!(actual.err.contains("invalid parse pattern"));
})
}
} }
mod regex { mod regex {
@ -86,12 +119,12 @@ mod regex {
let actual = nu!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
open nushell_git_log_oneline.txt open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)" | parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
| nth 1 | nth 1
| get PR | get PR
| echo $it | echo $it
"# "#
)); ));
assert_eq!(actual.out, "1842"); assert_eq!(actual.out, "1842");
@ -106,12 +139,12 @@ mod regex {
let actual = nu!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
open nushell_git_log_oneline.txt open nushell_git_log_oneline.txt
| parse --regex "(\w+) (.+) \(#(\d+)\)" | parse --regex "(\w+) (.+) \(#(\d+)\)"
| nth 1 | nth 1
| get Capture1 | get Capture1
| echo $it | echo $it
"# "#
)); ));
assert_eq!(actual.out, "b89976da"); assert_eq!(actual.out, "b89976da");
@ -126,15 +159,33 @@ mod regex {
let actual = nu!( let actual = nu!(
cwd: dirs.test(), pipeline( cwd: dirs.test(), pipeline(
r#" r#"
open nushell_git_log_oneline.txt open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)" | parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
| nth 1 | nth 1
| get Capture2 | get Capture2
| echo $it | echo $it
"# "#
)); ));
assert_eq!(actual.out, "let format access variables also"); assert_eq!(actual.out, "let format access variables also");
}) })
} }
#[test]
fn errors_with_invalid_regex() {
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
sandbox.with_files(nushell_git_log_oneline());
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open nushell_git_log_oneline.txt
| parse --regex "(?P<Hash>\w+ unfinished capture group"
| echo $it
"#
));
assert!(actual.err.contains("invalid regex"));
})
}
} }

View File

@ -1,4 +1,4 @@
mod nu; mod nu;
mod parse; mod parse;
pub use parse::{ColumnNames, Parse}; pub use parse::Parse;

View File

@ -1,12 +1,14 @@
use regex::{self, Regex};
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_plugin::Plugin; use nu_plugin::Plugin;
use nu_protocol::{ use nu_protocol::{
CallInfo, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, TaggedDictBuilder, CallInfo, Primitive, ReturnSuccess, ReturnValue, ShellTypeName, Signature, SyntaxShape,
UntaggedValue, Value, TaggedDictBuilder, UntaggedValue, Value,
}; };
use nu_source::Tag;
use crate::{ColumnNames, Parse}; use crate::Parse;
use regex::{self, Regex};
impl Plugin for Parse { impl Plugin for Parse {
fn config(&mut self) -> Result<Signature, ShellError> { fn config(&mut self) -> Result<Signature, ShellError> {
@ -21,114 +23,81 @@ impl Plugin for Parse {
} }
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> { fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
if let Some(ref args) = &call_info.args.positional { if let Some(ref args) = call_info.args.positional {
match &args[0] { let value = &args[0];
match value {
Value { Value {
value: UntaggedValue::Primitive(Primitive::String(s)), value: UntaggedValue::Primitive(Primitive::String(s)),
tag, tag,
} => { } => {
self.pattern_tag = tag.clone(); self.pattern_tag = tag.clone();
if call_info.args.has("regex") { self.regex = if call_info.args.has("regex") {
self.regex = Regex::new(&s).map_err(|_| { Regex::new(&s).map_err(|_| {
ShellError::labeled_error( ShellError::labeled_error("Invalid regex", "invalid regex", tag.span)
"Could not parse regex", })?
"could not parse regex",
tag.span,
)
})?;
self.column_names = ColumnNames::from(&self.regex);
} else { } else {
let parse_pattern = parse(&s); let parse_regex = build_regex(&s, tag.clone())?;
let parse_regex = build_regex(&parse_pattern); Regex::new(&parse_regex).map_err(|_| {
self.column_names = ColumnNames::from(parse_pattern.as_slice());
self.regex = Regex::new(&parse_regex).map_err(|_| {
ShellError::labeled_error( ShellError::labeled_error(
"Could not parse regex", "Invalid pattern",
"could not parse regex", "invalid pattern",
tag.span, tag.span,
) )
})?; })?
}; };
self.column_names = column_names(&self.regex);
} }
Value { tag, .. } => { Value { tag, .. } => {
return Err(ShellError::labeled_error( return Err(ShellError::labeled_error(
"Unrecognized type in params", format!(
"unexpected value", "Unexpected type in params (found `{}`, expected `String`)",
value.type_name()
),
"unexpected type",
tag, tag,
)); ));
} }
} }
} }
Ok(vec![]) Ok(vec![])
} }
fn filter(&mut self, input: Value) -> Result<Vec<ReturnValue>, ShellError> { fn filter(&mut self, input: Value) -> Result<Vec<ReturnValue>, ShellError> {
match &input.as_string() { if let Ok(s) = input.as_string() {
Ok(s) => { Ok(self
let mut output = vec![]; .regex
for caps in self.regex.captures_iter(&s) { .captures_iter(&s)
let group_count = caps.len() - 1; .map(|caps| {
if self.column_names.0.len() != group_count {
return Err(ShellError::labeled_error(
format!(
"There are {} column(s) specified in the pattern, but could only match the first {}: [{}]",
self.column_names.0.len(),
group_count,
caps.iter()
.skip(1)
.map(|m| {
if let Some(m) = m {
let m = m.as_str();
let mut m = m.replace(",","\\,");
if m.len() > 20 {
m.truncate(17);
format!("{}...", m)
} else {
m
}
} else {
"<none>".to_string()
}
})
.collect::<Vec<String>>()
.join(", ")
),
"could not match all columns in pattern",
&self.pattern_tag,
));
}
let mut dict = TaggedDictBuilder::new(&input.tag); let mut dict = TaggedDictBuilder::new(&input.tag);
for (idx, column_name) in self.column_names.0.iter().enumerate() { for (column_name, cap) in self.column_names.iter().zip(caps.iter().skip(1)) {
dict.insert_untagged( let cap_string = cap.map(|v| v.as_str()).unwrap_or("").to_string();
column_name, dict.insert_untagged(column_name, UntaggedValue::string(cap_string));
UntaggedValue::string(caps[idx + 1].to_string()),
);
} }
output.push(Ok(ReturnSuccess::Value(dict.into_value())));
} Ok(ReturnSuccess::Value(dict.into_value()))
Ok(output) })
} .collect())
_ => Err(ShellError::labeled_error_with_secondary( } else {
Err(ShellError::labeled_error_with_secondary(
"Expected string input", "Expected string input",
"expected string input", "expected string input",
&self.name, &self.name,
"value originated here", "value originated here",
input.tag, input.tag,
)), ))
} }
} }
} }
fn parse(input: &str) -> Vec<ParseCommand> { fn build_regex(input: &str, tag: Tag) -> Result<String, ShellError> {
let mut output = vec![]; let mut output = "(?s)\\A".to_string();
//let mut loop_input = input; //let mut loop_input = input;
let mut loop_input = input.chars().peekable(); let mut loop_input = input.chars().peekable();
loop { loop {
let mut before = String::new(); let mut before = String::new();
while let Some(c) = loop_input.next() { while let Some(c) = loop_input.next() {
if c == '{' { if c == '{' {
// If '{{', still creating a plaintext parse command, but just for a single '{' char // If '{{', still creating a plaintext parse command, but just for a single '{' char
@ -142,20 +111,30 @@ fn parse(input: &str) -> Vec<ParseCommand> {
} }
if !before.is_empty() { if !before.is_empty() {
output.push(ParseCommand::Text(before.to_string())); output.push_str(&regex::escape(&before));
} }
// Look for column as we're now at one // Look for column as we're now at one
let mut column = String::new(); let mut column = String::new();
while let Some(c) = loop_input.next() { while let Some(c) = loop_input.next() {
if c == '}' { if c == '}' {
break; break;
} }
column.push(c); column.push(c);
if loop_input.peek().is_none() {
return Err(ShellError::labeled_error(
"Found opening `{` without an associated closing `}`",
"invalid parse pattern",
tag,
));
}
} }
if !column.is_empty() { if !column.is_empty() {
output.push(ParseCommand::Column(column.to_string())); output.push_str("(?P<");
output.push_str(&column);
output.push_str(">.*?)");
} }
if before.is_empty() && column.is_empty() { if before.is_empty() && column.is_empty() {
@ -163,56 +142,18 @@ fn parse(input: &str) -> Vec<ParseCommand> {
} }
} }
output
}
impl From<&[ParseCommand]> for ColumnNames {
fn from(commands: &[ParseCommand]) -> ColumnNames {
let mut output = vec![];
for command in commands {
if let ParseCommand::Column(c) = command {
output.push(c.clone());
}
}
ColumnNames(output)
}
}
impl From<&Regex> for ColumnNames {
fn from(regex: &Regex) -> ColumnNames {
let output = regex
.capture_names()
.enumerate()
.skip(1)
.map(|(i, name)| name.map(String::from).unwrap_or(format!("Capture{}", i)))
.collect::<Vec<_>>();
ColumnNames(output)
}
}
fn build_regex(commands: &[ParseCommand]) -> String {
let mut output = "(?s)\\A".to_string();
for command in commands {
match command {
ParseCommand::Text(s) => {
output.push_str(&regex::escape(&s));
}
ParseCommand::Column(_) => {
output.push_str("(.*?)");
}
}
}
output.push_str("\\z"); output.push_str("\\z");
Ok(output)
output
} }
#[derive(Debug)] fn column_names(regex: &Regex) -> Vec<String> {
enum ParseCommand { regex
Text(String), .capture_names()
Column(String), .enumerate()
.skip(1)
.map(|(i, name)| {
name.map(String::from)
.unwrap_or_else(|| format!("Capture{}", i))
})
.collect()
} }

View File

@ -5,11 +5,9 @@ pub struct Parse {
pub regex: Regex, pub regex: Regex,
pub name: Tag, pub name: Tag,
pub pattern_tag: Tag, pub pattern_tag: Tag,
pub column_names: ColumnNames, pub column_names: Vec<String>,
} }
pub struct ColumnNames(pub Vec<String>);
impl Parse { impl Parse {
#[allow(clippy::trivial_regex)] #[allow(clippy::trivial_regex)]
pub fn new() -> Result<Self, Box<dyn std::error::Error>> { pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
@ -17,7 +15,7 @@ impl Parse {
regex: Regex::new("")?, regex: Regex::new("")?,
name: Tag::unknown(), name: Tag::unknown(),
pattern_tag: Tag::unknown(), pattern_tag: Tag::unknown(),
column_names: ColumnNames(vec![]), column_names: vec![],
}) })
} }
} }