forked from extern/nushell
Simplify parse plugin code. (#1904)
Primarily, instead of building a parse pattern enum, we just build the regex directly, with the appropriate capture group names so that the column name codepaths can be shared between simple and `--regex` patterns. Also removed capture group count compared to column name count. I don't think this codepath can possibly be reached with the regex we now use for the simplified capture form.
This commit is contained in:
parent
98a3d9fff6
commit
0a6692ac44
@ -38,10 +38,10 @@ mod simple {
|
|||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
echo "{abc}123"
|
echo "{abc}123"
|
||||||
| parse "{{abc}{name}"
|
| parse "{{abc}{name}"
|
||||||
| echo $it.name
|
| echo $it.name
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual.out, "123");
|
assert_eq!(actual.out, "123");
|
||||||
@ -54,15 +54,48 @@ mod simple {
|
|||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
echo "(abc)123"
|
echo "(abc)123"
|
||||||
| parse "(abc){name}"
|
| parse "(abc){name}"
|
||||||
| echo $it.name
|
| echo $it.name
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual.out, "123");
|
assert_eq!(actual.out, "123");
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn properly_captures_empty_column() {
|
||||||
|
Playground::setup("parse_test_regex_4", |dirs, _sandbox| {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
echo ["1:INFO:component:all is well" "2:ERROR::something bad happened"]
|
||||||
|
| parse "{timestamp}:{level}:{tag}:{entry}"
|
||||||
|
| get entry
|
||||||
|
| nth 1
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "something bad happened");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_when_missing_closing_brace() {
|
||||||
|
Playground::setup("parse_test_regex_5", |dirs, _sandbox| {
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
echo "(abc)123"
|
||||||
|
| parse "(abc){name"
|
||||||
|
| echo $it.name
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert!(actual.err.contains("invalid parse pattern"));
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mod regex {
|
mod regex {
|
||||||
@ -86,12 +119,12 @@ mod regex {
|
|||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
|
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
|
||||||
| nth 1
|
| nth 1
|
||||||
| get PR
|
| get PR
|
||||||
| echo $it
|
| echo $it
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual.out, "1842");
|
assert_eq!(actual.out, "1842");
|
||||||
@ -106,12 +139,12 @@ mod regex {
|
|||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(\w+) (.+) \(#(\d+)\)"
|
| parse --regex "(\w+) (.+) \(#(\d+)\)"
|
||||||
| nth 1
|
| nth 1
|
||||||
| get Capture1
|
| get Capture1
|
||||||
| echo $it
|
| echo $it
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual.out, "b89976da");
|
assert_eq!(actual.out, "b89976da");
|
||||||
@ -126,15 +159,33 @@ mod regex {
|
|||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: dirs.test(), pipeline(
|
cwd: dirs.test(), pipeline(
|
||||||
r#"
|
r#"
|
||||||
open nushell_git_log_oneline.txt
|
open nushell_git_log_oneline.txt
|
||||||
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
|
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
|
||||||
| nth 1
|
| nth 1
|
||||||
| get Capture2
|
| get Capture2
|
||||||
| echo $it
|
| echo $it
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual.out, "let format access variables also");
|
assert_eq!(actual.out, "let format access variables also");
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_with_invalid_regex() {
|
||||||
|
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(nushell_git_log_oneline());
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open nushell_git_log_oneline.txt
|
||||||
|
| parse --regex "(?P<Hash>\w+ unfinished capture group"
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert!(actual.err.contains("invalid regex"));
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
mod nu;
|
mod nu;
|
||||||
mod parse;
|
mod parse;
|
||||||
|
|
||||||
pub use parse::{ColumnNames, Parse};
|
pub use parse::Parse;
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
|
use regex::{self, Regex};
|
||||||
|
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_plugin::Plugin;
|
use nu_plugin::Plugin;
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
CallInfo, Primitive, ReturnSuccess, ReturnValue, Signature, SyntaxShape, TaggedDictBuilder,
|
CallInfo, Primitive, ReturnSuccess, ReturnValue, ShellTypeName, Signature, SyntaxShape,
|
||||||
UntaggedValue, Value,
|
TaggedDictBuilder, UntaggedValue, Value,
|
||||||
};
|
};
|
||||||
|
use nu_source::Tag;
|
||||||
|
|
||||||
use crate::{ColumnNames, Parse};
|
use crate::Parse;
|
||||||
use regex::{self, Regex};
|
|
||||||
|
|
||||||
impl Plugin for Parse {
|
impl Plugin for Parse {
|
||||||
fn config(&mut self) -> Result<Signature, ShellError> {
|
fn config(&mut self) -> Result<Signature, ShellError> {
|
||||||
@ -21,114 +23,81 @@ impl Plugin for Parse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
|
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
|
||||||
if let Some(ref args) = &call_info.args.positional {
|
if let Some(ref args) = call_info.args.positional {
|
||||||
match &args[0] {
|
let value = &args[0];
|
||||||
|
match value {
|
||||||
Value {
|
Value {
|
||||||
value: UntaggedValue::Primitive(Primitive::String(s)),
|
value: UntaggedValue::Primitive(Primitive::String(s)),
|
||||||
tag,
|
tag,
|
||||||
} => {
|
} => {
|
||||||
self.pattern_tag = tag.clone();
|
self.pattern_tag = tag.clone();
|
||||||
if call_info.args.has("regex") {
|
self.regex = if call_info.args.has("regex") {
|
||||||
self.regex = Regex::new(&s).map_err(|_| {
|
Regex::new(&s).map_err(|_| {
|
||||||
ShellError::labeled_error(
|
ShellError::labeled_error("Invalid regex", "invalid regex", tag.span)
|
||||||
"Could not parse regex",
|
})?
|
||||||
"could not parse regex",
|
|
||||||
tag.span,
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
self.column_names = ColumnNames::from(&self.regex);
|
|
||||||
} else {
|
} else {
|
||||||
let parse_pattern = parse(&s);
|
let parse_regex = build_regex(&s, tag.clone())?;
|
||||||
let parse_regex = build_regex(&parse_pattern);
|
Regex::new(&parse_regex).map_err(|_| {
|
||||||
self.column_names = ColumnNames::from(parse_pattern.as_slice());
|
|
||||||
self.regex = Regex::new(&parse_regex).map_err(|_| {
|
|
||||||
ShellError::labeled_error(
|
ShellError::labeled_error(
|
||||||
"Could not parse regex",
|
"Invalid pattern",
|
||||||
"could not parse regex",
|
"invalid pattern",
|
||||||
tag.span,
|
tag.span,
|
||||||
)
|
)
|
||||||
})?;
|
})?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.column_names = column_names(&self.regex);
|
||||||
}
|
}
|
||||||
Value { tag, .. } => {
|
Value { tag, .. } => {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
"Unrecognized type in params",
|
format!(
|
||||||
"unexpected value",
|
"Unexpected type in params (found `{}`, expected `String`)",
|
||||||
|
value.type_name()
|
||||||
|
),
|
||||||
|
"unexpected type",
|
||||||
tag,
|
tag,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filter(&mut self, input: Value) -> Result<Vec<ReturnValue>, ShellError> {
|
fn filter(&mut self, input: Value) -> Result<Vec<ReturnValue>, ShellError> {
|
||||||
match &input.as_string() {
|
if let Ok(s) = input.as_string() {
|
||||||
Ok(s) => {
|
Ok(self
|
||||||
let mut output = vec![];
|
.regex
|
||||||
for caps in self.regex.captures_iter(&s) {
|
.captures_iter(&s)
|
||||||
let group_count = caps.len() - 1;
|
.map(|caps| {
|
||||||
|
|
||||||
if self.column_names.0.len() != group_count {
|
|
||||||
return Err(ShellError::labeled_error(
|
|
||||||
format!(
|
|
||||||
"There are {} column(s) specified in the pattern, but could only match the first {}: [{}]",
|
|
||||||
self.column_names.0.len(),
|
|
||||||
group_count,
|
|
||||||
caps.iter()
|
|
||||||
.skip(1)
|
|
||||||
.map(|m| {
|
|
||||||
if let Some(m) = m {
|
|
||||||
let m = m.as_str();
|
|
||||||
let mut m = m.replace(",","\\,");
|
|
||||||
if m.len() > 20 {
|
|
||||||
m.truncate(17);
|
|
||||||
format!("{}...", m)
|
|
||||||
} else {
|
|
||||||
m
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
"<none>".to_string()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(", ")
|
|
||||||
),
|
|
||||||
"could not match all columns in pattern",
|
|
||||||
&self.pattern_tag,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut dict = TaggedDictBuilder::new(&input.tag);
|
let mut dict = TaggedDictBuilder::new(&input.tag);
|
||||||
for (idx, column_name) in self.column_names.0.iter().enumerate() {
|
for (column_name, cap) in self.column_names.iter().zip(caps.iter().skip(1)) {
|
||||||
dict.insert_untagged(
|
let cap_string = cap.map(|v| v.as_str()).unwrap_or("").to_string();
|
||||||
column_name,
|
dict.insert_untagged(column_name, UntaggedValue::string(cap_string));
|
||||||
UntaggedValue::string(caps[idx + 1].to_string()),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
output.push(Ok(ReturnSuccess::Value(dict.into_value())));
|
|
||||||
}
|
Ok(ReturnSuccess::Value(dict.into_value()))
|
||||||
Ok(output)
|
})
|
||||||
}
|
.collect())
|
||||||
_ => Err(ShellError::labeled_error_with_secondary(
|
} else {
|
||||||
|
Err(ShellError::labeled_error_with_secondary(
|
||||||
"Expected string input",
|
"Expected string input",
|
||||||
"expected string input",
|
"expected string input",
|
||||||
&self.name,
|
&self.name,
|
||||||
"value originated here",
|
"value originated here",
|
||||||
input.tag,
|
input.tag,
|
||||||
)),
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse(input: &str) -> Vec<ParseCommand> {
|
fn build_regex(input: &str, tag: Tag) -> Result<String, ShellError> {
|
||||||
let mut output = vec![];
|
let mut output = "(?s)\\A".to_string();
|
||||||
|
|
||||||
//let mut loop_input = input;
|
//let mut loop_input = input;
|
||||||
let mut loop_input = input.chars().peekable();
|
let mut loop_input = input.chars().peekable();
|
||||||
loop {
|
loop {
|
||||||
let mut before = String::new();
|
let mut before = String::new();
|
||||||
|
|
||||||
while let Some(c) = loop_input.next() {
|
while let Some(c) = loop_input.next() {
|
||||||
if c == '{' {
|
if c == '{' {
|
||||||
// If '{{', still creating a plaintext parse command, but just for a single '{' char
|
// If '{{', still creating a plaintext parse command, but just for a single '{' char
|
||||||
@ -142,20 +111,30 @@ fn parse(input: &str) -> Vec<ParseCommand> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !before.is_empty() {
|
if !before.is_empty() {
|
||||||
output.push(ParseCommand::Text(before.to_string()));
|
output.push_str(®ex::escape(&before));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for column as we're now at one
|
// Look for column as we're now at one
|
||||||
let mut column = String::new();
|
let mut column = String::new();
|
||||||
|
|
||||||
while let Some(c) = loop_input.next() {
|
while let Some(c) = loop_input.next() {
|
||||||
if c == '}' {
|
if c == '}' {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
column.push(c);
|
column.push(c);
|
||||||
|
|
||||||
|
if loop_input.peek().is_none() {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"Found opening `{` without an associated closing `}`",
|
||||||
|
"invalid parse pattern",
|
||||||
|
tag,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !column.is_empty() {
|
if !column.is_empty() {
|
||||||
output.push(ParseCommand::Column(column.to_string()));
|
output.push_str("(?P<");
|
||||||
|
output.push_str(&column);
|
||||||
|
output.push_str(">.*?)");
|
||||||
}
|
}
|
||||||
|
|
||||||
if before.is_empty() && column.is_empty() {
|
if before.is_empty() && column.is_empty() {
|
||||||
@ -163,56 +142,18 @@ fn parse(input: &str) -> Vec<ParseCommand> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<&[ParseCommand]> for ColumnNames {
|
|
||||||
fn from(commands: &[ParseCommand]) -> ColumnNames {
|
|
||||||
let mut output = vec![];
|
|
||||||
|
|
||||||
for command in commands {
|
|
||||||
if let ParseCommand::Column(c) = command {
|
|
||||||
output.push(c.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ColumnNames(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<&Regex> for ColumnNames {
|
|
||||||
fn from(regex: &Regex) -> ColumnNames {
|
|
||||||
let output = regex
|
|
||||||
.capture_names()
|
|
||||||
.enumerate()
|
|
||||||
.skip(1)
|
|
||||||
.map(|(i, name)| name.map(String::from).unwrap_or(format!("Capture{}", i)))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
ColumnNames(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_regex(commands: &[ParseCommand]) -> String {
|
|
||||||
let mut output = "(?s)\\A".to_string();
|
|
||||||
|
|
||||||
for command in commands {
|
|
||||||
match command {
|
|
||||||
ParseCommand::Text(s) => {
|
|
||||||
output.push_str(®ex::escape(&s));
|
|
||||||
}
|
|
||||||
ParseCommand::Column(_) => {
|
|
||||||
output.push_str("(.*?)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
output.push_str("\\z");
|
output.push_str("\\z");
|
||||||
|
Ok(output)
|
||||||
output
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
fn column_names(regex: &Regex) -> Vec<String> {
|
||||||
enum ParseCommand {
|
regex
|
||||||
Text(String),
|
.capture_names()
|
||||||
Column(String),
|
.enumerate()
|
||||||
|
.skip(1)
|
||||||
|
.map(|(i, name)| {
|
||||||
|
name.map(String::from)
|
||||||
|
.unwrap_or_else(|| format!("Capture{}", i))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
@ -5,11 +5,9 @@ pub struct Parse {
|
|||||||
pub regex: Regex,
|
pub regex: Regex,
|
||||||
pub name: Tag,
|
pub name: Tag,
|
||||||
pub pattern_tag: Tag,
|
pub pattern_tag: Tag,
|
||||||
pub column_names: ColumnNames,
|
pub column_names: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ColumnNames(pub Vec<String>);
|
|
||||||
|
|
||||||
impl Parse {
|
impl Parse {
|
||||||
#[allow(clippy::trivial_regex)]
|
#[allow(clippy::trivial_regex)]
|
||||||
pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
|
pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
@ -17,7 +15,7 @@ impl Parse {
|
|||||||
regex: Regex::new("")?,
|
regex: Regex::new("")?,
|
||||||
name: Tag::unknown(),
|
name: Tag::unknown(),
|
||||||
pattern_tag: Tag::unknown(),
|
pattern_tag: Tag::unknown(),
|
||||||
column_names: ColumnNames(vec![]),
|
column_names: vec![],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user