mirror of
https://github.com/nushell/nushell.git
synced 2025-03-30 10:37:29 +02:00
Add --regex
flag to parse
(#1863)
This commit is contained in:
parent
5de30d0ae5
commit
9e6ab33fd7
@ -1,11 +1,11 @@
|
|||||||
use nu_test_support::fs::Stub::FileWithContentToBeTrimmed;
|
use nu_test_support::fs::Stub;
|
||||||
use nu_test_support::playground::Playground;
|
use nu_test_support::playground::Playground;
|
||||||
use nu_test_support::{nu, pipeline};
|
use nu_test_support::{nu, pipeline};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn extracts_fields_from_the_given_the_pattern() {
|
fn extracts_fields_from_the_given_the_pattern() {
|
||||||
Playground::setup("parse_test_1", |dirs, sandbox| {
|
Playground::setup("parse_test_1", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![Stub::FileWithContentToBeTrimmed(
|
||||||
"key_value_separated_arepa_ingredients.txt",
|
"key_value_separated_arepa_ingredients.txt",
|
||||||
r#"
|
r#"
|
||||||
VAR1=Cheese
|
VAR1=Cheese
|
||||||
@ -28,3 +28,77 @@ fn extracts_fields_from_the_given_the_pattern() {
|
|||||||
assert_eq!(actual.out, "JonathanParsed");
|
assert_eq!(actual.out, "JonathanParsed");
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod regex {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn nushell_git_log_oneline<'a>() -> Vec<Stub<'a>> {
|
||||||
|
vec![Stub::FileWithContentToBeTrimmed(
|
||||||
|
"nushell_git_log_oneline.txt",
|
||||||
|
r#"
|
||||||
|
ae87582c Fix missing invocation errors (#1846)
|
||||||
|
b89976da let format access variables also (#1842)
|
||||||
|
"#,
|
||||||
|
)]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_fields_with_all_named_groups() {
|
||||||
|
Playground::setup("parse_test_regex_1", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(nushell_git_log_oneline());
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open nushell_git_log_oneline.txt
|
||||||
|
| parse --regex "(?P<Hash>\w+) (?P<Message>.+) \(#(?P<PR>\d+)\)"
|
||||||
|
| nth 1
|
||||||
|
| get PR
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "1842");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_fields_with_all_unnamed_groups() {
|
||||||
|
Playground::setup("parse_test_regex_2", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(nushell_git_log_oneline());
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open nushell_git_log_oneline.txt
|
||||||
|
| parse --regex "(\w+) (.+) \(#(\d+)\)"
|
||||||
|
| nth 1
|
||||||
|
| get Capture1
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "b89976da");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extracts_fields_with_named_and_unnamed_groups() {
|
||||||
|
Playground::setup("parse_test_regex_3", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(nushell_git_log_oneline());
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open nushell_git_log_oneline.txt
|
||||||
|
| parse --regex "(?P<Hash>\w+) (.+) \(#(?P<PR>\d+)\)"
|
||||||
|
| nth 1
|
||||||
|
| get Capture2
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "let format access variables also");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
mod nu;
|
mod nu;
|
||||||
mod parse;
|
mod parse;
|
||||||
|
|
||||||
pub use parse::Parse;
|
pub use parse::{ColumnNames, Parse};
|
||||||
|
@ -5,12 +5,13 @@ use nu_protocol::{
|
|||||||
UntaggedValue, Value,
|
UntaggedValue, Value,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::Parse;
|
use crate::{ColumnNames, Parse};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
|
||||||
impl Plugin for Parse {
|
impl Plugin for Parse {
|
||||||
fn config(&mut self) -> Result<Signature, ShellError> {
|
fn config(&mut self) -> Result<Signature, ShellError> {
|
||||||
Ok(Signature::build("parse")
|
Ok(Signature::build("parse")
|
||||||
|
.switch("regex", "use full regex syntax for patterns", Some('r'))
|
||||||
.required(
|
.required(
|
||||||
"pattern",
|
"pattern",
|
||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
@ -20,24 +21,34 @@ impl Plugin for Parse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
|
fn begin_filter(&mut self, call_info: CallInfo) -> Result<Vec<ReturnValue>, ShellError> {
|
||||||
if let Some(args) = call_info.args.positional {
|
if let Some(ref args) = &call_info.args.positional {
|
||||||
match &args[0] {
|
match &args[0] {
|
||||||
Value {
|
Value {
|
||||||
value: UntaggedValue::Primitive(Primitive::String(s)),
|
value: UntaggedValue::Primitive(Primitive::String(s)),
|
||||||
tag,
|
tag,
|
||||||
} => {
|
} => {
|
||||||
self.pattern_tag = tag.clone();
|
self.pattern_tag = tag.clone();
|
||||||
let parse_pattern = parse(&s);
|
if call_info.args.has("regex") {
|
||||||
let parse_regex = build_regex(&parse_pattern);
|
self.regex = Regex::new(&s).map_err(|_| {
|
||||||
self.column_names = column_names(&parse_pattern);
|
ShellError::labeled_error(
|
||||||
|
"Could not parse regex",
|
||||||
self.regex = Regex::new(&parse_regex).map_err(|_| {
|
"could not parse regex",
|
||||||
ShellError::labeled_error(
|
tag.span,
|
||||||
"Could not parse regex",
|
)
|
||||||
"could not parse regex",
|
})?;
|
||||||
tag.span,
|
self.column_names = ColumnNames::from(&self.regex);
|
||||||
)
|
} else {
|
||||||
})?;
|
let parse_pattern = parse(&s);
|
||||||
|
let parse_regex = build_regex(&parse_pattern);
|
||||||
|
self.column_names = ColumnNames::from(parse_pattern.as_slice());
|
||||||
|
self.regex = Regex::new(&parse_regex).map_err(|_| {
|
||||||
|
ShellError::labeled_error(
|
||||||
|
"Could not parse regex",
|
||||||
|
"could not parse regex",
|
||||||
|
tag.span,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
Value { tag, .. } => {
|
Value { tag, .. } => {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
@ -58,11 +69,11 @@ impl Plugin for Parse {
|
|||||||
for caps in self.regex.captures_iter(&s) {
|
for caps in self.regex.captures_iter(&s) {
|
||||||
let group_count = caps.len() - 1;
|
let group_count = caps.len() - 1;
|
||||||
|
|
||||||
if self.column_names.len() != group_count {
|
if self.column_names.0.len() != group_count {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
format!(
|
format!(
|
||||||
"There are {} column(s) specified in the pattern, but could only match the first {}: [{}]",
|
"There are {} column(s) specified in the pattern, but could only match the first {}: [{}]",
|
||||||
self.column_names.len(),
|
self.column_names.0.len(),
|
||||||
group_count,
|
group_count,
|
||||||
caps.iter()
|
caps.iter()
|
||||||
.skip(1)
|
.skip(1)
|
||||||
@ -89,7 +100,7 @@ impl Plugin for Parse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut dict = TaggedDictBuilder::new(&input.tag);
|
let mut dict = TaggedDictBuilder::new(&input.tag);
|
||||||
for (idx, column_name) in self.column_names.iter().enumerate() {
|
for (idx, column_name) in self.column_names.0.iter().enumerate() {
|
||||||
dict.insert_untagged(
|
dict.insert_untagged(
|
||||||
column_name,
|
column_name,
|
||||||
UntaggedValue::string(caps[idx + 1].to_string()),
|
UntaggedValue::string(caps[idx + 1].to_string()),
|
||||||
@ -150,16 +161,30 @@ fn parse(input: &str) -> Vec<ParseCommand> {
|
|||||||
output
|
output
|
||||||
}
|
}
|
||||||
|
|
||||||
fn column_names(commands: &[ParseCommand]) -> Vec<String> {
|
impl From<&[ParseCommand]> for ColumnNames {
|
||||||
let mut output = vec![];
|
fn from(commands: &[ParseCommand]) -> ColumnNames {
|
||||||
|
let mut output = vec![];
|
||||||
|
|
||||||
for command in commands {
|
for command in commands {
|
||||||
if let ParseCommand::Column(c) = command {
|
if let ParseCommand::Column(c) = command {
|
||||||
output.push(c.clone());
|
output.push(c.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
output
|
ColumnNames(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&Regex> for ColumnNames {
|
||||||
|
fn from(regex: &Regex) -> ColumnNames {
|
||||||
|
let output = regex
|
||||||
|
.capture_names()
|
||||||
|
.enumerate()
|
||||||
|
.skip(1)
|
||||||
|
.map(|(i, name)| name.map(String::from).unwrap_or(format!("Capture{}", i)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
ColumnNames(output)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_regex(commands: &[ParseCommand]) -> String {
|
fn build_regex(commands: &[ParseCommand]) -> String {
|
||||||
|
@ -5,9 +5,11 @@ pub struct Parse {
|
|||||||
pub regex: Regex,
|
pub regex: Regex,
|
||||||
pub name: Tag,
|
pub name: Tag,
|
||||||
pub pattern_tag: Tag,
|
pub pattern_tag: Tag,
|
||||||
pub column_names: Vec<String>,
|
pub column_names: ColumnNames,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct ColumnNames(pub Vec<String>);
|
||||||
|
|
||||||
impl Parse {
|
impl Parse {
|
||||||
#[allow(clippy::trivial_regex)]
|
#[allow(clippy::trivial_regex)]
|
||||||
pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
|
pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
@ -15,7 +17,7 @@ impl Parse {
|
|||||||
regex: Regex::new("")?,
|
regex: Regex::new("")?,
|
||||||
name: Tag::unknown(),
|
name: Tag::unknown(),
|
||||||
pattern_tag: Tag::unknown(),
|
pattern_tag: Tag::unknown(),
|
||||||
column_names: vec![],
|
column_names: ColumnNames(vec![]),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user