mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 22:50:14 +02:00
replace the regex crate with the fancy-regex crate (#6227)
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
use fancy_regex::Regex;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
@ -5,7 +6,6 @@ use nu_protocol::{
|
||||
Category, Example, ListStream, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
|
||||
Value,
|
||||
};
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Parse;
|
||||
@ -55,6 +55,61 @@ impl Command for Parse {
|
||||
example: "echo \"hi there\" | parse -r '(?P<foo>\\w+) (?P<bar>\\w+)'",
|
||||
result: Some(result),
|
||||
},
|
||||
Example {
|
||||
description: "Parse a string using fancy-regex named capture group pattern",
|
||||
example: "echo \"foo bar.\" | parse -r '\\s*(?<name>\\w+)(?=\\.)'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![Value::Record {
|
||||
cols: vec!["name".to_string()],
|
||||
vals: vec![Value::test_string("bar")],
|
||||
span: Span::test_data()
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Parse a string using fancy-regex capture group pattern",
|
||||
example: "echo \"foo! bar.\" | parse -r '(\\w+)(?=\\.)|(\\w+)(?=!)'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::Record {
|
||||
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
|
||||
vals: vec![Value::test_string(""), Value::test_string("foo")],
|
||||
span: Span::test_data()
|
||||
},
|
||||
Value::Record {
|
||||
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
|
||||
vals: vec![Value::test_string("bar"), Value::test_string("")],
|
||||
span: Span::test_data(),
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Parse a string using fancy-regex look behind pattern",
|
||||
example: "echo \" @another(foo bar) \" | parse -r '\\s*(?<=[() ])(@\\w+)(\\([^)]*\\))?\\s*'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![Value::Record {
|
||||
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
|
||||
vals: vec![Value::test_string("@another"), Value::test_string("(foo bar)")],
|
||||
span: Span::test_data()
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Parse a string using fancy-regex look ahead atomic group pattern",
|
||||
example: "echo \"abcd\" | parse -r '^a(bc(?=d)|b)cd$'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![Value::Record {
|
||||
cols: vec!["Capture1".to_string()],
|
||||
vals: vec![Value::test_string("b")],
|
||||
span: Span::test_data()
|
||||
}],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
|
||||
@ -89,8 +144,15 @@ fn operate(
|
||||
build_regex(&pattern_item, pattern_span)?
|
||||
};
|
||||
|
||||
let regex_pattern =
|
||||
Regex::new(&item_to_parse).map_err(|e| parse_regex_error(e, pattern_span))?;
|
||||
let regex_pattern = Regex::new(&item_to_parse).map_err(|err| {
|
||||
ShellError::GenericError(
|
||||
"Error with regular expression".into(),
|
||||
err.to_string(),
|
||||
Some(pattern_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let columns = column_names(®ex_pattern);
|
||||
let mut parsed: Vec<Value> = Vec::new();
|
||||
@ -102,9 +164,21 @@ fn operate(
|
||||
|
||||
for c in results {
|
||||
let mut cols = Vec::with_capacity(columns.len());
|
||||
let mut vals = Vec::with_capacity(c.len());
|
||||
let captures = match c {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
return Err(ShellError::GenericError(
|
||||
"Error with regular expression captures".into(),
|
||||
e.to_string(),
|
||||
None,
|
||||
None,
|
||||
Vec::new(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let mut vals = Vec::with_capacity(captures.len());
|
||||
|
||||
for (column_name, cap) in columns.iter().zip(c.iter().skip(1)) {
|
||||
for (column_name, cap) in columns.iter().zip(captures.iter().skip(1)) {
|
||||
let cap_string = cap.map(|v| v.as_str()).unwrap_or("").to_string();
|
||||
cols.push(column_name.clone());
|
||||
vals.push(Value::String {
|
||||
@ -156,7 +230,7 @@ fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
|
||||
}
|
||||
|
||||
if !before.is_empty() {
|
||||
output.push_str(®ex::escape(&before));
|
||||
output.push_str(&fancy_regex::escape(&before));
|
||||
}
|
||||
|
||||
// Look for column as we're now at one
|
||||
@ -202,35 +276,6 @@ fn column_names(regex: &Regex) -> Vec<String> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_regex_error(e: regex::Error, base_span: Span) -> ShellError {
|
||||
match e {
|
||||
regex::Error::Syntax(msg) => {
|
||||
let mut lines = msg.lines();
|
||||
|
||||
let main_msg = lines
|
||||
.next()
|
||||
.map(|l| l.replace(':', ""))
|
||||
.expect("invalid regex pattern");
|
||||
|
||||
let span = lines.nth(1).and_then(|l| l.find('^')).map(|space| {
|
||||
let start = base_span.start + space - 3;
|
||||
Span::new(start, start + 1)
|
||||
});
|
||||
|
||||
let msg = lines
|
||||
.next()
|
||||
.and_then(|l| l.split(':').nth(1))
|
||||
.map(|s| format!("{}: {}", main_msg, s.trim()));
|
||||
|
||||
match (msg, span) {
|
||||
(Some(msg), Some(span)) => ShellError::DelimiterError(msg, span),
|
||||
_ => ShellError::DelimiterError("Invalid regex".to_owned(), base_span),
|
||||
}
|
||||
}
|
||||
_ => ShellError::DelimiterError("Invalid regex".to_owned(), base_span),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -1,3 +1,4 @@
|
||||
use fancy_regex::Regex;
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, Span, Value};
|
||||
@ -265,10 +266,13 @@ impl Count for Counter {
|
||||
// use regex here because it can search for CRLF first and not duplicate the count
|
||||
let line_ending_types = [CRLF, LF, CR, NEL, FF, LS, PS];
|
||||
let pattern = &line_ending_types.join("|");
|
||||
let newline_pattern = regex::Regex::new(pattern).expect("Unable to create regex");
|
||||
let newline_pattern = Regex::new(pattern).expect("Unable to create regex");
|
||||
let line_endings = newline_pattern
|
||||
.find_iter(s)
|
||||
.map(|f| f.as_str().to_string())
|
||||
.map(|f| match f {
|
||||
Ok(mat) => mat.as_str().to_string(),
|
||||
Err(_) => "".to_string(),
|
||||
})
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let has_line_ending_suffix =
|
||||
|
@ -1,10 +1,10 @@
|
||||
use fancy_regex::{NoExpand, Regex};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::{Call, CellPath},
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use regex::{NoExpand, Regex};
|
||||
use std::sync::Arc;
|
||||
|
||||
struct Arguments {
|
||||
@ -133,6 +133,23 @@ impl Command for SubCommand {
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Find and replace with fancy-regex",
|
||||
example: r#"'a sucessful b' | str replace '\b([sS])uc(?:cs|s?)e(ed(?:ed|ing|s?)|ss(?:es|ful(?:ly)?|i(?:ons?|ve(?:ly)?)|ors?)?)\b' '${1}ucce$2'"#,
|
||||
result: Some(Value::String {
|
||||
val: "a successful b".to_string(),
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Find and replace with fancy-regex",
|
||||
example: r#"'GHIKK-9+*' | str replace '[*[:xdigit:]+]' 'z'"#,
|
||||
result: Some(Value::String {
|
||||
val: "GHIKK-z+*".to_string(),
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
use fancy_regex::Regex;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::{Call, CellPath},
|
||||
@ -311,7 +312,7 @@ fn trim(s: &str, char_: Option<char>, closure_flags: &ClosureFlags) -> String {
|
||||
// create a regex string that looks for 2 or more of each of these characters
|
||||
let re_str = format!("{}{{2,}}", reg);
|
||||
// create the regex
|
||||
let re = regex::Regex::new(&re_str).expect("Error creating regular expression");
|
||||
let re = Regex::new(&re_str).expect("Error creating regular expression");
|
||||
// replace all mutliple occurances with single occurences represented by r
|
||||
let new_str = re.replace_all(&return_string, r.to_string());
|
||||
// update the return string so the next loop has the latest changes
|
||||
|
Reference in New Issue
Block a user