mirror of
https://github.com/nushell/nushell.git
synced 2025-01-11 08:48:23 +01:00
Add regex separators for split row/list/column (#8707)
# Description Verified on discord with maintainer Change adds regex separators in split rows/column/list. The primary motivating reason was to make it easier to split on separators with unbounded whitespace without requiring a lot of trim jiggery. But, secondary motivation is the same as the set of all motivations for adding split regex features to most languages. # User-Facing Changes Adds -r option to split rows/column/list. # Tests + Formatting Ran tests, however tests.nu fails with unrelated errors: ``` ~/src/nushell> cargo run -- crates/nu-utils/standard_library/tests.nu 04/02/2023 02:07:25 AM Finished dev [unoptimized + debuginfo] target(s) in 0.24s Running `target/debug/nu crates/nu-utils/standard_library/tests.nu` INF|2023-04-02T02:07:27.060|Running tests in test_asserts INF|2023-04-02T02:07:27.141|Running tests in test_dirs Error: × list is just pwd after initialization INF|2023-04-02T02:07:27.167|Running tests in test_logger INF|2023-04-02T02:07:27.286|Running tests in test_std Error: × some tests did not pass (see complete errors above): │ │ test_asserts test_assert │ test_asserts test_assert_equal │ test_asserts test_assert_error │ test_asserts test_assert_greater │ test_asserts test_assert_greater_or_equal │ test_asserts test_assert_length │ test_asserts test_assert_less │ test_asserts test_assert_less_or_equal │ test_asserts test_assert_not_equal │ ⨯ test_dirs test_dirs_command │ test_logger test_critical │ test_logger test_debug │ test_logger test_error │ test_logger test_info │ test_logger test_warning │ test_std test_path_add │ ``` Upon investigating seeing this difference: ``` ╭───┬─────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ 0 │ /var/folders/1f/ltbr1m8s5s1811k6n1rhpc0r0000gn/T/test_dirs_c1ed89d6-19f7-47c7-9e1f-74c39f3623b5 │ │ 1 │ /private/var/folders/1f/ltbr1m8s5s1811k6n1rhpc0r0000gn/T/test_dirs_c1ed89d6-19f7-47c7-9e1f-74c39f3623b5 │ ╰───┴─────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` This seems unrelated to my changes, but can investigate further if desired. # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. Co-authored-by: Robert Waugh <robert@waugh.io>
This commit is contained in:
parent
771e24913d
commit
4fda6d7eaa
@ -5,6 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SubCommand;
|
||||
@ -30,6 +31,7 @@ impl Command for SubCommand {
|
||||
"the character or string that denotes what separates columns",
|
||||
)
|
||||
.switch("collapse-empty", "remove empty columns", Some('c'))
|
||||
.switch("regex", "separator is a regular expression", Some('r'))
|
||||
.rest(
|
||||
"rest",
|
||||
SyntaxShape::String,
|
||||
@ -117,6 +119,25 @@ impl Command for SubCommand {
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Split a list of strings into a table, ignoring padding",
|
||||
example: r"['a - b' 'c - d'] | split column -r '\s*-\s*'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::Record {
|
||||
cols: vec!["column1".to_string(), "column2".to_string()],
|
||||
vals: vec![Value::test_string("a"), Value::test_string("b")],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::Record {
|
||||
cols: vec!["column1".to_string(), "column2".to_string()],
|
||||
vals: vec![Value::test_string("c"), Value::test_string("d")],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -132,30 +153,43 @@ fn split_column(
|
||||
let rest: Vec<Spanned<String>> = call.rest(engine_state, stack, 1)?;
|
||||
let collapse_empty = call.has_flag("collapse-empty");
|
||||
|
||||
let regex = if call.has_flag("regex") {
|
||||
Regex::new(&separator.item)
|
||||
} else {
|
||||
let escaped = regex::escape(&separator.item);
|
||||
Regex::new(&escaped)
|
||||
}
|
||||
.map_err(|err| {
|
||||
ShellError::GenericError(
|
||||
"Error with regular expression".into(),
|
||||
err.to_string(),
|
||||
Some(separator.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
input.flat_map(
|
||||
move |x| split_column_helper(&x, &separator, &rest, collapse_empty, name_span),
|
||||
move |x| split_column_helper(&x, ®ex, &rest, collapse_empty, name_span),
|
||||
engine_state.ctrlc.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
fn split_column_helper(
|
||||
v: &Value,
|
||||
separator: &Spanned<String>,
|
||||
separator: &Regex,
|
||||
rest: &[Spanned<String>],
|
||||
collapse_empty: bool,
|
||||
head: Span,
|
||||
) -> Vec<Value> {
|
||||
if let Ok(s) = v.as_string() {
|
||||
let split_result: Vec<_> = if collapse_empty {
|
||||
s.split(&separator.item).filter(|s| !s.is_empty()).collect()
|
||||
} else {
|
||||
s.split(&separator.item).collect()
|
||||
};
|
||||
|
||||
let split_result: Vec<_> = separator
|
||||
.split(&s)
|
||||
.filter(|x| !(collapse_empty && x.is_empty()))
|
||||
.collect();
|
||||
let positional: Vec<_> = rest.iter().map(|f| f.item.clone()).collect();
|
||||
|
||||
// If they didn't provide column names, make up our own
|
||||
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
if positional.is_empty() {
|
||||
|
@ -5,6 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, SyntaxShape,
|
||||
Type, Value,
|
||||
};
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SubCommand;
|
||||
@ -25,6 +26,10 @@ impl Command for SubCommand {
|
||||
SyntaxShape::Any,
|
||||
"the value that denotes what separates the list",
|
||||
)
|
||||
.switch(
|
||||
"regex",
|
||||
"separator is a regular expression, matching values that can be coerced into a string",
|
||||
Some('r'))
|
||||
.category(Category::Filters)
|
||||
}
|
||||
|
||||
@ -121,10 +126,76 @@ impl Command for SubCommand {
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Split a list of chars into lists based on multiple characters",
|
||||
example: r"[a, b, c, d, a, e, f, g] | split list -r '(b|e)'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::List {
|
||||
vals: vec![Value::test_string("a")],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::List {
|
||||
vals: vec![
|
||||
Value::test_string("c"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("a"),
|
||||
],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::List {
|
||||
vals: vec![Value::test_string("f"), Value::test_string("g")],
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
enum Matcher {
|
||||
Regex(Regex),
|
||||
Direct(Value),
|
||||
}
|
||||
|
||||
impl Matcher {
|
||||
pub fn new(regex: bool, lhs: Value) -> Result<Self, ShellError> {
|
||||
if regex {
|
||||
Ok(Matcher::Regex(Regex::new(&lhs.as_string()?).map_err(
|
||||
|err| {
|
||||
ShellError::GenericError(
|
||||
"Error with regular expression".into(),
|
||||
err.to_string(),
|
||||
match lhs {
|
||||
Value::Error { error: _ } => None,
|
||||
_ => Some(lhs.expect_span()),
|
||||
},
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
},
|
||||
)?))
|
||||
} else {
|
||||
Ok(Matcher::Direct(lhs))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compare(&self, rhs: &Value) -> Result<bool, ShellError> {
|
||||
Ok(match self {
|
||||
Matcher::Regex(regex) => {
|
||||
if let Ok(rhs_str) = rhs.as_string() {
|
||||
regex.is_match(&rhs_str)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Matcher::Direct(lhs) => rhs == lhs,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn split_list(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
@ -134,9 +205,11 @@ fn split_list(
|
||||
let separator: Value = call.req(engine_state, stack, 0)?;
|
||||
let mut temp_list = Vec::new();
|
||||
let mut returned_list = Vec::new();
|
||||
|
||||
let iter = input.into_interruptible_iter(engine_state.ctrlc.clone());
|
||||
let matcher = Matcher::new(call.has_flag("regex"), separator)?;
|
||||
for val in iter {
|
||||
if val == separator {
|
||||
if matcher.compare(&val)? {
|
||||
if !temp_list.is_empty() {
|
||||
returned_list.push(Value::List {
|
||||
vals: temp_list.clone(),
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
#[derive(Clone)]
|
||||
pub struct SubCommand;
|
||||
|
||||
@ -21,7 +21,7 @@ impl Command for SubCommand {
|
||||
.required(
|
||||
"separator",
|
||||
SyntaxShape::String,
|
||||
"the character that denotes what separates rows",
|
||||
"a character or regex that denotes what separates rows",
|
||||
)
|
||||
.named(
|
||||
"number",
|
||||
@ -29,6 +29,7 @@ impl Command for SubCommand {
|
||||
"Split into maximum number of items",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("regex", "use regex syntax for separator", Some('r'))
|
||||
.category(Category::Strings)
|
||||
}
|
||||
|
||||
@ -92,6 +93,18 @@ impl Command for SubCommand {
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
Example {
|
||||
description: "Split a string by regex",
|
||||
example: r"'a b c' | split row -r '\s+'",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -104,30 +117,40 @@ fn split_row(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let name_span = call.head;
|
||||
let separator: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||
let regex = if call.has_flag("regex") {
|
||||
Regex::new(&separator.item)
|
||||
} else {
|
||||
let escaped = regex::escape(&separator.item);
|
||||
Regex::new(&escaped)
|
||||
}
|
||||
.map_err(|err| {
|
||||
ShellError::GenericError(
|
||||
"Error with regular expression".into(),
|
||||
err.to_string(),
|
||||
Some(separator.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
let max_split: Option<usize> = call.get_flag(engine_state, stack, "number")?;
|
||||
input.flat_map(
|
||||
move |x| split_row_helper(&x, &separator, max_split, name_span),
|
||||
move |x| split_row_helper(&x, ®ex, max_split, name_span),
|
||||
engine_state.ctrlc.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
fn split_row_helper(
|
||||
v: &Value,
|
||||
separator: &Spanned<String>,
|
||||
max_split: Option<usize>,
|
||||
name: Span,
|
||||
) -> Vec<Value> {
|
||||
fn split_row_helper(v: &Value, regex: &Regex, max_split: Option<usize>, name: Span) -> Vec<Value> {
|
||||
match v.span() {
|
||||
Ok(v_span) => {
|
||||
if let Ok(s) = v.as_string() {
|
||||
match max_split {
|
||||
Some(max_split) => s
|
||||
.splitn(max_split, &separator.item)
|
||||
.map(|s| Value::string(s, v_span))
|
||||
Some(max_split) => regex
|
||||
.splitn(&s, max_split)
|
||||
.map(|x: &str| Value::string(x, v_span))
|
||||
.collect(),
|
||||
None => s
|
||||
.split(&separator.item)
|
||||
.map(|s| Value::string(s, v_span))
|
||||
None => regex
|
||||
.split(&s)
|
||||
.map(|x: &str| Value::string(x, v_span))
|
||||
.collect(),
|
||||
}
|
||||
} else {
|
||||
|
@ -5,12 +5,20 @@ use nu_test_support::{nu, pipeline};
|
||||
#[test]
|
||||
fn to_column() {
|
||||
Playground::setup("split_column_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"sample.txt",
|
||||
r#"
|
||||
sandbox.with_files(vec![
|
||||
FileWithContentToBeTrimmed(
|
||||
"sample.txt",
|
||||
r#"
|
||||
importer,shipper,tariff_item,name,origin
|
||||
"#,
|
||||
)]);
|
||||
),
|
||||
FileWithContentToBeTrimmed(
|
||||
"sample2.txt",
|
||||
r#"
|
||||
importer , shipper , tariff_item , name , origin
|
||||
"#,
|
||||
),
|
||||
]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
@ -24,5 +32,18 @@ fn to_column() {
|
||||
));
|
||||
|
||||
assert!(actual.out.contains("shipper"));
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open sample2.txt
|
||||
| lines
|
||||
| str trim
|
||||
| split column -r '\s*,\s*'
|
||||
| get column2
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.out.contains("shipper"));
|
||||
})
|
||||
}
|
||||
|
@ -5,12 +5,20 @@ use nu_test_support::{nu, pipeline};
|
||||
#[test]
|
||||
fn to_row() {
|
||||
Playground::setup("split_row_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"sample.txt",
|
||||
r#"
|
||||
sandbox.with_files(vec![
|
||||
FileWithContentToBeTrimmed(
|
||||
"sample.txt",
|
||||
r#"
|
||||
importer,shipper,tariff_item,name,origin
|
||||
"#,
|
||||
)]);
|
||||
),
|
||||
FileWithContentToBeTrimmed(
|
||||
"sample2.txt",
|
||||
r#"
|
||||
importer , shipper , tariff_item,name , origin
|
||||
"#,
|
||||
),
|
||||
]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
@ -24,5 +32,18 @@ fn to_row() {
|
||||
));
|
||||
|
||||
assert!(actual.out.contains('5'));
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open sample2.txt
|
||||
| lines
|
||||
| str trim
|
||||
| split row -r '\s*,\s*'
|
||||
| length
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.out.contains('5'));
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user