forked from extern/nushell
feat: allow from csv
to accept 4 byte unicode separator chars (#10138)
- this PR should close #10132 # Description * added a flag to `from csv --ascii` that replaces the given `separator with the unicode separator x1f https://www.codetable.net/hex/1f (aka Information Separator One) # User-Facing Changes New flags are available for `from csv` ( `--ascii` or short `-a`) # Tests + Formatting There are no tests at the moment. Code has been formatted. - `cargo test --workspace` (breaks with a non related test on my machine)
This commit is contained in:
parent
02318cf3a7
commit
93f20b406e
@ -21,7 +21,7 @@ impl Command for FromCsv {
|
|||||||
.named(
|
.named(
|
||||||
"separator",
|
"separator",
|
||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
"a character to separate columns, defaults to ','",
|
"a character to separate columns (either single char or 4 byte unicode sequence), defaults to ','",
|
||||||
Some('s'),
|
Some('s'),
|
||||||
)
|
)
|
||||||
.named(
|
.named(
|
||||||
@ -134,11 +134,22 @@ fn from_csv(
|
|||||||
) -> Result<PipelineData, ShellError> {
|
) -> Result<PipelineData, ShellError> {
|
||||||
let name = call.head;
|
let name = call.head;
|
||||||
|
|
||||||
let separator = call
|
let separator = match call.get_flag::<String>(engine_state, stack, "separator")? {
|
||||||
.get_flag(engine_state, stack, "separator")?
|
Some(sep) => {
|
||||||
.map(|v: Value| v.as_char())
|
if sep.len() == 1 {
|
||||||
.transpose()?
|
sep.chars().next().unwrap_or(',')
|
||||||
.unwrap_or(',');
|
} else if sep.len() == 4 {
|
||||||
|
let unicode_sep = u32::from_str_radix(&sep, 16);
|
||||||
|
char::from_u32(unicode_sep.unwrap_or(b'\x1f' as u32)).unwrap_or(',')
|
||||||
|
} else {
|
||||||
|
return Err(ShellError::NonUtf8Custom(
|
||||||
|
"separator should be a single char or a 4-byte unicode".to_string(),
|
||||||
|
call.span(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => ',',
|
||||||
|
};
|
||||||
let comment = call
|
let comment = call
|
||||||
.get_flag(engine_state, stack, "comment")?
|
.get_flag(engine_state, stack, "comment")?
|
||||||
.map(|v: Value| v.as_char())
|
.map(|v: Value| v.as_char())
|
||||||
|
@ -341,7 +341,9 @@ fn from_csv_text_with_multiple_char_separator() {
|
|||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert!(actual.err.contains("single character separator"));
|
assert!(actual
|
||||||
|
.err
|
||||||
|
.contains("separator should be a single char or a 4-byte unicode"));
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +368,7 @@ fn from_csv_text_with_wrong_type_separator() {
|
|||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert!(actual.err.contains("can't convert int to char"));
|
assert!(actual.err.contains("can't convert int to string"));
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -404,3 +406,57 @@ fn string_to_csv_error() {
|
|||||||
|
|
||||||
assert!(actual.err.contains("command doesn't support"))
|
assert!(actual.err.contains("command doesn't support"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_csv_with_unicode_sep() {
|
||||||
|
Playground::setup("filter_from_csv_unicode_sep_test_3", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.txt",
|
||||||
|
r#"
|
||||||
|
first_name;last_name;rusty_luck
|
||||||
|
Andrés;Robalino;1
|
||||||
|
JT;Turner;1
|
||||||
|
Yehuda;Katz;1
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.txt
|
||||||
|
| from csv --separator "003B"
|
||||||
|
| get rusty_luck
|
||||||
|
| length
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "3");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parses_csv_with_unicode_x1f_sep() {
|
||||||
|
Playground::setup("filter_from_csv_unicode_sep_x1f_test_3", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_tres_caballeros.txt",
|
||||||
|
r#"
|
||||||
|
first_namelast_namerusty_luck
|
||||||
|
AndrésRobalino1
|
||||||
|
JTTurner1
|
||||||
|
YehudaKatz1
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_tres_caballeros.txt
|
||||||
|
| from csv --separator "001F"
|
||||||
|
| get rusty_luck
|
||||||
|
| length
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual.out, "3");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user