forked from extern/nushell
feat: allow from csv
to accept 4 byte unicode separator chars (#10138)
- this PR should close #10132 # Description * added a flag to `from csv --ascii` that replaces the given `separator with the unicode separator x1f https://www.codetable.net/hex/1f (aka Information Separator One) # User-Facing Changes New flags are available for `from csv` ( `--ascii` or short `-a`) # Tests + Formatting There are no tests at the moment. Code has been formatted. - `cargo test --workspace` (breaks with a non related test on my machine)
This commit is contained in:
parent
02318cf3a7
commit
93f20b406e
@ -21,7 +21,7 @@ impl Command for FromCsv {
|
||||
.named(
|
||||
"separator",
|
||||
SyntaxShape::String,
|
||||
"a character to separate columns, defaults to ','",
|
||||
"a character to separate columns (either single char or 4 byte unicode sequence), defaults to ','",
|
||||
Some('s'),
|
||||
)
|
||||
.named(
|
||||
@ -134,11 +134,22 @@ fn from_csv(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let name = call.head;
|
||||
|
||||
let separator = call
|
||||
.get_flag(engine_state, stack, "separator")?
|
||||
.map(|v: Value| v.as_char())
|
||||
.transpose()?
|
||||
.unwrap_or(',');
|
||||
let separator = match call.get_flag::<String>(engine_state, stack, "separator")? {
|
||||
Some(sep) => {
|
||||
if sep.len() == 1 {
|
||||
sep.chars().next().unwrap_or(',')
|
||||
} else if sep.len() == 4 {
|
||||
let unicode_sep = u32::from_str_radix(&sep, 16);
|
||||
char::from_u32(unicode_sep.unwrap_or(b'\x1f' as u32)).unwrap_or(',')
|
||||
} else {
|
||||
return Err(ShellError::NonUtf8Custom(
|
||||
"separator should be a single char or a 4-byte unicode".to_string(),
|
||||
call.span(),
|
||||
));
|
||||
}
|
||||
}
|
||||
None => ',',
|
||||
};
|
||||
let comment = call
|
||||
.get_flag(engine_state, stack, "comment")?
|
||||
.map(|v: Value| v.as_char())
|
||||
|
@ -341,7 +341,9 @@ fn from_csv_text_with_multiple_char_separator() {
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("single character separator"));
|
||||
assert!(actual
|
||||
.err
|
||||
.contains("separator should be a single char or a 4-byte unicode"));
|
||||
})
|
||||
}
|
||||
|
||||
@ -366,7 +368,7 @@ fn from_csv_text_with_wrong_type_separator() {
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.err.contains("can't convert int to char"));
|
||||
assert!(actual.err.contains("can't convert int to string"));
|
||||
})
|
||||
}
|
||||
|
||||
@ -404,3 +406,57 @@ fn string_to_csv_error() {
|
||||
|
||||
assert!(actual.err.contains("command doesn't support"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_csv_with_unicode_sep() {
|
||||
Playground::setup("filter_from_csv_unicode_sep_test_3", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.txt",
|
||||
r#"
|
||||
first_name;last_name;rusty_luck
|
||||
Andrés;Robalino;1
|
||||
JT;Turner;1
|
||||
Yehuda;Katz;1
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.txt
|
||||
| from csv --separator "003B"
|
||||
| get rusty_luck
|
||||
| length
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_csv_with_unicode_x1f_sep() {
|
||||
Playground::setup("filter_from_csv_unicode_sep_x1f_test_3", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.txt",
|
||||
r#"
|
||||
first_namelast_namerusty_luck
|
||||
AndrésRobalino1
|
||||
JTTurner1
|
||||
YehudaKatz1
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.txt
|
||||
| from csv --separator "001F"
|
||||
| get rusty_luck
|
||||
| length
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "3");
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user