feat: allow from csv to accept 4 byte unicode separator chars (#10138)

- this PR should close #10132

# Description
* added a flag to `from csv --ascii` that replaces the given `separator
with the unicode separator x1f https://www.codetable.net/hex/1f (aka
Information Separator One)

# User-Facing Changes
New flags are available for `from csv` ( `--ascii` or short `-a`)

# Tests + Formatting
There are no tests at the moment. Code has been formatted.
- `cargo test --workspace` (breaks with a non related test on my
machine)
This commit is contained in:
Matthias Q
2023-08-31 18:55:39 +02:00
committed by GitHub
parent 02318cf3a7
commit 93f20b406e
2 changed files with 75 additions and 8 deletions

View File

@ -21,7 +21,7 @@ impl Command for FromCsv {
.named(
"separator",
SyntaxShape::String,
"a character to separate columns, defaults to ','",
"a character to separate columns (either single char or 4 byte unicode sequence), defaults to ','",
Some('s'),
)
.named(
@ -134,11 +134,22 @@ fn from_csv(
) -> Result<PipelineData, ShellError> {
let name = call.head;
let separator = call
.get_flag(engine_state, stack, "separator")?
.map(|v: Value| v.as_char())
.transpose()?
.unwrap_or(',');
let separator = match call.get_flag::<String>(engine_state, stack, "separator")? {
Some(sep) => {
if sep.len() == 1 {
sep.chars().next().unwrap_or(',')
} else if sep.len() == 4 {
let unicode_sep = u32::from_str_radix(&sep, 16);
char::from_u32(unicode_sep.unwrap_or(b'\x1f' as u32)).unwrap_or(',')
} else {
return Err(ShellError::NonUtf8Custom(
"separator should be a single char or a 4-byte unicode".to_string(),
call.span(),
));
}
}
None => ',',
};
let comment = call
.get_flag(engine_state, stack, "comment")?
.map(|v: Value| v.as_char())