Merge pull request #943 from drmason13/from_csv-add-separator-arg

Add --separator argument to from_csv
This commit is contained in:
Jonathan Turner 2019-11-09 15:09:32 -08:00 committed by GitHub
commit 9043970e97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 122 additions and 1 deletions

View File

@ -3,7 +3,9 @@
Converts csv data into table. Use this when nushell cannot dertermine the input file extension.
## Example
Let's say we have the following file :
```shell
> cat pets.txt
animal, name, age
@ -36,6 +38,7 @@ To get a table from `pets.txt` we need to use the `from-csv` command :
```
To ignore the csv headers use `--headerless` :
```shell
━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━━━━
# │ Column1 │ Column2 │ Column3
@ -45,3 +48,65 @@ To ignore the csv headers use `--headerless` :
━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━━━━
```
To split on a character other than ',' use `--separator` :
```shell
> open pets.txt
animal; name; age
cat; Tom; 7
dog; Alfred; 10
chameleon; Linda; 1
```
```shell
> open pets.txt | from-csv --separator ';'
━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━
# │ animal │ name │ age
───┼───────────┼─────────┼──────
0 │ cat │ Tom │ 7
1 │ dog │ Alfred │ 10
2 │ chameleon │ Linda │ 1
━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━
```
To use this command to open a csv with separators other than a comma, use the `--raw` switch of `open` to open the csv, othewise the csv will enter `from-csv` as a table split on commas rather than raw text.
```shell
> mv pets.txt pets.csv
> open pets.csv | from-csv --separator ';'
error: Expected a string from pipeline
- shell:1:16
1 | open pets.csv | from-csv --separator ';'
| ^^^^^^^^ requires string input
- shell:1:0
1 | open pets.csv | from-csv --separator ';'
| value originates from here
> open pets.csv --raw | from-csv --separator ';'
━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━
# │ animal │ name │ age
───┼───────────┼─────────┼──────
0 │ cat │ Tom │ 7
1 │ dog │ Alfred │ 10
2 │ chameleon │ Linda │ 1
━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━
```
Note that separators are currently provided as strings and need to be wrapped in quotes.
```shell
> open pets.csv --raw | from-csv --separator ;
- shell:1:43
1 | open pets.csv --raw | from-csv --separator ;
| ^
```
It is also considered an error to use a separator greater than one char :
```shell
> open pets.txt | from-csv --separator '123'
error: Expected a single separator char from --separator
- shell:1:37
1 | open pets.txt | from-csv --separator '123'
| ^^^^^ requires a single character string input
```

View File

@ -8,6 +8,7 @@ pub struct FromCSV;
#[derive(Deserialize)]
pub struct FromCSVArgs {
headerless: bool,
separator: Option<Tagged<Value>>,
}
impl WholeStreamCommand for FromCSV {
@ -17,6 +18,11 @@ impl WholeStreamCommand for FromCSV {
fn signature(&self) -> Signature {
Signature::build("from-csv")
.named(
"separator",
SyntaxShape::String,
"a character to separate columns, defaults to ','",
)
.switch("headerless", "don't treat the first row as column names")
}
@ -36,10 +42,12 @@ impl WholeStreamCommand for FromCSV {
pub fn from_csv_string_to_value(
s: String,
headerless: bool,
separator: char,
tag: impl Into<Tag>,
) -> Result<Tagged<Value>, csv::Error> {
let mut reader = ReaderBuilder::new()
.has_headers(false)
.delimiter(separator as u8)
.from_reader(s.as_bytes());
let tag = tag.into();
@ -84,10 +92,29 @@ pub fn from_csv_string_to_value(
fn from_csv(
FromCSVArgs {
headerless: skip_headers,
separator,
}: FromCSVArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let name_tag = name;
let sep = match separator {
Some(Tagged {
item: Value::Primitive(Primitive::String(s)),
tag,
..
}) => {
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::labeled_error(
"Expected a single separator char from --separator",
"requires a single character string input",
tag,
));
};
vec_s[0]
}
_ => ',',
};
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
@ -114,7 +141,7 @@ fn from_csv(
}
}
match from_csv_string_to_value(concat_string, skip_headers, name_tag.clone()) {
match from_csv_string_to_value(concat_string, skip_headers, sep, name_tag.clone()) {
Ok(x) => match x {
Tagged { item: Value::Table(list), .. } => {
for l in list {

View File

@ -100,6 +100,35 @@ fn converts_from_csv_text_to_structured_table() {
})
}
#[test]
fn converts_from_csv_text_with_separator_to_structured_table() {
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.txt",
r#"
first_name;last_name;rusty_luck
Andrés;Robalino;1
Jonathan;Turner;1
Yehuda;Katz;1
"#,
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_caballeros.txt
| from-csv --separator ';'
| get rusty_luck
| str --to-int
| sum
| echo $it
"#
));
assert_eq!(actual, "3");
})
}
#[test]
fn converts_from_csv_text_skipping_headers_to_structured_table() {
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {