From 15986c598ae653c8d1d2415a0b06b2235cebd235 Mon Sep 17 00:00:00 2001 From: David Mason Date: Fri, 8 Nov 2019 13:11:04 +0000 Subject: [PATCH 1/2] Add --separator command to from_csv The command takes a string, checks it is a single character and then passes it to csv::ReaderBuilder via .delimiter() method as a u8. --- docs/commands/from-csv.md | 65 +++++++++++++++++++++++++++++++++++++++ src/commands/from_csv.rs | 23 +++++++++++++- tests/filters_test.rs | 29 +++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/docs/commands/from-csv.md b/docs/commands/from-csv.md index 86d309d86b..b72818eefc 100644 --- a/docs/commands/from-csv.md +++ b/docs/commands/from-csv.md @@ -3,7 +3,9 @@ Converts csv data into table. Use this when nushell cannot dertermine the input file extension. ## Example + Let's say we have the following file : + ```shell > cat pets.txt animal, name, age @@ -36,6 +38,7 @@ To get a table from `pets.txt` we need to use the `from-csv` command : ``` To ignore the csv headers use `--headerless` : + ```shell ━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━━━━ # │ Column1 │ Column2 │ Column3 @@ -45,3 +48,65 @@ To ignore the csv headers use `--headerless` : ━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━━━━ ``` +To split on a character other than ',' use `--separator` : + +```shell +> open pets.txt +animal; name; age +cat; Tom; 7 +dog; Alfred; 10 +chameleon; Linda; 1 +``` + +```shell +> open pets.txt | from-csv --separator ';' +━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━ + # │ animal │ name │ age +───┼───────────┼─────────┼────── + 0 │ cat │ Tom │ 7 + 1 │ dog │ Alfred │ 10 + 2 │ chameleon │ Linda │ 1 +━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━ +``` + +To use this command to open a csv with separators other than a comma, use the `--raw` switch of `open` to open the csv, othewise the csv will enter `from-csv` as a table split on commas rather than raw text. + +```shell +> mv pets.txt pets.csv +> open pets.csv | from-csv --separator ';' +error: Expected a string from pipeline +- shell:1:16 +1 | open pets.csv | from-csv --separator ';' + | ^^^^^^^^ requires string input +- shell:1:0 +1 | open pets.csv | from-csv --separator ';' + | value originates from here + +> open pets.csv --raw | from-csv --separator ';' +━━━┯━━━━━━━━━━━┯━━━━━━━━━┯━━━━━━ + # │ animal │ name │ age +───┼───────────┼─────────┼────── + 0 │ cat │ Tom │ 7 + 1 │ dog │ Alfred │ 10 + 2 │ chameleon │ Linda │ 1 +━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━ +``` + +Note that separators are currently provided as strings and need to be wrapped in quotes. + +```shell +> open pets.csv --raw | from-csv --separator ; +- shell:1:43 +1 | open pets.csv --raw | from-csv --separator ; + | ^ +``` + +It is also considered an error to use a separator greater than one char : + +```shell +> open pets.txt | from-csv --separator '123' +error: Expected a single separator char from --separator +- shell:1:37 +1 | open pets.txt | from-csv --separator '123' + | ^^^^^ requires a single character string input +``` diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs index 7442a07fc9..de2b0d0593 100644 --- a/src/commands/from_csv.rs +++ b/src/commands/from_csv.rs @@ -8,6 +8,7 @@ pub struct FromCSV; #[derive(Deserialize)] pub struct FromCSVArgs { headerless: bool, + separator: Option>, } impl WholeStreamCommand for FromCSV { @@ -17,6 +18,7 @@ impl WholeStreamCommand for FromCSV { fn signature(&self) -> Signature { Signature::build("from-csv") + .named("separator", SyntaxShape::String, "a character to separate columns, defaults to ','") .switch("headerless", "don't treat the first row as column names") } @@ -36,10 +38,12 @@ impl WholeStreamCommand for FromCSV { pub fn from_csv_string_to_value( s: String, headerless: bool, + separator: char, tag: impl Into, ) -> Result, csv::Error> { let mut reader = ReaderBuilder::new() .has_headers(false) + .delimiter(separator as u8) .from_reader(s.as_bytes()); let tag = tag.into(); @@ -84,10 +88,27 @@ pub fn from_csv_string_to_value( fn from_csv( FromCSVArgs { headerless: skip_headers, + separator, }: FromCSVArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let name_tag = name; + let sep = match separator { + Some(Tagged { item: Value::Primitive(Primitive::String(s)), tag, .. }) => { + let vec_s: Vec = s.chars().collect(); + if vec_s.len() != 1 { + return Err(ShellError::labeled_error( + "Expected a single separator char from --separator", + "requires a single character string input", + tag, + )) + }; + vec_s[0] + } + _ => { + ',' + } + }; let stream = async_stream! { let values: Vec> = input.values.collect().await; @@ -114,7 +135,7 @@ fn from_csv( } } - match from_csv_string_to_value(concat_string, skip_headers, name_tag.clone()) { + match from_csv_string_to_value(concat_string, skip_headers, sep, name_tag.clone()) { Ok(x) => match x { Tagged { item: Value::Table(list), .. } => { for l in list { diff --git a/tests/filters_test.rs b/tests/filters_test.rs index 1eb55448b7..e410e99e65 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -100,6 +100,35 @@ fn converts_from_csv_text_to_structured_table() { }) } +#[test] +fn converts_from_csv_text_with_separator_to_structured_table() { + Playground::setup("filter_from_csv_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name;last_name;rusty_luck + Andrés;Robalino;1 + Jonathan;Turner;1 + Yehuda;Katz;1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.txt + | from-csv --separator ';' + | get rusty_luck + | str --to-int + | sum + | echo $it + "# + )); + + assert_eq!(actual, "3"); + }) +} + #[test] fn converts_from_csv_text_skipping_headers_to_structured_table() { Playground::setup("filter_from_csv_test_2", |dirs, sandbox| { From 4a6122905b69435a36c6d66e212820e4aab13fd6 Mon Sep 17 00:00:00 2001 From: David Mason Date: Fri, 8 Nov 2019 15:27:29 +0000 Subject: [PATCH 2/2] fmt: cargo fmt --all --- src/commands/from_csv.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs index de2b0d0593..9483fed521 100644 --- a/src/commands/from_csv.rs +++ b/src/commands/from_csv.rs @@ -18,7 +18,11 @@ impl WholeStreamCommand for FromCSV { fn signature(&self) -> Signature { Signature::build("from-csv") - .named("separator", SyntaxShape::String, "a character to separate columns, defaults to ','") + .named( + "separator", + SyntaxShape::String, + "a character to separate columns, defaults to ','", + ) .switch("headerless", "don't treat the first row as column names") } @@ -94,20 +98,22 @@ fn from_csv( ) -> Result { let name_tag = name; let sep = match separator { - Some(Tagged { item: Value::Primitive(Primitive::String(s)), tag, .. }) => { + Some(Tagged { + item: Value::Primitive(Primitive::String(s)), + tag, + .. + }) => { let vec_s: Vec = s.chars().collect(); if vec_s.len() != 1 { return Err(ShellError::labeled_error( "Expected a single separator char from --separator", "requires a single character string input", tag, - )) + )); }; vec_s[0] } - _ => { - ',' - } + _ => ',', }; let stream = async_stream! {