diff --git a/crates/nu-command/src/formats/from/csv.rs b/crates/nu-command/src/formats/from/csv.rs index ac3237195..ab1b11cb9 100644 --- a/crates/nu-command/src/formats/from/csv.rs +++ b/crates/nu-command/src/formats/from/csv.rs @@ -1,4 +1,4 @@ -use super::delimited::{from_delimited_data, trim_from_str}; +use super::delimited::{from_delimited_data, trim_from_str, DelimitedReaderConfig}; use nu_engine::CallExt; use nu_protocol::ast::Call; @@ -24,11 +24,34 @@ impl Command for FromCsv { "a character to separate columns, defaults to ','", Some('s'), ) + .named( + "comment", + SyntaxShape::String, + "a comment character to ignore lines starting with it", + Some('c'), + ) + .named( + "quote", + SyntaxShape::String, + "a quote character to ignore separators in strings, defaults to '\"'", + Some('q'), + ) + .named( + "escape", + SyntaxShape::String, + "an escape character for strings containing the quote character", + Some('e'), + ) .switch( "noheaders", "don't treat the first row as column names", Some('n'), ) + .switch( + "flexible", + "allow the number of fields in records to be variable", + None, + ) .switch("no-infer", "no field type inferencing", None) .named( "trim", @@ -75,28 +98,28 @@ impl Command for FromCsv { example: "open data.txt | from csv --noheaders", result: None, }, - Example { - description: "Convert comma-separated data to a table, ignoring headers", - example: "open data.txt | from csv -n", - result: None, - }, Example { description: "Convert semicolon-separated data to a table", example: "open data.txt | from csv --separator ';'", result: None, }, Example { - description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names and field values", + description: "Convert comma-separated data to a table, ignoring lines starting with '#'", + example: "open data.txt | from csv --comment '#'", + result: None, + }, + Example { + description: "Convert comma-separated data to a table, dropping all possible whitespaces around header names and field values", example: "open data.txt | from csv --trim all", result: None, }, Example { - description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around header names", + description: "Convert comma-separated data to a table, dropping all possible whitespaces around header names", example: "open data.txt | from csv --trim headers", result: None, }, Example { - description: "Convert semicolon-separated data to a table, dropping all possible whitespaces around field values", + description: "Convert comma-separated data to a table, dropping all possible whitespaces around field values", example: "open data.txt | from csv --trim fields", result: None, }, @@ -112,32 +135,41 @@ fn from_csv( ) -> Result { let name = call.head; + let separator = call + .get_flag(engine_state, stack, "separator")? + .map(|v: Value| v.as_char()) + .transpose()? + .unwrap_or(','); + let comment = call + .get_flag(engine_state, stack, "comment")? + .map(|v: Value| v.as_char()) + .transpose()?; + let quote = call + .get_flag(engine_state, stack, "quote")? + .map(|v: Value| v.as_char()) + .transpose()? + .unwrap_or('"'); + let escape = call + .get_flag(engine_state, stack, "escape")? + .map(|v: Value| v.as_char()) + .transpose()?; let no_infer = call.has_flag("no-infer"); let noheaders = call.has_flag("noheaders"); - let separator: Option = call.get_flag(engine_state, stack, "separator")?; - let trim: Option = call.get_flag(engine_state, stack, "trim")?; + let flexible = call.has_flag("flexible"); + let trim = trim_from_str(call.get_flag(engine_state, stack, "trim")?)?; - let sep = match separator { - Some(Value::String { val: s, span }) => { - if s == r"\t" { - '\t' - } else { - let vec_s: Vec = s.chars().collect(); - if vec_s.len() != 1 { - return Err(ShellError::MissingParameter { - param_name: "single character separator".into(), - span, - }); - }; - vec_s[0] - } - } - _ => ',', + let config = DelimitedReaderConfig { + separator, + comment, + quote, + escape, + noheaders, + flexible, + no_infer, + trim, }; - let trim = trim_from_str(trim)?; - - from_delimited_data(noheaders, no_infer, sep, trim, input, name) + from_delimited_data(config, input, name) } #[cfg(test)] diff --git a/crates/nu-command/src/formats/from/delimited.rs b/crates/nu-command/src/formats/from/delimited.rs index 1b0d2dc86..e191d9218 100644 --- a/crates/nu-command/src/formats/from/delimited.rs +++ b/crates/nu-command/src/formats/from/delimited.rs @@ -2,16 +2,26 @@ use csv::{ReaderBuilder, Trim}; use nu_protocol::{IntoPipelineData, PipelineData, ShellError, Span, Value}; fn from_delimited_string_to_value( + DelimitedReaderConfig { + separator, + comment, + quote, + escape, + noheaders, + flexible, + no_infer, + trim, + }: DelimitedReaderConfig, s: String, - noheaders: bool, - no_infer: bool, - separator: char, - trim: Trim, span: Span, ) -> Result { let mut reader = ReaderBuilder::new() .has_headers(!noheaders) + .flexible(flexible) .delimiter(separator as u8) + .comment(comment.map(|c| c as u8)) + .quote(quote as u8) + .escape(escape.map(|c| c as u8)) .trim(trim) .from_reader(s.as_bytes()); @@ -56,24 +66,30 @@ fn from_delimited_string_to_value( Ok(Value::List { vals: rows, span }) } -pub fn from_delimited_data( - noheaders: bool, - no_infer: bool, - sep: char, - trim: Trim, +pub(super) struct DelimitedReaderConfig { + pub separator: char, + pub comment: Option, + pub quote: char, + pub escape: Option, + pub noheaders: bool, + pub flexible: bool, + pub no_infer: bool, + pub trim: Trim, +} + +pub(super) fn from_delimited_data( + config: DelimitedReaderConfig, input: PipelineData, name: Span, ) -> Result { let (concat_string, _span, metadata) = input.collect_string_strict(name)?; - Ok( - from_delimited_string_to_value(concat_string, noheaders, no_infer, sep, trim, name) - .map_err(|x| ShellError::DelimiterError { - msg: x.to_string(), - span: name, - })? - .into_pipeline_data_with_metadata(metadata), - ) + Ok(from_delimited_string_to_value(config, concat_string, name) + .map_err(|x| ShellError::DelimiterError { + msg: x.to_string(), + span: name, + })? + .into_pipeline_data_with_metadata(metadata)) } pub fn trim_from_str(trim: Option) -> Result { diff --git a/crates/nu-command/src/formats/from/tsv.rs b/crates/nu-command/src/formats/from/tsv.rs index eb48474dc..1b77300cf 100644 --- a/crates/nu-command/src/formats/from/tsv.rs +++ b/crates/nu-command/src/formats/from/tsv.rs @@ -1,4 +1,4 @@ -use super::delimited::{from_delimited_data, trim_from_str}; +use super::delimited::{from_delimited_data, trim_from_str, DelimitedReaderConfig}; use nu_engine::CallExt; use nu_protocol::ast::Call; @@ -18,11 +18,34 @@ impl Command for FromTsv { fn signature(&self) -> Signature { Signature::build("from tsv") .input_output_types(vec![(Type::String, Type::Table(vec![]))]) + .named( + "comment", + SyntaxShape::String, + "a comment character to ignore lines starting with it", + Some('c'), + ) + .named( + "quote", + SyntaxShape::String, + "a quote character to ignore separators in strings, defaults to '\"'", + Some('q'), + ) + .named( + "escape", + SyntaxShape::String, + "an escape character for strings containing the quote character", + Some('e'), + ) .switch( "noheaders", "don't treat the first row as column names", Some('n'), ) + .switch( + "flexible", + "allow the number of fields in records to be variable", + None, + ) .switch("no-infer", "no field type inferencing", None) .named( "trim", @@ -101,12 +124,36 @@ fn from_tsv( ) -> Result { let name = call.head; + let comment = call + .get_flag(engine_state, stack, "comment")? + .map(|v: Value| v.as_char()) + .transpose()?; + let quote = call + .get_flag(engine_state, stack, "quote")? + .map(|v: Value| v.as_char()) + .transpose()? + .unwrap_or('"'); + let escape = call + .get_flag(engine_state, stack, "escape")? + .map(|v: Value| v.as_char()) + .transpose()?; let no_infer = call.has_flag("no-infer"); let noheaders = call.has_flag("noheaders"); - let trim: Option = call.get_flag(engine_state, stack, "trim")?; - let trim = trim_from_str(trim)?; + let flexible = call.has_flag("flexible"); + let trim = trim_from_str(call.get_flag(engine_state, stack, "trim")?)?; - from_delimited_data(noheaders, no_infer, '\t', trim, input, name) + let config = DelimitedReaderConfig { + separator: '\t', + comment, + quote, + escape, + noheaders, + flexible, + no_infer, + trim, + }; + + from_delimited_data(config, input, name) } #[cfg(test)] diff --git a/crates/nu-command/tests/format_conversions/csv.rs b/crates/nu-command/tests/format_conversions/csv.rs index 61372a410..cacb796f2 100644 --- a/crates/nu-command/tests/format_conversions/csv.rs +++ b/crates/nu-command/tests/format_conversions/csv.rs @@ -183,8 +183,92 @@ fn from_csv_text_with_tab_separator_to_table() { } #[test] -fn from_csv_text_skipping_headers_to_table() { +fn from_csv_text_with_comments_to_table() { Playground::setup("filter_from_csv_test_5", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + # This is a comment + first_name,last_name,rusty_luck + # This one too + Andrés,Robalino,1 + Jonathan,Turner,1 + Yehuda,Katz,1 + # This one also + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r##" + open los_tres_caballeros.txt + | from csv --comment "#" + | get rusty_luck + | length + "## + )); + + assert_eq!(actual.out, "3"); + }) +} + +#[test] +fn from_csv_text_with_custom_quotes_to_table() { + Playground::setup("filter_from_csv_test_6", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name,last_name,rusty_luck + 'And''rés',Robalino,1 + Jonathan,Turner,1 + Yehuda,Katz,1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --quote "'" + | first + | get first_name + "# + )); + + assert_eq!(actual.out, "And'rés"); + }) +} + +#[test] +fn from_csv_text_with_custom_escapes_to_table() { + Playground::setup("filter_from_csv_test_7", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name,last_name,rusty_luck + "And\"rés",Robalino,1 + Jonathan,Turner,1 + Yehuda,Katz,1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --escape '\' + | first + | get first_name + "# + )); + + assert_eq!(actual.out, "And\"rés"); + }) +} + +#[test] +fn from_csv_text_skipping_headers_to_table() { + Playground::setup("filter_from_csv_test_8", |dirs, sandbox| { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_amigos.txt", r#" @@ -208,6 +292,84 @@ fn from_csv_text_skipping_headers_to_table() { }) } +#[test] +fn from_csv_text_with_missing_columns_to_table() { + Playground::setup("filter_from_csv_test_9", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name,last_name,rusty_luck + Andrés,Robalino + Jonathan,Turner,1 + Yehuda,Katz,1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --flexible + | get -i rusty_luck + | compact + | length + "# + )); + + assert_eq!(actual.out, "2"); + }) +} + +#[test] +fn from_csv_text_with_multiple_char_separator() { + Playground::setup("filter_from_csv_test_10", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name,last_name,rusty_luck + Andrés,Robalino,1 + Jonathan,Turner,1 + Yehuda,Katz,1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --separator "li" + "# + )); + + assert!(actual.err.contains("single character separator")); + }) +} + +#[test] +fn from_csv_text_with_wrong_type_separator() { + Playground::setup("filter_from_csv_test_11", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name,last_name,rusty_luck + Andrés,Robalino,1 + Jonathan,Turner,1 + Yehuda,Katz,1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --separator ('123' | into int) + "# + )); + + assert!(actual.err.contains("can't convert int to char")); + }) +} + #[test] fn table_with_record_error() { let actual = nu!( diff --git a/crates/nu-command/tests/format_conversions/tsv.rs b/crates/nu-command/tests/format_conversions/tsv.rs index 0670dfafa..2427f7453 100644 --- a/crates/nu-command/tests/format_conversions/tsv.rs +++ b/crates/nu-command/tests/format_conversions/tsv.rs @@ -16,7 +16,7 @@ fn table_to_tsv_text_and_from_tsv_text_back_into_table() { fn table_to_tsv_text_and_from_tsv_text_back_into_table_using_csv_separator() { let actual = nu!( cwd: "tests/fixtures/formats", - r"open caco3_plastics.tsv | to tsv | from csv --separator '\t' | first | get origin" + r#"open caco3_plastics.tsv | to tsv | from csv --separator "\t" | first | get origin"# ); assert_eq!(actual.out, "SPAIN"); @@ -106,8 +106,92 @@ fn from_tsv_text_to_table() { } #[test] -fn from_tsv_text_skipping_headers_to_table() { +fn from_tsv_text_with_comments_to_table() { Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + # This is a comment + first_name last_name rusty_luck + # This one too + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + # This one also + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r##" + open los_tres_caballeros.txt + | from tsv --comment "#" + | get rusty_luck + | length + "## + )); + + assert_eq!(actual.out, "3"); + }) +} + +#[test] +fn from_tsv_text_with_custom_quotes_to_table() { + Playground::setup("filter_from_tsv_test_3", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + 'And''rés' Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from tsv --quote "'" + | first + | get first_name + "# + )); + + assert_eq!(actual.out, "And'rés"); + }) +} + +#[test] +fn from_tsv_text_with_custom_escapes_to_table() { + Playground::setup("filter_from_tsv_test_4", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + "And\"rés" Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from tsv --escape '\' + | first + | get first_name + "# + )); + + assert_eq!(actual.out, "And\"rés"); + }) +} + +#[test] +fn from_tsv_text_skipping_headers_to_table() { + Playground::setup("filter_from_tsv_test_5", |dirs, sandbox| { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_amigos.txt", r#" @@ -130,3 +214,81 @@ fn from_tsv_text_skipping_headers_to_table() { assert_eq!(actual.out, "3"); }) } + +#[test] +fn from_tsv_text_with_missing_columns_to_table() { + Playground::setup("filter_from_tsv_test_6", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + Andrés Robalino + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from tsv --flexible + | get -i rusty_luck + | compact + | length + "# + )); + + assert_eq!(actual.out, "2"); + }) +} + +#[test] +fn from_tsv_text_with_multiple_char_comment() { + Playground::setup("filter_from_tsv_test_7", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --comment "li" + "# + )); + + assert!(actual.err.contains("single character separator")); + }) +} + +#[test] +fn from_tsv_text_with_wrong_type_comment() { + Playground::setup("filter_from_csv_test_8", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open los_tres_caballeros.txt + | from csv --comment ('123' | into int) + "# + )); + + assert!(actual.err.contains("can't convert int to char")); + }) +} diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 60bd1ec25..3193c02a9 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -190,6 +190,27 @@ impl Clone for Value { } impl Value { + pub fn as_char(&self) -> Result { + match self { + Value::String { val, span } => { + let mut chars = val.chars(); + match (chars.next(), chars.next()) { + (Some(c), None) => Ok(c), + _ => Err(ShellError::MissingParameter { + param_name: "single character separator".into(), + span: *span, + }), + } + } + x => Err(ShellError::CantConvert { + to_type: "char".into(), + from_type: x.get_type().to_string(), + span: self.span()?, + help: None, + }), + } + } + /// Converts into string values that can be changed into string natively pub fn as_string(&self) -> Result { match self {