diff --git a/src/cli.rs b/src/cli.rs index b650787d8d..fc10dc0d0d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -180,6 +180,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(ToCSV), whole_stream_command(ToJSON), whole_stream_command(ToTOML), + whole_stream_command(ToTSV), whole_stream_command(ToYAML), whole_stream_command(SortBy), whole_stream_command(Tags), @@ -188,6 +189,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(FromArray), whole_stream_command(FromArray), whole_stream_command(FromCSV), + whole_stream_command(FromTSV), whole_stream_command(FromINI), whole_stream_command(FromBSON), whole_stream_command(FromJSON), diff --git a/src/commands.rs b/src/commands.rs index d1d9297fd4..eb5d29b7ba 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -20,6 +20,7 @@ crate mod from_csv; crate mod from_ini; crate mod from_json; crate mod from_toml; +crate mod from_tsv; crate mod from_xml; crate mod from_yaml; crate mod get; @@ -52,6 +53,7 @@ crate mod to_bson; crate mod to_csv; crate mod to_json; crate mod to_toml; +crate mod to_tsv; crate mod to_yaml; crate mod trim; crate mod version; @@ -78,6 +80,7 @@ crate use from_csv::FromCSV; crate use from_ini::FromINI; crate use from_json::FromJSON; crate use from_toml::FromTOML; +crate use from_tsv::FromTSV; crate use from_xml::FromXML; crate use from_yaml::FromYAML; crate use get::Get; @@ -109,6 +112,7 @@ crate use to_bson::ToBSON; crate use to_csv::ToCSV; crate use to_json::ToJSON; crate use to_toml::ToTOML; +crate use to_tsv::ToTSV; crate use to_yaml::ToYAML; crate use trim::Trim; crate use version::Version; diff --git a/src/commands/from_tsv.rs b/src/commands/from_tsv.rs new file mode 100644 index 0000000000..ed37bf4531 --- /dev/null +++ b/src/commands/from_tsv.rs @@ -0,0 +1,135 @@ +use crate::commands::WholeStreamCommand; +use crate::object::{Primitive, TaggedDictBuilder, Value}; +use crate::prelude::*; +use csv::ReaderBuilder; + +pub struct FromTSV; + +#[derive(Deserialize)] +pub struct FromTSVArgs { + headerless: bool, +} + +impl WholeStreamCommand for FromTSV { + fn name(&self) -> &str { + "from-tsv" + } + + fn signature(&self) -> Signature { + Signature::build("from-tsv").switch("headerless") + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, from_tsv)?.run() + } +} + +pub fn from_tsv_string_to_value( + s: String, + headerless: bool, + tag: impl Into, +) -> Result, csv::Error> { + let mut reader = ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(s.as_bytes()); + let tag = tag.into(); + + let mut fields: VecDeque = VecDeque::new(); + let mut iter = reader.records(); + let mut rows = vec![]; + + if let Some(result) = iter.next() { + let line = result?; + + for (idx, item) in line.iter().enumerate() { + if headerless { + fields.push_back(format!("Column{}", idx + 1)); + } else { + fields.push_back(item.to_string()); + } + } + } + + loop { + if let Some(row_values) = iter.next() { + let row_values = row_values?; + + let mut row = TaggedDictBuilder::new(tag); + + for (idx, entry) in row_values.iter().enumerate() { + row.insert_tagged( + fields.get(idx).unwrap(), + Value::Primitive(Primitive::String(String::from(entry))).tagged(tag), + ); + } + + rows.push(row.into_tagged_value()); + } else { + break; + } + } + + Ok(Tagged::from_item(Value::List(rows), tag)) +} + +fn from_tsv( + FromTSVArgs { + headerless: skip_headers, + }: FromTSVArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let name_span = name; + + let stream = async_stream_block! { + let values: Vec> = input.values.collect().await; + + let mut concat_string = String::new(); + let mut latest_tag: Option = None; + + for value in values { + let value_tag = value.tag(); + latest_tag = Some(value_tag); + match value.item { + Value::Primitive(Primitive::String(s)) => { + concat_string.push_str(&s); + concat_string.push_str("\n"); + } + _ => yield Err(ShellError::labeled_error_with_secondary( + "Expected a string from pipeline", + "requires string input", + name_span, + "value originates from here", + value_tag.span, + )), + + } + } + + match from_tsv_string_to_value(concat_string, skip_headers, name_span) { + Ok(x) => match x { + Tagged { item: Value::List(list), .. } => { + for l in list { + yield ReturnSuccess::value(l); + } + } + x => yield ReturnSuccess::value(x), + }, + Err(_) => if let Some(last_tag) = latest_tag { + yield Err(ShellError::labeled_error_with_secondary( + "Could not parse as TSV", + "input cannot be parsed as TSV", + name_span, + "value originates from here", + last_tag.span, + )) + } , + } + }; + + Ok(stream.to_output_stream()) +} diff --git a/src/commands/open.rs b/src/commands/open.rs index cd3056e81d..c4cd057147 100644 --- a/src/commands/open.rs +++ b/src/commands/open.rs @@ -437,6 +437,16 @@ pub fn parse_string_as_value( ) }) } + Some(ref x) if x == "tsv" => { + crate::commands::from_tsv::from_tsv_string_to_value(contents, false, contents_tag) + .map_err(move |_| { + ShellError::labeled_error( + "Could not open as TSV", + "could not open as TSV", + name_span, + ) + }) + } Some(ref x) if x == "toml" => { crate::commands::from_toml::from_toml_string_to_value(contents, contents_tag).map_err( move |_| { diff --git a/src/commands/save.rs b/src/commands/save.rs index da5c093502..9a28931f59 100644 --- a/src/commands/save.rs +++ b/src/commands/save.rs @@ -1,4 +1,5 @@ use crate::commands::to_csv::{to_string as to_csv_to_string, value_to_csv_value}; +use crate::commands::to_tsv::{to_string as to_tsv_to_string, value_to_tsv_value}; use crate::commands::to_json::value_to_json_value; use crate::commands::to_toml::value_to_toml_value; use crate::commands::to_yaml::value_to_yaml_value; @@ -166,6 +167,14 @@ fn to_string_for( } to_csv_to_string(&value_to_csv_value(&input[0]))? } + Some(x) if x == "tsv" => { + if input.len() != 1 { + return Err(ShellError::string( + "saving to tsv requires a single object (or use --raw)", + )); + } + to_tsv_to_string(&value_to_tsv_value(&input[0]))? + } Some(x) if x == "toml" => { if input.len() != 1 { return Err(ShellError::string( diff --git a/src/commands/to_tsv.rs b/src/commands/to_tsv.rs new file mode 100644 index 0000000000..5a8cb1de41 --- /dev/null +++ b/src/commands/to_tsv.rs @@ -0,0 +1,108 @@ +use crate::commands::WholeStreamCommand; +use crate::object::{Primitive, Value}; +use crate::prelude::*; +use csv::WriterBuilder; + +pub struct ToTSV; + +#[derive(Deserialize)] +pub struct ToTSVArgs { + headerless: bool, +} + +impl WholeStreamCommand for ToTSV { + fn name(&self) -> &str { + "to-tsv" + } + + fn signature(&self) -> Signature { + Signature::build("to-tsv").switch("headerless") + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, to_tsv)?.run() + } +} + +pub fn value_to_tsv_value(v: &Value) -> Value { + match v { + Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())), + Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing), + Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())), + Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())), + Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())), + Value::Object(o) => Value::Object(o.clone()), + Value::List(l) => Value::List(l.clone()), + Value::Block(_) => Value::Primitive(Primitive::Nothing), + _ => Value::Primitive(Primitive::Nothing), + } +} + +fn to_string_helper(v: &Value) -> Result> { + match v { + Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()), + Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", *b as u64)), + Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?), + Value::List(_) => return Ok(String::from("[list list]")), + Value::Object(_) => return Ok(String::from("[object]")), + Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()), + _ => return Err("Bad input".into()), + } +} + +pub fn to_string(v: &Value) -> Result> { + match v { + Value::Object(o) => { + let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]); + let mut fields: VecDeque = VecDeque::new(); + let mut values: VecDeque = VecDeque::new(); + + for (k, v) in o.entries.iter() { + fields.push_back(k.clone()); + values.push_back(to_string_helper(&v)?); + } + + wtr.write_record(fields).expect("can not write."); + wtr.write_record(values).expect("can not write."); + + return Ok(String::from_utf8(wtr.into_inner()?)?); + } + _ => return to_string_helper(&v), + } +} + +fn to_tsv( + ToTSVArgs { headerless }: ToTSVArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let name_span = name; + let out = input; + + Ok(out + .values + .map(move |a| match to_string(&value_to_tsv_value(&a.item)) { + Ok(x) => { + let converted = if headerless { + x.lines().skip(1).collect() + } else { + x + }; + + ReturnSuccess::value( + Value::Primitive(Primitive::String(converted)).simple_spanned(name_span), + ) + } + _ => Err(ShellError::labeled_error_with_secondary( + "Expected an object with TSV-compatible structure from pipeline", + "requires TSV-compatible input", + name_span, + format!("{} originates from here", a.item.type_name()), + a.span(), + )), + }) + .to_output_stream()) +} diff --git a/src/utils.rs b/src/utils.rs index 71d0b50fe6..271c799b33 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -250,6 +250,10 @@ mod tests { loc: fixtures().join("caco3_plastics.csv"), at: 0 }, + Res { + loc: fixtures().join("caco3_plastics.tsv"), + at: 0 + }, Res { loc: fixtures().join("cargo_sample.toml"), at: 0 diff --git a/tests/command_open_tests.rs b/tests/command_open_tests.rs index a19873e7b3..0b01cc6b8e 100644 --- a/tests/command_open_tests.rs +++ b/tests/command_open_tests.rs @@ -68,6 +68,21 @@ fn open_can_parse_toml() { assert_eq!(actual, "2018"); } +#[test] +fn open_can_parse_tsv() { + let actual = nu!( + cwd: "tests/fixtures/formats", h::pipeline( + r#" + open caco3_plastics.tsv + | first 1 + | get origin + | echo $it + "# + )); + + assert_eq!(actual, "SPAIN") +} + #[test] fn open_can_parse_json() { let actual = nu!( diff --git a/tests/filters_test.rs b/tests/filters_test.rs index 4258f7b109..deabdaa257 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -237,6 +237,136 @@ fn converts_structured_table_to_json_text() { }) } +#[test] +fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table() { + let actual = nu!( + cwd: "tests/fixtures/formats", + "open caco3_plastics.tsv | to-tsv | from-tsv | first 1 | get origin | echo $it" + ); + + assert_eq!(actual, "SPAIN"); +} + +#[test] +fn converts_structured_table_to_tsv_text() { + Playground::setup("filter_to_tsv_test_1", |dirs, sandbox| { + sandbox + .with_files(vec![FileWithContentToBeTrimmed( + "tsv_text_sample.txt", + r#" + importer shipper tariff_item name origin + Plasticos Rival Reverte 2509000000 Calcium carbonate Spain + Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia + "# + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open tsv_text_sample.txt + | lines + | split-column "\t" a b c d origin + | last 1 + | to-tsv + | lines + | nth 1 + | echo "$it" + "# + )); + + assert!(actual.contains("Colombia")); + }) +} + +#[test] +fn converts_structured_table_to_tsv_text_skipping_headers_after_conversion() { + Playground::setup("filter_to_tsv_test_2", |dirs, sandbox| { + sandbox + .with_files(vec![FileWithContentToBeTrimmed( + "tsv_text_sample.txt", + r#" + importer shipper tariff_item name origin + Plasticos Rival Reverte 2509000000 Calcium carbonate Spain + Tigre Ecuador OMYA Andina 3824909999 Calcium carbonate Colombia + "# + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open tsv_text_sample.txt + | lines + | split-column "\t" a b c d origin + | last 1 + | to-tsv --headerless + | echo "$it" + "# + )); + + assert!(actual.contains("Colombia")); + }) +} + +#[test] +fn converts_from_tsv_text_to_structured_table() { + Playground::setup("filter_from_tsv_test_1", |dirs, sandbox| { + sandbox + .with_files(vec![FileWithContentToBeTrimmed( + "los_tres_amigos.txt", + r#" + first Name Last Name rusty_luck + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_amigos.txt + | from-tsv + | get rusty_luck + | str --to-int + | sum + | echo $it + "# + )); + + assert_eq!(actual, "3"); + }) +} + +#[test] +fn converts_from_tsv_text_skipping_headers_to_structured_table() { + Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| { + sandbox + .with_files(vec![FileWithContentToBeTrimmed( + "los_tres_amigos.txt", + r#" + first Name Last Name rusty_luck + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_amigos.txt + | from-tsv --headerless + | get Column3 + | str --to-int + | sum + | echo $it + "# + )); + + assert_eq!(actual, "3"); + }) +} + #[test] fn can_convert_json_text_to_bson_and_back_into_table() { let actual = nu!( @@ -333,10 +463,10 @@ fn can_sum() { fn can_filter_by_unit_size_comparison() { let actual = nu!( cwd: "tests/fixtures/formats", - "ls | where size > 1kb | sort-by size | get name | skip 1 | trim | echo $it" + "ls | where size > 1kb | sort-by size | get name | first 1 | trim | echo $it" ); - assert_eq!(actual, "caco3_plastics.csv"); + assert_eq!(actual, "cargo_sample.toml"); } #[test] diff --git a/tests/fixtures/formats/caco3_plastics.tsv b/tests/fixtures/formats/caco3_plastics.tsv new file mode 100644 index 0000000000..071baaae30 --- /dev/null +++ b/tests/fixtures/formats/caco3_plastics.tsv @@ -0,0 +1,10 @@ +importer shipper tariff_item name origin shipped_at arrived_at net_weight fob_price cif_price cif_per_net_weight +PLASTICOS RIVAL CIA LTDA S A REVERTE 2509000000 CARBONATO DE CALCIO TIPO CALCIPORE 160 T AL SPAIN 18/03/2016 17/04/2016 81,000.00 14,417.58 18,252.34 0.23 +MEXICHEM ECUADOR S.A. OMYA ANDINA S A 2836500000 CARBONATO COLOMBIA 07/07/2016 10/07/2016 26,000.00 7,072.00 8,127.18 0.31 +PLASTIAZUAY SA SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 27/07/2016 09/08/2016 81,000.00 8,100.00 11,474.55 0.14 +PLASTICOS RIVAL CIA LTDA AND ENDUSTRIYEL HAMMADDELER DIS TCARET LTD.STI. 2836500000 CALCIUM CARBONATE ANADOLU ANDCARB CT-1 TURKEY 04/10/2016 11/11/2016 100,000.00 17,500.00 22,533.75 0.23 +QUIMICA COMERCIAL QUIMICIAL CIA. LTDA. SA REVERTE 2836500000 CARBONATO DE CALCIO SPAIN 24/06/2016 12/07/2016 27,000.00 3,258.90 5,585.00 0.21 +PICA PLASTICOS INDUSTRIALES C.A. OMYA ANDINA S.A 3824909999 CARBONATO DE CALCIO COLOMBIA 01/01/1900 18/01/2016 66,500.00 12,635.00 18,670.52 0.28 +PLASTIQUIM S.A. OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYA CARB 1T CG BBS 1000 COLOMBIA 01/01/1900 25/10/2016 33,000.00 6,270.00 9,999.00 0.30 +QUIMICOS ANDINOS QUIMANDI S.A. SIBELCO COLOMBIA SAS 3824909999 CARBONATO DE CALCIO RECUBIERTO COLOMBIA 01/11/2016 03/11/2016 52,000.00 8,944.00 13,039.05 0.25 +TIGRE ECUADOR S.A. ECUATIGRE OMYA ANDINA S.A NIT 830.027.386-6 3824909999 CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO COLOMBIA 01/01/1900 28/10/2016 66,000.00 11,748.00 18,216.00 0.28