diff --git a/docs/commands/from-csv.md b/docs/commands/from-csv.md index b72818eefc..82f38c7c20 100644 --- a/docs/commands/from-csv.md +++ b/docs/commands/from-csv.md @@ -92,6 +92,10 @@ error: Expected a string from pipeline ━━━┷━━━━━━━━━━━┷━━━━━━━━━┷━━━━━━ ``` +The string '\t' can be used to separate on tabs. Note that this is the same as using the from-tsv command. + +Newlines '\n' are not acceptable separators. + Note that separators are currently provided as strings and need to be wrapped in quotes. ```shell diff --git a/docs/commands/to-csv.md b/docs/commands/to-csv.md index 2be6390fa8..ef316c45d8 100644 --- a/docs/commands/to-csv.md +++ b/docs/commands/to-csv.md @@ -7,11 +7,11 @@ Converts table data into csv text. ```shell > shells ━━━┯━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━ - # │ │ name │ path + # │ │ name │ path ───┼───┼────────────┼──────────────────────── - 0 │ X │ filesystem │ /home/shaurya - 1 │ │ filesystem │ /home/shaurya/Pictures - 2 │ │ filesystem │ /home/shaurya/Desktop + 0 │ X │ filesystem │ /home/shaurya + 1 │ │ filesystem │ /home/shaurya/Pictures + 2 │ │ filesystem │ /home/shaurya/Desktop ━━━┷━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━ > shells | to-csv ,name,path @@ -23,48 +23,48 @@ X,filesystem,/home/shaurya ```shell > open caco3_plastics.csv ━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━━━━ - # │ importer │ shipper │ tariff_item │ name │ origin │ shipped_at │ arrived_at │ net_weight │ fob_price │ cif_price │ cif_per_net_ - │ │ │ │ │ │ │ │ │ │ │ weight + # │ importer │ shipper │ tariff_item │ name │ origin │ shipped_at │ arrived_at │ net_weight │ fob_price │ cif_price │ cif_per_net_ + │ │ │ │ │ │ │ │ │ │ │ weight ───┼──────────────┼──────────────┼─────────────┼──────────────┼──────────┼────────────┼────────────┼────────────┼───────────┼───────────┼────────────── - 0 │ PLASTICOS │ S A REVERTE │ 2509000000 │ CARBONATO DE │ SPAIN │ 18/03/2016 │ 17/04/2016 │ 81,000.00 │ 14,417.58 │ 18,252.34 │ 0.23 - │ RIVAL CIA │ │ │ CALCIO TIPO │ │ │ │ │ │ │ - │ LTDA │ │ │ CALCIPORE │ │ │ │ │ │ │ - │ │ │ │ 160 T AL │ │ │ │ │ │ │ - 1 │ MEXICHEM │ OMYA ANDINA │ 2836500000 │ CARBONATO │ COLOMBIA │ 07/07/2016 │ 10/07/2016 │ 26,000.00 │ 7,072.00 │ 8,127.18 │ 0.31 - │ ECUADOR S.A. │ S A │ │ │ │ │ │ │ │ │ - 2 │ PLASTIAZUAY │ SA REVERTE │ 2836500000 │ CARBONATO DE │ SPAIN │ 27/07/2016 │ 09/08/2016 │ 81,000.00 │ 8,100.00 │ 11,474.55 │ 0.14 - │ SA │ │ │ CALCIO │ │ │ │ │ │ │ - 3 │ PLASTICOS │ AND │ 2836500000 │ CALCIUM │ TURKEY │ 04/10/2016 │ 11/11/2016 │ 100,000.00 │ 17,500.00 │ 22,533.75 │ 0.23 - │ RIVAL CIA │ ENDUSTRIYEL │ │ CARBONATE │ │ │ │ │ │ │ - │ LTDA │ HAMMADDELER │ │ ANADOLU │ │ │ │ │ │ │ - │ │ DIS TCARET │ │ ANDCARB CT-1 │ │ │ │ │ │ │ - │ │ LTD.STI. │ │ │ │ │ │ │ │ │ - 4 │ QUIMICA │ SA REVERTE │ 2836500000 │ CARBONATO DE │ SPAIN │ 24/06/2016 │ 12/07/2016 │ 27,000.00 │ 3,258.90 │ 5,585.00 │ 0.21 - │ COMERCIAL │ │ │ CALCIO │ │ │ │ │ │ │ - │ QUIMICIAL │ │ │ │ │ │ │ │ │ │ - │ CIA. LTDA. │ │ │ │ │ │ │ │ │ │ - 5 │ PICA │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 18/01/2016 │ 66,500.00 │ 12,635.00 │ 18,670.52 │ 0.28 - │ PLASTICOS │ S.A │ │ CALCIO │ │ │ │ │ │ │ - │ INDUSTRIALES │ │ │ │ │ │ │ │ │ │ - │ C.A. │ │ │ │ │ │ │ │ │ │ - 6 │ PLASTIQUIM │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 25/10/2016 │ 33,000.00 │ 6,270.00 │ 9,999.00 │ 0.30 - │ S.A. │ S.A NIT │ │ CALCIO │ │ │ │ │ │ │ - │ │ 830.027.386- │ │ RECUBIERTO │ │ │ │ │ │ │ - │ │ 6 │ │ CON ACIDO │ │ │ │ │ │ │ - │ │ │ │ ESTEARICO │ │ │ │ │ │ │ - │ │ │ │ OMYA CARB 1T │ │ │ │ │ │ │ - │ │ │ │ CG BBS 1000 │ │ │ │ │ │ │ - 7 │ QUIMICOS │ SIBELCO │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/11/2016 │ 03/11/2016 │ 52,000.00 │ 8,944.00 │ 13,039.05 │ 0.25 - │ ANDINOS │ COLOMBIA SAS │ │ CALCIO │ │ │ │ │ │ │ - │ QUIMANDI │ │ │ RECUBIERTO │ │ │ │ │ │ │ - │ S.A. │ │ │ │ │ │ │ │ │ │ - 8 │ TIGRE │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 28/10/2016 │ 66,000.00 │ 11,748.00 │ 18,216.00 │ 0.28 - │ ECUADOR S.A. │ S.A NIT │ │ CALCIO │ │ │ │ │ │ │ - │ ECUATIGRE │ 830.027.386- │ │ RECUBIERTO │ │ │ │ │ │ │ - │ │ 6 │ │ CON ACIDO │ │ │ │ │ │ │ - │ │ │ │ ESTEARICO │ │ │ │ │ │ │ - │ │ │ │ OMYACARB 1T │ │ │ │ │ │ │ - │ │ │ │ CG BPA 25 NO │ │ │ │ │ │ │ + 0 │ PLASTICOS │ S A REVERTE │ 2509000000 │ CARBONATO DE │ SPAIN │ 18/03/2016 │ 17/04/2016 │ 81,000.00 │ 14,417.58 │ 18,252.34 │ 0.23 + │ RIVAL CIA │ │ │ CALCIO TIPO │ │ │ │ │ │ │ + │ LTDA │ │ │ CALCIPORE │ │ │ │ │ │ │ + │ │ │ │ 160 T AL │ │ │ │ │ │ │ + 1 │ MEXICHEM │ OMYA ANDINA │ 2836500000 │ CARBONATO │ COLOMBIA │ 07/07/2016 │ 10/07/2016 │ 26,000.00 │ 7,072.00 │ 8,127.18 │ 0.31 + │ ECUADOR S.A. │ S A │ │ │ │ │ │ │ │ │ + 2 │ PLASTIAZUAY │ SA REVERTE │ 2836500000 │ CARBONATO DE │ SPAIN │ 27/07/2016 │ 09/08/2016 │ 81,000.00 │ 8,100.00 │ 11,474.55 │ 0.14 + │ SA │ │ │ CALCIO │ │ │ │ │ │ │ + 3 │ PLASTICOS │ AND │ 2836500000 │ CALCIUM │ TURKEY │ 04/10/2016 │ 11/11/2016 │ 100,000.00 │ 17,500.00 │ 22,533.75 │ 0.23 + │ RIVAL CIA │ ENDUSTRIYEL │ │ CARBONATE │ │ │ │ │ │ │ + │ LTDA │ HAMMADDELER │ │ ANADOLU │ │ │ │ │ │ │ + │ │ DIS TCARET │ │ ANDCARB CT-1 │ │ │ │ │ │ │ + │ │ LTD.STI. │ │ │ │ │ │ │ │ │ + 4 │ QUIMICA │ SA REVERTE │ 2836500000 │ CARBONATO DE │ SPAIN │ 24/06/2016 │ 12/07/2016 │ 27,000.00 │ 3,258.90 │ 5,585.00 │ 0.21 + │ COMERCIAL │ │ │ CALCIO │ │ │ │ │ │ │ + │ QUIMICIAL │ │ │ │ │ │ │ │ │ │ + │ CIA. LTDA. │ │ │ │ │ │ │ │ │ │ + 5 │ PICA │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 18/01/2016 │ 66,500.00 │ 12,635.00 │ 18,670.52 │ 0.28 + │ PLASTICOS │ S.A │ │ CALCIO │ │ │ │ │ │ │ + │ INDUSTRIALES │ │ │ │ │ │ │ │ │ │ + │ C.A. │ │ │ │ │ │ │ │ │ │ + 6 │ PLASTIQUIM │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 25/10/2016 │ 33,000.00 │ 6,270.00 │ 9,999.00 │ 0.30 + │ S.A. │ S.A NIT │ │ CALCIO │ │ │ │ │ │ │ + │ │ 830.027.386- │ │ RECUBIERTO │ │ │ │ │ │ │ + │ │ 6 │ │ CON ACIDO │ │ │ │ │ │ │ + │ │ │ │ ESTEARICO │ │ │ │ │ │ │ + │ │ │ │ OMYA CARB 1T │ │ │ │ │ │ │ + │ │ │ │ CG BBS 1000 │ │ │ │ │ │ │ + 7 │ QUIMICOS │ SIBELCO │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/11/2016 │ 03/11/2016 │ 52,000.00 │ 8,944.00 │ 13,039.05 │ 0.25 + │ ANDINOS │ COLOMBIA SAS │ │ CALCIO │ │ │ │ │ │ │ + │ QUIMANDI │ │ │ RECUBIERTO │ │ │ │ │ │ │ + │ S.A. │ │ │ │ │ │ │ │ │ │ + 8 │ TIGRE │ OMYA ANDINA │ 3824909999 │ CARBONATO DE │ COLOMBIA │ 01/01/1900 │ 28/10/2016 │ 66,000.00 │ 11,748.00 │ 18,216.00 │ 0.28 + │ ECUADOR S.A. │ S.A NIT │ │ CALCIO │ │ │ │ │ │ │ + │ ECUATIGRE │ 830.027.386- │ │ RECUBIERTO │ │ │ │ │ │ │ + │ │ 6 │ │ CON ACIDO │ │ │ │ │ │ │ + │ │ │ │ ESTEARICO │ │ │ │ │ │ │ + │ │ │ │ OMYACARB 1T │ │ │ │ │ │ │ + │ │ │ │ CG BPA 25 NO │ │ │ │ │ │ │ ━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━━━ > open caco3_plastics.csv | to-csv importer,shipper,tariff_item,name,origin,shipped_at,arrived_at,net_weight,fob_price,cif_price,cif_per_net_weight @@ -78,3 +78,37 @@ PLASTIQUIM S.A.,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO QUIMICOS ANDINOS QUIMANDI S.A.,SIBELCO COLOMBIA SAS,3824909999,CARBONATO DE CALCIO RECUBIERTO,COLOMBIA,01/11/2016,03/11/2016,"52,000.00","8,944.00","13,039.05",0.25 TIGRE ECUADOR S.A. ECUATIGRE,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO,COLOMBIA,01/01/1900,28/10/2016,"66,000.00","11,748.00","18,216.00",0.28 ``` + +To use a character other than ',' to separate records, use `--separator` : + +```shell +> shells +━━━┯━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━ + # │ │ name │ path +───┼───┼────────────┼──────────────────────── + 0 │ X │ filesystem │ /home/shaurya + 1 │ │ filesystem │ /home/shaurya/Pictures + 2 │ │ filesystem │ /home/shaurya/Desktop +━━━┷━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━ +> shells | to-csv --separator ';' + ;name,path +X;filesystem;/home/shaurya + ;filesystem;/home/shaurya/Pictures + ;filesystem;/home/shaurya/Desktop +``` + +The string '\t' can be used to separate on tabs. Note that this is the same as using the to-tsv command. + +Newlines '\n' are not acceptable separators. + +Note that separators are currently provided as strings and need to be wrapped in quotes. + +It is also considered an error to use a separator greater than one char : + +```shell +> open pets.txt | from-csv --separator '123' +error: Expected a single separator char from --separator +- shell:1:37 +1 | open pets.txt | from-csv --separator '123' + | ^^^^^ requires a single character string input +``` diff --git a/src/commands.rs b/src/commands.rs index bf25e1bb23..22a5f90b34 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -1,7 +1,8 @@ #[macro_use] pub(crate) mod macros; -mod from_structured_data; +mod from_delimited_data; +mod to_delimited_data; pub(crate) mod append; pub(crate) mod args; diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs index 4bada42dfb..df09c9dbe3 100644 --- a/src/commands/from_csv.rs +++ b/src/commands/from_csv.rs @@ -1,4 +1,4 @@ -use crate::commands::from_structured_data::from_structured_data; +use crate::commands::from_delimited_data::from_delimited_data; use crate::commands::WholeStreamCommand; use crate::data::{Primitive, Value}; use crate::prelude::*; @@ -52,18 +52,22 @@ fn from_csv( tag, .. }) => { - let vec_s: Vec = s.chars().collect(); - if vec_s.len() != 1 { - return Err(ShellError::labeled_error( - "Expected a single separator char from --separator", - "requires a single character string input", - tag, - )); - }; - vec_s[0] + if s == r"\t" { + '\t' + } else { + let vec_s: Vec = s.chars().collect(); + if vec_s.len() != 1 { + return Err(ShellError::labeled_error( + "Expected a single separator char from --separator", + "requires a single character string input", + tag, + )); + }; + vec_s[0] + } } _ => ',', }; - from_structured_data(headerless, sep, "CSV", runnable_context) + from_delimited_data(headerless, sep, "CSV", runnable_context) } diff --git a/src/commands/from_structured_data.rs b/src/commands/from_delimited_data.rs similarity index 95% rename from src/commands/from_structured_data.rs rename to src/commands/from_delimited_data.rs index 4799a40993..e499323774 100644 --- a/src/commands/from_structured_data.rs +++ b/src/commands/from_delimited_data.rs @@ -2,7 +2,7 @@ use crate::data::{Primitive, TaggedDictBuilder, Value}; use crate::prelude::*; use csv::ReaderBuilder; -fn from_stuctured_string_to_value( +fn from_delimited_string_to_value( s: String, headerless: bool, separator: char, @@ -37,7 +37,7 @@ fn from_stuctured_string_to_value( Ok(Value::Table(rows).tagged(&tag)) } -pub fn from_structured_data( +pub fn from_delimited_data( headerless: bool, sep: char, format_name: &'static str, @@ -70,7 +70,7 @@ pub fn from_structured_data( } } - match from_stuctured_string_to_value(concat_string, headerless, sep, name_tag.clone()) { + match from_delimited_string_to_value(concat_string, headerless, sep, name_tag.clone()) { Ok(x) => match x { Tagged { item: Value::Table(list), .. } => { for l in list { diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 37bba215f1..269d4d1d72 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -488,7 +488,7 @@ mod tests { #[test] fn input_is_parsed_correctly_if_either_option_works() { - let input = r#" + let input = r#" docker-registry docker-registry=default docker-registry=default 172.30.78.158 5000/TCP kubernetes component=apiserver,provider=kubernetes 172.30.0.2 443/TCP kubernetes-ro component=apiserver,provider=kubernetes 172.30.0.1 80/TCP diff --git a/src/commands/from_tsv.rs b/src/commands/from_tsv.rs index 7931b8ef38..38af5e4333 100644 --- a/src/commands/from_tsv.rs +++ b/src/commands/from_tsv.rs @@ -1,4 +1,4 @@ -use crate::commands::from_structured_data::from_structured_data; +use crate::commands::from_delimited_data::from_delimited_data; use crate::commands::WholeStreamCommand; use crate::prelude::*; @@ -36,5 +36,5 @@ fn from_tsv( FromTSVArgs { headerless }: FromTSVArgs, runnable_context: RunnableContext, ) -> Result { - from_structured_data(headerless, '\t', "TSV", runnable_context) + from_delimited_data(headerless, '\t', "TSV", runnable_context) } diff --git a/src/commands/to_csv.rs b/src/commands/to_csv.rs index d2b46d9f88..e013c6c8e3 100644 --- a/src/commands/to_csv.rs +++ b/src/commands/to_csv.rs @@ -1,13 +1,14 @@ +use crate::commands::to_delimited_data::to_delimited_data; use crate::commands::WholeStreamCommand; use crate::data::{Primitive, Value}; use crate::prelude::*; -use csv::WriterBuilder; pub struct ToCSV; #[derive(Deserialize)] pub struct ToCSVArgs { headerless: bool, + separator: Option>, } impl WholeStreamCommand for ToCSV { @@ -35,170 +36,34 @@ impl WholeStreamCommand for ToCSV { } } -pub fn value_to_csv_value(v: &Tagged) -> Tagged { - match &v.item { - Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())), - Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing), - Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())), - Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())), - Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())), - Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())), - Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())), - Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())), - Value::Row(o) => Value::Row(o.clone()), - Value::Table(l) => Value::Table(l.clone()), - Value::Block(_) => Value::Primitive(Primitive::Nothing), - _ => Value::Primitive(Primitive::Nothing), - } - .tagged(v.tag.clone()) -} - -fn to_string_helper(v: &Tagged) -> Result { - match &v.item { - Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()), - Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", b)), - Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?), - Value::Primitive(Primitive::Decimal(_)) => Ok(v.as_string()?), - Value::Primitive(Primitive::Int(_)) => Ok(v.as_string()?), - Value::Primitive(Primitive::Path(_)) => Ok(v.as_string()?), - Value::Table(_) => return Ok(String::from("[Table]")), - Value::Row(_) => return Ok(String::from("[Row]")), - Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()), - _ => { - return Err(ShellError::labeled_error( - "Unexpected value", - "", - v.tag.clone(), - )) - } - } -} - -fn merge_descriptors(values: &[Tagged]) -> Vec { - let mut ret = vec![]; - for value in values { - for desc in value.data_descriptors() { - if !ret.contains(&desc) { - ret.push(desc); - } - } - } - ret -} - -pub fn to_string(tagged_value: &Tagged) -> Result { - let v = &tagged_value.item; - - match v { - Value::Row(o) => { - let mut wtr = WriterBuilder::new().from_writer(vec![]); - let mut fields: VecDeque = VecDeque::new(); - let mut values: VecDeque = VecDeque::new(); - - for (k, v) in o.entries.iter() { - fields.push_back(k.clone()); - - values.push_back(to_string_helper(&v)?); - } - - wtr.write_record(fields).expect("can not write."); - wtr.write_record(values).expect("can not write."); - - return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?) - .map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?); - } - Value::Table(list) => { - let mut wtr = WriterBuilder::new().from_writer(vec![]); - - let merged_descriptors = merge_descriptors(&list); - wtr.write_record(&merged_descriptors) - .expect("can not write."); - - for l in list { - let mut row = vec![]; - for desc in &merged_descriptors { - match l.item.get_data_by_key(&desc) { - Some(s) => { - row.push(to_string_helper(s)?); - } - None => { - row.push(String::new()); - } - } - } - wtr.write_record(&row).expect("can not write"); - } - - return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?) - .map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?); - } - _ => return to_string_helper(tagged_value), - } -} - fn to_csv( - ToCSVArgs { headerless }: ToCSVArgs, - RunnableContext { input, name, .. }: RunnableContext, + ToCSVArgs { + separator, + headerless, + }: ToCSVArgs, + runnable_context: RunnableContext, ) -> Result { - let name_tag = name; - let stream = async_stream! { - let input: Vec> = input.values.collect().await; - - let to_process_input = if input.len() > 1 { - let tag = input[0].tag.clone(); - vec![Tagged { item: Value::Table(input), tag } ] - } else if input.len() == 1 { - input - } else { - vec![] - }; - - for value in to_process_input { - match to_string(&value_to_csv_value(&value)) { - Ok(x) => { - let converted = if headerless { - x.lines().skip(1).collect() - } else { - x - }; - yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag)) - } - _ => { - yield Err(ShellError::labeled_error_with_secondary( - "Expected a table with CSV-compatible structure.tag() from pipeline", - "requires CSV-compatible input", - &name_tag, - "originates from here".to_string(), - value.tag(), - )) - } - } - } + let sep = match separator { + Some(Tagged { + item: Value::Primitive(Primitive::String(s)), + tag, + .. + }) => { + if s == r"\t" { + '\t' + } else { + let vec_s: Vec = s.chars().collect(); + if vec_s.len() != 1 { + return Err(ShellError::labeled_error( + "Expected a single separator char from --separator", + "requires a single character string input", + tag, + )); + }; + vec_s[0] + } + } + _ => ',', }; - - Ok(stream.to_output_stream()) + to_delimited_data(headerless, sep, "CSV", runnable_context) } diff --git a/src/commands/to_delimited_data.rs b/src/commands/to_delimited_data.rs new file mode 100644 index 0000000000..68d2ecb943 --- /dev/null +++ b/src/commands/to_delimited_data.rs @@ -0,0 +1,188 @@ +use crate::data::{Primitive, Value}; +use crate::prelude::*; +use csv::WriterBuilder; + +fn from_value_to_delimited_string( + tagged_value: &Tagged, + separator: char, +) -> Result { + let v = &tagged_value.item; + + match v { + Value::Row(o) => { + let mut wtr = WriterBuilder::new() + .delimiter(separator as u8) + .from_writer(vec![]); + let mut fields: VecDeque = VecDeque::new(); + let mut values: VecDeque = VecDeque::new(); + + for (k, v) in o.entries.iter() { + fields.push_back(k.clone()); + + values.push_back(to_string_tagged_value(&v)?); + } + + wtr.write_record(fields).expect("can not write."); + wtr.write_record(values).expect("can not write."); + + return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { + ShellError::labeled_error( + "Could not convert record", + "original value", + &tagged_value.tag, + ) + })?) + .map_err(|_| { + ShellError::labeled_error( + "Could not convert record", + "original value", + &tagged_value.tag, + ) + })?); + } + Value::Table(list) => { + let mut wtr = WriterBuilder::new() + .delimiter(separator as u8) + .from_writer(vec![]); + + let merged_descriptors = merge_descriptors(&list); + wtr.write_record(&merged_descriptors) + .expect("can not write."); + + for l in list { + let mut row = vec![]; + for desc in &merged_descriptors { + match l.item.get_data_by_key(&desc) { + Some(s) => { + row.push(to_string_tagged_value(s)?); + } + None => { + row.push(String::new()); + } + } + } + wtr.write_record(&row).expect("can not write"); + } + + return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { + ShellError::labeled_error( + "Could not convert record", + "original value", + &tagged_value.tag, + ) + })?) + .map_err(|_| { + ShellError::labeled_error( + "Could not convert record", + "original value", + &tagged_value.tag, + ) + })?); + } + _ => return to_string_tagged_value(tagged_value), + } +} + +// NOTE: could this be useful more widely and implemented on Tagged ? +pub fn clone_tagged_value(v: &Tagged) -> Tagged { + match &v.item { + Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())), + Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing), + Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())), + Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())), + Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())), + Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())), + Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())), + Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())), + Value::Row(o) => Value::Row(o.clone()), + Value::Table(l) => Value::Table(l.clone()), + Value::Block(_) => Value::Primitive(Primitive::Nothing), + _ => Value::Primitive(Primitive::Nothing), + } + .tagged(v.tag.clone()) +} + +// NOTE: could this be useful more widely and implemented on Tagged ? +fn to_string_tagged_value(v: &Tagged) -> Result { + match &v.item { + Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()), + Value::Primitive(Primitive::Bytes(b)) => { + let tmp = format!("{}", b); + Ok(tmp) + } + Value::Primitive(Primitive::Boolean(_)) => Ok(v.as_string()?), + Value::Primitive(Primitive::Decimal(_)) => Ok(v.as_string()?), + Value::Primitive(Primitive::Int(_)) => Ok(v.as_string()?), + Value::Primitive(Primitive::Path(_)) => Ok(v.as_string()?), + Value::Table(_) => return Ok(String::from("[Table]")), + Value::Row(_) => return Ok(String::from("[Row]")), + Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()), + _ => { + return Err(ShellError::labeled_error( + "Unexpected value", + "", + v.tag.clone(), + )) + } + } +} + +fn merge_descriptors(values: &[Tagged]) -> Vec { + let mut ret = vec![]; + for value in values { + for desc in value.data_descriptors() { + if !ret.contains(&desc) { + ret.push(desc); + } + } + } + ret +} + +pub fn to_delimited_data( + headerless: bool, + sep: char, + format_name: &'static str, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let name_tag = name; + + let stream = async_stream! { + let input: Vec> = input.values.collect().await; + + let to_process_input = if input.len() > 1 { + let tag = input[0].tag.clone(); + vec![Tagged { item: Value::Table(input), tag } ] + } else if input.len() == 1 { + input + } else { + vec![] + }; + + for value in to_process_input { + match from_value_to_delimited_string(&clone_tagged_value(&value), sep) { + Ok(x) => { + let converted = if headerless { + x.lines().skip(1).collect() + } else { + x + }; + yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag)) + } + _ => { + let expected = format!("Expected a table with {}-compatible structure.tag() from pipeline", format_name); + let requires = format!("requires {}-compatible input", format_name); + yield Err(ShellError::labeled_error_with_secondary( + expected, + requires, + &name_tag, + "originates from here".to_string(), + value.tag(), + )) + } + } + } + }; + + Ok(stream.to_output_stream()) +} diff --git a/src/commands/to_tsv.rs b/src/commands/to_tsv.rs index 7857d1eeec..f567215e59 100644 --- a/src/commands/to_tsv.rs +++ b/src/commands/to_tsv.rs @@ -1,7 +1,6 @@ +use crate::commands::to_delimited_data::to_delimited_data; use crate::commands::WholeStreamCommand; -use crate::data::{Primitive, Value}; use crate::prelude::*; -use csv::WriterBuilder; pub struct ToTSV; @@ -35,172 +34,9 @@ impl WholeStreamCommand for ToTSV { } } -pub fn value_to_tsv_value(tagged_value: &Tagged) -> Tagged { - let v = &tagged_value.item; - - match v { - Value::Primitive(Primitive::String(s)) => Value::Primitive(Primitive::String(s.clone())), - Value::Primitive(Primitive::Nothing) => Value::Primitive(Primitive::Nothing), - Value::Primitive(Primitive::Boolean(b)) => Value::Primitive(Primitive::Boolean(b.clone())), - Value::Primitive(Primitive::Decimal(f)) => Value::Primitive(Primitive::Decimal(f.clone())), - Value::Primitive(Primitive::Int(i)) => Value::Primitive(Primitive::Int(i.clone())), - Value::Primitive(Primitive::Path(x)) => Value::Primitive(Primitive::Path(x.clone())), - Value::Primitive(Primitive::Bytes(b)) => Value::Primitive(Primitive::Bytes(b.clone())), - Value::Primitive(Primitive::Date(d)) => Value::Primitive(Primitive::Date(d.clone())), - Value::Row(o) => Value::Row(o.clone()), - Value::Table(l) => Value::Table(l.clone()), - Value::Block(_) => Value::Primitive(Primitive::Nothing), - _ => Value::Primitive(Primitive::Nothing), - } - .tagged(&tagged_value.tag) -} - -fn to_string_helper(tagged_value: &Tagged) -> Result { - let v = &tagged_value.item; - match v { - Value::Primitive(Primitive::Date(d)) => Ok(d.to_string()), - Value::Primitive(Primitive::Bytes(b)) => Ok(format!("{}", b)), - Value::Primitive(Primitive::Boolean(_)) => Ok(tagged_value.as_string()?), - Value::Primitive(Primitive::Decimal(_)) => Ok(tagged_value.as_string()?), - Value::Primitive(Primitive::Int(_)) => Ok(tagged_value.as_string()?), - Value::Primitive(Primitive::Path(_)) => Ok(tagged_value.as_string()?), - Value::Table(_) => return Ok(String::from("[table]")), - Value::Row(_) => return Ok(String::from("[row]")), - Value::Primitive(Primitive::String(s)) => return Ok(s.to_string()), - _ => { - return Err(ShellError::labeled_error( - "Unexpected value", - "original value", - &tagged_value.tag, - )) - } - } -} - -fn merge_descriptors(values: &[Tagged]) -> Vec { - let mut ret = vec![]; - for value in values { - for desc in value.data_descriptors() { - if !ret.contains(&desc) { - ret.push(desc); - } - } - } - ret -} - -pub fn to_string(tagged_value: &Tagged) -> Result { - let v = &tagged_value.item; - - match v { - Value::Row(o) => { - let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]); - let mut fields: VecDeque = VecDeque::new(); - let mut values: VecDeque = VecDeque::new(); - - for (k, v) in o.entries.iter() { - fields.push_back(k.clone()); - values.push_back(to_string_helper(&v)?); - } - - wtr.write_record(fields).expect("can not write."); - wtr.write_record(values).expect("can not write."); - - return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?) - .map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?); - } - Value::Table(list) => { - let mut wtr = WriterBuilder::new().delimiter(b'\t').from_writer(vec![]); - - let merged_descriptors = merge_descriptors(&list); - wtr.write_record(&merged_descriptors) - .expect("can not write."); - - for l in list { - let mut row = vec![]; - for desc in &merged_descriptors { - match l.item.get_data_by_key(&desc) { - Some(s) => { - row.push(to_string_helper(s)?); - } - None => { - row.push(String::new()); - } - } - } - wtr.write_record(&row).expect("can not write"); - } - - return Ok(String::from_utf8(wtr.into_inner().map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?) - .map_err(|_| { - ShellError::labeled_error( - "Could not convert record", - "original value", - &tagged_value.tag, - ) - })?); - } - _ => return to_string_helper(tagged_value), - } -} - fn to_tsv( ToTSVArgs { headerless }: ToTSVArgs, - RunnableContext { input, name, .. }: RunnableContext, + runnable_context: RunnableContext, ) -> Result { - let name_tag = name; - let stream = async_stream! { - let input: Vec> = input.values.collect().await; - - let to_process_input = if input.len() > 1 { - let tag = input[0].tag.clone(); - vec![Tagged { item: Value::Table(input), tag } ] - } else if input.len() == 1 { - input - } else { - vec![] - }; - - for value in to_process_input { - match to_string(&value_to_tsv_value(&value)) { - Ok(x) => { - let converted = if headerless { - x.lines().skip(1).collect() - } else { - x - }; - yield ReturnSuccess::value(Value::Primitive(Primitive::String(converted)).tagged(&name_tag)) - } - _ => { - yield Err(ShellError::labeled_error_with_secondary( - "Expected a table with TSV-compatible structure.tag() from pipeline", - "requires TSV-compatible input", - &name_tag, - "originates from here".to_string(), - value.tag(), - )) - } - } - } - }; - - Ok(stream.to_output_stream()) + to_delimited_data(headerless, '\t', "TSV", runnable_context) } diff --git a/tests/filters_test.rs b/tests/filters_test.rs index b31fe78654..311bf93175 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -129,6 +129,35 @@ fn converts_from_csv_text_with_separator_to_structured_table() { }) } +#[test] +fn converts_from_csv_text_with_tab_separator_to_structured_table() { + Playground::setup("filter_from_csv_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.txt", + r#" + first_name last_name rusty_luck + Andrés Robalino 1 + Jonathan Turner 1 + Yehuda Katz 1 + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.txt + | from-csv --separator '\t' + | get rusty_luck + | str --to-int + | sum + | echo $it + "# + )); + + assert_eq!(actual, "3"); + }) +} + #[test] fn converts_from_csv_text_skipping_headers_to_structured_table() { Playground::setup("filter_from_csv_test_2", |dirs, sandbox| { @@ -267,6 +296,16 @@ fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table() { assert_eq!(actual, "SPAIN"); } +#[test] +fn can_convert_table_to_tsv_text_and_from_tsv_text_back_into_table_using_csv_separator() { + let actual = nu!( + cwd: "tests/fixtures/formats", + r"open caco3_plastics.tsv | to-tsv | from-csv --separator '\t' | first 1 | get origin | echo $it" + ); + + assert_eq!(actual, "SPAIN"); +} + #[test] fn converts_structured_table_to_tsv_text() { Playground::setup("filter_to_tsv_test_1", |dirs, sandbox| {