From 895a1b2d72f2e7025b7761fe5ef0e4d13b63c202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Fri, 19 Jul 2019 15:11:49 -0500 Subject: [PATCH] Introduced ability to open csv documents. --- Cargo.lock | 37 +++++++++--- Cargo.toml | 1 + README.md | 1 + src/cli.rs | 1 + src/commands.rs | 1 + src/commands/from_csv.rs | 68 +++++++++++++++++++++++ src/commands/open.rs | 11 ++++ src/object.rs | 2 +- tests/commands_test.rs | 9 +++ tests/filters_test.rs | 2 +- tests/fixtures/formats/caco3_plastics.csv | 10 ++++ 11 files changed, 134 insertions(+), 9 deletions(-) create mode 100644 src/commands/from_csv.rs create mode 100644 tests/fixtures/formats/caco3_plastics.csv diff --git a/Cargo.lock b/Cargo.lock index 3215ea850..bd7d5a462 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,17 @@ name = "block" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "bstr" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "build_const" version = "0.2.1" @@ -535,18 +546,19 @@ dependencies = [ [[package]] name = "csv" -version = "1.0.7" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "bstr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", - "ryu 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "csv-core" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1644,6 +1656,7 @@ dependencies = [ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "clipboard 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossterm 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "ctrlc 3.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "derive-new 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", "derive_more 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2075,7 +2088,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "encode_unicode 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "term 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2347,6 +2360,14 @@ dependencies = [ "utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "regex-automata" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex-syntax" version = "0.6.7" @@ -3394,6 +3415,7 @@ dependencies = [ "checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" "checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +"checksum bstr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fc0662252f9bba48c251a16d16a768b9fcd959593bde07544710bce6efe60b7a" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum bumpalo 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2cd43d82f27d68911e6ee11ee791fb248f138f5d69424dc02e098d4f152b0b05" "checksum byte-unit 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6754bb4703aa167bed5381f0c6842f1cc31a9ecde3b9443f726dde3ad3afb841" @@ -3436,8 +3458,8 @@ dependencies = [ "checksum crossterm_terminal 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "18792c97c5cdcc5fd3582df58188a793bf290af4a53d5fc8442c7d17e003b356" "checksum crossterm_utils 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8321d40908d0ee77cb29335f591eae2b4f7225152f81b9dfa35a161ca3b077dc" "checksum crossterm_winapi 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c061e4a1c47a53952ba0f2396c00a61cd7ab74482eba99b9c9cc77fdca71932" -"checksum csv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9044e25afb0924b5a5fc5511689b0918629e85d68ea591e5e87fbf1e85ea1b3b" -"checksum csv-core 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa5cdef62f37e6ffe7d1f07a381bc0db32b7a3ff1cac0de56cb0d81e71f53d65" +"checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d" +"checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c" "checksum ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3b4c17619643c1252b5f690084b82639dd7fac141c57c8e77a00e0148132092c" "checksum ctrlc 3.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c7dfd2d8b4c82121dfdff120f818e09fc4380b0b7e17a742081a89b94853e87f" "checksum darling 0.8.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9158d690bc62a3a57c3e45b85e4d50de2008b39345592c64efd79345c7e24be0" @@ -3625,6 +3647,7 @@ dependencies = [ "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum redox_users 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3fe5204c3a17e97dde73f285d49be585df59ed84b50a872baf416e73b62c3828" "checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad" +"checksum regex-automata 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3ed09217220c272b29ef237a974ad58515bde75f194e3ffa7e6d0bf0f3b01f86" "checksum regex-syntax 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d76410686f9e3a17f06128962e0ecc5755870bb890c34820c7af7f1db2e1d48" "checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" "checksum render-tree 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "68ed587df09cfb7ce1bc6fe8f77e24db219f222c049326ccbfb948ec67e31664" diff --git a/Cargo.toml b/Cargo.toml index 21d32ad17..041d5b11a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ logos = "0.10.0-rc2" logos-derive = "0.10.0-rc2" language-reporting = "0.3.1" app_dirs = "1.2.1" +csv = "1.1" toml = "0.5.1" toml-query = "0.9.2" clap = "2.33.0" diff --git a/README.md b/README.md index 01d5083c7..fd382bfa1 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat ## Filters on text (unstructured data) | command | description | | ------------- | ------------- | +| from-csv | Parse text as .csv and create table | | from-ini | Parse text as .ini and create table | | from-json | Parse text as .json and create table | | from-toml | Parse text as .toml and create table | diff --git a/src/cli.rs b/src/cli.rs index a36e28e07..1d09ab655 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -156,6 +156,7 @@ pub async fn cli() -> Result<(), Box> { command("cd", Box::new(cd::cd)), command("first", Box::new(first::first)), command("size", Box::new(size::size)), + command("from-csv", Box::new(from_csv::from_csv)), command("from-ini", Box::new(from_ini::from_ini)), command("from-json", Box::new(from_json::from_json)), command("from-toml", Box::new(from_toml::from_toml)), diff --git a/src/commands.rs b/src/commands.rs index 206fd1d69..dc1d9704a 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -11,6 +11,7 @@ crate mod command; crate mod config; crate mod exit; crate mod first; +crate mod from_csv; crate mod from_ini; crate mod from_json; crate mod from_toml; diff --git a/src/commands/from_csv.rs b/src/commands/from_csv.rs new file mode 100644 index 000000000..c6e0fa9ec --- /dev/null +++ b/src/commands/from_csv.rs @@ -0,0 +1,68 @@ +use crate::object::{Primitive, SpannedDictBuilder, SpannedListBuilder, Value}; +use crate::prelude::*; +use csv::ReaderBuilder; + +pub fn from_csv_string_to_value( + s: String, + span: impl Into, +) -> Result, Box> { + + let mut reader = ReaderBuilder::new().has_headers(false).from_reader(s.as_bytes()); + let span = span.into(); + + let mut fields: VecDeque = VecDeque::new(); + let mut iter = reader.records(); + let mut root = SpannedDictBuilder::new(span); + let mut rows = SpannedListBuilder::new(span); + + if let Some(result) = iter.next() { + let line = result?; + + for item in line.iter() { + fields.push_back(item.to_string()); + } + } + + loop { + if let Some(row_values) = iter.next() { + let row_values = row_values?; + + let mut row = SpannedDictBuilder::new(span); + + for (idx, entry) in row_values.iter().enumerate() { + row.insert_spanned(fields.get(idx).unwrap(), Value::Primitive(Primitive::String(String::from(entry))).spanned(span)); + } + + rows.insert_spanned(row.into_spanned_value()); + } else { + break; + } + } + + root.insert_spanned("root", rows.into_spanned_value()); + Ok(root.into_spanned_value()) +} + +pub fn from_csv(args: CommandArgs) -> Result { + let out = args.input; + let span = args.name_span; + + Ok(out + .values + .map(move |a| match a.item { + Value::Primitive(Primitive::String(s)) => match from_csv_string_to_value(s, span) { + Ok(x) => ReturnSuccess::value(x.spanned(a.span)), + Err(_) => Err(ShellError::maybe_labeled_error( + "Could not parse as CSV", + "piped data failed CSV parse", + span, + )), + }, + _ => Err(ShellError::maybe_labeled_error( + "Expected string values from pipeline", + "expects strings from pipeline", + span, + )), + }) + .to_output_stream()) +} diff --git a/src/commands/open.rs b/src/commands/open.rs index fda4fa335..1d47032d3 100644 --- a/src/commands/open.rs +++ b/src/commands/open.rs @@ -169,6 +169,17 @@ pub fn parse_as_value( name_span: Option, ) -> Result, ShellError> { match extension { + Some(x) if x == "csv" => { + crate::commands::from_csv::from_csv_string_to_value(contents, contents_span) + .map(|c| c.spanned(contents_span)) + .map_err(move |_| { + ShellError::maybe_labeled_error( + "Could not open as CSV", + "could not open as CSV", + name_span, + ) + }) + } Some(x) if x == "toml" => { crate::commands::from_toml::from_toml_string_to_value(contents, contents_span) .map(|c| c.spanned(contents_span)) diff --git a/src/object.rs b/src/object.rs index f094c63a1..61d22b9ff 100644 --- a/src/object.rs +++ b/src/object.rs @@ -7,5 +7,5 @@ crate mod process; crate mod types; crate use base::{Block, Primitive, Switch, Value}; -crate use dict::{Dictionary, SpannedDictBuilder}; +crate use dict::{Dictionary, SpannedDictBuilder, SpannedListBuilder}; crate use files::dir_entry_dict; diff --git a/tests/commands_test.rs b/tests/commands_test.rs index c4f65e6ce..30aa84799 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -12,6 +12,15 @@ fn lines() { assert_eq!(output, "rustyline"); } +#[test] +fn open_csv() { + nu!(output, + cwd("tests/fixtures/formats"), + "open caco3_plastics.csv | get root | first 1 | get origin | echo $it"); + + assert_eq!(output, "SPAIN"); +} + #[test] fn open_toml() { nu!(output, diff --git a/tests/filters_test.rs b/tests/filters_test.rs index c9eb4baff..55a2d9ef8 100644 --- a/tests/filters_test.rs +++ b/tests/filters_test.rs @@ -78,7 +78,7 @@ fn can_filter_by_unit_size_comparison() { nu!( output, cwd("tests/fixtures/formats"), - "ls | where size > 1kb | get name | trim | echo $it" + "ls | where size > 1kb | get name | skip 1 | trim | echo $it" ); assert_eq!(output, "cargo_sample.toml"); diff --git a/tests/fixtures/formats/caco3_plastics.csv b/tests/fixtures/formats/caco3_plastics.csv new file mode 100644 index 000000000..c4f2fdfc6 --- /dev/null +++ b/tests/fixtures/formats/caco3_plastics.csv @@ -0,0 +1,10 @@ +importer,shipper,tariff_item,name,origin,shipped_at,arrived_at,net_weight,fob_price,cif_price,cif_per_net_weight +PLASTICOS RIVAL CIA LTDA,S A REVERTE,2509000000,CARBONATO DE CALCIO TIPO CALCIPORE 160 T AL,SPAIN,18/03/2016,17/04/2016,"81,000.00","14,417.58","18,252.34",0.23 +MEXICHEM ECUADOR S.A.,OMYA ANDINA S A,2836500000,CARBONATO,COLOMBIA,07/07/2016,10/07/2016,"26,000.00","7,072.00","8,127.18",0.31 +PLASTIAZUAY SA,SA REVERTE,2836500000,CARBONATO DE CALCIO,SPAIN,27/07/2016,09/08/2016,"81,000.00","8,100.00","11,474.55",0.14 +PLASTICOS RIVAL CIA LTDA,AND ENDUSTRIYEL HAMMADDELER DIS TCARET LTD.STI.,2836500000,CALCIUM CARBONATE ANADOLU ANDCARB CT-1,TURKEY,04/10/2016,11/11/2016,"100,000.00","17,500.00","22,533.75",0.23 +QUIMICA COMERCIAL QUIMICIAL CIA. LTDA.,SA REVERTE,2836500000,CARBONATO DE CALCIO,SPAIN,24/06/2016,12/07/2016,"27,000.00","3,258.90","5,585.00",0.21 +PICA PLASTICOS INDUSTRIALES C.A.,OMYA ANDINA S.A,3824909999,CARBONATO DE CALCIO,COLOMBIA,01/01/1900,18/01/2016,"66,500.00","12,635.00","18,670.52",0.28 +PLASTIQUIM S.A.,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYA CARB 1T CG BBS 1000,COLOMBIA,01/01/1900,25/10/2016,"33,000.00","6,270.00","9,999.00",0.30 +QUIMICOS ANDINOS QUIMANDI S.A.,SIBELCO COLOMBIA SAS,3824909999,CARBONATO DE CALCIO RECUBIERTO,COLOMBIA,01/11/2016,03/11/2016,"52,000.00","8,944.00","13,039.05",0.25 +TIGRE ECUADOR S.A. ECUATIGRE,OMYA ANDINA S.A NIT 830.027.386-6,3824909999,CARBONATO DE CALCIO RECUBIERTO CON ACIDO ESTEARICO OMYACARB 1T CG BPA 25 NO,COLOMBIA,01/01/1900,28/10/2016,"66,000.00","11,748.00","18,216.00",0.28 \ No newline at end of file