From 00aac850fdf78ac3695bead865f6375b8425d914 Mon Sep 17 00:00:00 2001 From: Luccas Mateus Date: Fri, 19 Nov 2021 16:23:35 -0300 Subject: [PATCH] `from xlsx` `from ods` and `from toml` (#352) * MathEval Variance and Stddev * Fix tests and linting * Typo * Deal with streams when they are not tables * `from toml` command * From ods * From XLSX --- Cargo.lock | 87 +++++++++ crates/nu-command/Cargo.toml | 2 + crates/nu-command/src/default_context.rs | 3 + crates/nu-command/src/formats/from/mod.rs | 6 + crates/nu-command/src/formats/from/ods.rs | 210 +++++++++++++++++++++ crates/nu-command/src/formats/from/toml.rs | 141 ++++++++++++++ crates/nu-command/src/formats/from/xlsx.rs | 210 +++++++++++++++++++++ 7 files changed, 659 insertions(+) create mode 100644 crates/nu-command/src/formats/from/ods.rs create mode 100644 crates/nu-command/src/formats/from/toml.rs create mode 100644 crates/nu-command/src/formats/from/xlsx.rs diff --git a/Cargo.lock b/Cargo.lock index 5c8e76c77b..9ac1f29fa8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,6 +177,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70" +[[package]] +name = "calamine" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b86ca78da4bdce5ac0f0bdbc0218ad14232f1e668376e044233f64c527cf5abb" +dependencies = [ + "byteorder", + "codepage", + "encoding_rs", + "log", + "quick-xml", + "serde", + "zip", +] + [[package]] name = "capnp" version = "0.14.3" @@ -249,6 +264,15 @@ dependencies = [ "phf_codegen", ] +[[package]] +name = "codepage" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0e9222c0cdf2c6ac27d73f664f9520266fa911c3106329d359f8861cb8bde9" +dependencies = [ + "encoding_rs", +] + [[package]] name = "console" version = "0.15.0" @@ -270,6 +294,15 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.1" @@ -491,6 +524,15 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encoding_rs" +version = "0.8.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a74ea89a0a1b98f6332de42c95baff457ada66d1cb4030f9ff151b2041a1c746" +dependencies = [ + "cfg-if", +] + [[package]] name = "engine-q" version = "0.1.0" @@ -515,6 +557,18 @@ dependencies = [ "tempfile", ] +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -884,6 +938,7 @@ version = "0.1.0" dependencies = [ "Inflector", "bytesize", + "calamine", "chrono", "chrono-humanize", "chrono-tz", @@ -912,6 +967,7 @@ dependencies = [ "terminal_size", "thiserror", "titlecase", + "toml", "trash", "unicode-segmentation", ] @@ -1219,6 +1275,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "quick-xml" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d72d5477478f85bd00b6521780dfba1ec6cdaadcf90b8b181c36d7de561f9b" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quote" version = "1.0.10" @@ -1691,6 +1757,15 @@ dependencies = [ "regex", ] +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + [[package]] name = "trash" version = "1.3.0" @@ -1844,3 +1919,15 @@ name = "zeroize" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d68d9dcec5f9b43a30d38c49f91dfedfaac384cb8f085faca366c26207dd1619" + +[[package]] +name = "zip" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" +dependencies = [ + "byteorder", + "crc32fast", + "flate2", + "thiserror", +] diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 9982f8e985..bf8bd2cea1 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -39,7 +39,9 @@ serde = { version="1.0.123", features=["derive"] } serde_yaml = "0.8.16" serde_urlencoded = "0.7.0" eml-parser = "0.1.0" +toml = "0.5.8" itertools = "0.10.0" +calamine = "0.18.0" rand = "0.8" [features] diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index c7ba22451c..99e8f9741e 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -55,8 +55,11 @@ pub fn create_default_context() -> EngineState { FromYaml, FromYml, FromTsv, + FromToml, FromUrl, FromEml, + FromOds, + FromXlsx, Get, Griddle, Help, diff --git a/crates/nu-command/src/formats/from/mod.rs b/crates/nu-command/src/formats/from/mod.rs index 6692f791c8..9fab2206b6 100644 --- a/crates/nu-command/src/formats/from/mod.rs +++ b/crates/nu-command/src/formats/from/mod.rs @@ -3,15 +3,21 @@ mod csv; mod delimited; mod eml; mod json; +mod ods; +mod toml; mod tsv; mod url; +mod xlsx; mod yaml; pub use self::csv::FromCsv; +pub use self::toml::FromToml; pub use command::From; pub use eml::FromEml; pub use json::FromJson; +pub use ods::FromOds; pub use tsv::FromTsv; pub use url::FromUrl; +pub use xlsx::FromXlsx; pub use yaml::FromYaml; pub use yaml::FromYml; diff --git a/crates/nu-command/src/formats/from/ods.rs b/crates/nu-command/src/formats/from/ods.rs new file mode 100644 index 0000000000..2997f98728 --- /dev/null +++ b/crates/nu-command/src/formats/from/ods.rs @@ -0,0 +1,210 @@ +use calamine::*; +use indexmap::map::IndexMap; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use std::io::Cursor; + +#[derive(Clone)] +pub struct FromOds; + +impl Command for FromOds { + fn name(&self) -> &str { + "from ods" + } + + fn signature(&self) -> Signature { + Signature::build("from ods") + .named( + "sheets", + SyntaxShape::List(Box::new(SyntaxShape::String)), + "Only convert specified sheets", + Some('s'), + ) + .category(Category::Formats) + } + + fn usage(&self) -> &str { + "Parse OpenDocument Spreadsheet(.ods) data and create table." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + + let sel_sheets = if let Some(Value::List { vals: columns, .. }) = + call.get_flag(engine_state, stack, "sheets")? + { + convert_columns(columns.as_slice())? + } else { + vec![] + }; + + from_ods(input, head, sel_sheets) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Convert binary .ods data to a table", + example: "open test.txt | from ods", + result: None, + }, + Example { + description: "Convert binary .ods data to a table, specifying the tables", + example: "open test.txt | from ods -s [Spreadsheet1]", + result: None, + }, + ] + } +} + +fn convert_columns(columns: &[Value]) -> Result, ShellError> { + let res = columns + .iter() + .map(|value| match &value { + Value::String { val: s, .. } => Ok(s.clone()), + _ => Err(ShellError::IncompatibleParametersSingle( + "Incorrect column format, Only string as column name".to_string(), + value.span().unwrap_or_else(|_| Span::unknown()), + )), + }) + .collect::, _>>()?; + + Ok(res) +} + +fn collect_binary(input: PipelineData) -> Result, ShellError> { + let mut bytes = vec![]; + let mut values = input.into_iter(); + + loop { + match values.next() { + Some(Value::Binary { val: b, .. }) => { + bytes.extend_from_slice(&b); + } + Some(x) => { + return Err(ShellError::UnsupportedInput( + "Expected binary from pipeline".to_string(), + x.span().unwrap_or_else(|_| Span::unknown()), + )) + } + None => break, + } + } + + Ok(bytes) +} + +fn from_ods( + input: PipelineData, + head: Span, + sel_sheets: Vec, +) -> Result { + let bytes = collect_binary(input)?; + let buf: Cursor> = Cursor::new(bytes); + let mut ods = Ods::<_>::new(buf) + .map_err(|_| ShellError::UnsupportedInput("Could not load ods file".to_string(), head))?; + + let mut dict = IndexMap::new(); + + let mut sheet_names = ods.sheet_names().to_owned(); + if !sel_sheets.is_empty() { + sheet_names.retain(|e| sel_sheets.contains(e)); + } + + for sheet_name in &sheet_names { + let mut sheet_output = vec![]; + + if let Some(Ok(current_sheet)) = ods.worksheet_range(sheet_name) { + for row in current_sheet.rows() { + let mut row_output = IndexMap::new(); + for (i, cell) in row.iter().enumerate() { + let value = match cell { + DataType::Empty => Value::nothing(head), + DataType::String(s) => Value::string(s, head), + DataType::Float(f) => Value::Float { + val: *f, + span: head, + }, + DataType::Int(i) => Value::Int { + val: *i, + span: head, + }, + DataType::Bool(b) => Value::Bool { + val: *b, + span: head, + }, + _ => Value::nothing(head), + }; + + row_output.insert(format!("Column{}", i), value); + } + + let (cols, vals) = + row_output + .into_iter() + .fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k); + acc.1.push(v); + acc + }); + + let record = Value::Record { + cols, + vals, + span: head, + }; + + sheet_output.push(record); + } + + dict.insert( + sheet_name, + Value::List { + vals: sheet_output, + span: head, + }, + ); + } else { + return Err(ShellError::UnsupportedInput( + "Could not load sheet".to_string(), + head, + )); + } + } + + let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k.clone()); + acc.1.push(v); + acc + }); + + let record = Value::Record { + cols, + vals, + span: head, + }; + + Ok(PipelineData::Value(record)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromOds {}) + } +} diff --git a/crates/nu-command/src/formats/from/toml.rs b/crates/nu-command/src/formats/from/toml.rs new file mode 100644 index 0000000000..813bdd2872 --- /dev/null +++ b/crates/nu-command/src/formats/from/toml.rs @@ -0,0 +1,141 @@ +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Value, +}; + +#[derive(Clone)] +pub struct FromToml; + +impl Command for FromToml { + fn name(&self) -> &str { + "from toml" + } + + fn signature(&self) -> Signature { + Signature::build("from toml").category(Category::Formats) + } + + fn usage(&self) -> &str { + "Parse text as .toml and create table." + } + + fn examples(&self) -> Vec { + vec![ + Example { + example: "'a = 1' | from toml", + description: "Converts toml formatted string to table", + result: Some(Value::Record { + cols: vec!["a".to_string()], + vals: vec![Value::Int { + val: 1, + span: Span::unknown(), + }], + span: Span::unknown(), + }), + }, + Example { + example: "'a = 1 +b = [1, 2]' | from toml", + description: "Converts toml formatted string to table", + result: Some(Value::Record { + cols: vec!["a".to_string(), "b".to_string()], + vals: vec![ + Value::Int { + val: 1, + span: Span::unknown(), + }, + Value::List { + vals: vec![ + Value::Int { + val: 1, + span: Span::unknown(), + }, + Value::Int { + val: 2, + span: Span::unknown(), + }, + ], + span: Span::unknown(), + }, + ], + span: Span::unknown(), + }), + }, + ] + } + + fn run( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let span = call.head; + let config = stack.get_config()?; + let mut string_input = input.collect_string("", &config); + string_input.push('\n'); + Ok(convert_string_to_value(string_input, span)?.into_pipeline_data()) + } +} + +fn convert_toml_to_value(value: &toml::Value, span: Span) -> Value { + match value { + toml::Value::Array(array) => { + let v: Vec = array + .iter() + .map(|x| convert_toml_to_value(x, span)) + .collect(); + + Value::List { vals: v, span } + } + toml::Value::Boolean(b) => Value::Bool { val: *b, span }, + toml::Value::Float(f) => Value::Float { val: *f, span }, + toml::Value::Integer(i) => Value::Int { val: *i, span }, + toml::Value::Table(k) => { + let mut cols = vec![]; + let mut vals = vec![]; + + for item in k { + cols.push(item.0.clone()); + vals.push(convert_toml_to_value(item.1, span)); + } + + Value::Record { cols, vals, span } + } + toml::Value::String(s) => Value::String { + val: s.clone(), + span, + }, + toml::Value::Datetime(d) => Value::String { + val: d.to_string(), + span, + }, + } +} + +pub fn convert_string_to_value(string_input: String, span: Span) -> Result { + let result: Result = toml::from_str(&string_input); + match result { + Ok(value) => Ok(convert_toml_to_value(&value, span)), + + Err(_x) => Err(ShellError::CantConvert( + "structured data from toml".into(), + "string".into(), + span, + )), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromToml {}) + } +} diff --git a/crates/nu-command/src/formats/from/xlsx.rs b/crates/nu-command/src/formats/from/xlsx.rs new file mode 100644 index 0000000000..032ad363cb --- /dev/null +++ b/crates/nu-command/src/formats/from/xlsx.rs @@ -0,0 +1,210 @@ +use calamine::*; +use indexmap::map::IndexMap; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use std::io::Cursor; + +#[derive(Clone)] +pub struct FromXlsx; + +impl Command for FromXlsx { + fn name(&self) -> &str { + "from xlsx" + } + + fn signature(&self) -> Signature { + Signature::build("from xlsx") + .named( + "sheets", + SyntaxShape::List(Box::new(SyntaxShape::String)), + "Only convert specified sheets", + Some('s'), + ) + .category(Category::Formats) + } + + fn usage(&self) -> &str { + "Parse binary Excel(.xlsx) data and create table." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + + let sel_sheets = if let Some(Value::List { vals: columns, .. }) = + call.get_flag(engine_state, stack, "sheets")? + { + convert_columns(columns.as_slice())? + } else { + vec![] + }; + + from_xlsx(input, head, sel_sheets) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Convert binary .xlsx data to a table", + example: "open test.txt | from xlsx", + result: None, + }, + Example { + description: "Convert binary .xlsx data to a table, specifying the tables", + example: "open test.txt | from xlsx -s [Spreadsheet1]", + result: None, + }, + ] + } +} + +fn convert_columns(columns: &[Value]) -> Result, ShellError> { + let res = columns + .iter() + .map(|value| match &value { + Value::String { val: s, .. } => Ok(s.clone()), + _ => Err(ShellError::IncompatibleParametersSingle( + "Incorrect column format, Only string as column name".to_string(), + value.span().unwrap_or_else(|_| Span::unknown()), + )), + }) + .collect::, _>>()?; + + Ok(res) +} + +fn collect_binary(input: PipelineData) -> Result, ShellError> { + let mut bytes = vec![]; + let mut values = input.into_iter(); + + loop { + match values.next() { + Some(Value::Binary { val: b, .. }) => { + bytes.extend_from_slice(&b); + } + Some(x) => { + return Err(ShellError::UnsupportedInput( + "Expected binary from pipeline".to_string(), + x.span().unwrap_or_else(|_| Span::unknown()), + )) + } + None => break, + } + } + + Ok(bytes) +} + +fn from_xlsx( + input: PipelineData, + head: Span, + sel_sheets: Vec, +) -> Result { + let bytes = collect_binary(input)?; + let buf: Cursor> = Cursor::new(bytes); + let mut xlsx = Xlsx::<_>::new(buf) + .map_err(|_| ShellError::UnsupportedInput("Could not load xlsx file".to_string(), head))?; + + let mut dict = IndexMap::new(); + + let mut sheet_names = xlsx.sheet_names().to_owned(); + if !sel_sheets.is_empty() { + sheet_names.retain(|e| sel_sheets.contains(e)); + } + + for sheet_name in &sheet_names { + let mut sheet_output = vec![]; + + if let Some(Ok(current_sheet)) = xlsx.worksheet_range(sheet_name) { + for row in current_sheet.rows() { + let mut row_output = IndexMap::new(); + for (i, cell) in row.iter().enumerate() { + let value = match cell { + DataType::Empty => Value::nothing(head), + DataType::String(s) => Value::string(s, head), + DataType::Float(f) => Value::Float { + val: *f, + span: head, + }, + DataType::Int(i) => Value::Int { + val: *i, + span: head, + }, + DataType::Bool(b) => Value::Bool { + val: *b, + span: head, + }, + _ => Value::nothing(head), + }; + + row_output.insert(format!("Column{}", i), value); + } + + let (cols, vals) = + row_output + .into_iter() + .fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k); + acc.1.push(v); + acc + }); + + let record = Value::Record { + cols, + vals, + span: head, + }; + + sheet_output.push(record); + } + + dict.insert( + sheet_name, + Value::List { + vals: sheet_output, + span: head, + }, + ); + } else { + return Err(ShellError::UnsupportedInput( + "Could not load sheet".to_string(), + head, + )); + } + } + + let (cols, vals) = dict.into_iter().fold((vec![], vec![]), |mut acc, (k, v)| { + acc.0.push(k.clone()); + acc.1.push(v); + acc + }); + + let record = Value::Record { + cols, + vals, + span: head, + }; + + Ok(PipelineData::Value(record)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromXlsx {}) + } +}