diff --git a/Cargo.lock b/Cargo.lock index c9faebd149..9ff264368c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,6 +121,7 @@ dependencies = [ "lazy_static", "memchr", "regex-automata", + "serde", ] [[package]] @@ -326,6 +327,28 @@ dependencies = [ "winapi", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "ctor" version = "0.1.21" @@ -719,6 +742,7 @@ dependencies = [ "chrono", "chrono-humanize", "chrono-tz", + "csv", "dialoguer", "glob", "lscolors", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 1326ec7534..78102f0f29 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -18,6 +18,7 @@ trash = { version = "1.3.0", optional = true } unicode-segmentation = "1.8.0" # Potential dependencies for extras +csv = "1.1.3" glob = "0.3.0" Inflector = "0.11" thiserror = "1.0.29" diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 19ef023236..b5291941f1 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -45,7 +45,9 @@ pub fn create_default_context() -> EngineState { For, Format, From, + FromCsv, FromJson, + FromTsv, Get, Griddle, Help, diff --git a/crates/nu-command/src/formats/from/csv.rs b/crates/nu-command/src/formats/from/csv.rs new file mode 100644 index 0000000000..25f8de5369 --- /dev/null +++ b/crates/nu-command/src/formats/from/csv.rs @@ -0,0 +1,113 @@ +use super::delimited::from_delimited_data; + +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{Example, PipelineData, ShellError, Signature, SyntaxShape, Value}; + +#[derive(Clone)] +pub struct FromCsv; + +impl Command for FromCsv { + fn name(&self) -> &str { + "from csv" + } + + fn signature(&self) -> Signature { + Signature::build("from csv") + .named( + "separator", + SyntaxShape::String, + "a character to separate columns, defaults to ','", + Some('s'), + ) + .switch( + "noheaders", + "don't treat the first row as column names", + Some('n'), + ) + } + + fn usage(&self) -> &str { + "Parse text as .csv and create table." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + from_csv(engine_state, stack, call, input) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Convert comma-separated data to a table", + example: "open data.txt | from csv", + result: None, + }, + Example { + description: "Convert comma-separated data to a table, ignoring headers", + example: "open data.txt | from csv --noheaders", + result: None, + }, + Example { + description: "Convert comma-separated data to a table, ignoring headers", + example: "open data.txt | from csv -n", + result: None, + }, + Example { + description: "Convert semicolon-separated data to a table", + example: "open data.txt | from csv --separator ';'", + result: None, + }, + ] + } +} + +fn from_csv( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let name = call.head; + + let noheaders = call.has_flag("noheaders"); + let separator: Option = call.get_flag(engine_state, stack, "separator")?; + + let sep = match separator { + Some(Value::String { val: s, span }) => { + if s == r"\t" { + '\t' + } else { + let vec_s: Vec = s.chars().collect(); + if vec_s.len() != 1 { + return Err(ShellError::MissingParameter( + "single character separator".into(), + span, + )); + }; + vec_s[0] + } + } + _ => ',', + }; + + from_delimited_data(noheaders, sep, input, name) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromCsv {}) + } +} diff --git a/crates/nu-command/src/formats/from/delimited.rs b/crates/nu-command/src/formats/from/delimited.rs new file mode 100644 index 0000000000..4e17206589 --- /dev/null +++ b/crates/nu-command/src/formats/from/delimited.rs @@ -0,0 +1,61 @@ +use csv::ReaderBuilder; +use nu_protocol::{IntoPipelineData, PipelineData, ShellError, Span, Value}; + +fn from_delimited_string_to_value( + s: String, + noheaders: bool, + separator: char, + span: Span, +) -> Result { + let mut reader = ReaderBuilder::new() + .has_headers(!noheaders) + .delimiter(separator as u8) + .from_reader(s.as_bytes()); + + let headers = if noheaders { + (1..=reader.headers()?.len()) + .map(|i| format!("Column{}", i)) + .collect::>() + } else { + reader.headers()?.iter().map(String::from).collect() + }; + + let mut rows = vec![]; + for row in reader.records() { + let mut output_row = vec![]; + for value in row?.iter() { + if let Ok(i) = value.parse::() { + output_row.push(Value::Int { val: i, span }); + } else if let Ok(f) = value.parse::() { + output_row.push(Value::Float { val: f, span }); + } else { + output_row.push(Value::String { + val: value.into(), + span, + }); + } + } + rows.push(Value::Record { + cols: headers.clone(), + vals: output_row, + span, + }); + } + + Ok(Value::List { vals: rows, span }) +} + +pub fn from_delimited_data( + noheaders: bool, + sep: char, + input: PipelineData, + name: Span, +) -> Result { + let concat_string = input.collect_string(""); + + Ok( + from_delimited_string_to_value(concat_string, noheaders, sep, name) + .map_err(|x| ShellError::DelimiterError(x.to_string(), name))? + .into_pipeline_data(), + ) +} diff --git a/crates/nu-command/src/formats/from/json.rs b/crates/nu-command/src/formats/from/json.rs index 35b610e67c..1cdbda80f3 100644 --- a/crates/nu-command/src/formats/from/json.rs +++ b/crates/nu-command/src/formats/from/json.rs @@ -77,7 +77,7 @@ impl Command for FromJson { input: PipelineData, ) -> Result { let span = call.head; - let mut string_input = input.collect_string(); + let mut string_input = input.collect_string(""); string_input.push('\n'); // TODO: turn this into a structured underline of the nu_json error diff --git a/crates/nu-command/src/formats/from/mod.rs b/crates/nu-command/src/formats/from/mod.rs index 78251e562d..713f71d9c4 100644 --- a/crates/nu-command/src/formats/from/mod.rs +++ b/crates/nu-command/src/formats/from/mod.rs @@ -1,5 +1,10 @@ mod command; +mod csv; +mod delimited; mod json; +mod tsv; +pub use self::csv::FromCsv; pub use command::From; pub use json::FromJson; +pub use tsv::FromTsv; diff --git a/crates/nu-command/src/formats/from/tsv.rs b/crates/nu-command/src/formats/from/tsv.rs new file mode 100644 index 0000000000..2c07a29b76 --- /dev/null +++ b/crates/nu-command/src/formats/from/tsv.rs @@ -0,0 +1,56 @@ +use super::delimited::from_delimited_data; + +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{PipelineData, ShellError, Signature}; + +#[derive(Clone)] +pub struct FromTsv; + +impl Command for FromTsv { + fn name(&self) -> &str { + "from tsv" + } + + fn signature(&self) -> Signature { + Signature::build("from csv").switch( + "noheaders", + "don't treat the first row as column names", + Some('n'), + ) + } + + fn usage(&self) -> &str { + "Parse text as .csv and create table." + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + from_tsv(call, input) + } +} + +fn from_tsv(call: &Call, input: PipelineData) -> Result { + let name = call.head; + + let noheaders = call.has_flag("noheaders"); + + from_delimited_data(noheaders, '\t', input, name) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(FromTsv {}) + } +} diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index c124830fdd..3c30b20e71 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -415,7 +415,7 @@ pub fn eval_subexpression( // to be used later // FIXME: the trimming of the end probably needs to live in a better place - let mut s = input.collect_string(); + let mut s = input.collect_string(""); if s.ends_with('\n') { s.pop(); } diff --git a/crates/nu-protocol/src/pipeline_data.rs b/crates/nu-protocol/src/pipeline_data.rs index 9d88e23f82..ba7a6978eb 100644 --- a/crates/nu-protocol/src/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline_data.rs @@ -51,10 +51,10 @@ impl PipelineData { } } - pub fn collect_string(self) -> String { + pub fn collect_string(self, separator: &str) -> String { match self { - PipelineData::Value(v) => v.into_string("\n"), - PipelineData::Stream(s) => s.into_string("\n"), + PipelineData::Value(v) => v.into_string(separator), + PipelineData::Stream(s) => s.into_string(separator), } } diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index 033d720a31..fdbca5e547 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -60,6 +60,10 @@ pub enum ShellError { right_span: Span, }, + #[error("Delimiter error")] + #[diagnostic(code(nu::shell::delimiter_error), url(docsrs))] + DelimiterError(String, #[label("{0}")] Span), + #[error("Incompatible parameters.")] #[diagnostic(code(nu::shell::incompatible_parameters), url(docsrs))] IncompatibleParametersSingle(String, #[label = "{0}"] Span), diff --git a/crates/nu-protocol/src/value/stream.rs b/crates/nu-protocol/src/value/stream.rs index 5780b245a1..f49965a44d 100644 --- a/crates/nu-protocol/src/value/stream.rs +++ b/crates/nu-protocol/src/value/stream.rs @@ -20,12 +20,9 @@ pub struct ValueStream { impl ValueStream { pub fn into_string(self, separator: &str) -> String { - format!( - "[{}]", - self.map(|x: Value| x.into_string(", ")) - .collect::>() - .join(separator) - ) + self.map(|x: Value| x.into_string(", ")) + .collect::>() + .join(separator) } pub fn from_stream( diff --git a/src/main.rs b/src/main.rs index 7f0683be7a..10cf327483 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,7 +133,7 @@ fn main() -> Result<()> { PipelineData::new(Span::unknown()), ) { Ok(pipeline_data) => { - println!("{}", pipeline_data.collect_string()); + println!("{}", pipeline_data.collect_string("\n")); } Err(err) => { let working_set = StateWorkingSet::new(&engine_state); @@ -273,7 +273,7 @@ fn print_value(value: Value, engine_state: &EngineState) -> Result<(), ShellErro &Call::new(), value.into_pipeline_data(), )?; - table.collect_string() + table.collect_string("\n") } None => value.into_string(", "), }; @@ -323,7 +323,7 @@ fn update_prompt<'prompt>( &block, PipelineData::new(Span::unknown()), ) { - Ok(pipeline_data) => pipeline_data.collect_string(), + Ok(pipeline_data) => pipeline_data.collect_string(""), Err(err) => { let working_set = StateWorkingSet::new(engine_state); report_error(&working_set, &err);