From 2fb48bd6ac51c35e1fbd6efd36b370b51bd3d0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Wed, 14 Oct 2020 04:36:11 -0500 Subject: [PATCH] Flatten command. (#2670) --- crates/nu-cli/src/cli.rs | 1 + crates/nu-cli/src/commands.rs | 14 +- crates/nu-cli/src/commands/flatten.rs | 184 ++++++++++++++++++++++++ crates/nu-cli/src/examples.rs | 45 ++++-- crates/nu-cli/tests/commands/flatten.rs | 168 ++++++++++++++++++++++ crates/nu-cli/tests/commands/mod.rs | 1 + crates/nu-protocol/src/value/dict.rs | 2 +- 7 files changed, 400 insertions(+), 15 deletions(-) create mode 100644 crates/nu-cli/src/commands/flatten.rs create mode 100644 crates/nu-cli/tests/commands/flatten.rs diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index 37e26012c..f18c2caef 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -198,6 +198,7 @@ pub fn create_default_context(interactive: bool) -> Result Vec { + fn full_tests() -> Vec { vec![ whole_stream_command(Append), whole_stream_command(GroupBy), @@ -288,9 +290,15 @@ mod tests { ] } + fn only_examples() -> Vec { + let mut commands = full_tests(); + commands.extend(vec![whole_stream_command(Flatten)]); + commands + } + #[test] fn examples_work_as_expected() -> Result<(), ShellError> { - for cmd in commands() { + for cmd in only_examples() { test_examples(cmd)?; } @@ -299,7 +307,7 @@ mod tests { #[test] fn tracks_metadata() -> Result<(), ShellError> { - for cmd in commands() { + for cmd in full_tests() { test_anchors(cmd)?; } diff --git a/crates/nu-cli/src/commands/flatten.rs b/crates/nu-cli/src/commands/flatten.rs new file mode 100644 index 000000000..ac0963d32 --- /dev/null +++ b/crates/nu-cli/src/commands/flatten.rs @@ -0,0 +1,184 @@ +use crate::command_registry::CommandRegistry; +use crate::commands::WholeStreamCommand; +use crate::prelude::*; +use nu_errors::ShellError; +use nu_protocol::{ + Dictionary, ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, +}; +use nu_source::Tagged; + +pub struct Command; + +#[derive(Deserialize)] +pub struct Arguments { + rest: Vec>, +} + +#[async_trait] +impl WholeStreamCommand for Command { + fn name(&self) -> &str { + "flatten" + } + + fn signature(&self) -> Signature { + Signature::build("flatten").rest(SyntaxShape::String, "optionally flatten data by column") + } + + fn usage(&self) -> &str { + "Flatten the table." + } + + async fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + flatten(args, registry).await + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "flatten a table", + example: "echo [[N, u, s, h, e, l, l]] | flatten | first", + result: Some(vec![Value::from("N")]), + }, + Example { + description: "flatten a column having a nested table", + example: "echo [[origin, people]; [Ecuador, $(echo [[name, meal]; ['Andres', 'arepa']])]] | flatten | get meal", + result: Some(vec![Value::from("arepa")]), + }, + Example { + description: "restrict the flattening by passing column names", + example: "echo [[origin, crate, versions]; [World, $(echo [[name]; ['nu-cli']]), ['0.21', '0.22']]] | flatten versions | last | = $it.versions", + result: Some(vec![Value::from("0.22")]), + } + ] + } +} + +async fn flatten( + args: CommandArgs, + registry: &CommandRegistry, +) -> Result { + let tag = args.call_info.name_tag.clone(); + let registry = registry.clone(); + let (Arguments { rest: columns }, input) = args.process(®istry).await?; + + Ok(input + .map(move |item| { + futures::stream::iter(flat_value(&columns, &item, &tag).into_iter().flatten()) + }) + .flatten() + .to_output_stream()) +} + +enum TableInside<'a> { + Entries(&'a str, &'a Tag, Vec<&'a Value>), +} + +fn flat_value( + columns: &[Tagged], + item: &Value, + name_tag: impl Into, +) -> Result>, ShellError> { + let tag = item.tag.clone(); + let name_tag = name_tag.into(); + + let res = { + if item.is_row() { + let mut out = TaggedDictBuilder::new(tag); + let mut a_table = None; + let mut tables_explicitly_flattened = 0; + + for (column, value) in item.row_entries() { + let column_requested = columns.iter().find(|c| c.item == *column); + + if let Value { + value: UntaggedValue::Row(Dictionary { entries: mapa }), + .. + } = value + { + if column_requested.is_none() && !columns.is_empty() { + out.insert_value(column, value.clone()); + continue; + } + + for (k, v) in mapa.into_iter() { + out.insert_value(k, v.clone()); + } + } else if value.is_table() { + if tables_explicitly_flattened >= 1 && column_requested.is_some() { + let attempted = if let Some(name) = column_requested { + name.span() + } else { + name_tag.span + }; + + let already_flattened = + if let Some(TableInside::Entries(_, column_tag, _)) = a_table { + column_tag.span + } else { + name_tag.span + }; + + return Ok(vec![ReturnSuccess::value( + UntaggedValue::Error(ShellError::labeled_error_with_secondary( + "can only flatten one inner table at the same time", + "tried flattening more than one column with inner tables", + attempted, + "...but is flattened already", + already_flattened, + )) + .into_value(name_tag), + )]); + } + + if !columns.is_empty() { + if let Some(requested) = column_requested { + a_table = Some(TableInside::Entries( + &requested.item, + &requested.tag, + value.table_entries().collect(), + )); + + tables_explicitly_flattened += 1; + } else { + out.insert_value(column, value.clone()); + } + } else if a_table.is_none() { + a_table = Some(TableInside::Entries( + &column, + &value.tag, + value.table_entries().collect(), + )) + } else { + out.insert_value(column, value.clone()); + } + } else { + out.insert_value(column, value.clone()); + } + } + + let mut expanded = vec![]; + + if let Some(TableInside::Entries(column, _, entries)) = a_table { + for entry in entries.into_iter() { + let mut base = out.clone(); + base.insert_value(column, entry.clone()); + expanded.push(base.into_value()); + } + } else { + expanded.push(out.into_value()); + } + + expanded + } else if item.is_table() { + item.table_entries().map(Clone::clone).collect() + } else { + vec![item.clone()] + } + }; + + Ok(res.into_iter().map(ReturnSuccess::value).collect()) +} diff --git a/crates/nu-cli/src/examples.rs b/crates/nu-cli/src/examples.rs index 83bd6bacf..cca869249 100644 --- a/crates/nu-cli/src/examples.rs +++ b/crates/nu-cli/src/examples.rs @@ -16,8 +16,8 @@ use crate::command_registry::CommandRegistry; use crate::commands::classified::block::run_block; use crate::commands::command::CommandArgs; use crate::commands::{ - whole_stream_command, BuildString, Command, Each, Echo, Get, Keep, StrCollect, - WholeStreamCommand, Wrap, + whole_stream_command, BuildString, Command, Each, Echo, First, Get, Keep, Last, Nth, + StrCollect, WholeStreamCommand, Wrap, }; use crate::evaluation_context::EvaluationContext; use crate::stream::{InputStream, OutputStream}; @@ -37,9 +37,12 @@ pub fn test_examples(cmd: Command) -> Result<(), ShellError> { // Minimal restricted commands to aid in testing whole_stream_command(Echo {}), whole_stream_command(BuildString {}), + whole_stream_command(First {}), whole_stream_command(Get {}), whole_stream_command(Keep {}), whole_stream_command(Each {}), + whole_stream_command(Last {}), + whole_stream_command(Nth {}), whole_stream_command(StrCollect), whole_stream_command(Wrap), cmd, @@ -150,9 +153,12 @@ pub fn test_anchors(cmd: Command) -> Result<(), ShellError> { whole_stream_command(MockEcho {}), whole_stream_command(MockLs {}), whole_stream_command(BuildString {}), + whole_stream_command(First {}), whole_stream_command(Get {}), whole_stream_command(Keep {}), whole_stream_command(Each {}), + whole_stream_command(Last {}), + whole_stream_command(Nth {}), whole_stream_command(StrCollect), whole_stream_command(Wrap), cmd, @@ -351,16 +357,33 @@ impl WholeStreamCommand for MockEcho { Value { value: UntaggedValue::Table(table), .. - } => futures::stream::iter( - table - .into_iter() - .map(move |mut v| { + } => { + if table.len() == 1 && table[0].is_table() { + let mut values: Vec = + table[0].table_entries().map(Clone::clone).collect(); + + for v in values.iter_mut() { v.tag = base_value.tag(); - v - }) - .map(ReturnSuccess::value), - ) - .to_output_stream(), + } + + let subtable = + vec![UntaggedValue::Table(values).into_value(base_value.tag())]; + + futures::stream::iter(subtable.into_iter().map(ReturnSuccess::value)) + .to_output_stream() + } else { + futures::stream::iter( + table + .into_iter() + .map(move |mut v| { + v.tag = base_value.tag(); + v + }) + .map(ReturnSuccess::value), + ) + .to_output_stream() + } + } _ => OutputStream::one(Ok(ReturnSuccess::Value(Value { value: i.value.clone(), tag: base_value.tag, diff --git a/crates/nu-cli/tests/commands/flatten.rs b/crates/nu-cli/tests/commands/flatten.rs new file mode 100644 index 000000000..c65553d70 --- /dev/null +++ b/crates/nu-cli/tests/commands/flatten.rs @@ -0,0 +1,168 @@ +use nu_test_support::fs::Stub::FileWithContentToBeTrimmed; +use nu_test_support::playground::Playground; +use nu_test_support::{nu, pipeline}; + +#[test] +fn flatten_nested_tables_with_columns() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo [[origin, people]; [Ecuador, $(= 'Andres' | wrap name)]] + [[origin, people]; [Nu, $(= 'nuno' | wrap name)]] + | flatten + | get name + | str collect ',' + "# + )); + + assert_eq!(actual.out, "Andres,nuno"); +} + +#[test] +fn flatten_nested_tables_that_have_many_columns() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo [[origin, people]; [Ecuador, $(echo [[name, meal]; ['Andres', 'arepa']])]] + [[origin, people]; [USA, $(echo [[name, meal]; ['Katz', 'nurepa']])]] + | flatten + | get meal + | str collect ',' + "# + )); + + assert_eq!(actual.out, "arepa,nurepa"); +} + +#[test] +fn flatten_nested_tables() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo [[Andrés, Nicolás, Robalino]] | flatten | nth 1 + "# + )); + + assert_eq!(actual.out, "Nicolás"); +} + +#[test] +fn flatten_row_column_explictly() { + Playground::setup("flatten_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "katz.json", + r#" + [ + { + "origin": "Ecuador", + "people": { + "name": "Andres", + "meal": "arepa" + }, + "code": { "id": 1, "references": 2}, + "tags": ["carbohydrate", "corn", "maiz"], + "city": ["Guayaquil", "Samborondón"] + }, + { + "origin": "USA", + "people": { + "name": "Katz", + "meal": "nurepa" + }, + "code": { "id": 2, "references": 1}, + "tags": ["carbohydrate", "shell food", "amigos flavor"], + "city": ["Oregon", "Brooklin"] + } + ] + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), + "open katz.json | flatten people | where name == Andres | count" + ); + + assert_eq!(actual.out, "1"); + }) +} + +#[test] +fn flatten_table_columns_explictly() { + Playground::setup("flatten_test_2", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "katz.json", + r#" + [ + { + "origin": "Ecuador", + "people": { + "name": "Andres", + "meal": "arepa" + }, + "code": { "id": 1, "references": 2}, + "tags": ["carbohydrate", "corn", "maiz"], + "city": ["Guayaquil", "Samborondón"] + }, + { + "origin": "USA", + "people": { + "name": "Katz", + "meal": "nurepa" + }, + "code": { "id": 2, "references": 1}, + "tags": ["carbohydrate", "shell food", "amigos flavor"], + "city": ["Oregon", "Brooklin"] + } + ] + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), + "open katz.json | flatten city | where people.name == Katz | count" + ); + + assert_eq!(actual.out, "2"); + }) +} + +#[test] +fn flatten_more_than_one_column_that_are_subtables_not_supported() { + Playground::setup("flatten_test_3", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "katz.json", + r#" + [ + { + "origin": "Ecuador", + "people": { + "name": "Andres", + "meal": "arepa" + }, + "code": { "id": 1, "references": 2}, + "tags": ["carbohydrate", "corn", "maiz"], + "city": ["Guayaquil", "Samborondón"] + }, + { + "origin": "USA", + "people": { + "name": "Katz", + "meal": "nurepa" + }, + "code": { "id": 2, "references": 1}, + "tags": ["carbohydrate", "shell food", "amigos flavor"], + "city": ["Oregon", "Brooklin"] + } + ] + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), + "open katz.json | flatten tags city" + ); + + assert!(actual.err.contains("tried flattening")); + assert!(actual.err.contains("but is flattened already")); + }) +} diff --git a/crates/nu-cli/tests/commands/mod.rs b/crates/nu-cli/tests/commands/mod.rs index c8d83d550..2b90267d5 100644 --- a/crates/nu-cli/tests/commands/mod.rs +++ b/crates/nu-cli/tests/commands/mod.rs @@ -16,6 +16,7 @@ mod empty; mod enter; mod every; mod first; +mod flatten; mod format; mod get; mod group_by; diff --git a/crates/nu-protocol/src/value/dict.rs b/crates/nu-protocol/src/value/dict.rs index cd2ddf087..98201b9da 100644 --- a/crates/nu-protocol/src/value/dict.rs +++ b/crates/nu-protocol/src/value/dict.rs @@ -196,7 +196,7 @@ impl Dictionary { } /// A helper to help create dictionaries for you. It has the ability to insert values into the dictionary while maintaining the tags that need to be applied to the individual members -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TaggedDictBuilder { tag: Tag, dict: IndexMap,