From 0611f56776e68254712185a8d25697e8a31c2aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Sun, 20 Oct 2019 18:42:07 -0500 Subject: [PATCH 1/2] Can group cells by given column name. --- README.md | 2 ++ src/cli.rs | 1 + src/commands.rs | 2 ++ src/commands/count.rs | 2 +- src/commands/group_by.rs | 59 ++++++++++++++++++++++++++++++++++++++++ tests/commands_test.rs | 28 +++++++++++++++++++ 6 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/commands/group_by.rs diff --git a/README.md b/README.md index cf36fd2cb1..c391b59903 100644 --- a/README.md +++ b/README.md @@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | command | description | | ------------- | ------------- | | add column-or-column-path value | Add a new column to the table | +| count | Show the total number of cells | | edit column-or-column-path value | Edit an existing column to have a new value | | embed column | Creates a new table of one column with the given name, and places the current table inside of it | | first amount | Show only the first number of rows | | get column-or-column-path | Open column and get data from the corresponding cells | +| group-by column | Creates a new table with the data from the table rows grouped by the column given | | inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table | | last amount | Show only the last number of rows | | nth row-number | Return only the selected row | diff --git a/src/cli.rs b/src/cli.rs index ad3eb8d39b..e88ee054fe 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(ToURL), whole_stream_command(ToYAML), whole_stream_command(SortBy), + whole_stream_command(GroupBy), whole_stream_command(Tags), whole_stream_command(Count), whole_stream_command(First), diff --git a/src/commands.rs b/src/commands.rs index 0b155891cc..7f0fa0a25a 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -30,6 +30,7 @@ pub(crate) mod from_url; pub(crate) mod from_xml; pub(crate) mod from_yaml; pub(crate) mod get; +pub(crate) mod group_by; pub(crate) mod help; pub(crate) mod last; pub(crate) mod lines; @@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML; pub(crate) use from_yaml::FromYAML; pub(crate) use from_yaml::FromYML; pub(crate) use get::Get; +pub(crate) use group_by::GroupBy; pub(crate) use help::Help; pub(crate) use last::Last; pub(crate) use lines::Lines; diff --git a/src/commands/count.rs b/src/commands/count.rs index 5e44283737..6fe5a94633 100644 --- a/src/commands/count.rs +++ b/src/commands/count.rs @@ -20,7 +20,7 @@ impl WholeStreamCommand for Count { } fn usage(&self) -> &str { - "Show the total number of rows." + "Show the total number of cells." } fn run( diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs new file mode 100644 index 0000000000..e08ebb2afb --- /dev/null +++ b/src/commands/group_by.rs @@ -0,0 +1,59 @@ +use crate::commands::WholeStreamCommand; +use crate::data::TaggedDictBuilder; +use crate::errors::ShellError; +use crate::prelude::*; + +pub struct GroupBy; + +#[derive(Deserialize)] +pub struct GroupByArgs { + column_name: Tagged, +} + +impl WholeStreamCommand for GroupBy { + fn name(&self) -> &str { + "group-by" + } + + fn signature(&self) -> Signature { + Signature::build("group-by").required("column_name", SyntaxShape::String) + } + + fn usage(&self) -> &str { + "Creates a new table with the data from the table rows grouped by the column given." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, group_by)?.run() + } +} + +fn group_by( + GroupByArgs { column_name }: GroupByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + let mut groups = indexmap::IndexMap::new(); + + for row in values { + let key = row.get_data_by_key(&column_name.item).unwrap().as_string()?; + let mut group = groups.entry(key).or_insert(vec![]); + group.push(row); + } + + let mut out = TaggedDictBuilder::new(name.clone()); + + for (k,v) in groups.iter() { + out.insert(k, Value::table(v)); + } + + yield ReturnSuccess::value(out) + }; + + Ok(stream.to_output_stream()) +} diff --git a/tests/commands_test.rs b/tests/commands_test.rs index 4d6fa84a65..7733942811 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -3,6 +3,34 @@ mod helpers; use helpers as h; use helpers::{Playground, Stub::*}; +#[test] +fn group_by() { + Playground::setup("group_by_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_luck,type + Andrés,Robalino,1,A + Jonathan,Turner,1,B + Yehuda,Katz,1,A + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.csv + | group-by type + | get A + | count + | echo $it + "# + )); + + assert_eq!(actual, "2"); + }) +} + #[test] fn first_gets_first_rows_by_amount() { Playground::setup("first_test_1", |dirs, sandbox| { From f1630da2ccbcf38110bab7ff9e9c10956b3c7d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Tue, 22 Oct 2019 00:00:06 -0500 Subject: [PATCH 2/2] Suggest a column name in case one unknown column is supplied. --- README.md | 2 +- src/commands/count.rs | 2 +- src/commands/group_by.rs | 39 +++++++++++++++++++++++++++++++++++---- tests/commands_test.rs | 25 +++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c391b59903..64ff0e8015 100644 --- a/README.md +++ b/README.md @@ -249,7 +249,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | command | description | | ------------- | ------------- | | add column-or-column-path value | Add a new column to the table | -| count | Show the total number of cells | +| count | Show the total number of rows | | edit column-or-column-path value | Edit an existing column to have a new value | | embed column | Creates a new table of one column with the given name, and places the current table inside of it | | first amount | Show only the first number of rows | diff --git a/src/commands/count.rs b/src/commands/count.rs index 6fe5a94633..5e44283737 100644 --- a/src/commands/count.rs +++ b/src/commands/count.rs @@ -20,7 +20,7 @@ impl WholeStreamCommand for Count { } fn usage(&self) -> &str { - "Show the total number of cells." + "Show the total number of rows." } fn run( diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs index e08ebb2afb..7f5f496408 100644 --- a/src/commands/group_by.rs +++ b/src/commands/group_by.rs @@ -40,10 +40,41 @@ fn group_by( let values: Vec> = input.values.collect().await; let mut groups = indexmap::IndexMap::new(); - for row in values { - let key = row.get_data_by_key(&column_name.item).unwrap().as_string()?; - let mut group = groups.entry(key).or_insert(vec![]); - group.push(row); + for value in values { + let group_key = value.get_data_by_key(&column_name.item); + + if group_key.is_none() { + + let possibilities = value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x)) + .collect(); + + possible_matches.sort(); + + let err = { + if possible_matches.len() > 0 { + ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + &column_name.tag,) + } else { + ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + &column_name.tag, + ) + } + }; + + yield Err(err) + } else { + let group_key = group_key.unwrap().as_string()?; + let mut group = groups.entry(group_key).or_insert(vec![]); + group.push(value); + } } let mut out = TaggedDictBuilder::new(name.clone()); diff --git a/tests/commands_test.rs b/tests/commands_test.rs index 7733942811..45e4bcb228 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -31,6 +31,31 @@ fn group_by() { }) } +#[test] +fn group_by_errors_if_unknown_column_name() { + Playground::setup("group_by_test_2", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_luck,type + Andrés,Robalino,1,A + Jonathan,Turner,1,B + Yehuda,Katz,1,A + "#, + )]); + + let actual = nu_error!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.csv + | group-by ttype + "# + )); + + assert!(actual.contains("Unknown column")); + }) +} + #[test] fn first_gets_first_rows_by_amount() { Playground::setup("first_test_1", |dirs, sandbox| {