diff --git a/README.md b/README.md index cf36fd2cb..64ff0e801 100644 --- a/README.md +++ b/README.md @@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat | command | description | | ------------- | ------------- | | add column-or-column-path value | Add a new column to the table | +| count | Show the total number of rows | | edit column-or-column-path value | Edit an existing column to have a new value | | embed column | Creates a new table of one column with the given name, and places the current table inside of it | | first amount | Show only the first number of rows | | get column-or-column-path | Open column and get data from the corresponding cells | +| group-by column | Creates a new table with the data from the table rows grouped by the column given | | inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table | | last amount | Show only the last number of rows | | nth row-number | Return only the selected row | diff --git a/src/cli.rs b/src/cli.rs index ad3eb8d39..e88ee054f 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box> { whole_stream_command(ToURL), whole_stream_command(ToYAML), whole_stream_command(SortBy), + whole_stream_command(GroupBy), whole_stream_command(Tags), whole_stream_command(Count), whole_stream_command(First), diff --git a/src/commands.rs b/src/commands.rs index 0b155891c..7f0fa0a25 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -30,6 +30,7 @@ pub(crate) mod from_url; pub(crate) mod from_xml; pub(crate) mod from_yaml; pub(crate) mod get; +pub(crate) mod group_by; pub(crate) mod help; pub(crate) mod last; pub(crate) mod lines; @@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML; pub(crate) use from_yaml::FromYAML; pub(crate) use from_yaml::FromYML; pub(crate) use get::Get; +pub(crate) use group_by::GroupBy; pub(crate) use help::Help; pub(crate) use last::Last; pub(crate) use lines::Lines; diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs new file mode 100644 index 000000000..7f5f49640 --- /dev/null +++ b/src/commands/group_by.rs @@ -0,0 +1,90 @@ +use crate::commands::WholeStreamCommand; +use crate::data::TaggedDictBuilder; +use crate::errors::ShellError; +use crate::prelude::*; + +pub struct GroupBy; + +#[derive(Deserialize)] +pub struct GroupByArgs { + column_name: Tagged, +} + +impl WholeStreamCommand for GroupBy { + fn name(&self) -> &str { + "group-by" + } + + fn signature(&self) -> Signature { + Signature::build("group-by").required("column_name", SyntaxShape::String) + } + + fn usage(&self) -> &str { + "Creates a new table with the data from the table rows grouped by the column given." + } + + fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + args.process(registry, group_by)?.run() + } +} + +fn group_by( + GroupByArgs { column_name }: GroupByArgs, + RunnableContext { input, name, .. }: RunnableContext, +) -> Result { + let stream = async_stream! { + let values: Vec> = input.values.collect().await; + let mut groups = indexmap::IndexMap::new(); + + for value in values { + let group_key = value.get_data_by_key(&column_name.item); + + if group_key.is_none() { + + let possibilities = value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x)) + .collect(); + + possible_matches.sort(); + + let err = { + if possible_matches.len() > 0 { + ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + &column_name.tag,) + } else { + ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + &column_name.tag, + ) + } + }; + + yield Err(err) + } else { + let group_key = group_key.unwrap().as_string()?; + let mut group = groups.entry(group_key).or_insert(vec![]); + group.push(value); + } + } + + let mut out = TaggedDictBuilder::new(name.clone()); + + for (k,v) in groups.iter() { + out.insert(k, Value::table(v)); + } + + yield ReturnSuccess::value(out) + }; + + Ok(stream.to_output_stream()) +} diff --git a/tests/commands_test.rs b/tests/commands_test.rs index 1a3e63ab4..87e1182b1 100644 --- a/tests/commands_test.rs +++ b/tests/commands_test.rs @@ -3,6 +3,59 @@ mod helpers; use helpers as h; use helpers::{Playground, Stub::*}; +#[test] +fn group_by() { + Playground::setup("group_by_test_1", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_luck,type + Andrés,Robalino,1,A + Jonathan,Turner,1,B + Yehuda,Katz,1,A + "#, + )]); + + let actual = nu!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.csv + | group-by type + | get A + | count + | echo $it + "# + )); + + assert_eq!(actual, "2"); + }) +} + +#[test] +fn group_by_errors_if_unknown_column_name() { + Playground::setup("group_by_test_2", |dirs, sandbox| { + sandbox.with_files(vec![FileWithContentToBeTrimmed( + "los_tres_caballeros.csv", + r#" + first_name,last_name,rusty_luck,type + Andrés,Robalino,1,A + Jonathan,Turner,1,B + Yehuda,Katz,1,A + "#, + )]); + + let actual = nu_error!( + cwd: dirs.test(), h::pipeline( + r#" + open los_tres_caballeros.csv + | group-by ttype + "# + )); + + assert!(actual.contains("Unknown column")); + }) +} + #[test] fn first_gets_first_rows_by_amount() { Playground::setup("first_test_1", |dirs, sandbox| {