Merge pull request #857 from andrasio/group-by

Can group rows by given column name.
2019-10-23 18:25:52 +13:00
parent 07b90f4b4b f1630da2cc
commit 571b33a11c
5 changed files with 148 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
 | command | description |
 | ------------- | ------------- |
 | add column-or-column-path value | Add a new column to the table |
+| count | Show the total number of rows |
 | edit column-or-column-path value | Edit an existing column to have a new value |
 | embed column | Creates a new table of one column with the given name, and places the current table inside of it |
 | first amount | Show only the first number of rows |
 | get column-or-column-path | Open column and get data from the corresponding cells |
+| group-by column | Creates a new table with the data from the table rows grouped by the column given |
 | inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table |
 | last amount | Show only the last number of rows |
 | nth row-number | Return only the selected row |
--- a/src/cli.rs
+++ b/src/cli.rs
@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
            whole_stream_command(ToURL),
            whole_stream_command(ToYAML),
            whole_stream_command(SortBy),
+            whole_stream_command(GroupBy),
            whole_stream_command(Tags),
            whole_stream_command(Count),
            whole_stream_command(First),
--- a/src/commands.rs
+++ b/src/commands.rs
@ -30,6 +30,7 @@ pub(crate) mod from_url;
 pub(crate) mod from_xml;
 pub(crate) mod from_yaml;
 pub(crate) mod get;
+pub(crate) mod group_by;
 pub(crate) mod help;
 pub(crate) mod last;
 pub(crate) mod lines;
@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML;
 pub(crate) use from_yaml::FromYAML;
 pub(crate) use from_yaml::FromYML;
 pub(crate) use get::Get;
+pub(crate) use group_by::GroupBy;
 pub(crate) use help::Help;
 pub(crate) use last::Last;
 pub(crate) use lines::Lines;
--- a/src/commands/group_by.rs
+++ b/src/commands/group_by.rs
@ -0,0 +1,90 @@
+use crate::commands::WholeStreamCommand;
+use crate::data::TaggedDictBuilder;
+use crate::errors::ShellError;
+use crate::prelude::*;
+
+pub struct GroupBy;
+
+#[derive(Deserialize)]
+pub struct GroupByArgs {
+    column_name: Tagged<String>,
+}
+
+impl WholeStreamCommand for GroupBy {
+    fn name(&self) -> &str {
+        "group-by"
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::build("group-by").required("column_name", SyntaxShape::String)
+    }
+
+    fn usage(&self) -> &str {
+        "Creates a new table with the data from the table rows grouped by the column given."
+    }
+
+    fn run(
+        &self,
+        args: CommandArgs,
+        registry: &CommandRegistry,
+    ) -> Result<OutputStream, ShellError> {
+        args.process(registry, group_by)?.run()
+    }
+}
+
+fn group_by(
+    GroupByArgs { column_name }: GroupByArgs,
+    RunnableContext { input, name, .. }: RunnableContext,
+) -> Result<OutputStream, ShellError> {
+    let stream = async_stream! {
+        let values: Vec<Tagged<Value>> = input.values.collect().await;
+        let mut groups = indexmap::IndexMap::new();
+
+        for value in values {
+            let group_key = value.get_data_by_key(&column_name.item);
+
+            if group_key.is_none() {
+
+                let possibilities = value.data_descriptors();
+
+                let mut possible_matches: Vec<_> = possibilities
+                    .iter()
+                    .map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x))
+                    .collect();
+
+                possible_matches.sort();
+
+                let err = {
+                    if possible_matches.len() > 0 {
+                        ShellError::labeled_error(
+                            "Unknown column",
+                            format!("did you mean '{}'?", possible_matches[0].1),
+                            &column_name.tag,)
+                    } else {
+                        ShellError::labeled_error(
+                            "Unknown column",
+                            "row does not contain this column",
+                            &column_name.tag,
+                        )
+                    }
+                };
+
+                yield Err(err)
+            } else {
+                let group_key = group_key.unwrap().as_string()?;
+                let mut group = groups.entry(group_key).or_insert(vec![]);
+                group.push(value);
+            }
+        }
+
+        let mut out = TaggedDictBuilder::new(name.clone());
+
+        for (k,v) in groups.iter() {
+            out.insert(k, Value::table(v));
+        }
+
+        yield ReturnSuccess::value(out)
+    };
+
+    Ok(stream.to_output_stream())
+}
--- a/tests/commands_test.rs
+++ b/tests/commands_test.rs
@ -3,6 +3,59 @@ mod helpers;
 use helpers as h;
 use helpers::{Playground, Stub::*};

+#[test]
+fn group_by() {
+    Playground::setup("group_by_test_1", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_tres_caballeros.csv",
+            r#"
+                first_name,last_name,rusty_luck,type
+                Andrés,Robalino,1,A
+                Jonathan,Turner,1,B
+                Yehuda,Katz,1,A
+            "#,
+        )]);
+
+        let actual = nu!(
+            cwd: dirs.test(), h::pipeline(
+            r#"
+                open los_tres_caballeros.csv
+                | group-by type
+                | get A
+                | count
+                | echo $it
+            "#
+        ));
+
+        assert_eq!(actual, "2");
+    })
+}
+
+#[test]
+fn group_by_errors_if_unknown_column_name() {
+    Playground::setup("group_by_test_2", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_tres_caballeros.csv",
+            r#"
+                first_name,last_name,rusty_luck,type
+                Andrés,Robalino,1,A
+                Jonathan,Turner,1,B
+                Yehuda,Katz,1,A
+            "#,
+        )]);
+
+        let actual = nu_error!(
+            cwd: dirs.test(), h::pipeline(
+            r#"
+                open los_tres_caballeros.csv
+                | group-by ttype
+            "#
+        ));
+
+        assert!(actual.contains("Unknown column"));
+    })
+}
+
 #[test]
 fn first_gets_first_rows_by_amount() {
    Playground::setup("first_test_1", |dirs, sandbox| {