From 0611f56776e68254712185a8d25697e8a31c2aa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= <andres@androbtech.com>
Date: Sun, 20 Oct 2019 18:42:07 -0500
Subject: [PATCH 1/2] Can group cells by given column name.

---
 README.md                |  2 ++
 src/cli.rs               |  1 +
 src/commands.rs          |  2 ++
 src/commands/count.rs    |  2 +-
 src/commands/group_by.rs | 59 ++++++++++++++++++++++++++++++++++++++++
 tests/commands_test.rs   | 28 +++++++++++++++++++
 6 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 src/commands/group_by.rs
diff --git a/README.md b/README.md
index cf36fd2cb1..c391b59903 100644
--- a/README.md
+++ b/README.md
@@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
 | command | description |
 | ------------- | ------------- |
 | add column-or-column-path value | Add a new column to the table |
+| count | Show the total number of cells |
 | edit column-or-column-path value | Edit an existing column to have a new value |
 | embed column | Creates a new table of one column with the given name, and places the current table inside of it |
 | first amount | Show only the first number of rows |
 | get column-or-column-path | Open column and get data from the corresponding cells |
+| group-by column | Creates a new table with the data from the table rows grouped by the column given |
 | inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table |
 | last amount | Show only the last number of rows |
 | nth row-number | Return only the selected row |
diff --git a/src/cli.rs b/src/cli.rs
index ad3eb8d39b..e88ee054fe 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
             whole_stream_command(ToURL),
             whole_stream_command(ToYAML),
             whole_stream_command(SortBy),
+            whole_stream_command(GroupBy),
             whole_stream_command(Tags),
             whole_stream_command(Count),
             whole_stream_command(First),
diff --git a/src/commands.rs b/src/commands.rs
index 0b155891cc..7f0fa0a25a 100644
--- a/src/commands.rs
+++ b/src/commands.rs
@@ -30,6 +30,7 @@ pub(crate) mod from_url;
 pub(crate) mod from_xml;
 pub(crate) mod from_yaml;
 pub(crate) mod get;
+pub(crate) mod group_by;
 pub(crate) mod help;
 pub(crate) mod last;
 pub(crate) mod lines;
@@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML;
 pub(crate) use from_yaml::FromYAML;
 pub(crate) use from_yaml::FromYML;
 pub(crate) use get::Get;
+pub(crate) use group_by::GroupBy;
 pub(crate) use help::Help;
 pub(crate) use last::Last;
 pub(crate) use lines::Lines;
diff --git a/src/commands/count.rs b/src/commands/count.rs
index 5e44283737..6fe5a94633 100644
--- a/src/commands/count.rs
+++ b/src/commands/count.rs
@@ -20,7 +20,7 @@ impl WholeStreamCommand for Count {
     }
 
     fn usage(&self) -> &str {
-        "Show the total number of rows."
+        "Show the total number of cells."
     }
 
     fn run(
diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs
new file mode 100644
index 0000000000..e08ebb2afb
--- /dev/null
+++ b/src/commands/group_by.rs
@@ -0,0 +1,59 @@
+use crate::commands::WholeStreamCommand;
+use crate::data::TaggedDictBuilder;
+use crate::errors::ShellError;
+use crate::prelude::*;
+
+pub struct GroupBy;
+
+#[derive(Deserialize)]
+pub struct GroupByArgs {
+    column_name: Tagged<String>,
+}
+
+impl WholeStreamCommand for GroupBy {
+    fn name(&self) -> &str {
+        "group-by"
+    }
+
+    fn signature(&self) -> Signature {
+        Signature::build("group-by").required("column_name", SyntaxShape::String)
+    }
+
+    fn usage(&self) -> &str {
+        "Creates a new table with the data from the table rows grouped by the column given."
+    }
+
+    fn run(
+        &self,
+        args: CommandArgs,
+        registry: &CommandRegistry,
+    ) -> Result<OutputStream, ShellError> {
+        args.process(registry, group_by)?.run()
+    }
+}
+
+fn group_by(
+    GroupByArgs { column_name }: GroupByArgs,
+    RunnableContext { input, name, .. }: RunnableContext,
+) -> Result<OutputStream, ShellError> {
+    let stream = async_stream! {
+        let values: Vec<Tagged<Value>> = input.values.collect().await;
+        let mut groups = indexmap::IndexMap::new();
+
+        for row in values {
+            let key = row.get_data_by_key(&column_name.item).unwrap().as_string()?;
+            let mut group = groups.entry(key).or_insert(vec![]);
+            group.push(row);
+        }
+
+        let mut out = TaggedDictBuilder::new(name.clone());
+
+        for (k,v) in groups.iter() {
+            out.insert(k, Value::table(v));
+        }
+
+        yield ReturnSuccess::value(out)
+    };
+
+    Ok(stream.to_output_stream())
+}
diff --git a/tests/commands_test.rs b/tests/commands_test.rs
index 4d6fa84a65..7733942811 100644
--- a/tests/commands_test.rs
+++ b/tests/commands_test.rs
@@ -3,6 +3,34 @@ mod helpers;
 use helpers as h;
 use helpers::{Playground, Stub::*};
 
+#[test]
+fn group_by() {
+    Playground::setup("group_by_test_1", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_tres_caballeros.csv",
+            r#"
+                first_name,last_name,rusty_luck,type
+                Andrés,Robalino,1,A
+                Jonathan,Turner,1,B
+                Yehuda,Katz,1,A
+            "#,
+        )]);
+
+        let actual = nu!(
+            cwd: dirs.test(), h::pipeline(
+            r#"
+                open los_tres_caballeros.csv
+                | group-by type
+                | get A
+                | count
+                | echo $it
+            "#
+        ));
+
+        assert_eq!(actual, "2");
+    })
+}
+
 #[test]
 fn first_gets_first_rows_by_amount() {
     Playground::setup("first_test_1", |dirs, sandbox| {

From f1630da2ccbcf38110bab7ff9e9c10956b3c7d06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= <andres@androbtech.com>
Date: Tue, 22 Oct 2019 00:00:06 -0500
Subject: [PATCH 2/2] Suggest a column name in case one unknown column is
 supplied.

---
 README.md                |  2 +-
 src/commands/count.rs    |  2 +-
 src/commands/group_by.rs | 39 +++++++++++++++++++++++++++++++++++----
 tests/commands_test.rs   | 25 +++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index c391b59903..64ff0e8015 100644
--- a/README.md
+++ b/README.md
@@ -249,7 +249,7 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
 | command | description |
 | ------------- | ------------- |
 | add column-or-column-path value | Add a new column to the table |
-| count | Show the total number of cells |
+| count | Show the total number of rows |
 | edit column-or-column-path value | Edit an existing column to have a new value |
 | embed column | Creates a new table of one column with the given name, and places the current table inside of it |
 | first amount | Show only the first number of rows |
diff --git a/src/commands/count.rs b/src/commands/count.rs
index 6fe5a94633..5e44283737 100644
--- a/src/commands/count.rs
+++ b/src/commands/count.rs
@@ -20,7 +20,7 @@ impl WholeStreamCommand for Count {
     }
 
     fn usage(&self) -> &str {
-        "Show the total number of cells."
+        "Show the total number of rows."
     }
 
     fn run(
diff --git a/src/commands/group_by.rs b/src/commands/group_by.rs
index e08ebb2afb..7f5f496408 100644
--- a/src/commands/group_by.rs
+++ b/src/commands/group_by.rs
@@ -40,10 +40,41 @@ fn group_by(
         let values: Vec<Tagged<Value>> = input.values.collect().await;
         let mut groups = indexmap::IndexMap::new();
 
-        for row in values {
-            let key = row.get_data_by_key(&column_name.item).unwrap().as_string()?;
-            let mut group = groups.entry(key).or_insert(vec![]);
-            group.push(row);
+        for value in values {
+            let group_key = value.get_data_by_key(&column_name.item);
+
+            if group_key.is_none() {
+
+                let possibilities = value.data_descriptors();
+
+                let mut possible_matches: Vec<_> = possibilities
+                    .iter()
+                    .map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x))
+                    .collect();
+
+                possible_matches.sort();
+
+                let err = {
+                    if possible_matches.len() > 0 {
+                        ShellError::labeled_error(
+                            "Unknown column",
+                            format!("did you mean '{}'?", possible_matches[0].1),
+                            &column_name.tag,)
+                    } else {
+                        ShellError::labeled_error(
+                            "Unknown column",
+                            "row does not contain this column",
+                            &column_name.tag,
+                        )
+                    }
+                };
+
+                yield Err(err)
+            } else {
+                let group_key = group_key.unwrap().as_string()?;
+                let mut group = groups.entry(group_key).or_insert(vec![]);
+                group.push(value);
+            }
         }
 
         let mut out = TaggedDictBuilder::new(name.clone());
diff --git a/tests/commands_test.rs b/tests/commands_test.rs
index 7733942811..45e4bcb228 100644
--- a/tests/commands_test.rs
+++ b/tests/commands_test.rs
@@ -31,6 +31,31 @@ fn group_by() {
     })
 }
 
+#[test]
+fn group_by_errors_if_unknown_column_name() {
+    Playground::setup("group_by_test_2", |dirs, sandbox| {
+        sandbox.with_files(vec![FileWithContentToBeTrimmed(
+            "los_tres_caballeros.csv",
+            r#"
+                first_name,last_name,rusty_luck,type
+                Andrés,Robalino,1,A
+                Jonathan,Turner,1,B
+                Yehuda,Katz,1,A
+            "#,
+        )]);
+
+        let actual = nu_error!(
+            cwd: dirs.test(), h::pipeline(
+            r#"
+                open los_tres_caballeros.csv
+                | group-by ttype
+            "#
+        ));
+
+        assert!(actual.contains("Unknown column"));
+    })
+}
+
 #[test]
 fn first_gets_first_rows_by_amount() {
     Playground::setup("first_test_1", |dirs, sandbox| {