Merge pull request #857 from andrasio/group-by

Can group rows by given column name.
This commit is contained in:
Jonathan Turner 2019-10-23 18:25:52 +13:00 committed by GitHub
commit 571b33a11c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 148 additions and 0 deletions

View File

@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
| command | description |
| ------------- | ------------- |
| add column-or-column-path value | Add a new column to the table |
| count | Show the total number of rows |
| edit column-or-column-path value | Edit an existing column to have a new value |
| embed column | Creates a new table of one column with the given name, and places the current table inside of it |
| first amount | Show only the first number of rows |
| get column-or-column-path | Open column and get data from the corresponding cells |
| group-by column | Creates a new table with the data from the table rows grouped by the column given |
| inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table |
| last amount | Show only the last number of rows |
| nth row-number | Return only the selected row |

View File

@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
whole_stream_command(ToURL),
whole_stream_command(ToYAML),
whole_stream_command(SortBy),
whole_stream_command(GroupBy),
whole_stream_command(Tags),
whole_stream_command(Count),
whole_stream_command(First),

View File

@ -30,6 +30,7 @@ pub(crate) mod from_url;
pub(crate) mod from_xml;
pub(crate) mod from_yaml;
pub(crate) mod get;
pub(crate) mod group_by;
pub(crate) mod help;
pub(crate) mod last;
pub(crate) mod lines;
@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML;
pub(crate) use from_yaml::FromYAML;
pub(crate) use from_yaml::FromYML;
pub(crate) use get::Get;
pub(crate) use group_by::GroupBy;
pub(crate) use help::Help;
pub(crate) use last::Last;
pub(crate) use lines::Lines;

90
src/commands/group_by.rs Normal file
View File

@ -0,0 +1,90 @@
use crate::commands::WholeStreamCommand;
use crate::data::TaggedDictBuilder;
use crate::errors::ShellError;
use crate::prelude::*;
pub struct GroupBy;
#[derive(Deserialize)]
pub struct GroupByArgs {
column_name: Tagged<String>,
}
impl WholeStreamCommand for GroupBy {
fn name(&self) -> &str {
"group-by"
}
fn signature(&self) -> Signature {
Signature::build("group-by").required("column_name", SyntaxShape::String)
}
fn usage(&self) -> &str {
"Creates a new table with the data from the table rows grouped by the column given."
}
fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
args.process(registry, group_by)?.run()
}
}
fn group_by(
GroupByArgs { column_name }: GroupByArgs,
RunnableContext { input, name, .. }: RunnableContext,
) -> Result<OutputStream, ShellError> {
let stream = async_stream! {
let values: Vec<Tagged<Value>> = input.values.collect().await;
let mut groups = indexmap::IndexMap::new();
for value in values {
let group_key = value.get_data_by_key(&column_name.item);
if group_key.is_none() {
let possibilities = value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x))
.collect();
possible_matches.sort();
let err = {
if possible_matches.len() > 0 {
ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
&column_name.tag,)
} else {
ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
&column_name.tag,
)
}
};
yield Err(err)
} else {
let group_key = group_key.unwrap().as_string()?;
let mut group = groups.entry(group_key).or_insert(vec![]);
group.push(value);
}
}
let mut out = TaggedDictBuilder::new(name.clone());
for (k,v) in groups.iter() {
out.insert(k, Value::table(v));
}
yield ReturnSuccess::value(out)
};
Ok(stream.to_output_stream())
}

View File

@ -3,6 +3,59 @@ mod helpers;
use helpers as h;
use helpers::{Playground, Stub::*};
#[test]
fn group_by() {
Playground::setup("group_by_test_1", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
first_name,last_name,rusty_luck,type
Andrés,Robalino,1,A
Jonathan,Turner,1,B
Yehuda,Katz,1,A
"#,
)]);
let actual = nu!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_caballeros.csv
| group-by type
| get A
| count
| echo $it
"#
));
assert_eq!(actual, "2");
})
}
#[test]
fn group_by_errors_if_unknown_column_name() {
Playground::setup("group_by_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"
first_name,last_name,rusty_luck,type
Andrés,Robalino,1,A
Jonathan,Turner,1,B
Yehuda,Katz,1,A
"#,
)]);
let actual = nu_error!(
cwd: dirs.test(), h::pipeline(
r#"
open los_tres_caballeros.csv
| group-by ttype
"#
));
assert!(actual.contains("Unknown column"));
})
}
#[test]
fn first_gets_first_rows_by_amount() {
Playground::setup("first_test_1", |dirs, sandbox| {