forked from extern/nushell
Merge pull request #857 from andrasio/group-by
Can group rows by given column name.
This commit is contained in:
commit
571b33a11c
@ -249,10 +249,12 @@ Nu adheres closely to a set of goals that make up its design philosophy. As feat
|
||||
| command | description |
|
||||
| ------------- | ------------- |
|
||||
| add column-or-column-path value | Add a new column to the table |
|
||||
| count | Show the total number of rows |
|
||||
| edit column-or-column-path value | Edit an existing column to have a new value |
|
||||
| embed column | Creates a new table of one column with the given name, and places the current table inside of it |
|
||||
| first amount | Show only the first number of rows |
|
||||
| get column-or-column-path | Open column and get data from the corresponding cells |
|
||||
| group-by column | Creates a new table with the data from the table rows grouped by the column given |
|
||||
| inc (column-or-column-path) | Increment a value or version. Optionally use the column of a table |
|
||||
| last amount | Show only the last number of rows |
|
||||
| nth row-number | Return only the selected row |
|
||||
|
@ -275,6 +275,7 @@ pub async fn cli() -> Result<(), Box<dyn Error>> {
|
||||
whole_stream_command(ToURL),
|
||||
whole_stream_command(ToYAML),
|
||||
whole_stream_command(SortBy),
|
||||
whole_stream_command(GroupBy),
|
||||
whole_stream_command(Tags),
|
||||
whole_stream_command(Count),
|
||||
whole_stream_command(First),
|
||||
|
@ -30,6 +30,7 @@ pub(crate) mod from_url;
|
||||
pub(crate) mod from_xml;
|
||||
pub(crate) mod from_yaml;
|
||||
pub(crate) mod get;
|
||||
pub(crate) mod group_by;
|
||||
pub(crate) mod help;
|
||||
pub(crate) mod last;
|
||||
pub(crate) mod lines;
|
||||
@ -103,6 +104,7 @@ pub(crate) use from_xml::FromXML;
|
||||
pub(crate) use from_yaml::FromYAML;
|
||||
pub(crate) use from_yaml::FromYML;
|
||||
pub(crate) use get::Get;
|
||||
pub(crate) use group_by::GroupBy;
|
||||
pub(crate) use help::Help;
|
||||
pub(crate) use last::Last;
|
||||
pub(crate) use lines::Lines;
|
||||
|
90
src/commands/group_by.rs
Normal file
90
src/commands/group_by.rs
Normal file
@ -0,0 +1,90 @@
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::data::TaggedDictBuilder;
|
||||
use crate::errors::ShellError;
|
||||
use crate::prelude::*;
|
||||
|
||||
pub struct GroupBy;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct GroupByArgs {
|
||||
column_name: Tagged<String>,
|
||||
}
|
||||
|
||||
impl WholeStreamCommand for GroupBy {
|
||||
fn name(&self) -> &str {
|
||||
"group-by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("group-by").required("column_name", SyntaxShape::String)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new table with the data from the table rows grouped by the column given."
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
args: CommandArgs,
|
||||
registry: &CommandRegistry,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
args.process(registry, group_by)?.run()
|
||||
}
|
||||
}
|
||||
|
||||
fn group_by(
|
||||
GroupByArgs { column_name }: GroupByArgs,
|
||||
RunnableContext { input, name, .. }: RunnableContext,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let stream = async_stream! {
|
||||
let values: Vec<Tagged<Value>> = input.values.collect().await;
|
||||
let mut groups = indexmap::IndexMap::new();
|
||||
|
||||
for value in values {
|
||||
let group_key = value.get_data_by_key(&column_name.item);
|
||||
|
||||
if group_key.is_none() {
|
||||
|
||||
let possibilities = value.data_descriptors();
|
||||
|
||||
let mut possible_matches: Vec<_> = possibilities
|
||||
.iter()
|
||||
.map(|x| (natural::distance::levenshtein_distance(x, &column_name.item), x))
|
||||
.collect();
|
||||
|
||||
possible_matches.sort();
|
||||
|
||||
let err = {
|
||||
if possible_matches.len() > 0 {
|
||||
ShellError::labeled_error(
|
||||
"Unknown column",
|
||||
format!("did you mean '{}'?", possible_matches[0].1),
|
||||
&column_name.tag,)
|
||||
} else {
|
||||
ShellError::labeled_error(
|
||||
"Unknown column",
|
||||
"row does not contain this column",
|
||||
&column_name.tag,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
yield Err(err)
|
||||
} else {
|
||||
let group_key = group_key.unwrap().as_string()?;
|
||||
let mut group = groups.entry(group_key).or_insert(vec![]);
|
||||
group.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = TaggedDictBuilder::new(name.clone());
|
||||
|
||||
for (k,v) in groups.iter() {
|
||||
out.insert(k, Value::table(v));
|
||||
}
|
||||
|
||||
yield ReturnSuccess::value(out)
|
||||
};
|
||||
|
||||
Ok(stream.to_output_stream())
|
||||
}
|
@ -3,6 +3,59 @@ mod helpers;
|
||||
use helpers as h;
|
||||
use helpers::{Playground, Stub::*};
|
||||
|
||||
#[test]
|
||||
fn group_by() {
|
||||
Playground::setup("group_by_test_1", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_luck,type
|
||||
Andrés,Robalino,1,A
|
||||
Jonathan,Turner,1,B
|
||||
Yehuda,Katz,1,A
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| group-by type
|
||||
| get A
|
||||
| count
|
||||
| echo $it
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual, "2");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn group_by_errors_if_unknown_column_name() {
|
||||
Playground::setup("group_by_test_2", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"los_tres_caballeros.csv",
|
||||
r#"
|
||||
first_name,last_name,rusty_luck,type
|
||||
Andrés,Robalino,1,A
|
||||
Jonathan,Turner,1,B
|
||||
Yehuda,Katz,1,A
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu_error!(
|
||||
cwd: dirs.test(), h::pipeline(
|
||||
r#"
|
||||
open los_tres_caballeros.csv
|
||||
| group-by ttype
|
||||
"#
|
||||
));
|
||||
|
||||
assert!(actual.contains("Unknown column"));
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_gets_first_rows_by_amount() {
|
||||
Playground::setup("first_test_1", |dirs, sandbox| {
|
||||
|
Loading…
Reference in New Issue
Block a user