group-by can generate custom grouping key by block evaluation. (#2172)

This commit is contained in:
Andrés N. Robalino 2020-07-14 08:45:19 -05:00 committed by GitHub
parent 8551e06d9e
commit f2c4d22739
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 163 additions and 28 deletions

View File

@ -83,7 +83,7 @@ fn is_expanded_it_usage(head: &SpannedExpression) -> bool {
}
}
async fn process_row(
pub async fn process_row(
block: Arc<Block>,
scope: Arc<Scope>,
head: Arc<Box<SpannedExpression>>,

View File

@ -10,7 +10,7 @@ pub struct GroupBy;
#[derive(Deserialize)]
pub struct GroupByArgs {
column_name: Option<Tagged<String>>,
grouper: Option<Value>,
}
#[async_trait]
@ -21,14 +21,14 @@ impl WholeStreamCommand for GroupBy {
fn signature(&self) -> Signature {
Signature::build("group-by").optional(
"column_name",
SyntaxShape::String,
"the name of the column to group by",
"grouper",
SyntaxShape::Any,
"the grouper value to use",
)
}
fn usage(&self) -> &str {
"Creates a new table with the data from the table rows grouped by the column given."
"create a new table grouped."
}
async fn run(
@ -42,12 +42,17 @@ impl WholeStreamCommand for GroupBy {
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group items by type",
description: "group items by column named \"type\"",
example: r#"ls | group-by type"#,
result: None,
},
Example {
description: "Group items by their value",
description: "blocks can be used for generating a grouping key (same as above)",
example: r#"ls | group-by { get type }"#,
result: None,
},
Example {
description: "you can also group by raw values by leaving out the argument",
example: "echo [1 3 1 3 2 1 1] | group-by",
result: Some(vec![UntaggedValue::row(indexmap! {
"1".to_string() => UntaggedValue::Table(vec![
@ -68,26 +73,95 @@ impl WholeStreamCommand for GroupBy {
})
.into()]),
},
Example {
description: "write pipelines for a more involved grouping key",
example:
"echo [1 3 1 3 2 1 1] | group-by { echo `({{$it}} - 1) % 3` | calc | str from }",
result: None,
},
]
}
}
enum Grouper {
ByColumn(Option<Tagged<String>>),
ByBlock,
}
pub async fn group_by(
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
let registry = registry.clone();
let name = args.call_info.name_tag.clone();
let (GroupByArgs { column_name }, input) = args.process(&registry).await?;
let registry = registry.clone();
let head = Arc::new(args.call_info.args.head.clone());
let scope = Arc::new(args.call_info.scope.clone());
let context = Arc::new(Context::from_raw(&args, &registry));
let (GroupByArgs { grouper }, input) = args.process(&registry).await?;
let values: Vec<Value> = input.collect().await;
let mut keys: Vec<Result<String, ShellError>> = vec![];
let mut group_strategy = Grouper::ByColumn(None);
match grouper {
Some(Value {
value: UntaggedValue::Block(block_given),
..
}) => {
let block = Arc::new(block_given);
let error_key = "error";
for value in values.iter() {
let run = block.clone();
let scope = scope.clone();
let head = head.clone();
let context = context.clone();
match crate::commands::each::process_row(run, scope, head, context, value.clone())
.await
{
Ok(mut s) => {
let collection: Vec<Result<ReturnSuccess, ShellError>> =
s.drain_vec().await;
if collection.len() > 1 {
return Err(ShellError::labeled_error(
"expected one value from the block",
"requires a table with one value for grouping",
&name,
));
}
let value = match collection.get(0) {
Some(Ok(return_value)) => {
return_value.raw_value().unwrap_or_else(|| {
UntaggedValue::string(error_key).into_value(&name)
})
}
Some(Err(_)) | None => {
UntaggedValue::string(error_key).into_value(&name)
}
};
keys.push(as_string(&value));
}
Err(_) => {
keys.push(Ok(error_key.into()));
}
}
}
group_strategy = Grouper::ByBlock;
}
Some(other) => {
group_strategy = Grouper::ByColumn(Some(as_string(&other)?.tagged(&name)));
}
_ => {}
}
if values.is_empty() {
return Err(ShellError::labeled_error(
"Expected table from pipeline",
"expected table from pipeline",
"requires a table input",
name,
));
@ -95,9 +169,25 @@ pub async fn group_by(
let values = UntaggedValue::table(&values).into_value(&name);
match group(&column_name, &values, name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
match group_strategy {
Grouper::ByBlock => {
let map = keys.clone();
let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) {
Some(Ok(key)) => Ok(key.clone()),
Some(Err(reason)) => Err(reason.clone()),
None => as_string(row),
});
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
}
}
Grouper::ByColumn(column_name) => match group(&column_name, &values, name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
},
}
}
@ -141,7 +231,7 @@ pub fn group(
match grouper {
Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| {
let block = Box::new(move |_, row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -151,13 +241,16 @@ pub fn group(
crate::utils::data::group(&values, &Some(block), &name)
}
Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) {
let block = Box::new(move |_, row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason),
});
crate::utils::data::group(&values, &Some(block), &name)
}
Grouper::ByBlock => Err(ShellError::unimplemented(
"Block not implemented: This should never happen.",
)),
}
}

View File

@ -34,7 +34,7 @@ impl WholeStreamCommand for GroupByDate {
}
fn usage(&self) -> &str {
"Creates a new table with the data from the table rows grouped by the column given."
"creates a table grouped by date."
}
async fn run(
@ -100,7 +100,7 @@ pub async fn group_by_date(
match (grouper_date, grouper_column) {
(Grouper::ByDate(None), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format("%Y-%b-%d"));
let block = Box::new(move |_, row: &Value| row.format("%Y-%b-%d"));
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -108,7 +108,7 @@ pub async fn group_by_date(
}
}
(Grouper::ByDate(None), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| {
let block = Box::new(move |_, row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -123,7 +123,7 @@ pub async fn group_by_date(
}
}
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(None)) => {
let block = Box::new(move |row: &Value| row.format(&fmt));
let block = Box::new(move |_, row: &Value| row.format(&fmt));
match crate::utils::data::group(&values, &Some(block), &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
@ -131,7 +131,7 @@ pub async fn group_by_date(
}
}
(Grouper::ByDate(Some(fmt)), GroupByColumn::Name(Some(column_name))) => {
let block = Box::new(move |row: &Value| {
let block = Box::new(move |_, row: &Value| {
let group_key = match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(group_key),
None => Err(suggestions(column_name.borrow_tagged(), &row)),

View File

@ -81,7 +81,7 @@ pub fn split(
match grouper {
Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| {
let block = Box::new(move |_, row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
@ -91,7 +91,7 @@ pub fn split(
crate::utils::data::split(&values, &Some(block), &name)
}
Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) {
let block = Box::new(move |_, row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason),
});

View File

@ -7,16 +7,16 @@ use nu_value_ext::as_string;
#[allow(clippy::type_complexity)]
pub fn group(
values: &Value,
grouper: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
grouper: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let mut groups: IndexMap<String, Vec<Value>> = IndexMap::new();
for value in values.table_entries() {
for (idx, value) in values.table_entries().enumerate() {
let group_key = if let Some(ref grouper) = grouper {
grouper(&value)
grouper(idx, &value)
} else {
as_string(&value)
};

View File

@ -7,7 +7,7 @@ use crate::utils::data::group;
#[allow(clippy::type_complexity)]
pub fn split(
value: &Value,
splitter: &Option<Box<dyn Fn(&Value) -> Result<String, ShellError> + Send>>,
splitter: &Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();

View File

@ -31,8 +31,50 @@ fn groups() {
}
#[test]
fn errors_if_given_unknown_column_name_is_missing() {
fn errors_if_given_unknown_column_name() {
Playground::setup("group_by_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.json",
r#"
{
"nu": {
"committers": [
{"name": "Andrés N. Robalino"},
{"name": "Jonathan Turner"},
{"name": "Yehuda Katz"}
],
"releases": [
{"version": "0.2"}
{"version": "0.8"},
{"version": "0.9999999"}
],
"0xATYKARNU": [
["Th", "e", " "],
["BIG", " ", "UnO"],
["punto", "cero"]
]
}
}
"#,
)]);
let actual = nu!(
cwd: dirs.test(), pipeline(
r#"
open los_tres_caballeros.json
| group-by { get nu.releases.version }
"#
));
assert!(actual
.err
.contains("requires a table with one value for grouping"));
})
}
#[test]
fn errors_if_block_given_evaluates_more_than_one_row() {
Playground::setup("group_by_test_3", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContentToBeTrimmed(
"los_tres_caballeros.csv",
r#"