Refactoring and more group-by flexibility.

This commit is contained in:
Andrés N. Robalino
2020-06-17 12:33:50 -05:00
parent 6914099e28
commit 778e497903
10 changed files with 266 additions and 189 deletions

View File

@ -4,6 +4,7 @@ use indexmap::indexmap;
use nu_errors::ShellError;
use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
use nu_source::Tagged;
use nu_value_ext::as_string;
pub struct GroupBy;
@ -71,6 +72,10 @@ impl WholeStreamCommand for GroupBy {
}
}
enum Grouper {
ByColumn(Option<Tagged<String>>),
}
pub async fn group_by(
args: CommandArgs,
registry: &CommandRegistry,
@ -81,30 +86,84 @@ pub async fn group_by(
let values: Vec<Value> = input.collect().await;
if values.is_empty() {
Err(ShellError::labeled_error(
return Err(ShellError::labeled_error(
"Expected table from pipeline",
"requires a table input",
name,
))
));
}
let values = UntaggedValue::table(&values).into_value(&name);
match group(&column_name, &values, name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(reason) => Err(reason),
}
}
pub fn suggestions(tried: Tagged<&str>, for_value: &Value) -> ShellError {
let possibilities = for_value.data_descriptors();
let mut possible_matches: Vec<_> = possibilities
.iter()
.map(|x| (natural::distance::levenshtein_distance(x, &tried), x))
.collect();
possible_matches.sort();
if !possible_matches.is_empty() {
ShellError::labeled_error(
"Unknown column",
format!("did you mean '{}'?", possible_matches[0].1),
tried.tag(),
)
} else {
match crate::utils::data::group(column_name, &values, None, &name) {
Ok(grouped) => Ok(OutputStream::one(ReturnSuccess::value(grouped))),
Err(err) => Err(err),
}
ShellError::labeled_error(
"Unknown column",
"row does not contain this column",
tried.tag(),
)
}
}
pub fn group(
column_name: &Tagged<String>,
values: Vec<Value>,
column_name: &Option<Tagged<String>>,
values: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
crate::utils::data::group(Some(column_name.clone()), &values, None, tag)
let name = tag.into();
let grouper = if let Some(column_name) = column_name {
Grouper::ByColumn(Some(column_name.clone()))
} else {
Grouper::ByColumn(None)
};
match grouper {
Grouper::ByColumn(Some(column_name)) => {
let block = Box::new(move |row: &Value| {
match row.get_data_by_key(column_name.borrow_spanned()) {
Some(group_key) => Ok(as_string(&group_key)?),
None => Err(suggestions(column_name.borrow_tagged(), &row)),
}
});
crate::utils::data::group(&values, &Some(block), &name)
}
Grouper::ByColumn(None) => {
let block = Box::new(move |row: &Value| match as_string(row) {
Ok(group_key) => Ok(group_key),
Err(reason) => Err(reason),
});
crate::utils::data::group(&values, &Some(block), &name)
}
}
}
#[cfg(test)]
mod tests {
use crate::commands::group_by::group;
use super::group;
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
@ -122,7 +181,7 @@ mod tests {
UntaggedValue::table(list).into_untagged_value()
}
fn nu_releases_commiters() -> Vec<Value> {
fn nu_releases_committers() -> Vec<Value> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
@ -156,10 +215,11 @@ mod tests {
#[test]
fn groups_table_by_date_column() -> Result<(), ShellError> {
let for_key = String::from("date").tagged_unknown();
let for_key = Some(String::from("date").tagged_unknown());
let sample = table(&nu_releases_committers());
assert_eq!(
group(&for_key, nu_releases_commiters(), Tag::unknown())?,
group(&for_key, &sample, Tag::unknown())?,
row(indexmap! {
"August 23-2019".into() => table(&[
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),
@ -184,10 +244,11 @@ mod tests {
#[test]
fn groups_table_by_country_column() -> Result<(), ShellError> {
let for_key = String::from("country").tagged_unknown();
let for_key = Some(String::from("country").tagged_unknown());
let sample = table(&nu_releases_committers());
assert_eq!(
group(&for_key, nu_releases_commiters(), Tag::unknown())?,
group(&for_key, &sample, Tag::unknown())?,
row(indexmap! {
"EC".into() => table(&[
row(indexmap!{"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}),