From f93ff9ec33eac200da25afc57165f40752d1d936 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Sat, 9 May 2020 12:15:47 -0500 Subject: [PATCH] Make grouping more flexible. (#1741) --- crates/nu-cli/src/commands/group_by.rs | 114 +++++++++++++------------ crates/nu-cli/src/utils.rs | 1 + crates/nu-cli/src/utils/data/group.rs | 62 ++++++++++++++ crates/nu-cli/src/utils/data/mod.rs | 3 + crates/nu-protocol/src/value.rs | 7 ++ 5 files changed, 132 insertions(+), 55 deletions(-) create mode 100644 crates/nu-cli/src/utils/data/group.rs create mode 100644 crates/nu-cli/src/utils/data/mod.rs diff --git a/crates/nu-cli/src/commands/group_by.rs b/crates/nu-cli/src/commands/group_by.rs index db407673bc..372da74ca0 100644 --- a/crates/nu-cli/src/commands/group_by.rs +++ b/crates/nu-cli/src/commands/group_by.rs @@ -1,15 +1,16 @@ use crate::commands::WholeStreamCommand; use crate::prelude::*; use nu_errors::ShellError; -use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value}; +use nu_protocol::{ReturnSuccess, Signature, SyntaxShape, Value}; use nu_source::Tagged; -use nu_value_ext::{as_string, get_data_by_key}; pub struct GroupBy; #[derive(Deserialize)] pub struct GroupByArgs { column_name: Tagged, + date: Tagged, + format: Option>, } impl WholeStreamCommand for GroupBy { @@ -18,11 +19,19 @@ impl WholeStreamCommand for GroupBy { } fn signature(&self) -> Signature { - Signature::build("group-by").required( - "column_name", - SyntaxShape::String, - "the name of the column to group by", - ) + Signature::build("group-by") + .required( + "column_name", + SyntaxShape::String, + "the name of the column to group by", + ) + .named( + "format", + SyntaxShape::String, + "Specify date and time formatting", + Some('f'), + ) + .switch("date", "by date", Some('d')) } fn usage(&self) -> &str { @@ -38,8 +47,17 @@ impl WholeStreamCommand for GroupBy { } } +enum Grouper { + Default, + ByDate(Option), +} + pub fn group_by( - GroupByArgs { column_name }: GroupByArgs, + GroupByArgs { + column_name, + date, + format, + }: GroupByArgs, RunnableContext { input, name, .. }: RunnableContext, ) -> Result { let stream = async_stream! { @@ -52,9 +70,38 @@ pub fn group_by( column_name.span() )) } else { - match group(&column_name, values, name) { - Ok(grouped) => yield ReturnSuccess::value(grouped), - Err(err) => yield Err(err) + + let grouper = if let Tagged { item: true, tag } = date { + if let Some(Tagged { item: fmt, tag }) = format { + Grouper::ByDate(Some(fmt)) + } else { + Grouper::ByDate(None) + } + } else { + Grouper::Default + }; + + match grouper { + Grouper::Default => { + match crate::utils::data::group(column_name, &values, None, &name) { + Ok(grouped) => yield ReturnSuccess::value(grouped), + Err(err) => yield Err(err), + } + } + Grouper::ByDate(None) => { + match crate::utils::data::group(column_name, &values, Some(Box::new(|row: &Value| row.format("%Y-%b-%d"))), &name) { + Ok(grouped) => yield ReturnSuccess::value(grouped), + Err(err) => yield Err(err), + } + } + Grouper::ByDate(Some(fmt)) => { + match crate::utils::data::group(column_name, &values, Some(Box::new(move |row: &Value| { + row.format(&fmt) + })), &name) { + Ok(grouped) => yield ReturnSuccess::value(grouped), + Err(err) => yield Err(err), + } + } } } }; @@ -67,50 +114,7 @@ pub fn group( values: Vec, tag: impl Into, ) -> Result { - let tag = tag.into(); - - let mut groups: indexmap::IndexMap> = indexmap::IndexMap::new(); - - for value in values { - let group_key = get_data_by_key(&value, column_name.borrow_spanned()); - - if let Some(group_key) = group_key { - let group_key = as_string(&group_key)?; - let group = groups.entry(group_key).or_insert(vec![]); - group.push(value); - } else { - let possibilities = value.data_descriptors(); - - let mut possible_matches: Vec<_> = possibilities - .iter() - .map(|x| (natural::distance::levenshtein_distance(x, column_name), x)) - .collect(); - - possible_matches.sort(); - - if !possible_matches.is_empty() { - return Err(ShellError::labeled_error( - "Unknown column", - format!("did you mean '{}'?", possible_matches[0].1), - column_name.tag(), - )); - } else { - return Err(ShellError::labeled_error( - "Unknown column", - "row does not contain this column", - column_name.tag(), - )); - } - } - } - - let mut out = TaggedDictBuilder::new(&tag); - - for (k, v) in groups.iter() { - out.insert_untagged(k, UntaggedValue::table(v)); - } - - Ok(out.into_value()) + crate::utils::data::group(column_name.clone(), &values, None, tag) } #[cfg(test)] diff --git a/crates/nu-cli/src/utils.rs b/crates/nu-cli/src/utils.rs index 4c151bdfaa..a32cec0a8f 100644 --- a/crates/nu-cli/src/utils.rs +++ b/crates/nu-cli/src/utils.rs @@ -1,3 +1,4 @@ +pub mod data; pub mod data_processing; use crate::path::canonicalize; diff --git a/crates/nu-cli/src/utils/data/group.rs b/crates/nu-cli/src/utils/data/group.rs new file mode 100644 index 0000000000..3c468033a3 --- /dev/null +++ b/crates/nu-cli/src/utils/data/group.rs @@ -0,0 +1,62 @@ +use indexmap::IndexMap; +use nu_errors::ShellError; +use nu_protocol::{TaggedDictBuilder, UntaggedValue, Value}; +use nu_source::{Tag, Tagged}; +use nu_value_ext::{as_string, get_data_by_key}; + +#[allow(clippy::type_complexity)] +pub fn group( + column_name: Tagged, + values: &[Value], + grouper: Option Result + Send>>, + tag: impl Into, +) -> Result { + let tag = tag.into(); + + let mut groups: IndexMap> = IndexMap::new(); + + for value in values { + let group_key = get_data_by_key(&value, column_name.borrow_spanned()); + + if let Some(group_key) = group_key { + let group_key = if let Some(ref grouper) = grouper { + grouper(&group_key) + } else { + as_string(&group_key) + }; + let group = groups.entry(group_key?).or_insert(vec![]); + group.push((*value).clone()); + } else { + let possibilities = value.data_descriptors(); + + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &column_name), x)) + .collect(); + + possible_matches.sort(); + + if !possible_matches.is_empty() { + return Err(ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + column_name.tag(), + )); + } else { + return Err(ShellError::labeled_error( + "Unknown column", + "row does not contain this column", + column_name.tag(), + )); + } + } + } + + let mut out = TaggedDictBuilder::new(&tag); + + for (k, v) in groups.iter() { + out.insert_untagged(k, UntaggedValue::table(v)); + } + + Ok(out.into_value()) +} diff --git a/crates/nu-cli/src/utils/data/mod.rs b/crates/nu-cli/src/utils/data/mod.rs new file mode 100644 index 0000000000..f90d93bdbb --- /dev/null +++ b/crates/nu-cli/src/utils/data/mod.rs @@ -0,0 +1,3 @@ +pub mod group; + +pub use crate::utils::data::group::group; diff --git a/crates/nu-protocol/src/value.rs b/crates/nu-protocol/src/value.rs index fc2c5bf077..233a1f4704 100644 --- a/crates/nu-protocol/src/value.rs +++ b/crates/nu-protocol/src/value.rs @@ -272,6 +272,13 @@ impl Value { } } + pub fn format(&self, fmt: &str) -> Result { + match &self.value { + UntaggedValue::Primitive(Primitive::Date(dt)) => Ok(dt.format(fmt).to_string()), + _ => Err(ShellError::type_error("date", self.spanned_type_name())), + } + } + /// View into the borrowed string contents of a Value, if possible pub fn as_forgiving_string(&self) -> Result<&str, ShellError> { match &self.value {