Data summarize reporting overhaul. (#2299)

Refactored out most of internal work for summarizing data opening
the door for generating charts from it. A model is introduced
to hold information needed for a summary, Histogram command is
an example of a partial usage. This is the beginning.

Removed implicit arithmetic traits on Value and Primitive to avoid
mixed types panics. The std operations traits can't fail and we
can't guarantee that. We can handle gracefully now since compute_values
was introduced after the parser changes four months ago. The handling
logic should be taken care of either explicitly or in compute_values.

The zero identity trait was also removed (and implementing this forced
us to also implement Add, Mult, etc)

Also: the `math` operations now remove in the output if a given column is not computable:

```
> ls | math sum
──────┬──────────
 size │ 150.9 KB
──────┴──────────
```
This commit is contained in:
Andrés N. Robalino
2020-08-03 17:47:19 -05:00
committed by GitHub
parent eeb9b4edcb
commit 028fc9b9cd
29 changed files with 1396 additions and 1512 deletions

View File

@ -0,0 +1,274 @@
#![allow(clippy::type_complexity)]
use crate::data::value::compute_values;
use derive_new::new;
use nu_errors::ShellError;
use nu_protocol::hir::Operator;
use nu_protocol::{UntaggedValue, Value};
use nu_source::{SpannedItem, Tag, TaggedItem};
use nu_value_ext::ValueExt;
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, new)]
pub struct Labels {
pub x: Vec<String>,
pub y: Vec<String>,
}
impl Labels {
pub fn at(&self, idx: usize) -> Option<&str> {
if let Some(k) = self.x.get(idx) {
Some(&k[..])
} else {
None
}
}
pub fn grouped(&self) -> impl Iterator<Item = &String> {
self.x.iter()
}
pub fn grouping_total(&self) -> Value {
UntaggedValue::int(self.x.len()).into_untagged_value()
}
pub fn splits(&self) -> impl Iterator<Item = &String> {
self.y.iter()
}
pub fn splits_total(&self) -> Value {
UntaggedValue::int(self.y.len()).into_untagged_value()
}
}
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Clone, new)]
pub struct Range {
pub start: Value,
pub end: Value,
}
fn formula(
acc_begin: Value,
calculator: Box<dyn Fn(Vec<&Value>) -> Result<Value, ShellError> + Send + Sync + 'static>,
) -> Box<dyn Fn(&Value, Vec<&Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
Box::new(move |acc, datax| -> Result<Value, ShellError> {
let result = match compute_values(Operator::Multiply, &acc, &acc_begin) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
};
match calculator(datax) {
Ok(total) => Ok(match compute_values(Operator::Plus, &result, &total) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
}),
Err(reason) => Err(reason),
}
})
}
pub fn reducer_for(
command: Reduction,
) -> Box<dyn Fn(&Value, Vec<&Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
match command {
Reduction::Accumulate => Box::new(formula(
UntaggedValue::int(1).into_untagged_value(),
Box::new(sum),
)),
_ => Box::new(formula(
UntaggedValue::int(0).into_untagged_value(),
Box::new(sum),
)),
}
}
pub fn max(values: &Value, tag: impl Into<Tag>) -> Result<&Value, ShellError> {
let tag = tag.into();
values
.table_entries()
.filter_map(|dataset| dataset.table_entries().max())
.max()
.ok_or_else(|| ShellError::labeled_error("err", "err", &tag))
}
pub fn sum(data: Vec<&Value>) -> Result<Value, ShellError> {
let mut acc = UntaggedValue::int(0);
for value in data {
match value.value {
UntaggedValue::Primitive(_) => {
acc = match compute_values(Operator::Plus, &acc, &value) {
Ok(v) => v,
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
};
}
_ => {
return Err(ShellError::labeled_error(
"Attempted to compute the sum of a value that cannot be summed.",
"value appears here",
value.tag.span,
))
}
}
}
Ok(acc.into_untagged_value())
}
pub fn sort_columns(
values: &[String],
format: &Option<Box<dyn Fn(&Value, String) -> Result<String, ShellError>>>,
) -> Result<Vec<String>, ShellError> {
let mut keys = vec![];
if let Some(fmt) = format {
for k in values.iter() {
let k = k.clone().tagged_unknown();
let v =
crate::data::value::Date::naive_from_str(k.borrow_tagged())?.into_untagged_value();
keys.push(fmt(&v, k.to_string())?);
}
} else {
keys = values.to_vec();
}
keys.sort();
Ok(keys)
}
pub fn sort(planes: &Labels, values: &Value, tag: impl Into<Tag>) -> Result<Value, ShellError> {
let tag = tag.into();
let mut x = vec![];
for column in planes.splits() {
let key = column.clone().tagged_unknown();
let groups = values
.get_data_by_key(key.borrow_spanned())
.ok_or_else(|| {
ShellError::labeled_error("unknown column", "unknown column", key.span())
})?;
let mut y = vec![];
for inner_column in planes.grouped() {
let key = inner_column.clone().tagged_unknown();
let grouped = groups.get_data_by_key(key.borrow_spanned());
if let Some(grouped) = grouped {
y.push(grouped.table_entries().cloned().collect::<Vec<_>>());
} else {
let empty = UntaggedValue::table(&[]).into_value(&tag);
y.push(empty.table_entries().cloned().collect::<Vec<_>>());
}
}
x.push(
UntaggedValue::table(&y.iter().cloned().flatten().collect::<Vec<Value>>())
.into_value(&tag),
);
}
Ok(UntaggedValue::table(&x).into_value(&tag))
}
pub fn evaluate(
values: &Value,
evaluator: &Option<Box<dyn Fn(usize, &Value) -> Result<Value, ShellError> + Send>>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let mut x = vec![];
for split in values.table_entries() {
let mut y = vec![];
for (idx, subset) in split.table_entries().enumerate() {
let mut set = vec![];
if let Some(ref evaluator) = evaluator {
let value = evaluator(idx, subset)?;
set.push(value);
} else {
set.push(UntaggedValue::int(1).into_value(&tag));
}
y.push(UntaggedValue::table(&set).into_value(&tag));
}
x.push(UntaggedValue::table(&y).into_value(&tag));
}
Ok(UntaggedValue::table(&x).into_value(&tag))
}
pub enum Reduction {
#[allow(dead_code)]
Count,
Accumulate,
}
pub fn reduce(values: &Value, tag: impl Into<Tag>) -> Result<Value, ShellError> {
let tag = tag.into();
let reduce_with = reducer_for(Reduction::Accumulate);
let mut datasets = vec![];
for dataset in values.table_entries() {
let mut acc = UntaggedValue::int(0).into_value(&tag);
let mut subsets = vec![];
for subset in dataset.table_entries() {
acc = reduce_with(&acc, subset.table_entries().collect::<Vec<_>>())?;
subsets.push(acc.clone());
}
datasets.push(UntaggedValue::table(&subsets).into_value(&tag));
}
Ok(UntaggedValue::table(&datasets).into_value(&tag))
}
pub fn percentages(
maxima: &Value,
values: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let mut x = vec![];
for split in values.table_entries() {
x.push(
UntaggedValue::table(
&split
.table_entries()
.filter_map(|s| {
let hundred = UntaggedValue::decimal(100);
match compute_values(Operator::Divide, &hundred, &maxima) {
Ok(v) => match compute_values(Operator::Multiply, &s, &v) {
Ok(v) => Some(v.into_untagged_value()),
Err(_) => None,
},
Err(_) => None,
}
})
.collect::<Vec<_>>(),
)
.into_value(&tag),
);
}
Ok(UntaggedValue::table(&x).into_value(&tag))
}

View File

@ -1,5 +1,296 @@
pub mod group;
pub mod split;
mod internal;
pub use crate::utils::data::group::group;
pub use crate::utils::data::split::split;
use crate::utils::data::internal::*;
use derive_new::new;
use getset::Getters;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
use nu_source::Tag;
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Getters, Clone, new)]
pub struct Model {
pub labels: Labels,
pub ranges: (Range, Range),
pub data: Value,
pub percentages: Value,
}
#[allow(clippy::type_complexity)]
pub struct Operation<'a> {
pub grouper: Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
pub splitter: Option<Box<dyn Fn(usize, &Value) -> Result<String, ShellError> + Send>>,
pub format: Option<Box<dyn Fn(&Value, String) -> Result<String, ShellError>>>,
pub eval: &'a Option<Box<dyn Fn(usize, &Value) -> Result<Value, ShellError> + Send>>,
}
pub fn report(
values: &Value,
options: Operation,
tag: impl Into<Tag>,
) -> Result<Model, ShellError> {
let tag = tag.into();
let grouped = group(&values, &options.grouper, &tag)?;
let splitted = split(&grouped, &options.splitter, &tag)?;
let x = grouped
.row_entries()
.map(|(key, _)| key.clone())
.collect::<Vec<_>>();
let x = if options.format.is_some() {
sort_columns(&x, &options.format)
} else {
sort_columns(&x, &None)
}?;
let mut y = splitted
.row_entries()
.map(|(key, _)| key.clone())
.collect::<Vec<_>>();
y.sort();
let planes = Labels { x, y };
let sorted = sort(&planes, &splitted, &tag)?;
let evaluated = evaluate(
&sorted,
if options.eval.is_some() {
options.eval
} else {
&None
},
&tag,
)?;
let group_labels = planes.grouping_total();
let reduced = reduce(&evaluated, &tag)?;
let max = max(&reduced, &tag)?.clone();
let maxima = max.clone();
let percents = percentages(&maxima, &reduced, &tag)?;
Ok(Model {
labels: planes,
ranges: (
Range {
start: UntaggedValue::int(0).into_untagged_value(),
end: group_labels,
},
Range {
start: UntaggedValue::int(0).into_untagged_value(),
end: max,
},
),
data: reduced,
percentages: percents,
})
}
#[cfg(test)]
pub mod helpers {
use super::{report, Labels, Model, Operation, Range};
use bigdecimal::BigDecimal;
use indexmap::indexmap;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
use nu_source::{Tag, TaggedItem};
use nu_value_ext::ValueExt;
use num_bigint::BigInt;
use indexmap::IndexMap;
pub fn int(s: impl Into<BigInt>) -> Value {
UntaggedValue::int(s).into_untagged_value()
}
pub fn decimal(f: impl Into<BigDecimal>) -> Value {
UntaggedValue::decimal(f.into()).into_untagged_value()
}
pub fn string(input: impl Into<String>) -> Value {
UntaggedValue::string(input.into()).into_untagged_value()
}
pub fn row(entries: IndexMap<String, Value>) -> Value {
UntaggedValue::row(entries).into_untagged_value()
}
pub fn table(list: &[Value]) -> Value {
UntaggedValue::table(list).into_untagged_value()
}
pub fn date(input: impl Into<String>) -> Value {
let key = input.into().tagged_unknown();
crate::data::value::Date::naive_from_str(key.borrow_tagged())
.unwrap()
.into_untagged_value()
}
pub fn committers() -> Vec<Value> {
vec![
row(indexmap! {
"date".into() => date("2019-07-23"),
"name".into() => string("AR"),
"country".into() => string("EC"),
"chickens".into() => int(10),
}),
row(indexmap! {
"date".into() => date("2019-07-23"),
"name".into() => string("JT"),
"country".into() => string("NZ"),
"chickens".into() => int(5),
}),
row(indexmap! {
"date".into() => date("2019-10-10"),
"name".into() => string("YK"),
"country".into() => string("US"),
"chickens".into() => int(6),
}),
row(indexmap! {
"date".into() => date("2019-09-24"),
"name".into() => string("AR"),
"country".into() => string("EC"),
"chickens".into() => int(20),
}),
row(indexmap! {
"date".into() => date("2019-10-10"),
"name".into() => string("JT"),
"country".into() => string("NZ"),
"chickens".into() => int(15),
}),
row(indexmap! {
"date".into() => date("2019-09-24"),
"name".into() => string("YK"),
"country".into() => string("US"),
"chickens".into() => int(4),
}),
row(indexmap! {
"date".into() => date("2019-10-10"),
"name".into() => string("AR"),
"country".into() => string("EC"),
"chickens".into() => int(30),
}),
row(indexmap! {
"date".into() => date("2019-09-24"),
"name".into() => string("JT"),
"country".into() => string("NZ"),
"chickens".into() => int(10),
}),
row(indexmap! {
"date".into() => date("2019-07-23"),
"name".into() => string("YK"),
"country".into() => string("US"),
"chickens".into() => int(2),
}),
]
}
pub fn committers_grouped_by_date() -> Value {
let sample = table(&committers());
let grouper = Box::new(move |_, row: &Value| {
let key = String::from("date").tagged_unknown();
let group_key = row.get_data_by_key(key.borrow_spanned()).unwrap();
group_key.format("%Y-%m-%d")
});
crate::utils::data::group(&sample, &Some(grouper), Tag::unknown()).unwrap()
}
pub fn date_formatter(
fmt: &'static str,
) -> Box<dyn Fn(&Value, String) -> Result<String, ShellError>> {
Box::new(move |date: &Value, _: String| date.format(&fmt))
}
fn assert_without_checking_percentages(report_a: Model, report_b: Model) {
assert_eq!(report_a.labels.x, report_b.labels.x);
assert_eq!(report_a.labels.y, report_b.labels.y);
assert_eq!(report_a.ranges, report_b.ranges);
assert_eq!(report_a.data, report_b.data);
}
#[test]
fn prepares_report_using_accumulating_value() {
let committers = table(&committers());
let by_date = Box::new(move |_, row: &Value| {
let key = String::from("date").tagged_unknown();
let key = row.get_data_by_key(key.borrow_spanned()).unwrap();
let callback = date_formatter("%Y-%m-%d");
callback(&key, "nothing".to_string())
});
let by_country = Box::new(move |_, row: &Value| {
let key = String::from("country").tagged_unknown();
let key = row.get_data_by_key(key.borrow_spanned()).unwrap();
nu_value_ext::as_string(&key)
});
let options = Operation {
grouper: Some(by_date),
splitter: Some(by_country),
format: Some(date_formatter("%Y-%m-%d")),
eval: /* value to be used for accumulation */ &Some(Box::new(move |_, value: &Value| {
let chickens_key = String::from("chickens").tagged_unknown();
value
.get_data_by_key(chickens_key.borrow_spanned())
.ok_or_else(|| {
ShellError::labeled_error(
"unknown column",
"unknown column",
chickens_key.span(),
)
})
})),
};
assert_without_checking_percentages(
report(&committers, options, Tag::unknown()).unwrap(),
Model {
labels: Labels {
x: vec![
String::from("2019-07-23"),
String::from("2019-09-24"),
String::from("2019-10-10"),
],
y: vec![String::from("EC"), String::from("NZ"), String::from("US")],
},
ranges: (
Range {
start: int(0),
end: int(3),
},
Range {
start: int(0),
end: int(60),
},
),
data: table(&[
table(&[int(10), int(30), int(60)]),
table(&[int(5), int(15), int(30)]),
table(&[int(2), int(6), int(12)]),
]),
percentages: table(&[
table(&[decimal(16.66), decimal(50), decimal(100)]),
table(&[decimal(8.33), decimal(25), decimal(50)]),
table(&[decimal(3.33), decimal(10), decimal(20)]),
]),
},
);
}
}

View File

@ -13,6 +13,12 @@ pub fn split(
let tag = tag.into();
let mut splits = indexmap::IndexMap::new();
let mut out = TaggedDictBuilder::new(&tag);
if splitter.is_none() {
out.insert_untagged("table", UntaggedValue::table(&[value.clone()]));
return Ok(out.into_value());
}
for (column, value) in value.row_entries() {
if !&value.is_table() {

View File

@ -1,675 +0,0 @@
use crate::data::value::compare_values;
use crate::data::TaggedListBuilder;
use chrono::{DateTime, NaiveDate, Utc};
use nu_errors::ShellError;
use nu_protocol::hir::Operator;
use nu_protocol::{Primitive, TaggedDictBuilder, UntaggedValue, Value};
use nu_source::{SpannedItem, Tag, Tagged, TaggedItem};
use nu_value_ext::{get_data_by_key, ValueExt};
use num_traits::Zero;
// Re-usable error messages
const ERR_EMPTY_DATA: &str = "Cannot perform aggregate math operation on empty data";
pub fn columns_sorted(
_group_by_name: Option<Tagged<String>>,
value: &Value,
tag: impl Into<Tag>,
) -> Vec<Tagged<String>> {
let origin_tag = tag.into();
match value {
Value {
value: UntaggedValue::Row(rows),
..
} => {
let mut keys: Vec<Value> = rows
.entries
.keys()
.map(|s| s.as_ref())
.map(|k: &str| {
let date = NaiveDate::parse_from_str(k, "%B %d-%Y");
let date = match date {
Ok(parsed) => UntaggedValue::Primitive(Primitive::Date(
DateTime::<Utc>::from_utc(parsed.and_hms(12, 34, 56), Utc),
)),
Err(_) => UntaggedValue::string(k),
};
date.into_untagged_value()
})
.collect();
keys.sort();
let keys: Vec<String> = keys
.into_iter()
.map(|k| match k {
Value {
value: UntaggedValue::Primitive(Primitive::Date(d)),
..
} => format!("{}", d.format("%B %d-%Y")),
_ => k.as_string().unwrap_or_else(|_| String::from("<string>")),
})
.collect();
keys.into_iter().map(|k| k.tagged(&origin_tag)).collect()
}
_ => vec!["default".to_owned().tagged(&origin_tag)],
}
}
pub fn t_sort(
group_by_name: Option<Tagged<String>>,
split_by_name: Option<String>,
value: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let origin_tag = tag.into();
match group_by_name {
Some(column_name) => {
let sorted_labels: Vec<Tagged<String>> =
columns_sorted(Some(column_name), value, &origin_tag);
match split_by_name {
None => {
let mut dataset = TaggedDictBuilder::new(&origin_tag);
dataset.insert_value("default", value.clone());
let dataset = dataset.into_value();
let split_labels: Vec<Tagged<String>> = match &dataset {
Value {
value: UntaggedValue::Row(rows),
..
} => {
let mut keys: Vec<Tagged<String>> = rows
.entries
.keys()
.map(|k| k.clone().tagged_unknown())
.collect();
keys.sort();
keys
}
_ => vec![],
};
let results: Vec<Vec<Value>> = split_labels
.iter()
.map(|split| {
let groups = get_data_by_key(&dataset, split.borrow_spanned());
sorted_labels
.clone()
.into_iter()
.map(|label| match &groups {
Some(Value {
value: UntaggedValue::Row(dict),
..
}) => {
dict.get_data_by_key(label.borrow_spanned()).unwrap_or_else(
|| UntaggedValue::Table(vec![]).into_value(&origin_tag),
)
}
_ => UntaggedValue::Table(vec![]).into_value(&origin_tag),
})
.collect()
})
.collect();
let mut outer = TaggedListBuilder::new(&origin_tag);
for i in results {
outer.push_value(UntaggedValue::Table(i).into_value(&origin_tag));
}
Ok(UntaggedValue::Table(outer.list).into_value(&origin_tag))
}
Some(_) => Ok(UntaggedValue::nothing().into_value(&origin_tag)),
}
}
None => Ok(UntaggedValue::nothing().into_value(&origin_tag)),
}
}
pub fn fetch(key: Option<String>) -> Box<dyn Fn(Value, Tag) -> Option<Value> + 'static> {
Box::new(move |value: Value, tag| match &key {
Some(key_given) => value.get_data_by_key(key_given[..].spanned(tag.span)),
None => Some(UntaggedValue::int(1).into_value(tag)),
})
}
pub fn evaluate(
values: &Value,
evaluator: Option<String>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let evaluate_with = match evaluator {
Some(keyfn) => fetch(Some(keyfn)),
None => fetch(None),
};
let results: Value = match values {
Value {
value: UntaggedValue::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.iter()
.map(|subsets| match subsets {
Value {
value: UntaggedValue::Table(subsets),
..
} => {
let subsets: Vec<_> = subsets
.clone()
.into_iter()
.map(|data| match data {
Value {
value: UntaggedValue::Table(data),
..
} => {
let data: Vec<_> = data
.into_iter()
.map(|x| match evaluate_with(x, tag.clone()) {
Some(val) => val,
None => UntaggedValue::int(1).into_value(tag.clone()),
})
.collect();
UntaggedValue::Table(data).into_value(&tag)
}
_ => UntaggedValue::Table(vec![]).into_value(&tag),
})
.collect();
UntaggedValue::Table(subsets).into_value(&tag)
}
_ => UntaggedValue::Table(vec![]).into_value(&tag),
})
.collect();
UntaggedValue::Table(datasets).into_value(&tag)
}
_ => UntaggedValue::Table(vec![]).into_value(&tag),
};
Ok(results)
}
pub fn sum(data: Vec<Value>) -> Result<Value, ShellError> {
if data.is_empty() {
return Err(ShellError::unexpected(ERR_EMPTY_DATA));
}
let mut acc = Value::zero();
for value in data {
match value.value {
UntaggedValue::Primitive(_) => acc = acc + value,
_ => {
return Err(ShellError::labeled_error(
"Attempted to compute the sum of a value that cannot be summed.",
"value appears here",
value.tag.span,
))
}
}
}
Ok(acc)
}
pub fn max(data: Vec<Value>) -> Result<Value, ShellError> {
let mut biggest = data
.first()
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
.value
.clone();
for value in data.iter() {
if let Ok(greater_than) = compare_values(Operator::GreaterThan, &value.value, &biggest) {
if greater_than {
biggest = value.value.clone();
}
} else {
return Err(ShellError::unexpected(format!(
"Could not compare\nleft: {:?}\nright: {:?}",
biggest, value.value
)));
}
}
Ok(Value {
value: biggest,
tag: Tag::unknown(),
})
}
pub fn min(data: Vec<Value>) -> Result<Value, ShellError> {
let mut smallest = data
.first()
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
.value
.clone();
for value in data.iter() {
if let Ok(greater_than) = compare_values(Operator::LessThan, &value.value, &smallest) {
if greater_than {
smallest = value.value.clone();
}
} else {
return Err(ShellError::unexpected(format!(
"Could not compare\nleft: {:?}\nright: {:?}",
smallest, value.value
)));
}
}
Ok(Value {
value: smallest,
tag: Tag::unknown(),
})
}
fn formula(
acc_begin: Value,
calculator: Box<dyn Fn(Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static>,
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
Box::new(move |acc, datax| -> Result<Value, ShellError> {
let result = acc * acc_begin.clone();
match calculator(datax) {
Ok(total) => Ok(result + total),
Err(reason) => Err(reason),
}
})
}
pub fn reducer_for(
command: Reduce,
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
match command {
Reduce::Summation | Reduce::Default => Box::new(formula(Value::zero(), Box::new(sum))),
Reduce::Minimum => Box::new(|_, values| min(values)),
Reduce::Maximum => Box::new(|_, values| max(values)),
}
}
pub enum Reduce {
Summation,
Minimum,
Maximum,
Default,
}
pub fn reduce(
values: &Value,
reducer: Option<String>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let reduce_with = match reducer {
Some(cmd) if cmd == "sum" => reducer_for(Reduce::Summation),
Some(cmd) if cmd == "min" => reducer_for(Reduce::Minimum),
Some(cmd) if cmd == "max" => reducer_for(Reduce::Maximum),
Some(_) | None => reducer_for(Reduce::Default),
};
let results: Value = match values {
Value {
value: UntaggedValue::Table(datasets),
..
} => {
let datasets: Vec<_> = datasets
.iter()
.map(|subsets| {
let acc = Value::zero();
match subsets {
Value {
value: UntaggedValue::Table(data),
..
} => {
let data = data
.iter()
.map(|d| {
if let Value {
value: UntaggedValue::Table(x),
..
} = d
{
if let Ok(Value {
value:
UntaggedValue::Primitive(Primitive::Int(computed)),
..
}) = reduce_with(acc.clone(), x.clone())
{
UntaggedValue::int(computed).into_value(&tag)
} else {
UntaggedValue::int(0).into_value(&tag)
}
} else {
UntaggedValue::int(0).into_value(&tag)
}
})
.collect::<Vec<_>>();
UntaggedValue::Table(data).into_value(&tag)
}
_ => UntaggedValue::Table(vec![]).into_value(&tag),
}
})
.collect();
UntaggedValue::Table(datasets).into_value(&tag)
}
_ => UntaggedValue::Table(vec![]).into_value(&tag),
};
Ok(results)
}
pub fn map_max(
values: &Value,
_map_by_column_name: Option<String>,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let tag = tag.into();
let results: Value = match values {
Value {
value: UntaggedValue::Table(datasets),
..
} => {
let datasets: Vec<Value> = datasets
.iter()
.map(|subsets| match subsets {
Value {
value: UntaggedValue::Table(data),
..
} => data.iter().fold(Value::zero(), |acc, value| {
let left = &value.value;
let right = &acc.value;
if let Ok(is_greater_than) =
compare_values(Operator::GreaterThan, left, right)
{
if is_greater_than {
value.clone()
} else {
acc
}
} else {
acc
}
}),
_ => UntaggedValue::int(0).into_value(&tag),
})
.collect();
datasets.into_iter().fold(Value::zero(), |max, value| {
let left = &value.value;
let right = &max.value;
if let Ok(is_greater_than) = compare_values(Operator::GreaterThan, left, right) {
if is_greater_than {
value
} else {
max
}
} else {
max
}
})
}
_ => UntaggedValue::int(-1).into_value(&tag),
};
Ok(results)
}
#[cfg(test)]
mod tests {
use super::{columns_sorted, evaluate, fetch, map_max, reduce, reducer_for, t_sort, Reduce};
use crate::commands::group_by::group;
use indexmap::IndexMap;
use nu_errors::ShellError;
use nu_protocol::{UntaggedValue, Value};
use nu_source::*;
use num_bigint::BigInt;
use num_traits::Zero;
fn int(s: impl Into<BigInt>) -> Value {
UntaggedValue::int(s).into_untagged_value()
}
fn string(input: impl Into<String>) -> Value {
UntaggedValue::string(input.into()).into_untagged_value()
}
fn row(entries: IndexMap<String, Value>) -> Value {
UntaggedValue::row(entries).into_untagged_value()
}
fn table(list: &[Value]) -> Value {
UntaggedValue::table(list).into_untagged_value()
}
fn nu_releases_grouped_by_date() -> Result<Value, ShellError> {
let key = Some(String::from("date").tagged_unknown());
let sample = table(&nu_releases_committers());
group(&key, &sample, Tag::unknown())
}
fn nu_releases_sorted_by_date() -> Result<Value, ShellError> {
let key = String::from("date").tagged(Tag::unknown());
t_sort(
Some(key),
None,
&nu_releases_grouped_by_date()?,
Tag::unknown(),
)
}
fn nu_releases_evaluated_by_default_one() -> Result<Value, ShellError> {
evaluate(&nu_releases_sorted_by_date()?, None, Tag::unknown())
}
fn nu_releases_reduced_by_sum() -> Result<Value, ShellError> {
reduce(
&nu_releases_evaluated_by_default_one()?,
Some(String::from("sum")),
Tag::unknown(),
)
}
fn nu_releases_committers() -> Vec<Value> {
vec![
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")},
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")},
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")},
),
]
}
#[test]
fn show_columns_sorted_given_a_column_to_sort_by() -> Result<(), ShellError> {
let by_column = String::from("date").tagged(Tag::unknown());
assert_eq!(
columns_sorted(
Some(by_column),
&nu_releases_grouped_by_date()?,
Tag::unknown()
),
vec![
"August 23-2019".to_string().tagged_unknown(),
"September 24-2019".to_string().tagged_unknown(),
"October 10-2019".to_string().tagged_unknown()
]
);
Ok(())
}
#[test]
fn sorts_the_tables() -> Result<(), ShellError> {
let group_by = String::from("date").tagged(Tag::unknown());
assert_eq!(
t_sort(
Some(group_by),
None,
&nu_releases_grouped_by_date()?,
Tag::unknown()
)?,
table(&[table(&[
table(&[
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("August 23-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("August 23-2019")}
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("August 23-2019")}
)
]),
table(&[
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("September 24-2019")}
),
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("September 24-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("September 24-2019")}
)
]),
table(&[
row(
indexmap! {"name".into() => string("YK"), "country".into() => string("US"), "date".into() => string("October 10-2019")}
),
row(
indexmap! {"name".into() => string("JT"), "country".into() => string("NZ"), "date".into() => string("October 10-2019")}
),
row(
indexmap! {"name".into() => string("AR"), "country".into() => string("EC"), "date".into() => string("October 10-2019")}
)
]),
]),])
);
Ok(())
}
#[test]
fn evaluator_fetches_by_column_if_supplied_a_column_name() -> Result<(), ShellError> {
let subject = row(indexmap! { "name".into() => string("andres") });
let evaluator = fetch(Some(String::from("name")));
assert_eq!(evaluator(subject, Tag::unknown()), Some(string("andres")));
Ok(())
}
#[test]
fn evaluator_returns_1_if_no_column_name_given() -> Result<(), ShellError> {
let subject = row(indexmap! { "name".into() => string("andres") });
let evaluator = fetch(None);
assert_eq!(
evaluator(subject, Tag::unknown()),
Some(UntaggedValue::int(1).into_untagged_value())
);
Ok(())
}
#[test]
fn evaluates_the_tables() -> Result<(), ShellError> {
assert_eq!(
evaluate(&nu_releases_sorted_by_date()?, None, Tag::unknown())?,
table(&[table(&[
table(&[int(1), int(1), int(1)]),
table(&[int(1), int(1), int(1)]),
table(&[int(1), int(1), int(1)]),
]),])
);
Ok(())
}
#[test]
fn evaluates_the_tables_with_custom_evaluator() -> Result<(), ShellError> {
let eval = String::from("name");
assert_eq!(
evaluate(&nu_releases_sorted_by_date()?, Some(eval), Tag::unknown())?,
table(&[table(&[
table(&[string("AR"), string("JT"), string("YK")]),
table(&[string("AR"), string("YK"), string("JT")]),
table(&[string("YK"), string("JT"), string("AR")]),
]),])
);
Ok(())
}
#[test]
fn reducer_computes_given_a_sum_command() -> Result<(), ShellError> {
let subject = vec![int(1), int(1), int(1)];
let action = reducer_for(Reduce::Summation);
assert_eq!(action(Value::zero(), subject)?, int(3));
Ok(())
}
#[test]
fn reducer_computes() -> Result<(), ShellError> {
assert_eq!(
reduce(
&nu_releases_evaluated_by_default_one()?,
Some(String::from("sum")),
Tag::unknown()
)?,
table(&[table(&[int(3), int(3), int(3)])])
);
Ok(())
}
#[test]
fn maps_and_gets_max_value() -> Result<(), ShellError> {
assert_eq!(
map_max(&nu_releases_reduced_by_sum()?, None, Tag::unknown())?,
int(3)
);
Ok(())
}
}