Data summarize reporting overhaul. (#2299)

Refactored out most of internal work for summarizing data opening
the door for generating charts from it. A model is introduced
to hold information needed for a summary, Histogram command is
an example of a partial usage. This is the beginning.

Removed implicit arithmetic traits on Value and Primitive to avoid
mixed types panics. The std operations traits can't fail and we
can't guarantee that. We can handle gracefully now since compute_values
was introduced after the parser changes four months ago. The handling
logic should be taken care of either explicitly or in compute_values.

The zero identity trait was also removed (and implementing this forced
us to also implement Add, Mult, etc)

Also: the `math` operations now remove in the output if a given column is not computable:

```
> ls | math sum
──────┬──────────
 size │ 150.9 KB
──────┴──────────
```
This commit is contained in:
Andrés N. Robalino
2020-08-03 17:47:19 -05:00
committed by GitHub
parent eeb9b4edcb
commit 028fc9b9cd
29 changed files with 1396 additions and 1512 deletions

View File

@ -1,14 +1,17 @@
use crate::prelude::*;
use crate::commands::math::reducers::{reducer_for, Reduce};
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use bigdecimal::{FromPrimitive, Zero};
use nu_errors::ShellError;
use nu_protocol::{
hir::{convert_number_to_u64, Number, Operator},
Primitive, Signature, UntaggedValue, Value,
};
use bigdecimal::FromPrimitive;
pub struct SubCommand;
#[async_trait]
@ -55,19 +58,59 @@ impl WholeStreamCommand for SubCommand {
}
}
fn to_byte(value: &Value) -> Option<Value> {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(num)) => Some(
UntaggedValue::Primitive(Primitive::Filesize(convert_number_to_u64(&Number::Int(
num.clone(),
))))
.into_untagged_value(),
),
_ => None,
}
}
pub fn average(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let sum = reducer_for(Reduce::Summation);
let number = BigDecimal::from_usize(values.len()).ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
ShellError::labeled_error("nothing to average", "nothing to average", &name.span)
})?;
let total_rows = UntaggedValue::decimal(number);
let total = sum(Value::zero(), values.to_vec())?;
let are_bytes = values
.get(0)
.ok_or_else(|| {
ShellError::unexpected("Cannot perform aggregate math operation on empty data")
})?
.is_filesize();
let total = if are_bytes {
to_byte(&sum(
UntaggedValue::int(0).into_untagged_value(),
values
.to_vec()
.iter()
.map(|v| match v {
Value {
value: UntaggedValue::Primitive(Primitive::Filesize(num)),
..
} => UntaggedValue::int(*num as usize).into_untagged_value(),
other => other.clone(),
})
.collect::<Vec<_>>(),
)?)
.ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
})
} else {
sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec())
}?;
match total {
Value {

View File

@ -1,7 +1,7 @@
use crate::commands::math::reducers::{reducer_for, Reduce};
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use nu_errors::ShellError;
use nu_protocol::{Signature, UntaggedValue, Value};

View File

@ -1,8 +1,8 @@
use crate::commands::math::reducers::{reducer_for, Reduce};
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use bigdecimal::{FromPrimitive, Zero};
use bigdecimal::FromPrimitive;
use nu_errors::ShellError;
use nu_protocol::{
hir::{convert_number_to_u64, Number, Operator},
@ -130,7 +130,7 @@ fn compute_average(values: &[Value], name: impl Into<Tag>) -> Result<Value, Shel
)
})?;
let total_rows = UntaggedValue::decimal(number);
let total = sum(Value::zero(), values.to_vec())?;
let total = sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec())?;
match total {
Value {

View File

@ -1,7 +1,7 @@
use crate::commands::math::reducers::{reducer_for, Reduce};
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use nu_errors::ShellError;
use nu_protocol::{Signature, UntaggedValue, Value};

View File

@ -7,9 +7,11 @@ pub mod min;
pub mod mode;
pub mod stddev;
pub mod sum;
pub mod utils;
pub mod variance;
mod reducers;
mod utils;
pub use avg::SubCommand as MathAverage;
pub use command::Command as Math;
pub use eval::SubCommand as MathEval;

View File

@ -0,0 +1,135 @@
use crate::data::value::{compare_values, compute_values};
use nu_errors::ShellError;
use nu_protocol::hir::Operator;
use nu_protocol::{UntaggedValue, Value};
use nu_source::{SpannedItem, Tag};
// Re-usable error messages
const ERR_EMPTY_DATA: &str = "Cannot perform aggregate math operation on empty data";
fn formula(
acc_begin: Value,
calculator: Box<dyn Fn(Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static>,
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
Box::new(move |acc, datax| -> Result<Value, ShellError> {
let result = match compute_values(Operator::Multiply, &acc, &acc_begin) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
};
match calculator(datax) {
Ok(total) => Ok(match compute_values(Operator::Plus, &result, &total) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
}),
Err(reason) => Err(reason),
}
})
}
pub fn reducer_for(
command: Reduce,
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
match command {
Reduce::Summation | Reduce::Default => Box::new(formula(
UntaggedValue::int(0).into_untagged_value(),
Box::new(sum),
)),
Reduce::Minimum => Box::new(|_, values| min(values)),
Reduce::Maximum => Box::new(|_, values| max(values)),
}
}
pub enum Reduce {
Summation,
Minimum,
Maximum,
Default,
}
pub fn sum(data: Vec<Value>) -> Result<Value, ShellError> {
let mut acc = UntaggedValue::int(0).into_untagged_value();
for value in data {
match value.value {
UntaggedValue::Primitive(_) => {
acc = match compute_values(Operator::Plus, &acc, &value) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned_unknown(),
right_type.spanned_unknown(),
))
}
};
}
_ => {
return Err(ShellError::labeled_error(
"Attempted to compute the sum of a value that cannot be summed.",
"value appears here",
value.tag.span,
))
}
}
}
Ok(acc)
}
pub fn max(data: Vec<Value>) -> Result<Value, ShellError> {
let mut biggest = data
.first()
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
.value
.clone();
for value in data.iter() {
if let Ok(greater_than) = compare_values(Operator::GreaterThan, &value.value, &biggest) {
if greater_than {
biggest = value.value.clone();
}
} else {
return Err(ShellError::unexpected(format!(
"Could not compare\nleft: {:?}\nright: {:?}",
biggest, value.value
)));
}
}
Ok(Value {
value: biggest,
tag: Tag::unknown(),
})
}
pub fn min(data: Vec<Value>) -> Result<Value, ShellError> {
let mut smallest = data
.first()
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
.value
.clone();
for value in data.iter() {
if let Ok(greater_than) = compare_values(Operator::LessThan, &value.value, &smallest) {
if greater_than {
smallest = value.value.clone();
}
} else {
return Err(ShellError::unexpected(format!(
"Could not compare\nleft: {:?}\nright: {:?}",
smallest, value.value
)));
}
}
Ok(Value {
value: smallest,
tag: Tag::unknown(),
})
}

View File

@ -1,10 +1,13 @@
use crate::commands::math::reducers::{reducer_for, Reduce};
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use nu_errors::ShellError;
use nu_protocol::{Dictionary, Signature, UntaggedValue, Value};
use num_traits::identities::Zero;
use nu_protocol::{
hir::{convert_number_to_u64, Number},
Primitive, Signature, UntaggedValue, Value,
};
pub struct SubCommand;
@ -59,37 +62,63 @@ impl WholeStreamCommand for SubCommand {
}
}
fn to_byte(value: &Value) -> Option<Value> {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(num)) => Some(
UntaggedValue::Primitive(Primitive::Filesize(convert_number_to_u64(&Number::Int(
num.clone(),
))))
.into_untagged_value(),
),
_ => None,
}
}
pub fn summation(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let sum = reducer_for(Reduce::Summation);
if values.iter().all(|v| v.is_primitive()) {
Ok(sum(Value::zero(), values.to_vec())?)
} else {
let mut column_values = IndexMap::new();
let first = values.get(0).ok_or_else(|| {
ShellError::unexpected("Cannot perform aggregate math operation on empty data")
})?;
for value in values {
if let UntaggedValue::Row(row_dict) = value.value.clone() {
for (key, value) in row_dict.entries.iter() {
column_values
.entry(key.clone())
.and_modify(|v: &mut Vec<Value>| v.push(value.clone()))
.or_insert(vec![value.clone()]);
}
};
}
let mut column_totals = IndexMap::new();
for (col_name, col_vals) in column_values {
let sum = sum(Value::zero(), col_vals)?;
column_totals.insert(col_name, sum);
}
Ok(UntaggedValue::Row(Dictionary {
entries: column_totals,
})
.into_value(name))
match first {
v if v.is_filesize() => to_byte(&sum(
UntaggedValue::int(0).into_untagged_value(),
values
.to_vec()
.iter()
.map(|v| match v {
Value {
value: UntaggedValue::Primitive(Primitive::Filesize(num)),
..
} => UntaggedValue::int(*num as usize).into_untagged_value(),
other => other.clone(),
})
.collect::<Vec<_>>(),
)?)
.ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
}),
// v is nothing primitive
v if v.is_none() => sum(
UntaggedValue::int(0).into_untagged_value(),
values
.to_vec()
.iter()
.map(|v| match v {
Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
..
} => UntaggedValue::int(0).into_untagged_value(),
other => other.clone(),
})
.collect::<Vec<_>>(),
),
_ => sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec()),
}
}

View File

@ -13,6 +13,7 @@ pub async fn run_with_function(
mf: MathFunction,
) -> Result<OutputStream, ShellError> {
let values: Vec<Value> = input.drain_vec().await;
let res = calculate(&values, &name, mf);
match res {
Ok(v) => {
@ -50,7 +51,17 @@ pub fn calculate(values: &[Value], name: &Tag, mf: MathFunction) -> Result<Value
// The mathematical function operates over the columns of the table
let mut column_totals = IndexMap::new();
for (col_name, col_vals) in column_values {
column_totals.insert(col_name, mf(&col_vals, &name)?);
if let Ok(out) = mf(&col_vals, &name) {
column_totals.insert(col_name, out);
}
}
if column_totals.keys().len() == 0 {
return Err(ShellError::labeled_error(
"Attempted to compute values that can't be operated on",
"value appears here",
name.span,
));
}
Ok(UntaggedValue::Row(Dictionary {

View File

@ -2,7 +2,7 @@ use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::data::value::compute_values;
use crate::prelude::*;
use bigdecimal::{FromPrimitive, Zero};
use bigdecimal::FromPrimitive;
use nu_errors::ShellError;
use nu_protocol::{hir::Operator, Primitive, Signature, UntaggedValue, Value};
@ -60,8 +60,8 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
&name.span,
)
})?;
let mut sum_x = Value::zero();
let mut sum_x2 = Value::zero();
let mut sum_x = UntaggedValue::int(0).into_untagged_value();
let mut sum_x2 = UntaggedValue::int(0).into_untagged_value();
for value in values {
let v = match value {
Value {
@ -87,7 +87,17 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let v_squared = compute_values(Operator::Multiply, &v, &v);
match v_squared {
// X^2
Ok(x2) => sum_x2 = sum_x2 + x2.into_untagged_value(),
Ok(x2) => {
sum_x2 = match compute_values(Operator::Plus, &sum_x2, &x2) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
))
}
};
}
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(value.tag.span),
@ -95,7 +105,15 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
))
}
};
sum_x = sum_x + v.into_untagged_value();
sum_x = match compute_values(Operator::Plus, &sum_x, &v) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
))
}
};
}
let sum_x_squared = match compute_values(Operator::Multiply, &sum_x, &sum_x) {