mirror of
https://github.com/nushell/nushell.git
synced 2025-08-10 09:48:18 +02:00
Data summarize reporting overhaul. (#2299)
Refactored out most of internal work for summarizing data opening the door for generating charts from it. A model is introduced to hold information needed for a summary, Histogram command is an example of a partial usage. This is the beginning. Removed implicit arithmetic traits on Value and Primitive to avoid mixed types panics. The std operations traits can't fail and we can't guarantee that. We can handle gracefully now since compute_values was introduced after the parser changes four months ago. The handling logic should be taken care of either explicitly or in compute_values. The zero identity trait was also removed (and implementing this forced us to also implement Add, Mult, etc) Also: the `math` operations now remove in the output if a given column is not computable: ``` > ls | math sum ──────┬────────── size │ 150.9 KB ──────┴────────── ```
This commit is contained in:
committed by
GitHub
parent
eeb9b4edcb
commit
028fc9b9cd
@ -1,14 +1,17 @@
|
||||
use crate::prelude::*;
|
||||
|
||||
use crate::commands::math::reducers::{reducer_for, Reduce};
|
||||
use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use crate::utils::data_processing::{reducer_for, Reduce};
|
||||
use bigdecimal::{FromPrimitive, Zero};
|
||||
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
hir::{convert_number_to_u64, Number, Operator},
|
||||
Primitive, Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use bigdecimal::FromPrimitive;
|
||||
|
||||
pub struct SubCommand;
|
||||
|
||||
#[async_trait]
|
||||
@ -55,19 +58,59 @@ impl WholeStreamCommand for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn to_byte(value: &Value) -> Option<Value> {
|
||||
match &value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(num)) => Some(
|
||||
UntaggedValue::Primitive(Primitive::Filesize(convert_number_to_u64(&Number::Int(
|
||||
num.clone(),
|
||||
))))
|
||||
.into_untagged_value(),
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn average(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
|
||||
let sum = reducer_for(Reduce::Summation);
|
||||
|
||||
let number = BigDecimal::from_usize(values.len()).ok_or_else(|| {
|
||||
ShellError::labeled_error(
|
||||
"could not convert to big decimal",
|
||||
"could not convert to big decimal",
|
||||
&name.span,
|
||||
)
|
||||
ShellError::labeled_error("nothing to average", "nothing to average", &name.span)
|
||||
})?;
|
||||
|
||||
let total_rows = UntaggedValue::decimal(number);
|
||||
let total = sum(Value::zero(), values.to_vec())?;
|
||||
|
||||
let are_bytes = values
|
||||
.get(0)
|
||||
.ok_or_else(|| {
|
||||
ShellError::unexpected("Cannot perform aggregate math operation on empty data")
|
||||
})?
|
||||
.is_filesize();
|
||||
|
||||
let total = if are_bytes {
|
||||
to_byte(&sum(
|
||||
UntaggedValue::int(0).into_untagged_value(),
|
||||
values
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Filesize(num)),
|
||||
..
|
||||
} => UntaggedValue::int(*num as usize).into_untagged_value(),
|
||||
other => other.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)?)
|
||||
.ok_or_else(|| {
|
||||
ShellError::labeled_error(
|
||||
"could not convert to big decimal",
|
||||
"could not convert to big decimal",
|
||||
&name.span,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec())
|
||||
}?;
|
||||
|
||||
match total {
|
||||
Value {
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::commands::math::reducers::{reducer_for, Reduce};
|
||||
use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use crate::utils::data_processing::{reducer_for, Reduce};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{Signature, UntaggedValue, Value};
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
use crate::commands::math::reducers::{reducer_for, Reduce};
|
||||
use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use crate::utils::data_processing::{reducer_for, Reduce};
|
||||
use bigdecimal::{FromPrimitive, Zero};
|
||||
use bigdecimal::FromPrimitive;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
hir::{convert_number_to_u64, Number, Operator},
|
||||
@ -130,7 +130,7 @@ fn compute_average(values: &[Value], name: impl Into<Tag>) -> Result<Value, Shel
|
||||
)
|
||||
})?;
|
||||
let total_rows = UntaggedValue::decimal(number);
|
||||
let total = sum(Value::zero(), values.to_vec())?;
|
||||
let total = sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec())?;
|
||||
|
||||
match total {
|
||||
Value {
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::commands::math::reducers::{reducer_for, Reduce};
|
||||
use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use crate::utils::data_processing::{reducer_for, Reduce};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{Signature, UntaggedValue, Value};
|
||||
|
||||
|
@ -7,9 +7,11 @@ pub mod min;
|
||||
pub mod mode;
|
||||
pub mod stddev;
|
||||
pub mod sum;
|
||||
pub mod utils;
|
||||
pub mod variance;
|
||||
|
||||
mod reducers;
|
||||
mod utils;
|
||||
|
||||
pub use avg::SubCommand as MathAverage;
|
||||
pub use command::Command as Math;
|
||||
pub use eval::SubCommand as MathEval;
|
||||
|
135
crates/nu-cli/src/commands/math/reducers.rs
Normal file
135
crates/nu-cli/src/commands/math/reducers.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use crate::data::value::{compare_values, compute_values};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::hir::Operator;
|
||||
use nu_protocol::{UntaggedValue, Value};
|
||||
use nu_source::{SpannedItem, Tag};
|
||||
|
||||
// Re-usable error messages
|
||||
const ERR_EMPTY_DATA: &str = "Cannot perform aggregate math operation on empty data";
|
||||
|
||||
fn formula(
|
||||
acc_begin: Value,
|
||||
calculator: Box<dyn Fn(Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static>,
|
||||
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
|
||||
Box::new(move |acc, datax| -> Result<Value, ShellError> {
|
||||
let result = match compute_values(Operator::Multiply, &acc, &acc_begin) {
|
||||
Ok(v) => v.into_untagged_value(),
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned_unknown(),
|
||||
right_type.spanned_unknown(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
match calculator(datax) {
|
||||
Ok(total) => Ok(match compute_values(Operator::Plus, &result, &total) {
|
||||
Ok(v) => v.into_untagged_value(),
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned_unknown(),
|
||||
right_type.spanned_unknown(),
|
||||
))
|
||||
}
|
||||
}),
|
||||
Err(reason) => Err(reason),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn reducer_for(
|
||||
command: Reduce,
|
||||
) -> Box<dyn Fn(Value, Vec<Value>) -> Result<Value, ShellError> + Send + Sync + 'static> {
|
||||
match command {
|
||||
Reduce::Summation | Reduce::Default => Box::new(formula(
|
||||
UntaggedValue::int(0).into_untagged_value(),
|
||||
Box::new(sum),
|
||||
)),
|
||||
Reduce::Minimum => Box::new(|_, values| min(values)),
|
||||
Reduce::Maximum => Box::new(|_, values| max(values)),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Reduce {
|
||||
Summation,
|
||||
Minimum,
|
||||
Maximum,
|
||||
Default,
|
||||
}
|
||||
|
||||
pub fn sum(data: Vec<Value>) -> Result<Value, ShellError> {
|
||||
let mut acc = UntaggedValue::int(0).into_untagged_value();
|
||||
for value in data {
|
||||
match value.value {
|
||||
UntaggedValue::Primitive(_) => {
|
||||
acc = match compute_values(Operator::Plus, &acc, &value) {
|
||||
Ok(v) => v.into_untagged_value(),
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned_unknown(),
|
||||
right_type.spanned_unknown(),
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Attempted to compute the sum of a value that cannot be summed.",
|
||||
"value appears here",
|
||||
value.tag.span,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(acc)
|
||||
}
|
||||
|
||||
pub fn max(data: Vec<Value>) -> Result<Value, ShellError> {
|
||||
let mut biggest = data
|
||||
.first()
|
||||
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
|
||||
.value
|
||||
.clone();
|
||||
|
||||
for value in data.iter() {
|
||||
if let Ok(greater_than) = compare_values(Operator::GreaterThan, &value.value, &biggest) {
|
||||
if greater_than {
|
||||
biggest = value.value.clone();
|
||||
}
|
||||
} else {
|
||||
return Err(ShellError::unexpected(format!(
|
||||
"Could not compare\nleft: {:?}\nright: {:?}",
|
||||
biggest, value.value
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(Value {
|
||||
value: biggest,
|
||||
tag: Tag::unknown(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn min(data: Vec<Value>) -> Result<Value, ShellError> {
|
||||
let mut smallest = data
|
||||
.first()
|
||||
.ok_or_else(|| ShellError::unexpected(ERR_EMPTY_DATA))?
|
||||
.value
|
||||
.clone();
|
||||
|
||||
for value in data.iter() {
|
||||
if let Ok(greater_than) = compare_values(Operator::LessThan, &value.value, &smallest) {
|
||||
if greater_than {
|
||||
smallest = value.value.clone();
|
||||
}
|
||||
} else {
|
||||
return Err(ShellError::unexpected(format!(
|
||||
"Could not compare\nleft: {:?}\nright: {:?}",
|
||||
smallest, value.value
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(Value {
|
||||
value: smallest,
|
||||
tag: Tag::unknown(),
|
||||
})
|
||||
}
|
@ -1,10 +1,13 @@
|
||||
use crate::commands::math::reducers::{reducer_for, Reduce};
|
||||
use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::prelude::*;
|
||||
use crate::utils::data_processing::{reducer_for, Reduce};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{Dictionary, Signature, UntaggedValue, Value};
|
||||
use num_traits::identities::Zero;
|
||||
|
||||
use nu_protocol::{
|
||||
hir::{convert_number_to_u64, Number},
|
||||
Primitive, Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct SubCommand;
|
||||
|
||||
@ -59,37 +62,63 @@ impl WholeStreamCommand for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn to_byte(value: &Value) -> Option<Value> {
|
||||
match &value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(num)) => Some(
|
||||
UntaggedValue::Primitive(Primitive::Filesize(convert_number_to_u64(&Number::Int(
|
||||
num.clone(),
|
||||
))))
|
||||
.into_untagged_value(),
|
||||
),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn summation(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
|
||||
let sum = reducer_for(Reduce::Summation);
|
||||
|
||||
if values.iter().all(|v| v.is_primitive()) {
|
||||
Ok(sum(Value::zero(), values.to_vec())?)
|
||||
} else {
|
||||
let mut column_values = IndexMap::new();
|
||||
let first = values.get(0).ok_or_else(|| {
|
||||
ShellError::unexpected("Cannot perform aggregate math operation on empty data")
|
||||
})?;
|
||||
|
||||
for value in values {
|
||||
if let UntaggedValue::Row(row_dict) = value.value.clone() {
|
||||
for (key, value) in row_dict.entries.iter() {
|
||||
column_values
|
||||
.entry(key.clone())
|
||||
.and_modify(|v: &mut Vec<Value>| v.push(value.clone()))
|
||||
.or_insert(vec![value.clone()]);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let mut column_totals = IndexMap::new();
|
||||
|
||||
for (col_name, col_vals) in column_values {
|
||||
let sum = sum(Value::zero(), col_vals)?;
|
||||
|
||||
column_totals.insert(col_name, sum);
|
||||
}
|
||||
|
||||
Ok(UntaggedValue::Row(Dictionary {
|
||||
entries: column_totals,
|
||||
})
|
||||
.into_value(name))
|
||||
match first {
|
||||
v if v.is_filesize() => to_byte(&sum(
|
||||
UntaggedValue::int(0).into_untagged_value(),
|
||||
values
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Filesize(num)),
|
||||
..
|
||||
} => UntaggedValue::int(*num as usize).into_untagged_value(),
|
||||
other => other.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)?)
|
||||
.ok_or_else(|| {
|
||||
ShellError::labeled_error(
|
||||
"could not convert to big decimal",
|
||||
"could not convert to big decimal",
|
||||
&name.span,
|
||||
)
|
||||
}),
|
||||
// v is nothing primitive
|
||||
v if v.is_none() => sum(
|
||||
UntaggedValue::int(0).into_untagged_value(),
|
||||
values
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
..
|
||||
} => UntaggedValue::int(0).into_untagged_value(),
|
||||
other => other.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
),
|
||||
_ => sum(UntaggedValue::int(0).into_untagged_value(), values.to_vec()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ pub async fn run_with_function(
|
||||
mf: MathFunction,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let values: Vec<Value> = input.drain_vec().await;
|
||||
|
||||
let res = calculate(&values, &name, mf);
|
||||
match res {
|
||||
Ok(v) => {
|
||||
@ -50,7 +51,17 @@ pub fn calculate(values: &[Value], name: &Tag, mf: MathFunction) -> Result<Value
|
||||
// The mathematical function operates over the columns of the table
|
||||
let mut column_totals = IndexMap::new();
|
||||
for (col_name, col_vals) in column_values {
|
||||
column_totals.insert(col_name, mf(&col_vals, &name)?);
|
||||
if let Ok(out) = mf(&col_vals, &name) {
|
||||
column_totals.insert(col_name, out);
|
||||
}
|
||||
}
|
||||
|
||||
if column_totals.keys().len() == 0 {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Attempted to compute values that can't be operated on",
|
||||
"value appears here",
|
||||
name.span,
|
||||
));
|
||||
}
|
||||
|
||||
Ok(UntaggedValue::Row(Dictionary {
|
||||
|
@ -2,7 +2,7 @@ use crate::commands::math::utils::run_with_function;
|
||||
use crate::commands::WholeStreamCommand;
|
||||
use crate::data::value::compute_values;
|
||||
use crate::prelude::*;
|
||||
use bigdecimal::{FromPrimitive, Zero};
|
||||
use bigdecimal::FromPrimitive;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{hir::Operator, Primitive, Signature, UntaggedValue, Value};
|
||||
|
||||
@ -60,8 +60,8 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
|
||||
&name.span,
|
||||
)
|
||||
})?;
|
||||
let mut sum_x = Value::zero();
|
||||
let mut sum_x2 = Value::zero();
|
||||
let mut sum_x = UntaggedValue::int(0).into_untagged_value();
|
||||
let mut sum_x2 = UntaggedValue::int(0).into_untagged_value();
|
||||
for value in values {
|
||||
let v = match value {
|
||||
Value {
|
||||
@ -87,7 +87,17 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
|
||||
let v_squared = compute_values(Operator::Multiply, &v, &v);
|
||||
match v_squared {
|
||||
// X^2
|
||||
Ok(x2) => sum_x2 = sum_x2 + x2.into_untagged_value(),
|
||||
Ok(x2) => {
|
||||
sum_x2 = match compute_values(Operator::Plus, &sum_x2, &x2) {
|
||||
Ok(v) => v.into_untagged_value(),
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned(name.span),
|
||||
right_type.spanned(name.span),
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned(value.tag.span),
|
||||
@ -95,7 +105,15 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
|
||||
))
|
||||
}
|
||||
};
|
||||
sum_x = sum_x + v.into_untagged_value();
|
||||
sum_x = match compute_values(Operator::Plus, &sum_x, &v) {
|
||||
Ok(v) => v.into_untagged_value(),
|
||||
Err((left_type, right_type)) => {
|
||||
return Err(ShellError::coerce_error(
|
||||
left_type.spanned(name.span),
|
||||
right_type.spanned(name.span),
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let sum_x_squared = match compute_values(Operator::Multiply, &sum_x, &sum_x) {
|
||||
|
Reference in New Issue
Block a user