Add variance and stddev subcommands to math command (#2154)

* add variance (population)
subcommand to math

* impl variance subcommand with spanning errors for invalid types

* add stddev subcommand to math

* rename bytes to filesize

* clippy fix -- use expect instead of unwrap in variance tests
This commit is contained in:
Ali Mousa 2020-07-14 12:15:02 -07:00 committed by GitHub
parent e9313a61af
commit 8fd22b61be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 278 additions and 5 deletions

View File

@ -371,7 +371,9 @@ pub fn create_default_context(
whole_stream_command(MathMinimum),
whole_stream_command(MathMode),
whole_stream_command(MathMaximum),
whole_stream_command(MathStddev),
whole_stream_command(MathSummation),
whole_stream_command(MathVariance),
// File format output
whole_stream_command(To),
whole_stream_command(ToBSON),

View File

@ -217,7 +217,8 @@ pub(crate) use ls::Ls;
#[allow(unused_imports)]
pub(crate) use map_max_by::MapMaxBy;
pub(crate) use math::{
Math, MathAverage, MathMaximum, MathMedian, MathMinimum, MathMode, MathSummation,
Math, MathAverage, MathMaximum, MathMedian, MathMinimum, MathMode, MathStddev, MathSummation,
MathVariance,
};
pub(crate) use merge::Merge;
pub(crate) use mkdir::Mkdir;

View File

@ -35,12 +35,13 @@ impl WholeStreamCommand for Command {
mod tests {
use super::*;
use crate::commands::math::{
avg::average, max::maximum, median::median, min::minimum, mode::mode, sum::summation,
utils::calculate, utils::MathFunction,
avg::average, max::maximum, median::median, min::minimum, mode::mode, stddev::stddev,
sum::summation, utils::calculate, utils::MathFunction, variance::variance,
};
use nu_plugin::row;
use nu_plugin::test_helpers::value::{decimal, int, table};
use nu_protocol::Value;
use std::str::FromStr;
#[test]
fn examples_work_as_expected() {
@ -75,7 +76,9 @@ mod tests {
Ok(int(10)),
Ok(int(10)),
Ok(table(&[int(10)])),
Ok(decimal(0)),
Ok(int(10)),
Ok(decimal(0)),
],
},
TestCase {
@ -88,7 +91,9 @@ mod tests {
Ok(int(30)),
Ok(int(20)),
Ok(table(&[int(10), int(20), int(30)])),
Ok(decimal(BigDecimal::from_str("8.164965809277260327324280249019637973219824935522233761442308557503201258191050088466198110348800783").expect("Could not convert to decimal from string"))),
Ok(int(60)),
Ok(decimal(BigDecimal::from_str("66.66666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666667").expect("Could not convert to decimal from string"))),
],
},
TestCase {
@ -101,7 +106,9 @@ mod tests {
Ok(decimal(26.5)),
Ok(decimal(26.5)),
Ok(table(&[decimal(26.5)])),
Ok(decimal(BigDecimal::from_str("7.77817459305202276840928798315333943213319531457321440247173855894902863154158871367713143880202865").expect("Could not convert to decimal from string"))),
Ok(decimal(63)),
Ok(decimal(60.5)),
],
},
TestCase {
@ -114,7 +121,9 @@ mod tests {
Ok(int(10)),
Ok(int(-11)),
Ok(table(&[int(-14), int(-11), int(10)])),
Ok(decimal(BigDecimal::from_str("10.67707825203131121081152396559571062628228776946058011397810604284900898365140801704064843595778374").expect("Could not convert to decimal from string"))),
Ok(int(-15)),
Ok(decimal(114)),
],
},
TestCase {
@ -127,7 +136,9 @@ mod tests {
Ok(int(10)),
Ok(decimal(-11.5)),
Ok(table(&[decimal(-13.5), decimal(-11.5), int(10)])),
Ok(decimal(BigDecimal::from_str("10.63798226482196513098036125801342585449179971588207816421068645273754903468375890632981926875247027").expect("Could not convert to decimal from string"))),
Ok(decimal(-15)),
Ok(decimal(BigDecimal::from_str("113.1666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666666667").expect("Could not convert to decimal from string"))),
],
},
TestCase {
@ -147,8 +158,13 @@ mod tests {
Ok(row![
"col1".to_owned() => table(&[int(1), int(2), int(3), int(4)]),
"col2".to_owned() => table(&[int(5), int(6), int(7), int(8)])
]),
Ok(row![
"col1".to_owned() => decimal(BigDecimal::from_str("1.118033988749894848204586834365638117720309179805762862135448622705260462818902449707207204189391137").expect("Could not convert to decimal from string")),
"col2".to_owned() => decimal(BigDecimal::from_str("1.118033988749894848204586834365638117720309179805762862135448622705260462818902449707207204189391137").expect("Could not convert to decimal from string"))
]),
Ok(row!["col1".to_owned() => int(10), "col2".to_owned() => int(26)]),
Ok(row!["col1".to_owned() => decimal(1.25), "col2".to_owned() => decimal(1.25)]),
],
},
// TODO-Uncomment once Issue: https://github.com/nushell/nushell/issues/1883 is resolved
@ -162,8 +178,9 @@ mod tests {
let test_tag = Tag::unknown();
for tc in tt.iter() {
let tc: &TestCase = tc; // Just for type annotations
let math_functions: Vec<MathFunction> =
vec![average, minimum, maximum, median, mode, summation];
let math_functions: Vec<MathFunction> = vec![
average, minimum, maximum, median, mode, stddev, summation, variance,
];
let results = math_functions
.into_iter()
.map(|mf| calculate(&tc.values, &test_tag, mf))

View File

@ -4,8 +4,10 @@ pub mod max;
pub mod median;
pub mod min;
pub mod mode;
pub mod stddev;
pub mod sum;
pub mod utils;
pub mod variance;
pub use avg::SubCommand as MathAverage;
pub use command::Command as Math;
@ -13,4 +15,6 @@ pub use max::SubCommand as MathMaximum;
pub use median::SubCommand as MathMedian;
pub use min::SubCommand as MathMinimum;
pub use mode::SubCommand as MathMode;
pub use stddev::SubCommand as MathStddev;
pub use sum::SubCommand as MathSummation;
pub use variance::SubCommand as MathVariance;

View File

@ -0,0 +1,87 @@
use super::variance::variance;
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::{Primitive, Signature, UntaggedValue, Value};
use std::str::FromStr;
pub struct SubCommand;
#[async_trait]
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"math stddev"
}
fn signature(&self) -> Signature {
Signature::build("math stddev")
}
fn usage(&self) -> &str {
"Finds the stddev of a list of numbers or tables"
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
run_with_function(
RunnableContext {
input: args.input,
registry: registry.clone(),
shell_manager: args.shell_manager,
host: args.host,
ctrl_c: args.ctrl_c,
current_errors: args.current_errors,
name: args.call_info.name_tag,
raw_input: args.raw_input,
},
stddev,
)
.await
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Get the stddev of a list of numbers",
example: "echo [1 2 3 4 5] | math stddev",
result: Some(vec![UntaggedValue::decimal(BigDecimal::from_str("1.414213562373095048801688724209698078569671875376948073176679737990732478462107038850387534327641573").expect("Could not convert to decimal from string")).into()]),
}]
}
}
pub fn stddev(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let variance = variance(values, name)?.as_primitive()?;
let sqrt_var = match variance {
Primitive::Decimal(var) => var.sqrt(),
_ => {
return Err(ShellError::labeled_error(
"Could not take square root of variance",
"error occured here",
name.span,
))
}
};
match sqrt_var {
Some(stddev) => Ok(UntaggedValue::from(Primitive::Decimal(stddev)).into_value(name)),
None => Err(ShellError::labeled_error(
"Could not calculate stddev",
"error occured here",
name.span,
)),
}
}
#[cfg(test)]
mod tests {
use super::SubCommand;
#[test]
fn examples_work_as_expected() {
use crate::examples::test as test_examples;
test_examples(SubCommand {})
}
}

View File

@ -0,0 +1,162 @@
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand;
use crate::data::value::compute_values;
use crate::prelude::*;
use bigdecimal::{FromPrimitive, Zero};
use nu_errors::ShellError;
use nu_protocol::{hir::Operator, Primitive, Signature, UntaggedValue, Value};
pub struct SubCommand;
#[async_trait]
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"math variance"
}
fn signature(&self) -> Signature {
Signature::build("math variance")
}
fn usage(&self) -> &str {
"Finds the variance of a list of numbers or tables"
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
run_with_function(
RunnableContext {
input: args.input,
registry: registry.clone(),
shell_manager: args.shell_manager,
host: args.host,
ctrl_c: args.ctrl_c,
current_errors: args.current_errors,
name: args.call_info.name_tag,
raw_input: args.raw_input,
},
variance,
)
.await
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Get the variance of a list of numbers",
example: "echo [1 2 3 4 5] | math variance",
result: Some(vec![UntaggedValue::decimal(2).into()]),
}]
}
}
fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let n = BigDecimal::from_usize(values.len()).ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
})?;
let mut sum_x = Value::zero();
let mut sum_x2 = Value::zero();
for value in values {
let v = match value {
Value {
value: UntaggedValue::Primitive(Primitive::Filesize(num)),
..
} => {
UntaggedValue::from(Primitive::Int(num.clone().into()))
},
Value {
value: UntaggedValue::Primitive(num),
..
} => {
UntaggedValue::from(num.clone())
},
_ => {
return Err(ShellError::labeled_error(
"Attempted to compute the sum of squared values of a value that cannot be summed or squared.",
"value appears here",
value.tag.span,
))
}
};
let v_squared = compute_values(Operator::Multiply, &v, &v);
match v_squared {
// X^2
Ok(x2) => sum_x2 = sum_x2 + x2.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(value.tag.span),
right_type.spanned(value.tag.span),
))
}
};
sum_x = sum_x + v.into_untagged_value();
}
let sum_x_squared = match compute_values(Operator::Multiply, &sum_x, &sum_x) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
))
}
};
let sum_x_squared_div_n = match compute_values(Operator::Divide, &sum_x_squared, &n.into()) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
))
}
};
let ss = match compute_values(Operator::Minus, &sum_x2, &sum_x_squared_div_n) {
Ok(v) => v.into_untagged_value(),
Err((left_type, right_type)) => {
return Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
))
}
};
Ok(ss)
}
pub fn variance(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let ss = sum_of_squares(values, name)?;
let n = BigDecimal::from_usize(values.len()).ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
})?;
let variance = compute_values(Operator::Divide, &ss, &n.into());
match variance {
Ok(value) => Ok(value.into_value(name)),
Err((_, _)) => Err(ShellError::labeled_error(
"could not calculate variance of non-integer or unrelated types",
"source",
name,
)),
}
}
#[cfg(test)]
mod tests {
use super::SubCommand;
#[test]
fn examples_work_as_expected() {
use crate::examples::test as test_examples;
test_examples(SubCommand {})
}
}