Sample variance and Sample standard deviation. (#2310)

This commit is contained in:
Andrés N. Robalino 2020-08-06 23:56:19 -05:00 committed by GitHub
parent 50343f2d6a
commit 724b177c97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 161 additions and 37 deletions

View File

@ -1,13 +1,18 @@
use super::variance::variance; use super::variance::compute_variance as variance;
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand; use crate::commands::WholeStreamCommand;
use crate::prelude::*; use crate::prelude::*;
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_protocol::{Primitive, Signature, UntaggedValue, Value}; use nu_protocol::{Dictionary, Primitive, ReturnSuccess, Signature, UntaggedValue, Value};
use nu_source::Tagged;
use std::str::FromStr; use std::str::FromStr;
pub struct SubCommand; pub struct SubCommand;
#[derive(Deserialize)]
struct Arguments {
sample: Tagged<bool>,
}
#[async_trait] #[async_trait]
impl WholeStreamCommand for SubCommand { impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str { fn name(&self) -> &str {
@ -15,7 +20,11 @@ impl WholeStreamCommand for SubCommand {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("math stddev") Signature::build("math stddev").switch(
"sample",
"calculate sample standard deviation",
Some('s'),
)
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -27,20 +36,69 @@ impl WholeStreamCommand for SubCommand {
args: CommandArgs, args: CommandArgs,
registry: &CommandRegistry, registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
run_with_function( let name = args.call_info.name_tag.clone();
RunnableContext { let (Arguments { sample }, mut input) = args.process(&registry).await?;
input: args.input,
registry: registry.clone(), let values: Vec<Value> = input.drain_vec().await;
shell_manager: args.shell_manager,
host: args.host, let n = if let Tagged { item: true, .. } = sample {
ctrl_c: args.ctrl_c, values.len() - 1
current_errors: args.current_errors, } else {
name: args.call_info.name_tag, values.len()
raw_input: args.raw_input, };
},
stddev, let res = if values.iter().all(|v| v.is_primitive()) {
) compute_stddev(&values, n, &name)
.await } else {
// If we are not dealing with Primitives, then perhaps we are dealing with a table
// Create a key for each column name
let mut column_values = IndexMap::new();
for value in values {
if let UntaggedValue::Row(row_dict) = &value.value {
for (key, value) in row_dict.entries.iter() {
column_values
.entry(key.clone())
.and_modify(|v: &mut Vec<Value>| v.push(value.clone()))
.or_insert(vec![value.clone()]);
}
}
}
// The mathematical function operates over the columns of the table
let mut column_totals = IndexMap::new();
for (col_name, col_vals) in column_values {
if let Ok(out) = compute_stddev(&col_vals, n, &name) {
column_totals.insert(col_name, out);
}
}
if column_totals.keys().len() == 0 {
return Err(ShellError::labeled_error(
"Attempted to compute values that can't be operated on",
"value appears here",
name.span,
));
}
Ok(UntaggedValue::Row(Dictionary {
entries: column_totals,
})
.into_untagged_value())
};
match res {
Ok(v) => {
if v.value.is_table() {
Ok(OutputStream::from(
v.table_entries()
.map(|v| ReturnSuccess::value(v.clone()))
.collect::<Vec<_>>(),
))
} else {
Ok(OutputStream::one(ReturnSuccess::value(v)))
}
}
Err(e) => Err(e),
}
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
@ -52,8 +110,13 @@ impl WholeStreamCommand for SubCommand {
} }
} }
#[cfg(test)]
pub fn stddev(values: &[Value], name: &Tag) -> Result<Value, ShellError> { pub fn stddev(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let variance = variance(values, name)?.as_primitive()?; compute_stddev(values, values.len(), name)
}
pub fn compute_stddev(values: &[Value], n: usize, name: &Tag) -> Result<Value, ShellError> {
let variance = variance(values, n, name)?.as_primitive()?;
let sqrt_var = match variance { let sqrt_var = match variance {
Primitive::Decimal(var) => var.sqrt(), Primitive::Decimal(var) => var.sqrt(),
_ => { _ => {

View File

@ -1,13 +1,20 @@
use crate::commands::math::utils::run_with_function;
use crate::commands::WholeStreamCommand; use crate::commands::WholeStreamCommand;
use crate::data::value::compute_values; use crate::data::value::compute_values;
use crate::prelude::*; use crate::prelude::*;
use bigdecimal::FromPrimitive; use bigdecimal::FromPrimitive;
use nu_errors::ShellError; use nu_errors::ShellError;
use nu_protocol::{hir::Operator, Primitive, Signature, UntaggedValue, Value}; use nu_protocol::{
hir::Operator, Dictionary, Primitive, ReturnSuccess, Signature, UntaggedValue, Value,
};
use nu_source::Tagged;
pub struct SubCommand; pub struct SubCommand;
#[derive(Deserialize)]
struct Arguments {
sample: Tagged<bool>,
}
#[async_trait] #[async_trait]
impl WholeStreamCommand for SubCommand { impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str { fn name(&self) -> &str {
@ -15,7 +22,7 @@ impl WholeStreamCommand for SubCommand {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build("math variance") Signature::build("math variance").switch("sample", "calculate sample variance", Some('s'))
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -27,20 +34,69 @@ impl WholeStreamCommand for SubCommand {
args: CommandArgs, args: CommandArgs,
registry: &CommandRegistry, registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> { ) -> Result<OutputStream, ShellError> {
run_with_function( let name = args.call_info.name_tag.clone();
RunnableContext { let (Arguments { sample }, mut input) = args.process(&registry).await?;
input: args.input,
registry: registry.clone(), let values: Vec<Value> = input.drain_vec().await;
shell_manager: args.shell_manager,
host: args.host, let n = if let Tagged { item: true, .. } = sample {
ctrl_c: args.ctrl_c, values.len() - 1
current_errors: args.current_errors, } else {
name: args.call_info.name_tag, values.len()
raw_input: args.raw_input, };
},
variance, let res = if values.iter().all(|v| v.is_primitive()) {
) compute_variance(&values, n, &name)
.await } else {
// If we are not dealing with Primitives, then perhaps we are dealing with a table
// Create a key for each column name
let mut column_values = IndexMap::new();
for value in values {
if let UntaggedValue::Row(row_dict) = &value.value {
for (key, value) in row_dict.entries.iter() {
column_values
.entry(key.clone())
.and_modify(|v: &mut Vec<Value>| v.push(value.clone()))
.or_insert(vec![value.clone()]);
}
}
}
// The mathematical function operates over the columns of the table
let mut column_totals = IndexMap::new();
for (col_name, col_vals) in column_values {
if let Ok(out) = compute_variance(&col_vals, n, &name) {
column_totals.insert(col_name, out);
}
}
if column_totals.keys().len() == 0 {
return Err(ShellError::labeled_error(
"Attempted to compute values that can't be operated on",
"value appears here",
name.span,
));
}
Ok(UntaggedValue::Row(Dictionary {
entries: column_totals,
})
.into_untagged_value())
};
match res {
Ok(v) => {
if v.value.is_table() {
Ok(OutputStream::from(
v.table_entries()
.map(|v| ReturnSuccess::value(v.clone()))
.collect::<Vec<_>>(),
))
} else {
Ok(OutputStream::one(ReturnSuccess::value(v)))
}
}
Err(e) => Err(e),
}
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
@ -147,9 +203,14 @@ fn sum_of_squares(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
Ok(ss) Ok(ss)
} }
#[cfg(test)]
pub fn variance(values: &[Value], name: &Tag) -> Result<Value, ShellError> { pub fn variance(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
compute_variance(values, values.len(), name)
}
pub fn compute_variance(values: &[Value], n: usize, name: &Tag) -> Result<Value, ShellError> {
let ss = sum_of_squares(values, name)?; let ss = sum_of_squares(values, name)?;
let n = BigDecimal::from_usize(values.len()).ok_or_else(|| { let n = BigDecimal::from_usize(n).ok_or_else(|| {
ShellError::labeled_error( ShellError::labeled_error(
"could not convert to big decimal", "could not convert to big decimal",
"could not convert to big decimal", "could not convert to big decimal",