Math#media - ability to compute median value.

This commit is contained in:
Andrés N. Robalino 2020-06-18 16:37:18 -05:00
parent 353b33be1b
commit 5f9de80d9b
10 changed files with 266 additions and 22 deletions

View File

@ -348,9 +348,10 @@ pub fn create_default_context(
// Data processing
whole_stream_command(Histogram),
whole_stream_command(Math),
whole_stream_command(Average),
whole_stream_command(Minimum),
whole_stream_command(Maximum),
whole_stream_command(MathAverage),
whole_stream_command(MathMedian),
whole_stream_command(MathMinimum),
whole_stream_command(MathMaximum),
whole_stream_command(Sum),
// File format output
whole_stream_command(To),

View File

@ -151,7 +151,6 @@ pub(crate) use du::Du;
pub(crate) use each::Each;
pub(crate) use echo::Echo;
pub(crate) use is_empty::IsEmpty;
pub(crate) use math::Math;
pub(crate) use update::Update;
pub(crate) mod kill;
pub(crate) use kill::Kill;
@ -200,7 +199,7 @@ pub(crate) use lines::Lines;
pub(crate) use ls::Ls;
#[allow(unused_imports)]
pub(crate) use map_max_by::MapMaxBy;
pub(crate) use math::{Average, Maximum, Minimum};
pub(crate) use math::{Math, MathAverage, MathMaximum, MathMedian, MathMinimum};
pub(crate) use merge::Merge;
pub(crate) use mkdir::Mkdir;
pub(crate) use mv::Move;

View File

@ -58,7 +58,13 @@ impl WholeStreamCommand for SubCommand {
pub fn average(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let sum = reducer_for(Reduce::Sum);
let number = BigDecimal::from_usize(values.len()).expect("expected a usize-sized bigdecimal");
let number = BigDecimal::from_usize(values.len()).ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name.span,
)
})?;
let total_rows = UntaggedValue::decimal(number);
let total = sum(Value::zero(), values.to_vec())?;

View File

@ -0,0 +1,193 @@
use crate::commands::math::utils::calculate;
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use crate::utils::data_processing::{reducer_for, Reduce};
use bigdecimal::{FromPrimitive, Zero};
use nu_errors::ShellError;
use nu_protocol::{
hir::{convert_number_to_u64, Number, Operator},
Primitive, Signature, UntaggedValue, Value,
};
pub struct SubCommand;
#[async_trait]
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"math median"
}
fn signature(&self) -> Signature {
Signature::build("math median")
}
fn usage(&self) -> &str {
"Gets the median of a list of numbers"
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
calculate(
RunnableContext {
input: args.input,
registry: registry.clone(),
shell_manager: args.shell_manager,
host: args.host,
ctrl_c: args.ctrl_c,
current_errors: args.current_errors,
name: args.call_info.name_tag,
raw_input: args.raw_input,
},
median,
)
.await
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Get the median of a list of numbers",
example: "echo [3 8 9 12 12 15] | math median",
result: Some(vec![UntaggedValue::decimal(10.5).into()]),
}]
}
}
enum Pick {
MedianAverage,
Median,
}
pub fn median(values: &[Value], name: &Tag) -> Result<Value, ShellError> {
let take = if values.len() % 2 == 0 {
Pick::MedianAverage
} else {
Pick::Median
};
let mut sorted = vec![];
for item in values {
sorted.push(item.clone());
}
crate::commands::sort_by::sort(&mut sorted, &[], name)?;
match take {
Pick::Median => {
let idx = (values.len() as f64 / 2.0).floor() as usize;
let out = sorted.get(idx).ok_or_else(|| {
ShellError::labeled_error(
"could not extract value",
"could not extract value",
&name.span,
)
})?;
Ok(out.clone())
}
Pick::MedianAverage => {
let idx_end = (values.len() / 2) as usize;
let idx_start = idx_end - 1;
let left = sorted
.get(idx_start)
.ok_or_else(|| {
ShellError::labeled_error(
"could not extract value",
"could not extract value",
&name.span,
)
})?
.clone();
let right = sorted
.get(idx_end)
.ok_or_else(|| {
ShellError::labeled_error(
"could not extract value",
"could not extract value",
&name.span,
)
})?
.clone();
compute_average(&[left, right], name)
}
}
}
fn compute_average(values: &[Value], name: impl Into<Tag>) -> Result<Value, ShellError> {
let name = name.into();
let sum = reducer_for(Reduce::Sum);
let number = BigDecimal::from_usize(2).ok_or_else(|| {
ShellError::labeled_error(
"could not convert to big decimal",
"could not convert to big decimal",
&name,
)
})?;
let total_rows = UntaggedValue::decimal(number);
let total = sum(Value::zero(), values.to_vec())?;
match total {
Value {
value: UntaggedValue::Primitive(Primitive::Bytes(num)),
..
} => {
let left = UntaggedValue::from(Primitive::Int(num.into()));
let result = crate::data::value::compute_values(Operator::Divide, &left, &total_rows);
match result {
Ok(UntaggedValue::Primitive(Primitive::Decimal(result))) => {
let number = Number::Decimal(result);
let number = convert_number_to_u64(&number);
Ok(UntaggedValue::bytes(number).into_value(name))
}
Ok(_) => Err(ShellError::labeled_error(
"could not calculate median of non-numeric or unrelated types",
"source",
name,
)),
Err((left_type, right_type)) => Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
)),
}
}
Value {
value: UntaggedValue::Primitive(other),
..
} => {
let left = UntaggedValue::from(other);
let result = crate::data::value::compute_values(Operator::Divide, &left, &total_rows);
match result {
Ok(value) => Ok(value.into_value(name)),
Err((left_type, right_type)) => Err(ShellError::coerce_error(
left_type.spanned(name.span),
right_type.spanned(name.span),
)),
}
}
_ => Err(ShellError::labeled_error(
"could not calculate median of non-numeric or unrelated types",
"source",
name,
)),
}
}
#[cfg(test)]
mod tests {
use super::SubCommand;
#[test]
fn examples_work_as_expected() {
use crate::examples::test as test_examples;
test_examples(SubCommand {})
}
}

View File

@ -1,10 +1,12 @@
pub mod average;
pub mod command;
pub mod max;
pub mod median;
pub mod min;
pub mod utils;
pub use average::SubCommand as Average;
pub use average::SubCommand as MathAverage;
pub use command::Command as Math;
pub use max::SubCommand as Maximum;
pub use min::SubCommand as Minimum;
pub use max::SubCommand as MathMaximum;
pub use median::SubCommand as MathMedian;
pub use min::SubCommand as MathMinimum;

View File

@ -70,15 +70,33 @@ async fn sort_by(
let (SortByArgs { rest }, mut input) = args.process(&registry).await?;
let mut vec = input.drain_vec().await;
sort(&mut vec, &rest, &tag)?;
let mut values_vec_deque: VecDeque<Value> = VecDeque::new();
for item in vec {
values_vec_deque.push_back(item);
}
Ok(futures::stream::iter(values_vec_deque).to_output_stream())
}
pub fn sort(
vec: &mut [Value],
keys: &[Tagged<String>],
tag: impl Into<Tag>,
) -> Result<(), ShellError> {
let tag = tag.into();
if vec.is_empty() {
return Err(ShellError::labeled_error(
"Error performing sort-by command",
"sort-by error",
"no values to work with",
"no values to work with",
tag,
));
}
for sort_arg in rest.iter() {
for sort_arg in keys.iter() {
let match_test = get_data_by_key(&vec[0], sort_arg.borrow_spanned());
if match_test == None {
return Err(ShellError::labeled_error(
@ -98,7 +116,7 @@ async fn sort_by(
}
_ => {
let calc_key = |item: &Value| {
rest.iter()
keys.iter()
.map(|f| get_data_by_key(item, f.borrow_spanned()))
.collect::<Vec<Option<Value>>>()
};
@ -106,13 +124,7 @@ async fn sort_by(
}
};
let mut values_vec_deque: VecDeque<Value> = VecDeque::new();
for item in vec {
values_vec_deque.push_back(item);
}
Ok(futures::stream::iter(values_vec_deque).to_output_stream())
Ok(())
}
#[cfg(test)]

View File

@ -11,7 +11,7 @@ fn can_average_numbers() {
| echo $it
"#
));
println!("{:?}", actual.err);
assert_eq!(actual.out, "101.5")
}

View File

@ -0,0 +1,29 @@
use nu_test_support::{nu, pipeline};
#[test]
fn median_numbers_with_even_rows() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo [10 6 19 21 4]
| math median
| echo $it
"#
));
assert_eq!(actual.out, "10")
}
#[test]
fn median_numbers_with_odd_rows() {
let actual = nu!(
cwd: ".", pipeline(
r#"
echo [3 8 9 12 12 15]
| math median
| echo $it
"#
));
assert_eq!(actual.out, "10.5")
}

View File

@ -1,3 +1,6 @@
mod average;
mod median;
use nu_test_support::{nu, pipeline};
#[test]

View File

@ -1,6 +1,5 @@
mod alias;
mod append;
mod average;
mod cal;
mod calc;
mod cd;