From 5f9de80d9b8ff427c94cb448f467efff0e9eb25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Thu, 18 Jun 2020 16:37:18 -0500 Subject: [PATCH] Math#media - ability to compute median value. --- crates/nu-cli/src/cli.rs | 7 +- crates/nu-cli/src/commands.rs | 3 +- crates/nu-cli/src/commands/math/average.rs | 8 +- crates/nu-cli/src/commands/math/median.rs | 193 ++++++++++++++++++ crates/nu-cli/src/commands/math/mod.rs | 8 +- crates/nu-cli/src/commands/sort_by.rs | 34 ++- .../tests/commands/{ => math}/average.rs | 2 +- crates/nu-cli/tests/commands/math/median.rs | 29 +++ .../tests/commands/{math.rs => math/mod.rs} | 3 + crates/nu-cli/tests/commands/mod.rs | 1 - 10 files changed, 266 insertions(+), 22 deletions(-) create mode 100644 crates/nu-cli/src/commands/math/median.rs rename crates/nu-cli/tests/commands/{ => math}/average.rs (94%) create mode 100644 crates/nu-cli/tests/commands/math/median.rs rename crates/nu-cli/tests/commands/{math.rs => math/mod.rs} (99%) diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index 5136e875b..e3cf9100c 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -348,9 +348,10 @@ pub fn create_default_context( // Data processing whole_stream_command(Histogram), whole_stream_command(Math), - whole_stream_command(Average), - whole_stream_command(Minimum), - whole_stream_command(Maximum), + whole_stream_command(MathAverage), + whole_stream_command(MathMedian), + whole_stream_command(MathMinimum), + whole_stream_command(MathMaximum), whole_stream_command(Sum), // File format output whole_stream_command(To), diff --git a/crates/nu-cli/src/commands.rs b/crates/nu-cli/src/commands.rs index dbb6b605f..d24be5572 100644 --- a/crates/nu-cli/src/commands.rs +++ b/crates/nu-cli/src/commands.rs @@ -151,7 +151,6 @@ pub(crate) use du::Du; pub(crate) use each::Each; pub(crate) use echo::Echo; pub(crate) use is_empty::IsEmpty; -pub(crate) use math::Math; pub(crate) use update::Update; pub(crate) mod kill; pub(crate) use kill::Kill; @@ -200,7 +199,7 @@ pub(crate) use lines::Lines; pub(crate) use ls::Ls; #[allow(unused_imports)] pub(crate) use map_max_by::MapMaxBy; -pub(crate) use math::{Average, Maximum, Minimum}; +pub(crate) use math::{Math, MathAverage, MathMaximum, MathMedian, MathMinimum}; pub(crate) use merge::Merge; pub(crate) use mkdir::Mkdir; pub(crate) use mv::Move; diff --git a/crates/nu-cli/src/commands/math/average.rs b/crates/nu-cli/src/commands/math/average.rs index 627a072d1..0e02ab1a3 100644 --- a/crates/nu-cli/src/commands/math/average.rs +++ b/crates/nu-cli/src/commands/math/average.rs @@ -58,7 +58,13 @@ impl WholeStreamCommand for SubCommand { pub fn average(values: &[Value], name: &Tag) -> Result { let sum = reducer_for(Reduce::Sum); - let number = BigDecimal::from_usize(values.len()).expect("expected a usize-sized bigdecimal"); + let number = BigDecimal::from_usize(values.len()).ok_or_else(|| { + ShellError::labeled_error( + "could not convert to big decimal", + "could not convert to big decimal", + &name.span, + ) + })?; let total_rows = UntaggedValue::decimal(number); let total = sum(Value::zero(), values.to_vec())?; diff --git a/crates/nu-cli/src/commands/math/median.rs b/crates/nu-cli/src/commands/math/median.rs new file mode 100644 index 000000000..5a212be95 --- /dev/null +++ b/crates/nu-cli/src/commands/math/median.rs @@ -0,0 +1,193 @@ +use crate::commands::math::utils::calculate; +use crate::commands::WholeStreamCommand; +use crate::prelude::*; +use crate::utils::data_processing::{reducer_for, Reduce}; +use bigdecimal::{FromPrimitive, Zero}; +use nu_errors::ShellError; +use nu_protocol::{ + hir::{convert_number_to_u64, Number, Operator}, + Primitive, Signature, UntaggedValue, Value, +}; + +pub struct SubCommand; + +#[async_trait] +impl WholeStreamCommand for SubCommand { + fn name(&self) -> &str { + "math median" + } + + fn signature(&self) -> Signature { + Signature::build("math median") + } + + fn usage(&self) -> &str { + "Gets the median of a list of numbers" + } + + async fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + calculate( + RunnableContext { + input: args.input, + registry: registry.clone(), + shell_manager: args.shell_manager, + host: args.host, + ctrl_c: args.ctrl_c, + current_errors: args.current_errors, + name: args.call_info.name_tag, + raw_input: args.raw_input, + }, + median, + ) + .await + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Get the median of a list of numbers", + example: "echo [3 8 9 12 12 15] | math median", + result: Some(vec![UntaggedValue::decimal(10.5).into()]), + }] + } +} + +enum Pick { + MedianAverage, + Median, +} + +pub fn median(values: &[Value], name: &Tag) -> Result { + let take = if values.len() % 2 == 0 { + Pick::MedianAverage + } else { + Pick::Median + }; + + let mut sorted = vec![]; + + for item in values { + sorted.push(item.clone()); + } + + crate::commands::sort_by::sort(&mut sorted, &[], name)?; + + match take { + Pick::Median => { + let idx = (values.len() as f64 / 2.0).floor() as usize; + let out = sorted.get(idx).ok_or_else(|| { + ShellError::labeled_error( + "could not extract value", + "could not extract value", + &name.span, + ) + })?; + Ok(out.clone()) + } + Pick::MedianAverage => { + let idx_end = (values.len() / 2) as usize; + let idx_start = idx_end - 1; + + let left = sorted + .get(idx_start) + .ok_or_else(|| { + ShellError::labeled_error( + "could not extract value", + "could not extract value", + &name.span, + ) + })? + .clone(); + + let right = sorted + .get(idx_end) + .ok_or_else(|| { + ShellError::labeled_error( + "could not extract value", + "could not extract value", + &name.span, + ) + })? + .clone(); + + compute_average(&[left, right], name) + } + } +} + +fn compute_average(values: &[Value], name: impl Into) -> Result { + let name = name.into(); + + let sum = reducer_for(Reduce::Sum); + let number = BigDecimal::from_usize(2).ok_or_else(|| { + ShellError::labeled_error( + "could not convert to big decimal", + "could not convert to big decimal", + &name, + ) + })?; + let total_rows = UntaggedValue::decimal(number); + let total = sum(Value::zero(), values.to_vec())?; + + match total { + Value { + value: UntaggedValue::Primitive(Primitive::Bytes(num)), + .. + } => { + let left = UntaggedValue::from(Primitive::Int(num.into())); + let result = crate::data::value::compute_values(Operator::Divide, &left, &total_rows); + + match result { + Ok(UntaggedValue::Primitive(Primitive::Decimal(result))) => { + let number = Number::Decimal(result); + let number = convert_number_to_u64(&number); + Ok(UntaggedValue::bytes(number).into_value(name)) + } + Ok(_) => Err(ShellError::labeled_error( + "could not calculate median of non-numeric or unrelated types", + "source", + name, + )), + Err((left_type, right_type)) => Err(ShellError::coerce_error( + left_type.spanned(name.span), + right_type.spanned(name.span), + )), + } + } + Value { + value: UntaggedValue::Primitive(other), + .. + } => { + let left = UntaggedValue::from(other); + let result = crate::data::value::compute_values(Operator::Divide, &left, &total_rows); + + match result { + Ok(value) => Ok(value.into_value(name)), + Err((left_type, right_type)) => Err(ShellError::coerce_error( + left_type.spanned(name.span), + right_type.spanned(name.span), + )), + } + } + _ => Err(ShellError::labeled_error( + "could not calculate median of non-numeric or unrelated types", + "source", + name, + )), + } +} + +#[cfg(test)] +mod tests { + use super::SubCommand; + + #[test] + fn examples_work_as_expected() { + use crate::examples::test as test_examples; + + test_examples(SubCommand {}) + } +} diff --git a/crates/nu-cli/src/commands/math/mod.rs b/crates/nu-cli/src/commands/math/mod.rs index c8fee95f2..2b0ae6d5d 100644 --- a/crates/nu-cli/src/commands/math/mod.rs +++ b/crates/nu-cli/src/commands/math/mod.rs @@ -1,10 +1,12 @@ pub mod average; pub mod command; pub mod max; +pub mod median; pub mod min; pub mod utils; -pub use average::SubCommand as Average; +pub use average::SubCommand as MathAverage; pub use command::Command as Math; -pub use max::SubCommand as Maximum; -pub use min::SubCommand as Minimum; +pub use max::SubCommand as MathMaximum; +pub use median::SubCommand as MathMedian; +pub use min::SubCommand as MathMinimum; diff --git a/crates/nu-cli/src/commands/sort_by.rs b/crates/nu-cli/src/commands/sort_by.rs index cde0c5fc8..bf273d115 100644 --- a/crates/nu-cli/src/commands/sort_by.rs +++ b/crates/nu-cli/src/commands/sort_by.rs @@ -70,15 +70,33 @@ async fn sort_by( let (SortByArgs { rest }, mut input) = args.process(®istry).await?; let mut vec = input.drain_vec().await; + sort(&mut vec, &rest, &tag)?; + + let mut values_vec_deque: VecDeque = VecDeque::new(); + + for item in vec { + values_vec_deque.push_back(item); + } + + Ok(futures::stream::iter(values_vec_deque).to_output_stream()) +} + +pub fn sort( + vec: &mut [Value], + keys: &[Tagged], + tag: impl Into, +) -> Result<(), ShellError> { + let tag = tag.into(); + if vec.is_empty() { return Err(ShellError::labeled_error( - "Error performing sort-by command", - "sort-by error", + "no values to work with", + "no values to work with", tag, )); } - for sort_arg in rest.iter() { + for sort_arg in keys.iter() { let match_test = get_data_by_key(&vec[0], sort_arg.borrow_spanned()); if match_test == None { return Err(ShellError::labeled_error( @@ -98,7 +116,7 @@ async fn sort_by( } _ => { let calc_key = |item: &Value| { - rest.iter() + keys.iter() .map(|f| get_data_by_key(item, f.borrow_spanned())) .collect::>>() }; @@ -106,13 +124,7 @@ async fn sort_by( } }; - let mut values_vec_deque: VecDeque = VecDeque::new(); - - for item in vec { - values_vec_deque.push_back(item); - } - - Ok(futures::stream::iter(values_vec_deque).to_output_stream()) + Ok(()) } #[cfg(test)] diff --git a/crates/nu-cli/tests/commands/average.rs b/crates/nu-cli/tests/commands/math/average.rs similarity index 94% rename from crates/nu-cli/tests/commands/average.rs rename to crates/nu-cli/tests/commands/math/average.rs index 7c1e8a60b..caa586ff7 100644 --- a/crates/nu-cli/tests/commands/average.rs +++ b/crates/nu-cli/tests/commands/math/average.rs @@ -11,7 +11,7 @@ fn can_average_numbers() { | echo $it "# )); - println!("{:?}", actual.err); + assert_eq!(actual.out, "101.5") } diff --git a/crates/nu-cli/tests/commands/math/median.rs b/crates/nu-cli/tests/commands/math/median.rs new file mode 100644 index 000000000..79f4da6d4 --- /dev/null +++ b/crates/nu-cli/tests/commands/math/median.rs @@ -0,0 +1,29 @@ +use nu_test_support::{nu, pipeline}; + +#[test] +fn median_numbers_with_even_rows() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo [10 6 19 21 4] + | math median + | echo $it + "# + )); + + assert_eq!(actual.out, "10") +} + +#[test] +fn median_numbers_with_odd_rows() { + let actual = nu!( + cwd: ".", pipeline( + r#" + echo [3 8 9 12 12 15] + | math median + | echo $it + "# + )); + + assert_eq!(actual.out, "10.5") +} diff --git a/crates/nu-cli/tests/commands/math.rs b/crates/nu-cli/tests/commands/math/mod.rs similarity index 99% rename from crates/nu-cli/tests/commands/math.rs rename to crates/nu-cli/tests/commands/math/mod.rs index 680b1b641..ff682ab3a 100644 --- a/crates/nu-cli/tests/commands/math.rs +++ b/crates/nu-cli/tests/commands/math/mod.rs @@ -1,3 +1,6 @@ +mod average; +mod median; + use nu_test_support::{nu, pipeline}; #[test] diff --git a/crates/nu-cli/tests/commands/mod.rs b/crates/nu-cli/tests/commands/mod.rs index dd43d7620..22febe997 100644 --- a/crates/nu-cli/tests/commands/mod.rs +++ b/crates/nu-cli/tests/commands/mod.rs @@ -1,6 +1,5 @@ mod alias; mod append; -mod average; mod cal; mod calc; mod cd;