From 87abfee268927385d727c6df8e93923db9eed8be Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Mon, 31 Jul 2023 05:34:12 -0700 Subject: [PATCH] Merged overloaded commands (#9860) - fixes #9807 # Description This pull request merges all overloaded dfr commands into one command: eager: dfr first -> eager/first.rs dfr last -> eager/last.rs dfr into-nu -> eager/to_nu.rs (merged) lazy: dfr min -> expressions/expressions_macro.rs lazy_expressions_macro dfr max -> expressions/expressions_macro.rs lazy_expressions_macro dfr sum -> expressions/expressions_macro.rs lazy_expressions_macro dfr mean -> expressions/expressions_macro.rs lazy_expressions_macro dfr std -> expressions/expressions_macro.rs lazy_expressions_macro dfr var -> expressions/expressions_macro.rs lazy_expressions_macro series: dfr n-unique -> series/n_unique.rs dfr is-not-null -> series/masks/is_not_null.rs dfr is-null -> series/masks/is_null.rs # User-Facing Changes No user facing changes --------- Co-authored-by: Jack Wright --- .../src/dataframe/eager/first.rs | 58 +- .../src/dataframe/eager/last.rs | 81 ++- .../expressions/expressions_macro.rs | 584 ++++++++++++------ .../src/dataframe/expressions/mod.rs | 5 - .../src/dataframe/lazy/macro_commands.rs | 134 ---- .../src/dataframe/lazy/mod.rs | 6 - .../src/dataframe/series/masks/is_not_null.rs | 91 ++- .../src/dataframe/series/masks/is_null.rs | 91 ++- .../src/dataframe/series/n_unique.rs | 81 ++- .../src/dataframe/test_dataframe.rs | 93 +-- 10 files changed, 731 insertions(+), 493 deletions(-) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs index 839aaa752..6e88c111a 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs @@ -1,4 +1,4 @@ -use super::super::values::{Column, NuDataFrame}; +use super::super::values::{Column, NuDataFrame, NuExpression}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, @@ -15,7 +15,7 @@ impl Command for FirstDF { } fn usage(&self) -> &str { - "Show only the first number of rows." + "Show only the first number of rows or create a first expression" } fn signature(&self) -> Signature { @@ -25,10 +25,16 @@ impl Command for FirstDF { SyntaxShape::Int, "starting from the front, the number of rows to return", ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } @@ -64,6 +70,11 @@ impl Command for FirstDF { .into_value(Span::test_data()), ), }, + Example { + description: "Creates a first expression from a column", + example: "dfr col a | dfr first", + result: None, + }, ] } @@ -74,8 +85,19 @@ impl Command for FirstDF { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().first().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } } } @@ -97,11 +119,25 @@ fn command( #[cfg(test)] mod test { - use super::super::super::test_dataframe::test_dataframe; + use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; #[test] - fn test_examples() { - test_dataframe(vec![Box::new(FirstDF {})]) + fn test_examples_dataframe() { + let mut engine_state = build_test_engine_state(vec![Box::new(FirstDF {})]); + test_dataframe_example(&mut engine_state, &FirstDF.examples()[0]); + test_dataframe_example(&mut engine_state, &FirstDF.examples()[1]); + } + + #[test] + fn test_examples_expression() { + let mut engine_state = build_test_engine_state(vec![ + Box::new(FirstDF {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &FirstDF.examples()[2]); } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs index fe8a2b5e0..b9db26b0d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs @@ -1,4 +1,4 @@ -use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame}; +use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame, NuExpression}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, @@ -21,26 +21,39 @@ impl Command for LastDF { fn signature(&self) -> Signature { Signature::build(self.name()) .optional("rows", SyntaxShape::Int, "Number of rows for tail") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Create new dataframe with last rows", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Create new dataframe with last rows", + example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a last expression from a column", + example: "dfr col a | dfr last", + result: None, + }, + ] } fn run( @@ -50,8 +63,19 @@ impl Command for LastDF { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().last().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } } } @@ -73,11 +97,24 @@ fn command( #[cfg(test)] mod test { - use super::super::super::test_dataframe::test_dataframe; + use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LastDF {})]) + fn test_examples_dataframe() { + let mut engine_state = build_test_engine_state(vec![Box::new(LastDF {})]); + test_dataframe_example(&mut engine_state, &LastDF.examples()[0]); + } + + #[test] + fn test_examples_expression() { + let mut engine_state = build_test_engine_state(vec![ + Box::new(LastDF {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &LastDF.examples()[1]); } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs index 6ab69b27b..11a73fe20 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs @@ -1,7 +1,7 @@ /// Definition of multiple Expression commands using a macro rule /// All of these expressions have an identical body and only require /// to have a change in the name, description and expression function -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -134,6 +134,186 @@ macro_rules! expr_command { }; } +// The structs defined in this file are structs that form part of other commands +// since they share a similar name +macro_rules! lazy_expr_command { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { + #[derive(Clone)] + pub struct $command; + + impl Command for $command { + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value(value)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func()); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().$func().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } + } + } + + #[cfg(test)] + mod $test { + use super::super::super::test_dataframe::{ + build_test_engine_state, test_dataframe_example, + }; + use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + + #[test] + fn test_examples_dataframe() { + // the first example should be a for the dataframe case + let example = &$command.examples()[0]; + let mut engine_state = build_test_engine_state(vec![Box::new($command {})]); + test_dataframe_example(&mut engine_state, &example) + } + + #[test] + fn test_examples_expressions() { + // the second example should be a for the dataframe case + let example = &$command.examples()[1]; + let mut engine_state = build_test_engine_state(vec![ + Box::new($command {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &example) + } + } + }; + + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => { + #[derive(Clone)] + pub struct $command; + + impl Command for $command { + fn name(&self) -> &str { + $name + } + + fn usage(&self) -> &str { + $desc + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + $examples + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value(value)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddof)); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().$func($ddof).into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } + } + } + + #[cfg(test)] + mod $test { + use super::super::super::test_dataframe::{ + build_test_engine_state, test_dataframe_example, + }; + use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + + #[test] + fn test_examples_dataframe() { + // the first example should be a for the dataframe case + let example = &$command.examples()[0]; + let mut engine_state = build_test_engine_state(vec![Box::new($command {})]); + test_dataframe_example(&mut engine_state, &example) + } + + #[test] + fn test_examples_expressions() { + // the second example should be a for the dataframe case + let example = &$command.examples()[1]; + let mut engine_state = build_test_engine_state(vec![ + Box::new($command {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &example) + } + } + }; +} + // ExprList command // Expands to a command definition for a list expression expr_command!( @@ -209,81 +389,6 @@ expr_command!( test_count ); -// ExprFirst command -// Expands to a command definition for a count expression -expr_command!( - ExprFirst, - "dfr first", - "creates a first expression", - vec![Example { - description: "Creates a first expression from a column", - example: "dfr col a | dfr first", - result: None, - },], - first, - test_first -); - -// ExprLast command -// Expands to a command definition for a count expression -expr_command!( - ExprLast, - "dfr last", - "creates a last expression", - vec![Example { - description: "Creates a last expression from a column", - example: "dfr col a | dfr last", - result: None, - },], - last, - test_last -); - -// ExprNUnique command -// Expands to a command definition for a n-unique expression -expr_command!( - ExprNUnique, - "dfr n-unique", - "creates a n-unique expression", - vec![Example { - description: "Creates a is n-unique expression from a column", - example: "dfr col a | dfr n-unique", - result: None, - },], - n_unique, - test_nunique -); - -// ExprIsNotNull command -// Expands to a command definition for a n-unique expression -expr_command!( - ExprIsNotNull, - "dfr is-not-null", - "creates a is not null expression", - vec![Example { - description: "Creates a is not null expression from a column", - example: "dfr col a | dfr is-not-null", - result: None, - },], - is_not_null, - test_is_not_null -); - -// ExprIsNull command -// Expands to a command definition for a n-unique expression -expr_command!( - ExprIsNull, - "dfr is-null", - "creates a is null expression", - vec![Example { - description: "Creates a is null expression from a column", - example: "dfr col a | dfr is-null", - result: None, - },], - is_null, - test_is_null -); - // ExprNot command // Expands to a command definition for a not expression expr_command!( @@ -301,124 +406,180 @@ expr_command!( // ExprMax command // Expands to a command definition for max aggregation -expr_command!( +lazy_expr_command!( ExprMax, "dfr max", - "Creates a max expression", - vec![Example { - description: "Max aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] + "Creates a max expression or aggregates columns to their max value", + vec![ + Example { + description: "Max value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(6)],), + Column::new("b".to_string(), vec![Value::test_int(4)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Max aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr max)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(1)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], max, test_max ); // ExprMin command // Expands to a command definition for min aggregation -expr_command!( +lazy_expr_command!( ExprMin, "dfr min", - "Creates a min expression", - vec![Example { - description: "Min aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] + "Creates a min expression or aggregates columns to their min value", + vec![ + Example { + description: "Min value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(1)],), + Column::new("b".to_string(), vec![Value::test_int(1)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Min aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr min)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(1)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], min, test_min ); // ExprSum command // Expands to a command definition for sum aggregation -expr_command!( +lazy_expr_command!( ExprSum, "dfr sum", - "Creates a sum expression for an aggregation", - vec![Example { - description: "Sum aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] + "Creates a sum expression for an aggregation or aggregates columns to their sum value", + vec![ + Example { + description: "Sums all columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(11)],), + Column::new("b".to_string(), vec![Value::test_int(7)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Sum aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr sum)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(6), Value::test_int(1)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(6), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], sum, test_sum ); // ExprMean command // Expands to a command definition for mean aggregation -expr_command!( +lazy_expr_command!( ExprMean, "dfr mean", - "Creates a mean expression for an aggregation", - vec![Example { - description: "Mean aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] + "Creates a mean expression for an aggregation or aggregates columns to their mean value", + vec![ + Example { + description: "Mean value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Mean aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr mean)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(3.0), Value::test_float(1.0)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], mean, test_mean ); @@ -456,64 +617,93 @@ expr_command!( // ExprStd command // Expands to a command definition for std aggregation -expr_command!( +lazy_expr_command!( ExprStd, "dfr std", - "Creates a std expression for an aggregation", - vec![Example { - description: "Std aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + "Creates a std expression for an aggregation of std value from columns in a dataframe", + vec![ + Example { + description: "Std value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(2.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Std aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr std)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], std, test_std, - 0 + 1 ); // ExprVar command // Expands to a command definition for var aggregation -expr_command!( +lazy_expr_command!( ExprVar, "dfr var", "Create a var expression for an aggregation", - vec![Example { - description: "Var aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + vec![ + Example { + description: + "Var value from columns in a dataframe or aggregates columns to their var value", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Var aggregation for a group-by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] | dfr into-df | dfr group-by a | dfr agg (dfr col b | dfr var)"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], var, test_var, - 0 + 1 ); diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs index 941b3e508..ab098803d 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs @@ -50,12 +50,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) { ExprFlatten, ExprExplode, ExprCount, - ExprFirst, - ExprLast, - ExprNUnique, ExprIsIn, - ExprIsNotNull, - ExprIsNull, ExprNot, ExprMax, ExprMin, diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs index 1765861c9..70aab39ff 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs @@ -157,94 +157,6 @@ lazy_command!( test_cache ); -// LazyMax command -// Expands to a command definition for max aggregation -lazy_command!( - LazyMax, - "dfr max", - "Aggregates columns to their max value", - vec![Example { - description: "Max value from columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(6)],), - Column::new("b".to_string(), vec![Value::test_int(4)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - max, - test_max -); - -// LazyMin command -// Expands to a command definition for min aggregation -lazy_command!( - LazyMin, - "dfr min", - "Aggregates columns to their min value", - vec![Example { - description: "Min value from columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)],), - Column::new("b".to_string(), vec![Value::test_int(1)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - min, - test_min -); - -// LazySum command -// Expands to a command definition for sum aggregation -lazy_command!( - LazySum, - "dfr sum", - "Aggregates columns to their sum value", - vec![Example { - description: "Sums all columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(11)],), - Column::new("b".to_string(), vec![Value::test_int(7)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - sum, - test_sum -); - -// LazyMean command -// Expands to a command definition for mean aggregation -lazy_command!( - LazyMean, - "dfr mean", - "Aggregates columns to their mean value", - vec![Example { - description: "Mean value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(2.0)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - mean, - test_mean -); - // LazyMedian command // Expands to a command definition for median aggregation lazy_command!( @@ -266,49 +178,3 @@ lazy_command!( median, test_median ); - -// LazyStd command -// Expands to a command definition for std aggregation -lazy_command!( - LazyStd, - "dfr std", - "Aggregates columns to their std value", - vec![Example { - description: "Std value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(2.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - std, - test_std, - 1 -); - -// LazyVar command -// Expands to a command definition for var aggregation -lazy_command!( - LazyVar, - "dfr var", - "Aggregates columns to their var value", - vec![Example { - description: "Var value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - var, - test_var, - 1 -); diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs index 713c53b0b..3788e6511 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs @@ -49,13 +49,7 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) { LazyFilter, LazyJoin, LazyQuantile, - LazyMax, - LazyMin, - LazySum, - LazyMean, LazyMedian, - LazyStd, - LazyVar, LazyReverse, LazySelect, LazySortBy, diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs index 652d9cccd..e7fdcec35 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs @@ -1,4 +1,4 @@ -use super::super::super::values::{Column, NuDataFrame}; +use super::super::super::values::{Column, NuDataFrame, NuExpression}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -20,33 +20,46 @@ impl Command for IsNotNull { fn signature(&self) -> Signature { Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Create mask where values are not null", - example: r#"let s = ([5 6 0 8] | dfr into-df); + vec![ + Example { + description: "Create mask where values are not null", + example: r#"let s = ([5 6 0 8] | dfr into-df); let res = ($s / $s); $res | dfr is-not-null"#, - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_not_null".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(true), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "is_not_null".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(true), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is not null expression from a column", + example: "dfr col a | dfr is-not-null", + result: None, + }, + ] } fn run( @@ -56,8 +69,19 @@ impl Command for IsNotNull { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_not_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } } } @@ -76,11 +100,24 @@ fn command( #[cfg(test)] mod test { - use super::super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example}; #[test] - fn test_examples() { - test_dataframe(vec![Box::new(IsNotNull {})]) + fn test_examples_dataframe() { + let mut engine_state = build_test_engine_state(vec![Box::new(IsNotNull {})]); + test_dataframe_example(&mut engine_state, &IsNotNull.examples()[0]); + } + + #[test] + fn test_examples_expression() { + let mut engine_state = build_test_engine_state(vec![ + Box::new(IsNotNull {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &IsNotNull.examples()[1]); } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs index 4f3f5962f..77e32dfa5 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs @@ -1,4 +1,4 @@ -use super::super::super::values::{Column, NuDataFrame}; +use super::super::super::values::{Column, NuDataFrame, NuExpression}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -20,33 +20,46 @@ impl Command for IsNull { fn signature(&self) -> Signature { Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Create mask where values are null", - example: r#"let s = ([5 6 0 8] | dfr into-df); + vec![ + Example { + description: "Create mask where values are null", + example: r#"let s = ([5 6 0 8] | dfr into-df); let res = ($s / $s); $res | dfr is-null"#, - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "is_null".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "is_null".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(false), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is null expression from a column", + example: "dfr col a | dfr is-null", + result: None, + }, + ] } fn run( @@ -56,8 +69,19 @@ impl Command for IsNull { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().is_null().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } } } @@ -76,11 +100,24 @@ fn command( #[cfg(test)] mod test { - use super::super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example}; #[test] - fn test_examples() { - test_dataframe(vec![Box::new(IsNull {})]) + fn test_examples_dataframe() { + let mut engine_state = build_test_engine_state(vec![Box::new(IsNull {})]); + test_dataframe_example(&mut engine_state, &IsNull.examples()[0]); + } + + #[test] + fn test_examples_expression() { + let mut engine_state = build_test_engine_state(vec![ + Box::new(IsNull {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &IsNull.examples()[1]); } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs index 657790c5a..ddb11d82e 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs @@ -1,4 +1,4 @@ -use super::super::values::{Column, NuDataFrame}; +use super::super::values::{Column, NuDataFrame, NuExpression}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -19,26 +19,39 @@ impl Command for NUnique { fn signature(&self) -> Signature { Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Counts unique values", - example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "count_unique".to_string(), - vec![Value::test_int(4)], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Counts unique values", + example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "count_unique".to_string(), + vec![Value::test_int(4)], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is n-unique expression from a column", + example: "dfr col a | dfr n-unique", + result: None, + }, + ] } fn run( @@ -48,8 +61,19 @@ impl Command for NUnique { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) + let value = input.into_value(call.head); + if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + command(engine_state, stack, call, df) + } else { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr.into_polars().n_unique().into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } } } @@ -77,11 +101,24 @@ fn command( #[cfg(test)] mod test { - use super::super::super::test_dataframe::test_dataframe; + use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; #[test] - fn test_examples() { - test_dataframe(vec![Box::new(NUnique {})]) + fn test_examples_dataframe() { + let mut engine_state = build_test_engine_state(vec![Box::new(NUnique {})]); + test_dataframe_example(&mut engine_state, &NUnique.examples()[0]); + } + + #[test] + fn test_examples_expression() { + let mut engine_state = build_test_engine_state(vec![ + Box::new(NUnique {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]); + test_dataframe_example(&mut engine_state, &NUnique.examples()[1]); } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs index f6c4cc7bf..d030b074c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs @@ -2,7 +2,7 @@ use nu_engine::eval_block; use nu_parser::parse; use nu_protocol::{ engine::{Command, EngineState, Stack, StateWorkingSet}, - PipelineData, Span, + Example, PipelineData, Span, }; use super::eager::ToDataFrame; @@ -17,6 +17,14 @@ pub fn test_dataframe(cmds: Vec>) { // The first element in the cmds vector must be the one tested let examples = cmds[0].examples(); + let mut engine_state = build_test_engine_state(cmds.clone()); + + for example in examples { + test_dataframe_example(&mut engine_state, &example); + } +} + +pub fn build_test_engine_state(cmds: Vec>) -> Box { let mut engine_state = Box::new(EngineState::new()); let delta = { @@ -41,54 +49,55 @@ pub fn test_dataframe(cmds: Vec>) { .merge_delta(delta) .expect("Error merging delta"); - for example in examples { - // Skip tests that don't have results to compare to - if example.result.is_none() { - continue; + engine_state +} + +pub fn test_dataframe_example(engine_state: &mut Box, example: &Example) { + // Skip tests that don't have results to compare to + if example.result.is_none() { + return; + } + + let start = std::time::Instant::now(); + + let (block, delta) = { + let mut working_set = StateWorkingSet::new(&engine_state); + let output = parse(&mut working_set, None, example.example.as_bytes(), false); + + if let Some(err) = working_set.parse_errors.first() { + panic!("test parse error in `{}`: {:?}", example.example, err) } - let start = std::time::Instant::now(); - let (block, delta) = { - let mut working_set = StateWorkingSet::new(&engine_state); - let output = parse(&mut working_set, None, example.example.as_bytes(), false); + (output, working_set.render()) + }; - if let Some(err) = working_set.parse_errors.first() { - panic!("test parse error in `{}`: {:?}", example.example, err) - } + engine_state + .merge_delta(delta) + .expect("Error merging delta"); - (output, working_set.render()) - }; + let mut stack = Stack::new(); - engine_state - .merge_delta(delta) - .expect("Error merging delta"); + let result = eval_block( + &engine_state, + &mut stack, + &block, + PipelineData::empty(), + true, + true, + ) + .unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err)) + .into_value(Span::test_data()); - let mut stack = Stack::new(); + println!("input: {}", example.example); + println!("result: {result:?}"); + println!("done: {:?}", start.elapsed()); - let result = eval_block( - &engine_state, - &mut stack, - &block, - PipelineData::empty(), - true, - true, - ) - .unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err)) - .into_value(Span::test_data()); - - println!("input: {}", example.example); - println!("result: {result:?}"); - println!("done: {:?}", start.elapsed()); - - // Note. Value implements PartialEq for Bool, Int, Float, String and Block - // If the command you are testing requires to compare another case, then - // you need to define its equality in the Value struct - if let Some(expected) = example.result { - if result != expected { - panic!( - "the example result is different to expected value: {result:?} != {expected:?}" - ) - } + // Note. Value implements PartialEq for Bool, Int, Float, String and Block + // If the command you are testing requires to compare another case, then + // you need to define its equality in the Value struct + if let Some(expected) = example.result.clone() { + if result != expected { + panic!("the example result is different to expected value: {result:?} != {expected:?}") } } }