From ceaa0f93755bdf70148fc8c9aabbd0318c82c94c Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Mon, 14 Apr 2025 11:59:48 -0400 Subject: [PATCH] `polars`: add new command `polars over` (#15551) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Introducing a basic implementation of the polars expression for window functions: `over` (https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.over.html). Note that this PR only implements the default values for the sorting and `mapping_strategy` parameters. Implementations for other values for these parameters may be added in a future PR, as the demand arises. ```nushell # Compute expression over an aggregation window > [[a b]; [x 2] [x 4] [y 6] [y 4]] | polars into-lazy | polars select a (polars col b | polars cumulative sum | polars over a | polars as cum_b) | polars collect ╭───┬───┬───────╮ │ # │ a │ cum_b │ ├───┼───┼───────┤ │ 0 │ x │ 2 │ │ 1 │ x │ 6 │ │ 2 │ y │ 6 │ │ 3 │ y │ 10 │ ╰───┴───┴───────╯ # Compute expression over an aggregation window where partitions are defined by expressions > [[a b]; [x 2] [X 4] [Y 6] [y 4]] | polars into-lazy | polars select a (polars col b | polars cumulative sum | polars over (polars col a | polars lowercase) | polars as cum_b) | polars collect ╭───┬───┬───────╮ │ # │ a │ cum_b │ ├───┼───┼───────┤ │ 0 │ x │ 2 │ │ 1 │ X │ 6 │ │ 2 │ Y │ 6 │ │ 3 │ y │ 10 │ ╰───┴───┴───────╯ ```   # User-Facing Changes No breaking changes. This PR seeks to add a new command only. # Tests + Formatting Example tests are included. # After Submitting --- .../src/dataframe/command/aggregation/mod.rs | 3 + .../src/dataframe/command/aggregation/over.rs | 119 ++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/aggregation/over.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/aggregation/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/aggregation/mod.rs index f86b267df1..e00cb79fe1 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/aggregation/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/aggregation/mod.rs @@ -10,6 +10,7 @@ mod median; mod min; mod n_null; mod n_unique; +mod over; mod quantile; mod rolling; mod std; @@ -30,6 +31,7 @@ use mean::ExprMean; use min::ExprMin; pub use n_null::NNull; pub use n_unique::NUnique; +pub use over::Over; pub use rolling::Rolling; use std::ExprStd; pub use sum::ExprSum; @@ -52,6 +54,7 @@ pub(crate) fn aggregation_commands() -> Vec &str { + "polars over" + } + + fn description(&self) -> &str { + "Compute expressions over a window group defined by partition expressions." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "partition by expressions", + SyntaxShape::Any, + "Expression(s) that define the partition window", + ) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Compute expression over an aggregation window", + example: r#"[[a b]; [x 2] [x 4] [y 6] [y 4]] + | polars into-lazy + | polars select a (polars col b | polars cumulative sum | polars over a | polars as cum_b) + | polars collect"#, + result: Some( + NuDataFrame::from( + df!( + "a" => &["x", "x", "y", "y"], + "cum_b" => &[2, 6, 6, 10] + ) + .expect("should not fail"), + ) + .into_value(Span::test_data()), + ), + }, + Example { + description: "Compute expression over an aggregation window where partitions are defined by expressions", + example: r#"[[a b]; [x 2] [X 4] [Y 6] [y 4]] + | polars into-lazy + | polars select a (polars col b | polars cumulative sum | polars over (polars col a | polars lowercase) | polars as cum_b) + | polars collect"#, + result: Some( + NuDataFrame::from( + df!( + "a" => &["x", "X", "Y", "y"], + "cum_b" => &[2, 6, 6, 10] + ) + .expect("should not fail"), + ) + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let vals: Vec = call.rest(0)?; + let expr_value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, expr_value)?; + + let input_value = input.into_value(call.head)?; + + match PolarsPluginObject::try_from_value(plugin, &input_value)? { + PolarsPluginObject::NuExpression(expr) => { + let expr: NuExpression = expr + .into_polars() + .over_with_options(expressions, None, Default::default()) + .into(); + expr.to_pipeline_data(plugin, engine, call.head) + } + _ => Err(cant_convert_err( + &input_value, + &[PolarsPluginType::NuExpression], + )), + } + .map_err(LabeledError::from) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&Over) + } +}