From 2dc5c19b714279f403895b210bb10155e2bb333c Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Wed, 16 Apr 2025 17:18:48 -0400 Subject: [PATCH] feat(polars): loosen constraints on accepted expressions in `polars group-by` (#15583) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR lifts the constraint that expressions in the `polars group-by` command must be limited only to the type `Expr::Column` rather than most `Expr` types, which is what the underlying polars crate allows. This change enables more complex expressions to group by. In the example below, we group by even or odd days of column `a`. While we can reach the same result by creating and grouping by a new column in two separate steps, integrating these steps in a single group-by allows for better delegation to the polars optimizer. ```nushell # Group by an expression and perform an aggregation > [[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]] | polars into-lazy | polars group-by (polars col a | polars get-day | $in mod 2) | polars agg [ (polars col b | polars min | polars as "b_min") (polars col b | polars max | polars as "b_max") (polars col b | polars sum | polars as "b_sum") ] | polars collect | polars sort-by a ╭───┬───┬───────┬───────┬───────╮ │ # │ a │ b_min │ b_max │ b_sum │ ├───┼───┼───────┼───────┼───────┤ │ 0 │ 0 │ 2 │ 4 │ 6 │ │ 1 │ 1 │ 1 │ 3 │ 4 │ ╰───┴───┴───────┴───────┴───────╯ ``` # User-Facing Changes No breaking changes. The user is empowered to use more complex expressions in `polars group-by` # Tests + Formatting An example is added to `polars group-by`. # After Submitting --- .../dataframe/command/aggregation/groupby.rs | 61 +++++++++++++------ 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/aggregation/groupby.rs b/crates/nu_plugin_polars/src/dataframe/command/aggregation/groupby.rs index 24372e47aa..8a98864655 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/aggregation/groupby.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/aggregation/groupby.rs @@ -39,9 +39,10 @@ impl PluginCommand for ToLazyGroupBy { } fn examples(&self) -> Vec { - vec![Example { - description: "Group by and perform an aggregation", - example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] + vec![ + Example { + description: "Group by and perform an aggregation", + example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] | polars into-lazy | polars group-by a | polars agg [ @@ -51,19 +52,45 @@ impl PluginCommand for ToLazyGroupBy { ] | polars collect | polars sort-by a"#, - result: Some( - NuDataFrame::from( - df!( - "a" => &[1i64, 2], - "b_min" => &[2i64, 4], - "b_max" => &[4i64, 6], - "b_sum" => &[6i64, 10], + result: Some( + NuDataFrame::from( + df!( + "a" => &[1i64, 2], + "b_min" => &[2i64, 4], + "b_max" => &[4i64, 6], + "b_sum" => &[6i64, 10], + ) + .expect("should not fail"), ) - .expect("should not fail"), - ) - .into_value(Span::test_data()), - ), - }] + .into_value(Span::test_data()), + ), + }, + Example { + description: "Group by an expression and perform an aggregation", + example: r#"[[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]] + | polars into-lazy + | polars group-by (polars col a | polars get-day | $in mod 2) + | polars agg [ + (polars col b | polars min | polars as "b_min") + (polars col b | polars max | polars as "b_max") + (polars col b | polars sum | polars as "b_sum") + ] + | polars collect + | polars sort-by a"#, + result: Some( + NuDataFrame::from( + df!( + "a" => &[0i64, 1], + "b_min" => &[2i64, 1], + "b_max" => &[4i64, 3], + "b_sum" => &[6i64, 4], + ) + .expect("should not fail"), + ) + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -79,11 +106,11 @@ impl PluginCommand for ToLazyGroupBy { if expressions .iter() - .any(|expr| !matches!(expr, Expr::Column(..))) + .any(|expr| matches!(expr, Expr::Agg(..) | Expr::Window { .. })) { let value: Value = call.req(0)?; Err(ShellError::IncompatibleParametersSingle { - msg: "Expected only Col expressions".into(), + msg: "Cannot group by an aggregation or window expression".into(), span: value.span(), })?; }