feat(polars): loosen constraints on accepted expressions in polars group-by (#15583)

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR lifts the constraint that expressions in the `polars group-by`
command must be limited only to the type `Expr::Column` rather than most
`Expr` types, which is what the underlying polars crate allows. This
change enables more complex expressions to group by.

In the example below, we group by even or odd days of column `a`. While
we can reach the same result by creating and grouping by a new column in
two separate steps, integrating these steps in a single group-by allows
for better delegation to the polars optimizer.

```nushell
#  Group by an expression and perform an aggregation
  > [[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
    | polars into-lazy
    | polars group-by (polars col a | polars get-day | $in mod 2)
    | polars agg [
        (polars col b | polars min | polars as "b_min")
        (polars col b | polars max | polars as "b_max")
        (polars col b | polars sum | polars as "b_sum")
     ]
    | polars collect
    | polars sort-by a
  ╭───┬───┬───────┬───────┬───────╮
  │ # │ a │ b_min │ b_max │ b_sum │
  ├───┼───┼───────┼───────┼───────┤
  │ 0 │ 0 │     2 │     4 │     6 │
  │ 1 │ 1 │     1 │     3 │     4 │
  ╰───┴───┴───────┴───────┴───────╯
```  

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. The user is empowered to use more complex
expressions in `polars group-by`

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
An example is added to `polars group-by`.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-04-16 17:18:48 -04:00 committed by GitHub
parent 669b44ad7d
commit 2dc5c19b71
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -39,9 +39,10 @@ impl PluginCommand for ToLazyGroupBy {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Group by and perform an aggregation", Example {
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy | polars into-lazy
| polars group-by a | polars group-by a
| polars agg [ | polars agg [
@ -51,19 +52,45 @@ impl PluginCommand for ToLazyGroupBy {
] ]
| polars collect | polars collect
| polars sort-by a"#, | polars sort-by a"#,
result: Some( result: Some(
NuDataFrame::from( NuDataFrame::from(
df!( df!(
"a" => &[1i64, 2], "a" => &[1i64, 2],
"b_min" => &[2i64, 4], "b_min" => &[2i64, 4],
"b_max" => &[4i64, 6], "b_max" => &[4i64, 6],
"b_sum" => &[6i64, 10], "b_sum" => &[6i64, 10],
)
.expect("should not fail"),
) )
.expect("should not fail"), .into_value(Span::test_data()),
) ),
.into_value(Span::test_data()), },
), Example {
}] description: "Group by an expression and perform an aggregation",
example: r#"[[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
| polars into-lazy
| polars group-by (polars col a | polars get-day | $in mod 2)
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect
| polars sort-by a"#,
result: Some(
NuDataFrame::from(
df!(
"a" => &[0i64, 1],
"b_min" => &[2i64, 1],
"b_max" => &[4i64, 3],
"b_sum" => &[6i64, 4],
)
.expect("should not fail"),
)
.into_value(Span::test_data()),
),
},
]
} }
fn run( fn run(
@ -79,11 +106,11 @@ impl PluginCommand for ToLazyGroupBy {
if expressions if expressions
.iter() .iter()
.any(|expr| !matches!(expr, Expr::Column(..))) .any(|expr| matches!(expr, Expr::Agg(..) | Expr::Window { .. }))
{ {
let value: Value = call.req(0)?; let value: Value = call.req(0)?;
Err(ShellError::IncompatibleParametersSingle { Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only Col expressions".into(), msg: "Cannot group by an aggregation or window expression".into(),
span: value.span(), span: value.span(),
})?; })?;
} }