forked from extern/nushell
feat(polars): loosen constraints on accepted expressions in polars group-by (#15583)
# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.
Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR lifts the constraint that expressions in the `polars group-by`
command must be limited only to the type `Expr::Column` rather than most
`Expr` types, which is what the underlying polars crate allows. This
change enables more complex expressions to group by.
In the example below, we group by even or odd days of column `a`. While
we can reach the same result by creating and grouping by a new column in
two separate steps, integrating these steps in a single group-by allows
for better delegation to the polars optimizer.
```nushell
# Group by an expression and perform an aggregation
> [[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
| polars into-lazy
| polars group-by (polars col a | polars get-day | $in mod 2)
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect
| polars sort-by a
╭───┬───┬───────┬───────┬───────╮
│ # │ a │ b_min │ b_max │ b_sum │
├───┼───┼───────┼───────┼───────┤
│ 0 │ 0 │ 2 │ 4 │ 6 │
│ 1 │ 1 │ 1 │ 3 │ 4 │
╰───┴───┴───────┴───────┴───────╯
```
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. The user is empowered to use more complex
expressions in `polars group-by`
# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.
Make sure you've run and fixed any issues with these commands:
- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library
> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
An example is added to `polars group-by`.
# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
@@ -39,9 +39,10 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
@@ -51,19 +52,45 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
]
|
||||
| polars collect
|
||||
| polars sort-by a"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &[1i64, 2],
|
||||
"b_min" => &[2i64, 4],
|
||||
"b_max" => &[4i64, 6],
|
||||
"b_sum" => &[6i64, 10],
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &[1i64, 2],
|
||||
"b_min" => &[2i64, 4],
|
||||
"b_max" => &[4i64, 6],
|
||||
"b_sum" => &[6i64, 10],
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Group by an expression and perform an aggregation",
|
||||
example: r#"[[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by (polars col a | polars get-day | $in mod 2)
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect
|
||||
| polars sort-by a"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &[0i64, 1],
|
||||
"b_min" => &[2i64, 1],
|
||||
"b_max" => &[4i64, 3],
|
||||
"b_sum" => &[6i64, 4],
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@@ -79,11 +106,11 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
.any(|expr| matches!(expr, Expr::Agg(..) | Expr::Window { .. }))
|
||||
{
|
||||
let value: Value = call.req(0)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only Col expressions".into(),
|
||||
msg: "Cannot group by an aggregation or window expression".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user