mirror of
https://github.com/nushell/nushell.git
synced 2025-05-01 08:34:26 +02:00
feat(polars): loosen constraints on accepted expressions in polars group-by
(#15583)
# Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR lifts the constraint that expressions in the `polars group-by` command must be limited only to the type `Expr::Column` rather than most `Expr` types, which is what the underlying polars crate allows. This change enables more complex expressions to group by. In the example below, we group by even or odd days of column `a`. While we can reach the same result by creating and grouping by a new column in two separate steps, integrating these steps in a single group-by allows for better delegation to the polars optimizer. ```nushell # Group by an expression and perform an aggregation > [[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]] | polars into-lazy | polars group-by (polars col a | polars get-day | $in mod 2) | polars agg [ (polars col b | polars min | polars as "b_min") (polars col b | polars max | polars as "b_max") (polars col b | polars sum | polars as "b_sum") ] | polars collect | polars sort-by a ╭───┬───┬───────┬───────┬───────╮ │ # │ a │ b_min │ b_max │ b_sum │ ├───┼───┼───────┼───────┼───────┤ │ 0 │ 0 │ 2 │ 4 │ 6 │ │ 1 │ 1 │ 1 │ 3 │ 4 │ ╰───┴───┴───────┴───────┴───────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. The user is empowered to use more complex expressions in `polars group-by` # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> An example is added to `polars group-by`. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
669b44ad7d
commit
2dc5c19b71
@ -39,7 +39,8 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
@ -63,7 +64,33 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
},
|
||||
Example {
|
||||
description: "Group by an expression and perform an aggregation",
|
||||
example: r#"[[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by (polars col a | polars get-day | $in mod 2)
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect
|
||||
| polars sort-by a"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &[0i64, 1],
|
||||
"b_min" => &[2i64, 1],
|
||||
"b_max" => &[4i64, 3],
|
||||
"b_sum" => &[6i64, 4],
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -79,11 +106,11 @@ impl PluginCommand for ToLazyGroupBy {
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
.any(|expr| matches!(expr, Expr::Agg(..) | Expr::Window { .. }))
|
||||
{
|
||||
let value: Value = call.req(0)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only Col expressions".into(),
|
||||
msg: "Cannot group by an aggregation or window expression".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user