mirror of
https://github.com/nushell/nushell.git
synced 2025-05-01 08:34:26 +02:00
feat(polars): loosen constraints on accepted expressions in polars group-by
(#15583)
# Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR lifts the constraint that expressions in the `polars group-by` command must be limited only to the type `Expr::Column` rather than most `Expr` types, which is what the underlying polars crate allows. This change enables more complex expressions to group by. In the example below, we group by even or odd days of column `a`. While we can reach the same result by creating and grouping by a new column in two separate steps, integrating these steps in a single group-by allows for better delegation to the polars optimizer. ```nushell # Group by an expression and perform an aggregation > [[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]] | polars into-lazy | polars group-by (polars col a | polars get-day | $in mod 2) | polars agg [ (polars col b | polars min | polars as "b_min") (polars col b | polars max | polars as "b_max") (polars col b | polars sum | polars as "b_sum") ] | polars collect | polars sort-by a ╭───┬───┬───────┬───────┬───────╮ │ # │ a │ b_min │ b_max │ b_sum │ ├───┼───┼───────┼───────┼───────┤ │ 0 │ 0 │ 2 │ 4 │ 6 │ │ 1 │ 1 │ 1 │ 3 │ 4 │ ╰───┴───┴───────┴───────┴───────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. The user is empowered to use more complex expressions in `polars group-by` # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> An example is added to `polars group-by`. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
669b44ad7d
commit
2dc5c19b71
@ -39,9 +39,10 @@ impl PluginCommand for ToLazyGroupBy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![
|
||||||
description: "Group by and perform an aggregation",
|
Example {
|
||||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
description: "Group by and perform an aggregation",
|
||||||
|
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||||
| polars into-lazy
|
| polars into-lazy
|
||||||
| polars group-by a
|
| polars group-by a
|
||||||
| polars agg [
|
| polars agg [
|
||||||
@ -51,19 +52,45 @@ impl PluginCommand for ToLazyGroupBy {
|
|||||||
]
|
]
|
||||||
| polars collect
|
| polars collect
|
||||||
| polars sort-by a"#,
|
| polars sort-by a"#,
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::from(
|
NuDataFrame::from(
|
||||||
df!(
|
df!(
|
||||||
"a" => &[1i64, 2],
|
"a" => &[1i64, 2],
|
||||||
"b_min" => &[2i64, 4],
|
"b_min" => &[2i64, 4],
|
||||||
"b_max" => &[4i64, 6],
|
"b_max" => &[4i64, 6],
|
||||||
"b_sum" => &[6i64, 10],
|
"b_sum" => &[6i64, 10],
|
||||||
|
)
|
||||||
|
.expect("should not fail"),
|
||||||
)
|
)
|
||||||
.expect("should not fail"),
|
.into_value(Span::test_data()),
|
||||||
)
|
),
|
||||||
.into_value(Span::test_data()),
|
},
|
||||||
),
|
Example {
|
||||||
}]
|
description: "Group by an expression and perform an aggregation",
|
||||||
|
example: r#"[[a b]; [2025-04-01 1] [2025-04-02 2] [2025-04-03 3] [2025-04-04 4]]
|
||||||
|
| polars into-lazy
|
||||||
|
| polars group-by (polars col a | polars get-day | $in mod 2)
|
||||||
|
| polars agg [
|
||||||
|
(polars col b | polars min | polars as "b_min")
|
||||||
|
(polars col b | polars max | polars as "b_max")
|
||||||
|
(polars col b | polars sum | polars as "b_sum")
|
||||||
|
]
|
||||||
|
| polars collect
|
||||||
|
| polars sort-by a"#,
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::from(
|
||||||
|
df!(
|
||||||
|
"a" => &[0i64, 1],
|
||||||
|
"b_min" => &[2i64, 1],
|
||||||
|
"b_max" => &[4i64, 3],
|
||||||
|
"b_sum" => &[6i64, 4],
|
||||||
|
)
|
||||||
|
.expect("should not fail"),
|
||||||
|
)
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(
|
fn run(
|
||||||
@ -79,11 +106,11 @@ impl PluginCommand for ToLazyGroupBy {
|
|||||||
|
|
||||||
if expressions
|
if expressions
|
||||||
.iter()
|
.iter()
|
||||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
.any(|expr| matches!(expr, Expr::Agg(..) | Expr::Window { .. }))
|
||||||
{
|
{
|
||||||
let value: Value = call.req(0)?;
|
let value: Value = call.req(0)?;
|
||||||
Err(ShellError::IncompatibleParametersSingle {
|
Err(ShellError::IncompatibleParametersSingle {
|
||||||
msg: "Expected only Col expressions".into(),
|
msg: "Cannot group by an aggregation or window expression".into(),
|
||||||
span: value.span(),
|
span: value.span(),
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user