polars: extend NuExpression::extract_exprs to handle records (#15553)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR seeks to simplify the syntax for commands that handle a list of
expressions (e.g., `select`, `with-column`, and `agg`) by enabling the
user to replace a list of expressions each aliased with `polars as` to a
single record where the key is the alias for the value. See below for
examples in several contexts.

```nushell
#  Select a column from a dataframe using a record
  > [[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select {c: ((polars col a) * 2)}
  ╭───┬────╮
  │ # │ c  │
  ├───┼────┤
  │ 0 │ 12 │
  │ 1 │  8 │
  │ 2 │  4 │
  ╰───┴────╯

#  Select a column from a dataframe using a mix of expressions and record of expressions
  > [[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a b {c: ((polars col a) * 2)}
  ╭───┬───┬───┬────╮
  │ # │ a │ b │ c  │
  ├───┼───┼───┼────┤
  │ 0 │ 6 │ 2 │ 12 │
  │ 1 │ 4 │ 2 │  8 │
  │ 2 │ 2 │ 2 │  4 │
  ╰───┴───┴───┴────╯

#  Add series to the dataframe using a record
  > [[a b]; [1 2] [3 4]]
    | polars into-lazy
    | polars with-column {
        c: ((polars col a) * 2)
        d: ((polars col a) * 3)
      }
    | polars collect
  ╭───┬───┬───┬───┬───╮
  │ # │ a │ b │ c │ d │
  ├───┼───┼───┼───┼───┤
  │ 0 │ 1 │ 2 │ 2 │ 3 │
  │ 1 │ 3 │ 4 │ 6 │ 9 │
  ╰───┴───┴───┴───┴───╯

#  Group by and perform an aggregation using a record
  > [[a b]; [1 2] [1 4] [2 6] [2 4]]
                | polars into-lazy
                | polars group-by a
                | polars agg {
                    b_min: (polars col b | polars min)
                    b_max: (polars col b | polars max)
                    b_sum: (polars col b | polars sum)
                 }
                | polars collect
                | polars sort-by a
  ╭───┬───┬───────┬───────┬───────╮
  │ # │ a │ b_min │ b_max │ b_sum │
  ├───┼───┼───────┼───────┼───────┤
  │ 0 │ 1 │     2 │     4 │     6 │
  │ 1 │ 2 │     4 │     6 │    10 │
  ╰───┴───┴───────┴───────┴───────╯

```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. Users now can use a mix of lists of expressions and
records of expressions where previously only lists of expressions were
accepted (e.g., in `select`, `with-column`, and `agg`).

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
Example tests were added to `select`, `with-column`, and `agg`.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-04-14 19:56:52 -04:00 committed by GitHub
parent d601abaee0
commit 4e307480e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 170 additions and 44 deletions

View File

@ -40,9 +40,10 @@ impl PluginCommand for LazyAggregate {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
@ -52,32 +53,71 @@ impl PluginCommand for LazyAggregate {
]
| polars collect
| polars sort-by a"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation using a record",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg {
b_min: (polars col b | polars min)
b_max: (polars col b | polars max)
b_sum: (polars col b | polars sum)
}
| polars collect
| polars sort-by a"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(

View File

@ -37,21 +37,62 @@ impl PluginCommand for LazySelect {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
vec![
Example {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Select a column from a dataframe using a record",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select {c: ((polars col a) * 2)}",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"c".to_string(),
vec![Value::test_int(12), Value::test_int(8), Value::test_int(4)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Select a column from a dataframe using a mix of expressions and record of expressions",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a b {c: ((polars col a) * 2)}",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)]),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2)]),
Column::new(
"c".to_string(),
vec![Value::test_int(12), Value::test_int(8), Value::test_int(4)])
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(

View File

@ -76,6 +76,41 @@ impl PluginCommand for WithColumn {
((polars col a) * 2 | polars as "c")
((polars col a) * 3 | polars as "d")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Add series to the dataframe using a record",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-lazy
| polars with-column {
c: ((polars col a) * 2)
d: ((polars col a) * 3)
}
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(

View File

@ -126,6 +126,16 @@ impl ExtractedExpr {
Value::Custom { .. } => NuExpression::try_from_value(plugin, &value)
.map(NuExpression::into_polars)
.map(ExtractedExpr::Single),
Value::Record { val, .. } => val
.iter()
.map(|(key, value)| {
NuExpression::try_from_value(plugin, value)
.map(NuExpression::into_polars)
.map(|expr| expr.alias(key))
.map(ExtractedExpr::Single)
})
.collect::<Result<Vec<ExtractedExpr>, ShellError>>()
.map(ExtractedExpr::List),
Value::List { vals, .. } => vals
.into_iter()
.map(|x| Self::extract_exprs(plugin, x))