feat(polars): expand polars unique to allow expressions inputs (#15771)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
`polars unique` currently only operates on entire dataframes. This PR
seeks to expand this command to handle expressions as well. See
examples:

```nushell
  Returns unique values in a subset of lazyframe columns
  > [[a]; [2] [1] [2]]
    | polars into-lazy
    | polars select (polars col a | polars unique)
    | polars collect
  ╭───┬───╮
  │ # │ a │
  ├───┼───┤
  │ 0 │ 1 │
  │ 1 │ 2 │
  ╰───┴───╯

  Returns unique values in a subset of lazyframe columns
  > [[a]; [2] [1] [2]]
    | polars into-lazy
    | polars select (polars col a | polars unique --maintain-order)
    | polars collect
  ╭───┬───╮
  │ # │ a │
  ├───┼───┤
  │ 0 │ 2 │
  │ 1 │ 1 │
  ╰───┴───╯
```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. Users have the added option to use `polars unique`
in an expressions context.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
Example tests have been added to `polars unique`

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-05-17 12:26:26 -04:00 committed by GitHub
parent 58a8f30a25
commit 457f162fd9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,7 +4,9 @@ use crate::{
utils::{extract_sm_strs, extract_strings},
values::NuLazyFrame,
},
values::{CustomValueSupport, PolarsPluginObject, PolarsPluginType, cant_convert_err},
values::{
CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, cant_convert_err,
},
};
use crate::values::{Column, NuDataFrame};
@ -48,10 +50,16 @@ impl PluginCommand for Unique {
"Keep the same order as the original DataFrame (lazy df)",
Some('k'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.input_output_types(vec![
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
])
.category(Category::Custom("dataframe or lazyframe".into()))
}
@ -123,9 +131,40 @@ impl PluginCommand for Unique {
),
},
Example {
description: "Creates a is unique expression from a column",
example: "col a | unique",
result: None,
description: "Returns unique values in a subset of lazyframe columns",
example: r#"[[a]; [2] [1] [2]]
| polars into-lazy
| polars select (polars col a | polars unique)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Returns unique values in a subset of lazyframe columns",
example: r#"[[a]; [2] [1] [2]]
| polars into-lazy
| polars select (polars col a | polars unique --maintain-order)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(1)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -142,11 +181,21 @@ impl PluginCommand for Unique {
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => {
let maintain = call.has_flag("maintain-order")?;
let res: NuExpression = if maintain {
expr.into_polars().unique_stable().into()
} else {
expr.into_polars().unique().into()
};
res.to_pipeline_data(plugin, engine, call.head)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyGroupBy,
PolarsPluginType::NuExpression,
],
)),
}