mirror of
https://github.com/nushell/nushell.git
synced 2025-05-19 17:30:45 +02:00
feat(polars): expand polars unique
to allow expressions inputs (#15771)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> `polars unique` currently only operates on entire dataframes. This PR seeks to expand this command to handle expressions as well. See examples: ```nushell Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 1 │ │ 1 │ 2 │ ╰───┴───╯ Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique --maintain-order) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 2 │ │ 1 │ 1 │ ╰───┴───╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. Users have the added option to use `polars unique` in an expressions context. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Example tests have been added to `polars unique` # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
58a8f30a25
commit
457f162fd9
@ -4,7 +4,9 @@ use crate::{
|
|||||||
utils::{extract_sm_strs, extract_strings},
|
utils::{extract_sm_strs, extract_strings},
|
||||||
values::NuLazyFrame,
|
values::NuLazyFrame,
|
||||||
},
|
},
|
||||||
values::{CustomValueSupport, PolarsPluginObject, PolarsPluginType, cant_convert_err},
|
values::{
|
||||||
|
CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, cant_convert_err,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::values::{Column, NuDataFrame};
|
use crate::values::{Column, NuDataFrame};
|
||||||
@ -48,10 +50,16 @@ impl PluginCommand for Unique {
|
|||||||
"Keep the same order as the original DataFrame (lazy df)",
|
"Keep the same order as the original DataFrame (lazy df)",
|
||||||
Some('k'),
|
Some('k'),
|
||||||
)
|
)
|
||||||
.input_output_type(
|
.input_output_types(vec![
|
||||||
Type::Custom("dataframe".into()),
|
(
|
||||||
Type::Custom("dataframe".into()),
|
Type::Custom("dataframe".into()),
|
||||||
)
|
Type::Custom("dataframe".into()),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Type::Custom("expression".into()),
|
||||||
|
Type::Custom("expression".into()),
|
||||||
|
),
|
||||||
|
])
|
||||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,9 +131,40 @@ impl PluginCommand for Unique {
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
Example {
|
Example {
|
||||||
description: "Creates a is unique expression from a column",
|
description: "Returns unique values in a subset of lazyframe columns",
|
||||||
example: "col a | unique",
|
example: r#"[[a]; [2] [1] [2]]
|
||||||
result: None,
|
| polars into-lazy
|
||||||
|
| polars select (polars col a | polars unique)
|
||||||
|
| polars collect"#,
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::try_from_columns(
|
||||||
|
vec![Column::new(
|
||||||
|
"a".to_string(),
|
||||||
|
vec![Value::test_int(1), Value::test_int(2)],
|
||||||
|
)],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.expect("simple df for test should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Returns unique values in a subset of lazyframe columns",
|
||||||
|
example: r#"[[a]; [2] [1] [2]]
|
||||||
|
| polars into-lazy
|
||||||
|
| polars select (polars col a | polars unique --maintain-order)
|
||||||
|
| polars collect"#,
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::try_from_columns(
|
||||||
|
vec![Column::new(
|
||||||
|
"a".to_string(),
|
||||||
|
vec![Value::test_int(2), Value::test_int(1)],
|
||||||
|
)],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.expect("simple df for test should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -142,11 +181,21 @@ impl PluginCommand for Unique {
|
|||||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
||||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||||
|
PolarsPluginObject::NuExpression(expr) => {
|
||||||
|
let maintain = call.has_flag("maintain-order")?;
|
||||||
|
let res: NuExpression = if maintain {
|
||||||
|
expr.into_polars().unique_stable().into()
|
||||||
|
} else {
|
||||||
|
expr.into_polars().unique().into()
|
||||||
|
};
|
||||||
|
res.to_pipeline_data(plugin, engine, call.head)
|
||||||
|
}
|
||||||
_ => Err(cant_convert_err(
|
_ => Err(cant_convert_err(
|
||||||
&value,
|
&value,
|
||||||
&[
|
&[
|
||||||
PolarsPluginType::NuDataFrame,
|
PolarsPluginType::NuDataFrame,
|
||||||
PolarsPluginType::NuLazyGroupBy,
|
PolarsPluginType::NuLazyGroupBy,
|
||||||
|
PolarsPluginType::NuExpression,
|
||||||
],
|
],
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user