From 457f162fd9bcf0da67b098ba7538f6ed01cf4dc9 Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Sat, 17 May 2025 12:26:26 -0400 Subject: [PATCH] feat(polars): expand `polars unique` to allow expressions inputs (#15771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description `polars unique` currently only operates on entire dataframes. This PR seeks to expand this command to handle expressions as well. See examples: ```nushell Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 1 │ │ 1 │ 2 │ ╰───┴───╯ Returns unique values in a subset of lazyframe columns > [[a]; [2] [1] [2]] | polars into-lazy | polars select (polars col a | polars unique --maintain-order) | polars collect ╭───┬───╮ │ # │ a │ ├───┼───┤ │ 0 │ 2 │ │ 1 │ 1 │ ╰───┴───╯ ``` # User-Facing Changes No breaking changes. Users have the added option to use `polars unique` in an expressions context. # Tests + Formatting Example tests have been added to `polars unique` # After Submitting --- .../src/dataframe/command/data/unique.rs | 65 ++++++++++++++++--- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/unique.rs b/crates/nu_plugin_polars/src/dataframe/command/data/unique.rs index 566741741e..97f3ea30e4 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/unique.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/unique.rs @@ -4,7 +4,9 @@ use crate::{ utils::{extract_sm_strs, extract_strings}, values::NuLazyFrame, }, - values::{CustomValueSupport, PolarsPluginObject, PolarsPluginType, cant_convert_err}, + values::{ + CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, cant_convert_err, + }, }; use crate::values::{Column, NuDataFrame}; @@ -48,10 +50,16 @@ impl PluginCommand for Unique { "Keep the same order as the original DataFrame (lazy df)", Some('k'), ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ]) .category(Category::Custom("dataframe or lazyframe".into())) } @@ -123,9 +131,40 @@ impl PluginCommand for Unique { ), }, Example { - description: "Creates a is unique expression from a column", - example: "col a | unique", - result: None, + description: "Returns unique values in a subset of lazyframe columns", + example: r#"[[a]; [2] [1] [2]] + | polars into-lazy + | polars select (polars col a | polars unique) + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Returns unique values in a subset of lazyframe columns", + example: r#"[[a]; [2] [1] [2]] + | polars into-lazy + | polars select (polars col a | polars unique --maintain-order) + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(2), Value::test_int(1)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, ] } @@ -142,11 +181,21 @@ impl PluginCommand for Unique { match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), + PolarsPluginObject::NuExpression(expr) => { + let maintain = call.has_flag("maintain-order")?; + let res: NuExpression = if maintain { + expr.into_polars().unique_stable().into() + } else { + expr.into_polars().unique().into() + }; + res.to_pipeline_data(plugin, engine, call.head) + } _ => Err(cant_convert_err( &value, &[ PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyGroupBy, + PolarsPluginType::NuExpression, ], )), }