From 8b9f02246f5d78b366e7cc2a905be0b38a39a0a2 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Fri, 30 May 2025 10:56:44 -0700 Subject: [PATCH] Allow `polars first` to be used with `polars group-by` (#15855) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Provides functionality similar to https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.dataframe.group_by.GroupBy.first.html by allowing polars first to be used with a group by ``` > ❯ : [[a b c d]; [1 0.5 true Apple] [2 0.5 true Orange] [2 4 true Apple] [3 10 false Apple] [4 13 false Banana] [5 14 true Banana]] | polars into-df -s {a: u8, b: f32, c: bool, d: str} | polars group-by d | polars first | polars collect ╭───┬────────┬───┬───────┬───────╮ │ # │ d │ a │ b │ c │ ├───┼────────┼───┼───────┼───────┤ │ 0 │ Apple │ 1 │ 0.50 │ true │ │ 1 │ Banana │ 4 │ 13.00 │ false │ │ 2 │ Orange │ 2 │ 0.50 │ true │ ╰───┴────────┴───┴───────┴───────╯ ``` Additionally, I am setting the POLARS_ALLOW_EXTENSION to true to avoid panicking with operations using the dtype object. The conversion will fallback to object when the type cannot be determining, so this could be a common case. # User-Facing Changes - `polars first` can now be used with `polars group-by` --------- Co-authored-by: Jack Wright --- .../src/dataframe/command/data/first.rs | 42 ++++++++++++++++++- crates/nu_plugin_polars/src/main.rs | 7 ++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/crates/nu_plugin_polars/src/dataframe/command/data/first.rs b/crates/nu_plugin_polars/src/dataframe/command/data/first.rs index d09f415ccf..a63fa5595f 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/data/first.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/data/first.rs @@ -1,6 +1,6 @@ use crate::{ PolarsPlugin, - values::{Column, CustomValueSupport, NuLazyFrame, PolarsPluginObject}, + values::{Column, CustomValueSupport, NuLazyFrame, NuLazyGroupBy, PolarsPluginObject}, }; use crate::values::{NuDataFrame, NuExpression}; @@ -9,6 +9,7 @@ use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use polars::df; #[derive(Clone)] pub struct FirstDF; @@ -87,6 +88,24 @@ impl PluginCommand for FirstDF { example: "polars col a | polars first", result: None, }, + Example { + description: "Aggregate the first values in the group.", + example: "[[a b c d]; [1 0.5 true Apple] [2 0.5 true Orange] [2 4 true Apple] [3 10 false Apple] [4 13 false Banana] [5 14 true Banana]] | polars into-df -s {a: u8, b: f32, c: bool, d: str} | polars group-by d | polars first | polars sort-by [a] | polars collect", + result: Some( + NuDataFrame::new( + false, + df!( + "d" => &["Apple", "Orange", "Banana"], + "a" => &[1, 2, 4], + "b" => &[0.5, 0.5, 13.0], + "c" => &[true, true, false], + + ) + .expect("dataframe creation should succeed"), + ) + .into_value(Span::test_data()), + ), + }, ] } @@ -106,6 +125,9 @@ impl PluginCommand for FirstDF { PolarsPluginObject::NuLazyFrame(lazy) => { command_lazy(plugin, engine, call, lazy).map_err(|e| e.into()) } + PolarsPluginObject::NuLazyGroupBy(groupby) => { + command_groupby(plugin, engine, call, groupby).map_err(|e| e.into()) + } _ => { let expr = NuExpression::try_from_value(plugin, &value)?; let expr: NuExpression = expr.into_polars().first().into(); @@ -146,6 +168,20 @@ fn command_lazy( res.to_pipeline_data(plugin, engine, call.head) } +fn command_groupby( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + groupby: NuLazyGroupBy, +) -> Result { + let rows: Option = call.opt(0)?; + let rows = rows.unwrap_or(1); + let res = groupby.to_polars().head(Some(rows)); + let res: NuLazyFrame = res.into(); + + res.to_pipeline_data(plugin, engine, call.head) +} + #[cfg(test)] mod test { use super::*; @@ -153,6 +189,10 @@ mod test { #[test] fn test_examples() -> Result<(), ShellError> { + // // Extensions are required for the group-by functionality to work + // unsafe { + // std::env::set_var("POLARS_ALLOW_EXTENSION", "true"); + // } test_polars_plugin_command(&FirstDF) } } diff --git a/crates/nu_plugin_polars/src/main.rs b/crates/nu_plugin_polars/src/main.rs index a63bab32ba..02b2d256bf 100644 --- a/crates/nu_plugin_polars/src/main.rs +++ b/crates/nu_plugin_polars/src/main.rs @@ -4,6 +4,13 @@ use nu_plugin_polars::PolarsPlugin; fn main() { env_logger::init(); + // Set config options via environment variable + unsafe { + // Extensions are required for certain things like aggregates with object dtypes to work + // correctly. It is disabled by default because of unsafe code. + // See https://docs.rs/polars/latest/polars/#user-guide for details + std::env::set_var("POLARS_ALLOW_EXTENSION", "true"); + } match PolarsPlugin::new() { Ok(ref plugin) => serve_plugin(plugin, MsgPackSerializer {}), Err(e) => {