From 966cebec3496d8c3b7b87c21288521cfd8d34534 Mon Sep 17 00:00:00 2001 From: Matthias Meschede Date: Wed, 12 Mar 2025 16:25:03 +0100 Subject: [PATCH] Adds polars list-contains command (#15304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds the `polars list-contains` command. It works like this: ``` ~/Projects/nushell/nushell> let df = [[a]; [[a,b,c]] [[b,c,d]] [[c,d,f]]] | polars into-df -s {a: list}; ~/Projects/nushell/nushell> $df | polars with-column [(polars col a | polars list-contains (polars lit a) | polars as b)] | polars collect ╭───┬───────────┬───────╮ │ # │ a │ b │ ├───┼───────────┼───────┤ │ 0 │ ╭───┬───╮ │ true │ │ │ │ 0 │ a │ │ │ │ │ │ 1 │ b │ │ │ │ │ │ 2 │ c │ │ │ │ │ ╰───┴───╯ │ │ │ 1 │ ╭───┬───╮ │ false │ │ │ │ 0 │ b │ │ │ │ │ │ 1 │ c │ │ │ │ │ │ 2 │ d │ │ │ │ │ ╰───┴───╯ │ │ │ 2 │ ╭───┬───╮ │ false │ │ │ │ 0 │ c │ │ │ │ │ │ 1 │ d │ │ │ │ │ │ 2 │ f │ │ │ │ │ ╰───┴───╯ │ │ ╰───┴───────────┴───────╯ ``` or ``` ~/Projects/nushell/nushell> let df = [[a, b]; [[a,b,c], a] [[b,c,d], f] [[c,d,f], f]] | polars into-df -s {a: list, b: str} ~/Projects/nushell/nushell> $df | polars with-column [(polars col a | polars list-contains b | polars as c)] | polars collect ╭───┬───────────┬───┬───────╮ │ # │ a │ b │ c │ ├───┼───────────┼───┼───────┤ │ 0 │ ╭───┬───╮ │ a │ true │ │ │ │ 0 │ a │ │ │ │ │ │ │ 1 │ b │ │ │ │ │ │ │ 2 │ c │ │ │ │ │ │ ╰───┴───╯ │ │ │ │ 1 │ ╭───┬───╮ │ f │ false │ │ │ │ 0 │ b │ │ │ │ │ │ │ 1 │ c │ │ │ │ │ │ │ 2 │ d │ │ │ │ │ │ ╰───┴───╯ │ │ │ │ 2 │ ╭───┬───╮ │ f │ true │ │ │ │ 0 │ c │ │ │ │ │ │ │ 1 │ d │ │ │ │ │ │ │ 2 │ f │ │ │ │ │ │ ╰───┴───╯ │ │ │ ╰───┴───────────┴───┴───────╯ ``` or ``` ~/Projects/nushell/nushell> let df = [[a, b]; [[1,2,3], 4] [[2,4,1], 2] [[2,1,6], 3]] | polars into-df -s {a: list, b: i64} ~/Projects/nushell/nushell> $df | polars with-column [(polars col a | polars list-contains ((polars col b) * 2) | polars as c)] | polars collect ╭───┬───────────┬───┬───────╮ │ # │ a │ b │ c │ ├───┼───────────┼───┼───────┤ │ 0 │ ╭───┬───╮ │ 4 │ false │ │ │ │ 0 │ 1 │ │ │ │ │ │ │ 1 │ 2 │ │ │ │ │ │ │ 2 │ 3 │ │ │ │ │ │ ╰───┴───╯ │ │ │ │ 1 │ ╭───┬───╮ │ 2 │ true │ │ │ │ 0 │ 2 │ │ │ │ │ │ │ 1 │ 4 │ │ │ │ │ │ │ 2 │ 1 │ │ │ │ │ │ ╰───┴───╯ │ │ │ │ 2 │ ╭───┬───╮ │ 3 │ true │ │ │ │ 0 │ 2 │ │ │ │ │ │ │ 1 │ 1 │ │ │ │ │ │ │ 2 │ 6 │ │ │ │ │ │ ╰───┴───╯ │ │ │ ╰───┴───────────┴───┴───────╯ ``` Let me know what you think. I'm a bit surprised that a list by default seems to get converted to "object" when doing `into-df` which is why I added the extra `-s` flag every time to explicitly force it into a list. --- .../src/dataframe/command/list/contains.rs | 161 ++++++++++++++++++ .../src/dataframe/command/list/mod.rs | 10 ++ .../src/dataframe/command/mod.rs | 1 + crates/nu_plugin_polars/src/lib.rs | 3 +- 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/list/contains.rs create mode 100644 crates/nu_plugin_polars/src/dataframe/command/list/mod.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/list/contains.rs b/crates/nu_plugin_polars/src/dataframe/command/list/contains.rs new file mode 100644 index 0000000000..16902541c8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/command/list/contains.rs @@ -0,0 +1,161 @@ +use crate::{ + values::{ + cant_convert_err, CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, + }, + PolarsPlugin, +}; + +use super::super::super::values::{Column, NuDataFrame}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct ListContains; + +impl PluginCommand for ListContains { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars list-contains" + } + + fn description(&self) -> &str { + "Checks if an element is contained in a list." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "element", + SyntaxShape::Any, + "Element to search for in the list", + ) + .input_output_types(vec![( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + )]) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns boolean indicating if a literal element was found in a list column", + example: "let df = [[a]; [[a,b,c]] [[b,c,d]] [[c,d,f]]] | polars into-df -s {a: list}; + let df2 = $df | polars with-column [(polars col a | polars list-contains (polars lit a) | polars as b)] | polars collect; + $df2.b", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(false), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Returns boolean indicating if an element from another column was found in a list column", + example: "let df = [[a, b]; [[a,b,c], a] [[b,c,d], f] [[c,d,f], f]] | polars into-df -s {a: list, b: str}; + let df2 = $df | polars with-column [(polars col a | polars list-contains b | polars as c)] | polars collect; + $df2.c", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![ + Value::test_bool(true), + Value::test_bool(false), + Value::test_bool(true), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Returns boolean indicating if an element from another expression was found in a list column", + example: "let df = [[a, b]; [[1,2,3], 4] [[2,4,1], 2] [[2,1,6], 3]] | polars into-df -s {a: list, b: i64}; + let df2 = $df | polars with-column [(polars col a | polars list-contains ((polars col b) * 2) | polars as c)] | polars collect; + $df2.c", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![ + Value::test_bool(false), + Value::test_bool(true), + Value::test_bool(true), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + } + ] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr), + _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])), + } + .map_err(LabeledError::from) + } +} + +fn command_expr( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, +) -> Result { + let element = call.req(0)?; + let expressions = NuExpression::extract_exprs(plugin, element)?; + let single_expression = match expressions.as_slice() { + [single] => single.clone(), + _ => { + return Err(ShellError::GenericError { + error: "Expected a single polars expression".into(), + msg: "Requires a single polars expressions or column name as argument".into(), + span: Some(call.head), + help: None, + inner: vec![], + }) + } + }; + let res: NuExpression = expr.into_polars().list().contains(single_expression).into(); + res.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ListContains) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/command/list/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/list/mod.rs new file mode 100644 index 0000000000..ea90505a9a --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/command/list/mod.rs @@ -0,0 +1,10 @@ +mod contains; + +use crate::PolarsPlugin; +use nu_plugin::PluginCommand; + +pub use contains::ListContains; + +pub(crate) fn list_commands() -> Vec>> { + vec![Box::new(ListContains)] +} diff --git a/crates/nu_plugin_polars/src/dataframe/command/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/mod.rs index 50c0d522a1..c439411b75 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/mod.rs @@ -5,5 +5,6 @@ pub mod data; pub mod datetime; pub mod index; pub mod integer; +pub mod list; pub mod string; pub mod stub; diff --git a/crates/nu_plugin_polars/src/lib.rs b/crates/nu_plugin_polars/src/lib.rs index d9c7e6d392..f497f59567 100644 --- a/crates/nu_plugin_polars/src/lib.rs +++ b/crates/nu_plugin_polars/src/lib.rs @@ -8,7 +8,7 @@ pub use cache::{Cache, Cacheable}; use command::{ aggregation::aggregation_commands, boolean::boolean_commands, core::core_commands, data::data_commands, datetime::datetime_commands, index::index_commands, - integer::integer_commands, string::string_commands, stub::PolarsCmd, + integer::integer_commands, list::list_commands, string::string_commands, stub::PolarsCmd, }; use log::debug; use nu_plugin::{EngineInterface, Plugin, PluginCommand}; @@ -93,6 +93,7 @@ impl Plugin for PolarsPlugin { commands.append(&mut index_commands()); commands.append(&mut integer_commands()); commands.append(&mut string_commands()); + commands.append(&mut list_commands()); commands.append(&mut cache_commands()); commands