From e7c5f834609f26c1cbb7bfa0ff8a3282458347c6 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:26:09 -0700 Subject: [PATCH] Added expression support for `polars str-lengths` (#13782) # Description Allows `polars str-lengths` to be used as an expression: Screenshot 2024-09-04 at 13 57 45 # User-Facing Changes - `polars str-lengths` can be used as an expression. - char length is now the default. Use the --bytes flag to get bytes length. --- .../dataframe/series/string/str_lengths.rs | 114 ++++++++++++++---- 1 file changed, 90 insertions(+), 24 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs b/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs index 417d3c0b90..78d8030b06 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/string/str_lengths.rs @@ -1,4 +1,9 @@ -use crate::{values::CustomValueSupport, PolarsPlugin}; +use crate::{ + values::{ + cant_convert_err, CustomValueSupport, NuExpression, PolarsPluginObject, PolarsPluginType, + }, + PolarsPlugin, +}; use super::super::super::values::{Column, NuDataFrame}; @@ -24,29 +29,57 @@ impl PluginCommand for StrLengths { fn signature(&self) -> Signature { Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), + .switch( + "bytes", + "Get the length in bytes instead of chars.", + Some('b'), ) + .input_output_types(vec![ + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Returns string lengths", - example: "[a ab abc] | polars into-df | polars str-lengths", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Returns string lengths for a column", + example: "[[a]; [a] [ab] [abc]] | polars into-df | polars select (polars col a | polars str-lengths) | polars collect", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Returns string lengths", + example: "[a ab abc] | polars into-df | polars str-lengths", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -56,17 +89,46 @@ impl PluginCommand for StrLengths { call: &EvaluatedCall, input: PipelineData, ) -> Result { - command(plugin, engine, call, input).map_err(LabeledError::from) + let value = input.into_value(call.head)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df), + PolarsPluginObject::NuLazyFrame(lazy) => { + command_df(plugin, engine, call, lazy.collect(call.head)?) + } + PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr), + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } + .map_err(LabeledError::from) } } -fn command( +fn command_expr( plugin: &PolarsPlugin, engine: &EngineInterface, call: &EvaluatedCall, - input: PipelineData, + expr: NuExpression, +) -> Result { + let res: NuExpression = if call.has_flag("bytes")? { + expr.into_polars().str().len_bytes().into() + } else { + expr.into_polars().str().len_chars().into() + }; + res.to_pipeline_data(plugin, engine, call.head) +} + +fn command_df( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, ) -> Result { - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let series = df.as_series(call.head)?; let chunked = series.str().map_err(|e| ShellError::GenericError { @@ -77,7 +139,11 @@ fn command( inner: vec![], })?; - let res = chunked.as_ref().str_len_bytes().into_series(); + let res = if call.has_flag("bytes")? { + chunked.as_ref().str_len_bytes().into_series() + } else { + chunked.as_ref().str_len_chars().into_series() + }; let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?; df.to_pipeline_data(plugin, engine, call.head)