From a39e94de8a8ad4cd889e322ce9f05aab344fd155 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 28 Aug 2024 05:54:31 -0700 Subject: [PATCH] Added polars commands for converting string columns to integer and decimal columns (#13711) # Description Introduces two new polars commands for converting string columns to decimal and integer columns: Screenshot 2024-08-27 at 15 32 28 Screenshot 2024-08-27 at 15 33 46 # User-Facing Changes - Addition of the `polars integer` command - Addition of the `polars decimal` command --- crates/nu_plugin_polars/Cargo.toml | 6 +- .../src/dataframe/lazy/select.rs | 2 +- .../src/dataframe/series/mod.rs | 2 + .../src/dataframe/series/string/mod.rs | 4 + .../src/dataframe/series/string/to_decimal.rs | 111 ++++++++++++++++++ .../src/dataframe/series/string/to_integer.rs | 97 +++++++++++++++ 6 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 crates/nu_plugin_polars/src/dataframe/series/string/to_decimal.rs create mode 100644 crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index 70c901c4ce..b333ae16ae 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -53,6 +53,7 @@ features = [ "dtype-categorical", "dtype-datetime", "dtype-struct", + "dtype-decimal", "dtype-i8", "dtype-i16", "dtype-u8", @@ -70,7 +71,8 @@ features = [ "serde", "serde-lazy", "strings", - "streaming", + "string_to_integer", + "streaming", "to_dummies", ] optional = false @@ -82,4 +84,4 @@ nu-engine = { path = "../nu-engine", version = "0.97.2" } nu-parser = { path = "../nu-parser", version = "0.97.2" } nu-command = { path = "../nu-command", version = "0.97.2" } nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.97.2" } -tempfile.workspace = true \ No newline at end of file +tempfile.workspace = true diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs index 9417427545..ac6c2683d7 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs @@ -67,7 +67,7 @@ impl PluginCommand for LazySelect { let pipeline_value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions)); + let lazy: NuLazyFrame = lazy.to_polars().select(&expressions).into(); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/series/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/mod.rs index 94f28b0801..0c5128f966 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/mod.rs @@ -77,6 +77,8 @@ pub(crate) fn series_commands() -> Vec &str { + "polars decimal" + } + + fn description(&self) -> &str { + "Converts a string column into a decimal column" + } + + fn search_terms(&self) -> Vec<&str> { + vec!["expression", "decimal", "float"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "infer_length", + SyntaxShape::Int, + "Number of decimal points to infer", + ) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to decimal", + example: "[[a b]; [1, '2.4']] | polars into-df | polars select (polars col b | polars decimal 2) | polars collect", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![ + Value::test_float(2.40), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr), + _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, +) -> Result { + let infer_length: usize = call.req(0)?; + let res: NuExpression = expr + .into_polars() + .str() + .to_decimal(infer_length) + // since there isn't a good way to support actual large decimal types + // in nushell, just cast it to an f64. + .cast(DataType::Float64) + .into(); + res.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToDecimal) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs b/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs new file mode 100644 index 0000000000..9d1df4aa00 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs @@ -0,0 +1,97 @@ +use crate::{ + values::{ + cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, + PolarsPluginObject, PolarsPluginType, + }, + PolarsPlugin, +}; + +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, +}; +use polars::prelude::lit; + +#[derive(Clone)] +pub struct ToInteger; + +impl PluginCommand for ToInteger { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars integer" + } + + fn description(&self) -> &str { + "Converts a string column into a integer column" + } + + fn search_terms(&self) -> Vec<&str> { + vec!["expression", "integer", "float"] + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_type( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to integer", + example: "[[a b]; [1, '2']] | polars into-df | polars select (polars col b | polars integer) | polars collect", + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![ + Value::test_int(2), + ], + )], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head)?; + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr), + _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])), + } + .map_err(LabeledError::from) + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + expr: NuExpression, +) -> Result { + let res: NuExpression = expr.into_polars().str().to_integer(lit(10), false).into(); + res.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&ToInteger) + } +}