From a39e94de8a8ad4cd889e322ce9f05aab344fd155 Mon Sep 17 00:00:00 2001
From: Jack Wright <56345+ayax79@users.noreply.github.com>
Date: Wed, 28 Aug 2024 05:54:31 -0700
Subject: [PATCH] Added polars commands for converting string columns to
integer and decimal columns (#13711)
# Description
Introduces two new polars commands for converting string columns to
decimal and integer columns:
# User-Facing Changes
- Addition of the `polars integer` command
- Addition of the `polars decimal` command
---
crates/nu_plugin_polars/Cargo.toml | 6 +-
.../src/dataframe/lazy/select.rs | 2 +-
.../src/dataframe/series/mod.rs | 2 +
.../src/dataframe/series/string/mod.rs | 4 +
.../src/dataframe/series/string/to_decimal.rs | 111 ++++++++++++++++++
.../src/dataframe/series/string/to_integer.rs | 97 +++++++++++++++
6 files changed, 219 insertions(+), 3 deletions(-)
create mode 100644 crates/nu_plugin_polars/src/dataframe/series/string/to_decimal.rs
create mode 100644 crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs
diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml
index 70c901c4ce..b333ae16ae 100644
--- a/crates/nu_plugin_polars/Cargo.toml
+++ b/crates/nu_plugin_polars/Cargo.toml
@@ -53,6 +53,7 @@ features = [
"dtype-categorical",
"dtype-datetime",
"dtype-struct",
+ "dtype-decimal",
"dtype-i8",
"dtype-i16",
"dtype-u8",
@@ -70,7 +71,8 @@ features = [
"serde",
"serde-lazy",
"strings",
- "streaming",
+ "string_to_integer",
+ "streaming",
"to_dummies",
]
optional = false
@@ -82,4 +84,4 @@ nu-engine = { path = "../nu-engine", version = "0.97.2" }
nu-parser = { path = "../nu-parser", version = "0.97.2" }
nu-command = { path = "../nu-command", version = "0.97.2" }
nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.97.2" }
-tempfile.workspace = true
\ No newline at end of file
+tempfile.workspace = true
diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs
index 9417427545..ac6c2683d7 100644
--- a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs
+++ b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs
@@ -67,7 +67,7 @@ impl PluginCommand for LazySelect {
let pipeline_value = input.into_value(call.head)?;
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
- let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
+ let lazy: NuLazyFrame = lazy.to_polars().select(&expressions).into();
lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
}
diff --git a/crates/nu_plugin_polars/src/dataframe/series/mod.rs b/crates/nu_plugin_polars/src/dataframe/series/mod.rs
index 94f28b0801..0c5128f966 100644
--- a/crates/nu_plugin_polars/src/dataframe/series/mod.rs
+++ b/crates/nu_plugin_polars/src/dataframe/series/mod.rs
@@ -77,6 +77,8 @@ pub(crate) fn series_commands() -> Vec &str {
+ "polars decimal"
+ }
+
+ fn description(&self) -> &str {
+ "Converts a string column into a decimal column"
+ }
+
+ fn search_terms(&self) -> Vec<&str> {
+ vec!["expression", "decimal", "float"]
+ }
+
+ fn signature(&self) -> Signature {
+ Signature::build(self.name())
+ .required(
+ "infer_length",
+ SyntaxShape::Int,
+ "Number of decimal points to infer",
+ )
+ .input_output_type(
+ Type::Custom("expression".into()),
+ Type::Custom("expression".into()),
+ )
+ .category(Category::Custom("dataframe".into()))
+ }
+
+ fn examples(&self) -> Vec {
+ vec![Example {
+ description: "Modifies strings to decimal",
+ example: "[[a b]; [1, '2.4']] | polars into-df | polars select (polars col b | polars decimal 2) | polars collect",
+ result: Some(
+ NuDataFrame::try_from_columns(
+ vec![Column::new(
+ "b".to_string(),
+ vec![
+ Value::test_float(2.40),
+ ],
+ )],
+ None,
+ )
+ .expect("simple df for test should not fail")
+ .into_value(Span::test_data()),
+ ),
+ }]
+ }
+
+ fn run(
+ &self,
+ plugin: &Self::Plugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ input: PipelineData,
+ ) -> Result {
+ let value = input.into_value(call.head)?;
+ match PolarsPluginObject::try_from_value(plugin, &value)? {
+ PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr),
+ _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
+ }
+ .map_err(LabeledError::from)
+ }
+}
+
+fn command(
+ plugin: &PolarsPlugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ expr: NuExpression,
+) -> Result {
+ let infer_length: usize = call.req(0)?;
+ let res: NuExpression = expr
+ .into_polars()
+ .str()
+ .to_decimal(infer_length)
+ // since there isn't a good way to support actual large decimal types
+ // in nushell, just cast it to an f64.
+ .cast(DataType::Float64)
+ .into();
+ res.to_pipeline_data(plugin, engine, call.head)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::test::test_polars_plugin_command;
+
+ #[test]
+ fn test_examples() -> Result<(), ShellError> {
+ test_polars_plugin_command(&ToDecimal)
+ }
+}
diff --git a/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs b/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs
new file mode 100644
index 0000000000..9d1df4aa00
--- /dev/null
+++ b/crates/nu_plugin_polars/src/dataframe/series/string/to_integer.rs
@@ -0,0 +1,97 @@
+use crate::{
+ values::{
+ cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression,
+ PolarsPluginObject, PolarsPluginType,
+ },
+ PolarsPlugin,
+};
+
+use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
+use nu_protocol::{
+ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
+};
+use polars::prelude::lit;
+
+#[derive(Clone)]
+pub struct ToInteger;
+
+impl PluginCommand for ToInteger {
+ type Plugin = PolarsPlugin;
+
+ fn name(&self) -> &str {
+ "polars integer"
+ }
+
+ fn description(&self) -> &str {
+ "Converts a string column into a integer column"
+ }
+
+ fn search_terms(&self) -> Vec<&str> {
+ vec!["expression", "integer", "float"]
+ }
+
+ fn signature(&self) -> Signature {
+ Signature::build(self.name())
+ .input_output_type(
+ Type::Custom("expression".into()),
+ Type::Custom("expression".into()),
+ )
+ .category(Category::Custom("dataframe".into()))
+ }
+
+ fn examples(&self) -> Vec {
+ vec![Example {
+ description: "Modifies strings to integer",
+ example: "[[a b]; [1, '2']] | polars into-df | polars select (polars col b | polars integer) | polars collect",
+ result: Some(
+ NuDataFrame::try_from_columns(
+ vec![Column::new(
+ "b".to_string(),
+ vec![
+ Value::test_int(2),
+ ],
+ )],
+ None,
+ )
+ .expect("simple df for test should not fail")
+ .into_value(Span::test_data()),
+ ),
+ }]
+ }
+
+ fn run(
+ &self,
+ plugin: &Self::Plugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ input: PipelineData,
+ ) -> Result {
+ let value = input.into_value(call.head)?;
+ match PolarsPluginObject::try_from_value(plugin, &value)? {
+ PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr),
+ _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
+ }
+ .map_err(LabeledError::from)
+ }
+}
+
+fn command(
+ plugin: &PolarsPlugin,
+ engine: &EngineInterface,
+ call: &EvaluatedCall,
+ expr: NuExpression,
+) -> Result {
+ let res: NuExpression = expr.into_polars().str().to_integer(lit(10), false).into();
+ res.to_pipeline_data(plugin, engine, call.head)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::test::test_polars_plugin_command;
+
+ #[test]
+ fn test_examples() -> Result<(), ShellError> {
+ test_polars_plugin_command(&ToInteger)
+ }
+}