Added polars commands for converting string columns to integer and decimal columns (#13711)

# Description
Introduces two new polars commands for converting string columns to
decimal and integer columns:

<img width="740" alt="Screenshot 2024-08-27 at 15 32 28"
src="https://github.com/user-attachments/assets/f9573b6e-48f6-4bbf-8782-39ffb95eb934">

<img width="720" alt="Screenshot 2024-08-27 at 15 33 46"
src="https://github.com/user-attachments/assets/90a66bb5-fa78-4ed3-8b2b-ae05cddd2f3a">

# User-Facing Changes
- Addition of the `polars integer` command
- Addition of the `polars decimal` command
This commit is contained in:
Jack Wright 2024-08-28 05:54:31 -07:00 committed by GitHub
parent a88f46c6c9
commit a39e94de8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 219 additions and 3 deletions

View File

@ -53,6 +53,7 @@ features = [
"dtype-categorical",
"dtype-datetime",
"dtype-struct",
"dtype-decimal",
"dtype-i8",
"dtype-i16",
"dtype-u8",
@ -70,7 +71,8 @@ features = [
"serde",
"serde-lazy",
"strings",
"streaming",
"string_to_integer",
"streaming",
"to_dummies",
]
optional = false
@ -82,4 +84,4 @@ nu-engine = { path = "../nu-engine", version = "0.97.2" }
nu-parser = { path = "../nu-parser", version = "0.97.2" }
nu-command = { path = "../nu-command", version = "0.97.2" }
nu-plugin-test-support = { path = "../nu-plugin-test-support", version = "0.97.2" }
tempfile.workspace = true
tempfile.workspace = true

View File

@ -67,7 +67,7 @@ impl PluginCommand for LazySelect {
let pipeline_value = input.into_value(call.head)?;
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
let lazy: NuLazyFrame = lazy.to_polars().select(&expressions).into();
lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
}

View File

@ -77,6 +77,8 @@ pub(crate) fn series_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlug
Box::new(StrLengths),
Box::new(StrSlice),
Box::new(StrFTime),
Box::new(ToDecimal),
Box::new(ToInteger),
Box::new(ToLowerCase),
Box::new(ToUpperCase),
Box::new(Unique),

View File

@ -5,6 +5,8 @@ mod replace_all;
mod str_lengths;
mod str_slice;
mod strftime;
mod to_decimal;
mod to_integer;
mod to_lowercase;
mod to_uppercase;
@ -15,5 +17,7 @@ pub use replace_all::ReplaceAll;
pub use str_lengths::StrLengths;
pub use str_slice::StrSlice;
pub use strftime::StrFTime;
pub use to_decimal::ToDecimal;
pub use to_integer::ToInteger;
pub use to_lowercase::ToLowerCase;
pub use to_uppercase::ToUpperCase;

View File

@ -0,0 +1,111 @@
use crate::{
values::{
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression,
PolarsPluginObject, PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::DataType;
#[derive(Clone)]
pub struct ToDecimal;
impl PluginCommand for ToDecimal {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars decimal"
}
fn description(&self) -> &str {
"Converts a string column into a decimal column"
}
fn search_terms(&self) -> Vec<&str> {
vec!["expression", "decimal", "float"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"infer_length",
SyntaxShape::Int,
"Number of decimal points to infer",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Modifies strings to decimal",
example: "[[a b]; [1, '2.4']] | polars into-df | polars select (polars col b | polars decimal 2) | polars collect",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![
Value::test_float(2.40),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr),
_ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let infer_length: usize = call.req(0)?;
let res: NuExpression = expr
.into_polars()
.str()
.to_decimal(infer_length)
// since there isn't a good way to support actual large decimal types
// in nushell, just cast it to an f64.
.cast(DataType::Float64)
.into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToDecimal)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression,
PolarsPluginObject, PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::lit;
#[derive(Clone)]
pub struct ToInteger;
impl PluginCommand for ToInteger {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars integer"
}
fn description(&self) -> &str {
"Converts a string column into a integer column"
}
fn search_terms(&self) -> Vec<&str> {
vec!["expression", "integer", "float"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Modifies strings to integer",
example: "[[a b]; [1, '2']] | polars into-df | polars select (polars col b | polars integer) | polars collect",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuExpression(expr) => command(plugin, engine, call, expr),
_ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let res: NuExpression = expr.into_polars().str().to_integer(lit(10), false).into();
res.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToInteger)
}
}