nushell/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs
Jack Wright 68adc4657f
Polars lazy refactor (#12669)
This moves to predominantly supporting only lazy dataframes for most
operations. It removes a lot of the type conversion between lazy and
eager dataframes based on what was inputted into the command.

For the most part the changes will mean:
* You will need to run `polars collect` after performing operations
* The into-lazy command has been removed as it is redundant.
* When opening files a lazy frame will be outputted by default if the
reader supports lazy frames

A list of individual command changes can be found
[here](https://hackmd.io/@nucore/Bk-3V-hW0)

---------

Co-authored-by: Ian Manske <ian.manske@pm.me>
2024-05-06 23:19:11 +00:00

161 lines
5.3 KiB
Rust

use crate::{
dataframe::values::{str_to_dtype, NuExpression, NuLazyFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use polars::prelude::*;
#[derive(Clone)]
pub struct CastDF;
impl PluginCommand for CastDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cast"
}
fn usage(&self) -> &str {
"Cast a column to a different dtype."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.required(
"dtype",
SyntaxShape::String,
"The dtype to cast the column to",
)
.optional(
"column",
SyntaxShape::String,
"The column to cast. Required when used with a dataframe.",
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Cast a column in a dataframe to a different dtype",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a lazy dataframe to a different dtype",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a expression to a different dtype",
example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#,
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let (dtype, column_nm) = df_args(call)?;
command_lazy(plugin, engine, call, column_nm, dtype, lazy)
}
PolarsPluginObject::NuDataFrame(df) => {
let (dtype, column_nm) = df_args(call)?;
command_lazy(plugin, engine, call, column_nm, dtype, df.lazy())
}
PolarsPluginObject::NuExpression(expr) => {
let dtype: String = call.req(0)?;
let dtype = str_to_dtype(&dtype, call.head)?;
let expr: NuExpression = expr.into_polars().cast(dtype).into();
expr.to_pipeline_data(plugin, engine, call.head)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> {
let dtype = dtype_arg(call)?;
let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter {
param_name: "column_name".into(),
span: call.head,
})?;
Ok((dtype, column_nm))
}
fn dtype_arg(call: &EvaluatedCall) -> Result<DataType, ShellError> {
let dtype: String = call.req(0)?;
str_to_dtype(&dtype, call.head)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let column = col(&column_nm).cast(dtype);
let lazy = lazy.to_polars().with_columns(&[column]);
let lazy = NuLazyFrame::new(lazy);
lazy.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&CastDF)
}
}