mirror of
https://github.com/nushell/nushell.git
synced 2025-04-02 12:19:48 +02:00
This moves to predominantly supporting only lazy dataframes for most operations. It removes a lot of the type conversion between lazy and eager dataframes based on what was inputted into the command. For the most part the changes will mean: * You will need to run `polars collect` after performing operations * The into-lazy command has been removed as it is redundant. * When opening files a lazy frame will be outputted by default if the reader supports lazy frames A list of individual command changes can be found [here](https://hackmd.io/@nucore/Bk-3V-hW0) --------- Co-authored-by: Ian Manske <ian.manske@pm.me>
161 lines
5.3 KiB
Rust
161 lines
5.3 KiB
Rust
use crate::{
|
|
dataframe::values::{str_to_dtype, NuExpression, NuLazyFrame},
|
|
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
|
|
PolarsPlugin,
|
|
};
|
|
|
|
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
|
use nu_protocol::{
|
|
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
|
|
SyntaxShape, Type, Value,
|
|
};
|
|
use polars::prelude::*;
|
|
|
|
#[derive(Clone)]
|
|
pub struct CastDF;
|
|
|
|
impl PluginCommand for CastDF {
|
|
type Plugin = PolarsPlugin;
|
|
|
|
fn name(&self) -> &str {
|
|
"polars cast"
|
|
}
|
|
|
|
fn usage(&self) -> &str {
|
|
"Cast a column to a different dtype."
|
|
}
|
|
|
|
fn signature(&self) -> Signature {
|
|
Signature::build(self.name())
|
|
.input_output_types(vec![
|
|
(
|
|
Type::Custom("expression".into()),
|
|
Type::Custom("expression".into()),
|
|
),
|
|
(
|
|
Type::Custom("dataframe".into()),
|
|
Type::Custom("dataframe".into()),
|
|
),
|
|
])
|
|
.required(
|
|
"dtype",
|
|
SyntaxShape::String,
|
|
"The dtype to cast the column to",
|
|
)
|
|
.optional(
|
|
"column",
|
|
SyntaxShape::String,
|
|
"The column to cast. Required when used with a dataframe.",
|
|
)
|
|
.category(Category::Custom("dataframe".into()))
|
|
}
|
|
|
|
fn examples(&self) -> Vec<Example> {
|
|
vec![
|
|
Example {
|
|
description: "Cast a column in a dataframe to a different dtype",
|
|
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
|
|
result: Some(Value::record(
|
|
record! {
|
|
"a" => Value::string("u8", Span::test_data()),
|
|
"b" => Value::string("i64", Span::test_data()),
|
|
},
|
|
Span::test_data(),
|
|
)),
|
|
},
|
|
Example {
|
|
description: "Cast a column in a lazy dataframe to a different dtype",
|
|
example:
|
|
"[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
|
|
result: Some(Value::record(
|
|
record! {
|
|
"a" => Value::string("u8", Span::test_data()),
|
|
"b" => Value::string("i64", Span::test_data()),
|
|
},
|
|
Span::test_data(),
|
|
)),
|
|
},
|
|
Example {
|
|
description: "Cast a column in a expression to a different dtype",
|
|
example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#,
|
|
result: None,
|
|
},
|
|
]
|
|
}
|
|
|
|
fn run(
|
|
&self,
|
|
plugin: &Self::Plugin,
|
|
engine: &EngineInterface,
|
|
call: &EvaluatedCall,
|
|
input: PipelineData,
|
|
) -> Result<PipelineData, LabeledError> {
|
|
let value = input.into_value(call.head);
|
|
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
|
PolarsPluginObject::NuLazyFrame(lazy) => {
|
|
let (dtype, column_nm) = df_args(call)?;
|
|
command_lazy(plugin, engine, call, column_nm, dtype, lazy)
|
|
}
|
|
PolarsPluginObject::NuDataFrame(df) => {
|
|
let (dtype, column_nm) = df_args(call)?;
|
|
command_lazy(plugin, engine, call, column_nm, dtype, df.lazy())
|
|
}
|
|
PolarsPluginObject::NuExpression(expr) => {
|
|
let dtype: String = call.req(0)?;
|
|
let dtype = str_to_dtype(&dtype, call.head)?;
|
|
let expr: NuExpression = expr.into_polars().cast(dtype).into();
|
|
expr.to_pipeline_data(plugin, engine, call.head)
|
|
}
|
|
_ => Err(cant_convert_err(
|
|
&value,
|
|
&[
|
|
PolarsPluginType::NuDataFrame,
|
|
PolarsPluginType::NuLazyFrame,
|
|
PolarsPluginType::NuExpression,
|
|
],
|
|
)),
|
|
}
|
|
.map_err(LabeledError::from)
|
|
}
|
|
}
|
|
|
|
fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> {
|
|
let dtype = dtype_arg(call)?;
|
|
let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter {
|
|
param_name: "column_name".into(),
|
|
span: call.head,
|
|
})?;
|
|
Ok((dtype, column_nm))
|
|
}
|
|
|
|
fn dtype_arg(call: &EvaluatedCall) -> Result<DataType, ShellError> {
|
|
let dtype: String = call.req(0)?;
|
|
str_to_dtype(&dtype, call.head)
|
|
}
|
|
|
|
fn command_lazy(
|
|
plugin: &PolarsPlugin,
|
|
engine: &EngineInterface,
|
|
call: &EvaluatedCall,
|
|
column_nm: String,
|
|
dtype: DataType,
|
|
lazy: NuLazyFrame,
|
|
) -> Result<PipelineData, ShellError> {
|
|
let column = col(&column_nm).cast(dtype);
|
|
let lazy = lazy.to_polars().with_columns(&[column]);
|
|
let lazy = NuLazyFrame::new(lazy);
|
|
lazy.to_pipeline_data(plugin, engine, call.head)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
|
|
use super::*;
|
|
use crate::test::test_polars_plugin_command;
|
|
|
|
#[test]
|
|
fn test_examples() -> Result<(), ShellError> {
|
|
test_polars_plugin_command(&CastDF)
|
|
}
|
|
}
|