From 68adc4657f9c57bb7090df3984a5d2931f8e7358 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Mon, 6 May 2024 16:19:11 -0700 Subject: [PATCH] Polars lazy refactor (#12669) This moves to predominantly supporting only lazy dataframes for most operations. It removes a lot of the type conversion between lazy and eager dataframes based on what was inputted into the command. For the most part the changes will mean: * You will need to run `polars collect` after performing operations * The into-lazy command has been removed as it is redundant. * When opening files a lazy frame will be outputted by default if the reader supports lazy frames A list of individual command changes can be found [here](https://hackmd.io/@nucore/Bk-3V-hW0) --------- Co-authored-by: Ian Manske --- Cargo.lock | 1 + crates/nu_plugin_polars/Cargo.toml | 1 + .../src/dataframe/eager/filter_with.rs | 162 --------- .../src/dataframe/eager/mod.rs | 36 -- .../src/dataframe/eager/open.rs | 343 ++++++------------ .../src/dataframe/eager/query_df.rs | 2 +- .../src/dataframe/eager/sample.rs | 2 +- .../src/dataframe/eager/summary.rs | 2 +- .../src/dataframe/eager/take.rs | 2 +- .../src/dataframe/eager/with_column.rs | 196 ---------- .../expressions/expressions_macro.rs | 2 - .../src/dataframe/expressions/is_in.rs | 3 +- .../src/dataframe/expressions/otherwise.rs | 2 +- .../src/dataframe/expressions/when.rs | 2 +- .../src/dataframe/lazy/aggregate.rs | 4 +- .../src/dataframe/{eager => lazy}/cast.rs | 48 +-- .../src/dataframe/lazy/collect.rs | 8 +- .../src/dataframe/{eager => lazy}/drop.rs | 47 +-- .../{eager => lazy}/drop_duplicates.rs | 25 +- .../dataframe/{eager => lazy}/drop_nulls.rs | 32 +- .../src/dataframe/lazy/fetch.rs | 4 +- .../src/dataframe/lazy/fill_null.rs | 4 +- .../src/dataframe/lazy/filter.rs | 7 +- .../src/dataframe/lazy/filter_with.rs | 97 +++++ .../src/dataframe/{eager => lazy}/first.rs | 17 +- .../src/dataframe/{eager => lazy}/get.rs | 27 +- .../src/dataframe/lazy/groupby.rs | 4 +- .../src/dataframe/lazy/join.rs | 9 +- .../src/dataframe/{eager => lazy}/last.rs | 14 +- .../src/dataframe/lazy/macro_commands.rs | 2 +- .../src/dataframe/lazy/median.rs | 2 +- .../src/dataframe/{eager => lazy}/melt.rs | 132 ++----- .../src/dataframe/lazy/mod.rs | 27 +- .../src/dataframe/lazy/quantile.rs | 1 - .../src/dataframe/{eager => lazy}/rename.rs | 50 +-- .../src/dataframe/lazy/select.rs | 2 +- .../src/dataframe/{eager => lazy}/slice.rs | 16 +- .../src/dataframe/lazy/sort_by_expr.rs | 5 +- .../src/dataframe/lazy/to_lazy.rs | 90 ----- .../src/dataframe/lazy/with_column.rs | 114 ++++++ .../src/dataframe/series/shift.rs | 33 +- .../src/dataframe/series/unique.rs | 43 +-- .../src/dataframe/values/mod.rs | 14 +- .../values/nu_dataframe/conversion.rs | 2 +- .../src/dataframe/values/nu_dataframe/mod.rs | 14 +- .../values/nu_dataframe/operations.rs | 4 +- .../values/nu_expression/custom_value.rs | 49 +-- .../src/dataframe/values/nu_lazyframe/mod.rs | 12 +- .../dataframe/values/nu_lazygroupby/mod.rs | 4 +- .../src/dataframe/values/utils.rs | 2 - 50 files changed, 511 insertions(+), 1210 deletions(-) delete mode 100644 crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs delete mode 100644 crates/nu_plugin_polars/src/dataframe/eager/with_column.rs rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/cast.rs (79%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/drop.rs (62%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/drop_duplicates.rs (82%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/drop_nulls.rs (84%) create mode 100644 crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/first.rs (91%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/get.rs (77%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/last.rs (90%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/melt.rs (56%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/rename.rs (77%) rename crates/nu_plugin_polars/src/dataframe/{eager => lazy}/slice.rs (87%) delete mode 100644 crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs create mode 100644 crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs diff --git a/Cargo.lock b/Cargo.lock index 29647b87ab..6dcf38f7cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3419,6 +3419,7 @@ dependencies = [ "polars", "polars-arrow", "polars-io", + "polars-lazy", "polars-ops", "polars-plan", "polars-utils", diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index 38bf02a668..99c95f33e4 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -34,6 +34,7 @@ polars-arrow = { version = "0.39"} polars-ops = { version = "0.39"} polars-plan = { version = "0.39", features = ["regex"]} polars-utils = { version = "0.39"} +polars-lazy = { version = "0.39"} typetag = "0.2" uuid = { version = "1.7", features = ["v4", "serde"] } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs b/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs deleted file mode 100644 index 03475c2bea..0000000000 --- a/crates/nu_plugin_polars/src/dataframe/eager/filter_with.rs +++ /dev/null @@ -1,162 +0,0 @@ -use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; -use nu_protocol::{ - Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, - Value, -}; -use polars::prelude::LazyFrame; - -use crate::{ - dataframe::values::{NuExpression, NuLazyFrame}, - values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, - PolarsPlugin, -}; - -use super::super::values::{Column, NuDataFrame}; - -#[derive(Clone)] -pub struct FilterWith; - -impl PluginCommand for FilterWith { - type Plugin = PolarsPlugin; - - fn name(&self) -> &str { - "polars filter-with" - } - - fn usage(&self) -> &str { - "Filters dataframe using a mask or expression as reference." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "mask or expression", - SyntaxShape::Any, - "boolean mask used to filter data", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Filter dataframe using a bool mask", - example: r#"let mask = ([true false] | polars into-df); - [[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Filter dataframe using an expression", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - plugin: &Self::Plugin, - engine: &EngineInterface, - call: &EvaluatedCall, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head); - match PolarsPluginObject::try_from_value(plugin, &value)? { - PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), - PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), - _ => Err(cant_convert_err( - &value, - &[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame], - )), - } - .map_err(LabeledError::from) - } -} - -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - df: NuDataFrame, -) -> Result { - let mask_value: Value = call.req(0)?; - let mask_span = mask_value.span(); - - if NuExpression::can_downcast(&mask_value) { - let expression = NuExpression::try_from_value(plugin, &mask_value)?; - let lazy = df.lazy(); - let lazy = lazy.apply_with_expr(expression, LazyFrame::filter); - - lazy.to_pipeline_data(plugin, engine, call.head) - } else { - let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)? - .as_series(mask_span)?; - let mask = mask.bool().map_err(|e| ShellError::GenericError { - error: "Error casting to bool".into(), - msg: e.to_string(), - span: Some(mask_span), - help: Some("Perhaps you want to use a series with booleans as mask".into()), - inner: vec![], - })?; - - let polars_df = df - .as_ref() - .filter(mask) - .map_err(|e| ShellError::GenericError { - error: "Error filtering dataframe".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The only allowed column types for dummies are String or Int".into()), - inner: vec![], - })?; - let df = NuDataFrame::new(df.from_lazy, polars_df); - df.to_pipeline_data(plugin, engine, call.head) - } -} - -fn command_lazy( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - lazy: NuLazyFrame, -) -> Result { - let expr: Value = call.req(0)?; - let expr = NuExpression::try_from_value(plugin, &expr)?; - let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); - lazy.to_pipeline_data(plugin, engine, call.head) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::test::test_polars_plugin_command; - - #[test] - fn test_examples() -> Result<(), ShellError> { - test_polars_plugin_command(&FilterWith) - } -} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/mod.rs b/crates/nu_plugin_polars/src/dataframe/eager/mod.rs index dc50ba7cd2..28c99f1fed 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/mod.rs @@ -1,22 +1,11 @@ mod append; -mod cast; mod columns; -mod drop; -mod drop_duplicates; -mod drop_nulls; mod dummies; -mod filter_with; -mod first; -mod get; -mod last; -mod melt; mod open; mod query_df; -mod rename; mod sample; mod schema; mod shape; -mod slice; mod sql_context; mod sql_expr; mod summary; @@ -28,30 +17,18 @@ mod to_df; mod to_json_lines; mod to_nu; mod to_parquet; -mod with_column; use crate::PolarsPlugin; pub use self::open::OpenDataFrame; pub use append::AppendDF; -pub use cast::CastDF; pub use columns::ColumnsDF; -pub use drop::DropDF; -pub use drop_duplicates::DropDuplicates; -pub use drop_nulls::DropNulls; pub use dummies::Dummies; -pub use filter_with::FilterWith; -pub use first::FirstDF; -pub use get::GetDF; -pub use last::LastDF; -pub use melt::MeltDF; use nu_plugin::PluginCommand; pub use query_df::QueryDf; -pub use rename::RenameDF; pub use sample::SampleDF; pub use schema::SchemaCmd; pub use shape::ShapeDF; -pub use slice::SliceDF; pub use sql_context::SQLContext; pub use summary::Summary; pub use take::TakeDF; @@ -62,28 +39,16 @@ pub use to_df::ToDataFrame; pub use to_json_lines::ToJsonLines; pub use to_nu::ToNu; pub use to_parquet::ToParquet; -pub use with_column::WithColumn; pub(crate) fn eager_commands() -> Vec>> { vec![ Box::new(AppendDF), - Box::new(CastDF), Box::new(ColumnsDF), - Box::new(DropDF), - Box::new(DropDuplicates), - Box::new(DropNulls), Box::new(Dummies), - Box::new(FilterWith), - Box::new(GetDF), Box::new(OpenDataFrame), - Box::new(MeltDF), Box::new(Summary), - Box::new(FirstDF), - Box::new(LastDF), - Box::new(RenameDF), Box::new(SampleDF), Box::new(ShapeDF), - Box::new(SliceDF), Box::new(SchemaCmd), Box::new(TakeDF), Box::new(ToNu), @@ -94,6 +59,5 @@ pub(crate) fn eager_commands() -> Vec Result { - if call.has_flag("lazy")? { - let file: String = call.req(0)?; - let args = ScanArgsParquet { - n_rows: None, - cache: true, - parallel: ParallelStrategy::Auto, - rechunk: false, - row_index: None, - low_memory: false, - cloud_options: None, - use_statistics: false, - hive_options: HiveOptions::default(), - }; + let args = ScanArgsParquet { + n_rows: None, + cache: true, + parallel: ParallelStrategy::Auto, + rechunk: false, + row_index: None, + low_memory: false, + cloud_options: None, + use_statistics: false, + hive_options: HiveOptions::default(), + }; - let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); + let maybe_columns: Option> = call + .get_flag::>("columns")? + .map(|cols| cols.iter().map(|s| col(s)).collect()); - df.cache_and_to_value(plugin, engine, call.head) - } else { - let columns: Option> = call.get_flag("columns")?; - - let r = File::open(file_path).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file_span), + let mut polars_df = + LazyFrame::scan_parquet(file_path, args).map_err(|e| ShellError::GenericError { + error: "Parquet reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), help: None, inner: vec![], })?; - let reader = ParquetReader::new(r); - let reader = match columns { - None => reader, - Some(columns) => reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.cache_and_to_value(plugin, engine, call.head) + if let Some(columns) = maybe_columns { + polars_df = polars_df.select(columns); } + + let df: NuLazyFrame = polars_df.into(); + df.cache_and_to_value(plugin, engine, call.head) } fn from_avro( @@ -262,60 +238,36 @@ fn from_ipc( engine: &nu_plugin::EngineInterface, call: &nu_plugin::EvaluatedCall, file_path: &Path, - file_span: Span, + _file_span: Span, ) -> Result { - if call.has_flag("lazy")? { - let file: String = call.req(0)?; - let args = ScanArgsIpc { - n_rows: None, - cache: true, - rechunk: false, - row_index: None, - memory_map: true, - cloud_options: None, - }; + let args = ScanArgsIpc { + n_rows: None, + cache: true, + rechunk: false, + row_index: None, + memory_map: true, + cloud_options: None, + }; - let df: NuLazyFrame = LazyFrame::scan_ipc(file, args) - .map_err(|e| ShellError::GenericError { - error: "IPC reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); + let maybe_columns: Option> = call + .get_flag::>("columns")? + .map(|cols| cols.iter().map(|s| col(s)).collect()); - df.cache_and_to_value(plugin, engine, call.head) - } else { - let columns: Option> = call.get_flag("columns")?; - - let r = File::open(file_path).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file_span), + let mut polars_df = + LazyFrame::scan_ipc(file_path, args).map_err(|e| ShellError::GenericError { + error: "IPC reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), help: None, inner: vec![], })?; - let reader = IpcReader::new(r); - let reader = match columns { - None => reader, - Some(columns) => reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "IPC reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.cache_and_to_value(plugin, engine, call.head) + if let Some(columns) = maybe_columns { + polars_df = polars_df.select(columns); } + + let df: NuLazyFrame = polars_df.into(); + df.cache_and_to_value(plugin, engine, call.head) } fn from_json( @@ -364,32 +316,21 @@ fn from_jsonl( engine: &nu_plugin::EngineInterface, call: &nu_plugin::EvaluatedCall, file_path: &Path, - file_span: Span, + _file_span: Span, ) -> Result { let infer_schema: Option = call.get_flag("infer-schema")?; let maybe_schema = call .get_flag("schema")? .map(|schema| NuSchema::try_from(&schema)) .transpose()?; - let file = File::open(file_path).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file_span), - help: None, - inner: vec![], - })?; - let buf_reader = BufReader::new(file); - let reader = JsonReader::new(buf_reader) - .with_json_format(JsonFormat::JsonLines) - .infer_schema_len(infer_schema); + let maybe_columns: Option> = call + .get_flag::>("columns")? + .map(|cols| cols.iter().map(|s| col(s)).collect()); - let reader = match maybe_schema { - Some(schema) => reader.with_schema(schema.into()), - None => reader, - }; - - let df: NuDataFrame = reader + let mut polars_df = LazyJsonLineReader::new(file_path) + .with_infer_schema_length(infer_schema) + .with_schema(maybe_schema.map(|s| s.into())) .finish() .map_err(|e| ShellError::GenericError { error: "Json lines reader error".into(), @@ -397,9 +338,13 @@ fn from_jsonl( span: Some(call.head), help: None, inner: vec![], - })? - .into(); + })?; + if let Some(columns) = maybe_columns { + polars_df = polars_df.select(columns); + } + + let df: NuLazyFrame = polars_df.into(); df.cache_and_to_value(plugin, engine, call.head) } @@ -408,137 +353,73 @@ fn from_csv( engine: &nu_plugin::EngineInterface, call: &nu_plugin::EvaluatedCall, file_path: &Path, - file_span: Span, + _file_span: Span, ) -> Result { let delimiter: Option> = call.get_flag("delimiter")?; let no_header: bool = call.has_flag("no-header")?; let infer_schema: Option = call.get_flag("infer-schema")?; let skip_rows: Option = call.get_flag("skip-rows")?; - let columns: Option> = call.get_flag("columns")?; + let maybe_columns: Option> = call + .get_flag::>("columns")? + .map(|cols| cols.iter().map(|s| col(s)).collect()); let maybe_schema = call .get_flag("schema")? .map(|schema| NuSchema::try_from(&schema)) .transpose()?; - if call.has_flag("lazy")? { - let csv_reader = LazyCsvReader::new(file_path); + let csv_reader = LazyCsvReader::new(file_path); - let csv_reader = match delimiter { - None => csv_reader, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one character".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - csv_reader.with_separator(delimiter) - } + let csv_reader = match delimiter { + None => csv_reader, + Some(d) => { + if d.item.len() != 1 { + return Err(ShellError::GenericError { + error: "Incorrect delimiter".into(), + msg: "Delimiter has to be one character".into(), + span: Some(d.span), + help: None, + inner: vec![], + }); + } else { + let delimiter = match d.item.chars().next() { + Some(d) => d as u8, + None => unreachable!(), + }; + csv_reader.with_separator(delimiter) } - }; + } + }; - let csv_reader = csv_reader.has_header(!no_header); + let csv_reader = csv_reader.has_header(!no_header); - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; + let csv_reader = match maybe_schema { + Some(schema) => csv_reader.with_schema(Some(schema.into())), + None => csv_reader, + }; - let csv_reader = match infer_schema { - None => csv_reader, - Some(r) => csv_reader.with_infer_schema_length(Some(r)), - }; + let csv_reader = match infer_schema { + None => csv_reader, + Some(r) => csv_reader.with_infer_schema_length(Some(r)), + }; - let csv_reader = match skip_rows { - None => csv_reader, - Some(r) => csv_reader.with_skip_rows(r), - }; + let csv_reader = match skip_rows { + None => csv_reader, + Some(r) => csv_reader.with_skip_rows(r), + }; - let df: NuLazyFrame = csv_reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); + let mut polars_df = csv_reader.finish().map_err(|e| ShellError::GenericError { + error: "CSV reader error".into(), + msg: format!("{e:?}"), + span: Some(call.head), + help: None, + inner: vec![], + })?; - df.cache_and_to_value(plugin, engine, call.head) - } else { - let csv_reader = CsvReader::from_path(file_path) - .map_err(|e| ShellError::GenericError { - error: "Error creating CSV reader".into(), - msg: e.to_string(), - span: Some(file_span), - help: None, - inner: vec![], - })? - .with_encoding(CsvEncoding::LossyUtf8); - - let csv_reader = match delimiter { - None => csv_reader, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one character".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - csv_reader.with_separator(delimiter) - } - } - }; - - let csv_reader = csv_reader.has_header(!no_header); - - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; - - let csv_reader = match infer_schema { - None => csv_reader, - Some(r) => csv_reader.infer_schema(Some(r)), - }; - - let csv_reader = match skip_rows { - None => csv_reader, - Some(r) => csv_reader.with_skip_rows(r), - }; - - let csv_reader = match columns { - None => csv_reader, - Some(columns) => csv_reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = csv_reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.cache_and_to_value(plugin, engine, call.head) + if let Some(columns) = maybe_columns { + polars_df = polars_df.select(columns); } + + let df: NuLazyFrame = polars_df.into(); + df.cache_and_to_value(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs b/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs index a09da57250..aacfe5e3df 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/query_df.rs @@ -91,7 +91,7 @@ fn command( help: None, inner: vec![], })?; - let lazy = NuLazyFrame::new(!df.from_lazy, df_sql); + let lazy = NuLazyFrame::new(df_sql); lazy.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/sample.rs b/crates/nu_plugin_polars/src/dataframe/eager/sample.rs index 59f9ad6eee..64f7608c5c 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/sample.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/sample.rs @@ -147,7 +147,7 @@ fn command( inner: vec![], }), }; - let df = NuDataFrame::new(false, df?); + let df = NuDataFrame::new(df?); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs index c8723a92bf..8e5151837d 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs @@ -272,7 +272,7 @@ fn command( inner: vec![], })?; - let df = NuDataFrame::new(df.from_lazy, polars_df); + let df = NuDataFrame::new(polars_df); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/take.rs b/crates/nu_plugin_polars/src/dataframe/eager/take.rs index 28b22095a1..6cbcd519e0 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/take.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/take.rs @@ -143,7 +143,7 @@ fn command( inner: vec![], })?; - let df = NuDataFrame::new(df.from_lazy, polars_df); + let df = NuDataFrame::new(polars_df); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs b/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs deleted file mode 100644 index 6973389729..0000000000 --- a/crates/nu_plugin_polars/src/dataframe/eager/with_column.rs +++ /dev/null @@ -1,196 +0,0 @@ -use super::super::values::{Column, NuDataFrame}; -use crate::{ - dataframe::values::{NuExpression, NuLazyFrame}, - values::{CustomValueSupport, PolarsPluginObject}, - PolarsPlugin, -}; -use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; -use nu_protocol::{ - Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, - Value, -}; - -#[derive(Clone)] -pub struct WithColumn; - -impl PluginCommand for WithColumn { - type Plugin = PolarsPlugin; - - fn name(&self) -> &str { - "polars with-column" - } - - fn usage(&self) -> &str { - "Adds a series to the dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named("name", SyntaxShape::String, "new column name", Some('n')) - .rest( - "series or expressions", - SyntaxShape::Any, - "series to be added or expressions used to define the new columns", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Adds a series to the dataframe", - example: r#"[[a b]; [1 2] [3 4]] - | polars into-df - | polars with-column ([5 6] | polars into-df) --name c"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Adds a series to the dataframe", - example: r#"[[a b]; [1 2] [3 4]] - | polars into-lazy - | polars with-column [ - ((polars col a) * 2 | polars as "c") - ((polars col a) * 3 | polars as "d") - ] - | polars collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(2), Value::test_int(6)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(3), Value::test_int(9)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - plugin: &Self::Plugin, - engine: &EngineInterface, - call: &EvaluatedCall, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head); - match PolarsPluginObject::try_from_value(plugin, &value)? { - PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), - PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), - _ => Err(ShellError::CantConvert { - to_type: "lazy or eager dataframe".into(), - from_type: value.get_type().to_string(), - span: value.span(), - help: None, - }), - } - .map_err(LabeledError::from) - } -} - -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - df: NuDataFrame, -) -> Result { - let new_column: Value = call.req(0)?; - let column_span = new_column.span(); - - if NuExpression::can_downcast(&new_column) { - let vals: Vec = call.rest(0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(plugin, value)?; - let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions)); - let df = lazy.collect(call.head)?; - df.to_pipeline_data(plugin, engine, call.head) - } else { - let mut other = NuDataFrame::try_from_value_coerce(plugin, &new_column, call.head)? - .as_series(column_span)?; - - let name = match call.get_flag::("name")? { - Some(name) => name, - None => other.name().to_string(), - }; - - let series = other.rename(&name).clone(); - - let mut polars_df = df.to_polars(); - polars_df - .with_column(series) - .map_err(|e| ShellError::GenericError { - error: "Error adding column to dataframe".into(), - msg: e.to_string(), - span: Some(column_span), - help: None, - inner: vec![], - })?; - - let df = NuDataFrame::new(df.from_lazy, polars_df); - df.to_pipeline_data(plugin, engine, call.head) - } -} - -fn command_lazy( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - lazy: NuLazyFrame, -) -> Result { - let vals: Vec = call.rest(0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(plugin, value)?; - let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into(); - lazy.to_pipeline_data(plugin, engine, call.head) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::test::test_polars_plugin_command; - - #[test] - fn test_examples() -> Result<(), ShellError> { - test_polars_plugin_command(&WithColumn) - } -} diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs index 1a566f6989..577524123c 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs @@ -164,7 +164,6 @@ macro_rules! lazy_expr_command { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; let lazy = NuLazyFrame::new( - lazy.from_eager, lazy.to_polars() .$func() .map_err(|e| ShellError::GenericError { @@ -245,7 +244,6 @@ macro_rules! lazy_expr_command { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; let lazy = NuLazyFrame::new( - lazy.from_eager, lazy.to_polars() .$func($ddof) .map_err(|e| ShellError::GenericError { diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs index 1f270837bb..ed4b567983 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs @@ -181,8 +181,7 @@ fn command_df( res.rename("is_in"); - let mut new_df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; - new_df.from_lazy = df.from_lazy; + let new_df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; new_df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs index c887789268..2bdbfefb35 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs @@ -48,7 +48,7 @@ impl PluginCommand for ExprOtherwise { Example { description: "Create a new column for the dataframe", example: r#"[[a b]; [6 2] [1 4] [4 1]] - | polars into-lazy + | polars into-df | polars with-column ( polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c ) diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/when.rs b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs index 1639ed44be..158b2ac757 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/when.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs @@ -57,7 +57,7 @@ impl PluginCommand for ExprWhen { Example { description: "Create a new column for the dataframe", example: r#"[[a b]; [6 2] [1 4] [4 1]] - | polars into-lazy + | polars into-df | polars with-column ( polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c ) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs index 8fa717954f..d5b824ed9a 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs @@ -80,7 +80,7 @@ impl PluginCommand for LazyAggregate { Example { description: "Group by and perform an aggregation", example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | polars into-lazy + | polars into-df | polars group-by a | polars agg [ (polars col b | polars min | polars as "b_min") @@ -147,7 +147,7 @@ impl PluginCommand for LazyAggregate { } let polars = group_by.to_polars(); - let lazy = NuLazyFrame::new(false, polars.agg(&expressions)); + let lazy = NuLazyFrame::new(polars.agg(&expressions)); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/cast.rs b/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs similarity index 79% rename from crates/nu_plugin_polars/src/dataframe/eager/cast.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/cast.rs index 074e162201..559ca27658 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/cast.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs @@ -4,7 +4,6 @@ use crate::{ PolarsPlugin, }; -use super::super::values::NuDataFrame; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, @@ -67,7 +66,7 @@ impl PluginCommand for CastDF { Example { description: "Cast a column in a lazy dataframe to a different dtype", example: - "[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema", + "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema", result: Some(Value::record( record! { "a" => Value::string("u8", Span::test_data()), @@ -99,7 +98,7 @@ impl PluginCommand for CastDF { } PolarsPluginObject::NuDataFrame(df) => { let (dtype, column_nm) = df_args(call)?; - command_eager(plugin, engine, call, column_nm, dtype, df) + command_lazy(plugin, engine, call, column_nm, dtype, df.lazy()) } PolarsPluginObject::NuExpression(expr) => { let dtype: String = call.req(0)?; @@ -144,51 +143,10 @@ fn command_lazy( ) -> Result { let column = col(&column_nm).cast(dtype); let lazy = lazy.to_polars().with_columns(&[column]); - let lazy = NuLazyFrame::new(false, lazy); + let lazy = NuLazyFrame::new(lazy); lazy.to_pipeline_data(plugin, engine, call.head) } -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - column_nm: String, - dtype: DataType, - nu_df: NuDataFrame, -) -> Result { - let mut df = (*nu_df.df).clone(); - let column = df - .column(&column_nm) - .map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let _ = df - .with_column(casted) - .map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let df = NuDataFrame::new(false, df); - df.to_pipeline_data(plugin, engine, call.head) -} - #[cfg(test)] mod test { diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs index 47f91f1d71..db62426e83 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs @@ -32,8 +32,8 @@ impl PluginCommand for LazyCollect { fn examples(&self) -> Vec { vec![Example { - description: "drop duplicates", - example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect", + description: "collect a lazy dataframe", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars select [(polars col a) (polars col b)] | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -64,9 +64,7 @@ impl PluginCommand for LazyCollect { let value = input.into_value(call.head); match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuLazyFrame(lazy) => { - let mut eager = lazy.collect(call.head)?; - // We don't want this converted back to a lazy frame - eager.from_lazy = true; + let eager = lazy.collect(call.head)?; Ok(PipelineData::Value( eager .cache(plugin, engine, call.head)? diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop.rs b/crates/nu_plugin_polars/src/dataframe/lazy/drop.rs similarity index 62% rename from crates/nu_plugin_polars/src/dataframe/eager/drop.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/drop.rs index 812a77b048..6a0bd5b58a 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/drop.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/drop.rs @@ -4,7 +4,7 @@ use nu_protocol::{ Value, }; -use crate::values::CustomValueSupport; +use crate::values::{CustomValueSupport, NuLazyFrame}; use crate::PolarsPlugin; use super::super::values::utils::convert_columns; @@ -37,7 +37,7 @@ impl PluginCommand for DropDF { fn examples(&self) -> Vec { vec![Example { description: "drop column a", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a | polars collect", result: Some( NuDataFrame::try_from_columns( vec![Column::new( @@ -70,46 +70,11 @@ fn command( input: PipelineData, ) -> Result { let columns: Vec = call.rest(0)?; - let (col_string, col_span) = convert_columns(columns, call.head)?; + let (col_string, _col_span) = convert_columns(columns, call.head)?; - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; - - let new_df = col_string - .first() - .ok_or_else(|| ShellError::GenericError { - error: "Empty names list".into(), - msg: "No column names were found".into(), - span: Some(col_span), - help: None, - inner: vec![], - }) - .and_then(|col| { - df.as_ref() - .drop(&col.item) - .map_err(|e| ShellError::GenericError { - error: "Error dropping column".into(), - msg: e.to_string(), - span: Some(col.span), - help: None, - inner: vec![], - }) - })?; - - // If there are more columns in the drop selection list, these - // are added from the resulting dataframe - let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| { - new_df - .drop(&col.item) - .map_err(|e| ShellError::GenericError { - error: "Error dropping column".into(), - msg: e.to_string(), - span: Some(col.span), - help: None, - inner: vec![], - }) - })?; - - let final_df = NuDataFrame::new(df.from_lazy, polars_df); + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let polars_df = df.to_polars().drop(col_string.iter().map(|s| &s.item)); + let final_df = NuLazyFrame::new(polars_df); final_df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs b/crates/nu_plugin_polars/src/dataframe/lazy/drop_duplicates.rs similarity index 82% rename from crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/drop_duplicates.rs index 3eb6311637..f3cf569a3b 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/drop_duplicates.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/drop_duplicates.rs @@ -5,11 +5,11 @@ use nu_protocol::{ }; use polars::prelude::UniqueKeepStrategy; -use crate::values::CustomValueSupport; +use crate::values::{CustomValueSupport, NuDataFrame}; use crate::PolarsPlugin; use super::super::values::utils::convert_columns_string; -use super::super::values::{Column, NuDataFrame}; +use super::super::values::{Column, NuLazyFrame}; #[derive(Clone)] pub struct DropDuplicates; @@ -48,7 +48,7 @@ impl PluginCommand for DropDuplicates { fn examples(&self) -> Vec { vec![Example { description: "drop duplicates", - example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates", + example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -87,7 +87,7 @@ fn command( input: PipelineData, ) -> Result { let columns: Option> = call.opt(0)?; - let (subset, col_span) = match columns { + let (subset, _col_span) = match columns { Some(cols) => { let (agg_string, col_span) = convert_columns_string(cols, call.head)?; (Some(agg_string), col_span) @@ -95,9 +95,7 @@ fn command( None => (None, call.head), }; - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; - - let subset_slice = subset.as_ref().map(|cols| &cols[..]); + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let keep_strategy = if call.has_flag("last")? { UniqueKeepStrategy::Last @@ -105,18 +103,9 @@ fn command( UniqueKeepStrategy::First }; - let polars_df = df - .as_ref() - .unique(subset_slice, keep_strategy, None) - .map_err(|e| ShellError::GenericError { - error: "Error dropping duplicates".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; + let polars_df = df.to_polars().unique(subset, keep_strategy); - let df = NuDataFrame::new(df.from_lazy, polars_df); + let df = NuLazyFrame::new(polars_df); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs b/crates/nu_plugin_polars/src/dataframe/lazy/drop_nulls.rs similarity index 84% rename from crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/drop_nulls.rs index 4d1b41e2fa..ba03429359 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/drop_nulls.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/drop_nulls.rs @@ -4,11 +4,13 @@ use nu_protocol::{ Value, }; -use crate::values::CustomValueSupport; +use polars_lazy::dsl::col; + +use crate::values::{CustomValueSupport, NuDataFrame}; use crate::PolarsPlugin; use super::super::values::utils::convert_columns_string; -use super::super::values::{Column, NuDataFrame}; +use super::super::values::{Column, NuLazyFrame}; #[derive(Clone)] pub struct DropNulls; @@ -43,8 +45,7 @@ impl PluginCommand for DropNulls { Example { description: "drop null values in dataframe", example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df); - let res = ($df.b / $df.b); - let a = ($df | polars with-column $res --name res); + let a = ($df | polars with-column [((polars col b) / (polars col b) | polars as res)]); $a | polars drop-nulls"#, result: Some( NuDataFrame::try_from_columns( @@ -109,31 +110,20 @@ fn command( call: &EvaluatedCall, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; - + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let columns: Option> = call.opt(0)?; - let (subset, col_span) = match columns { + let (subset, _col_span) = match columns { Some(cols) => { let (agg_string, col_span) = convert_columns_string(cols, call.head)?; - (Some(agg_string), col_span) + let agg_expr = agg_string.iter().map(|s| col(s)).collect(); + (Some(agg_expr), col_span) } None => (None, call.head), }; - let subset_slice = subset.as_ref().map(|cols| &cols[..]); - - let polars_df = df - .as_ref() - .drop_nulls(subset_slice) - .map_err(|e| ShellError::GenericError { - error: "Error dropping nulls".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - let df = NuDataFrame::new(df.from_lazy, polars_df); + let polars_df = df.to_polars().drop_nulls(subset); + let df = NuLazyFrame::new(polars_df); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs index b142826326..49d917a393 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs @@ -70,7 +70,7 @@ impl PluginCommand for LazyFetch { let value = input.into_value(call.head); let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; - let mut eager: NuDataFrame = lazy + let eager: NuDataFrame = lazy .to_polars() .fetch(rows as usize) .map_err(|e| ShellError::GenericError { @@ -82,8 +82,6 @@ impl PluginCommand for LazyFetch { })? .into(); - // mark this as not from lazy so it doesn't get converted back to a lazy frame - eager.from_lazy = false; eager .to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs index e68a6829f0..64e6fd0d3f 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs @@ -40,7 +40,7 @@ impl PluginCommand for LazyFillNull { fn examples(&self) -> Vec { vec![Example { description: "Fills the null values by 0", - example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0", + example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0 | polars collect", result: Some( NuDataFrame::try_from_columns( vec![Column::new( @@ -96,7 +96,7 @@ fn cmd_lazy( fill: Value, ) -> Result { let expr = NuExpression::try_from_value(plugin, &fill)?.into_polars(); - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr)); + let lazy = NuLazyFrame::new(lazy.to_polars().fill_null(expr)); lazy.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs index 246976c2ee..6adabb967a 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs @@ -42,7 +42,7 @@ impl PluginCommand for LazyFilter { vec![Example { description: "Filter dataframe using an expression", example: - "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)", + "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4) | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -85,10 +85,7 @@ fn command( lazy: NuLazyFrame, filter_expr: NuExpression, ) -> Result { - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.to_polars().filter(filter_expr.into_polars()), - ); + let lazy = NuLazyFrame::new(lazy.to_polars().filter(filter_expr.into_polars())); lazy.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs b/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs new file mode 100644 index 0000000000..cd23a3b370 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs @@ -0,0 +1,97 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; +use polars::prelude::LazyFrame; + +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame}, + values::CustomValueSupport, + PolarsPlugin, +}; + +use super::super::values::{Column, NuDataFrame}; + +#[derive(Clone)] +pub struct FilterWith; + +impl PluginCommand for FilterWith { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars filter-with" + } + + fn usage(&self) -> &str { + "Filters dataframe using an expression." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "filter expression", + SyntaxShape::Any, + "filter expression used to filter dataframe", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Filter dataframe using an expression", + example: + "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)", + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let expr: Value = call.req(0)?; + let expr = NuExpression::try_from_value(plugin, &expr)?; + let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); + lazy.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&FilterWith) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/eager/first.rs b/crates/nu_plugin_polars/src/dataframe/lazy/first.rs similarity index 91% rename from crates/nu_plugin_polars/src/dataframe/eager/first.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/first.rs index 69f80e42c0..7f32dbf71d 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/first.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/first.rs @@ -48,7 +48,7 @@ impl PluginCommand for FirstDF { vec![ Example { description: "Return the first row of a dataframe", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -63,7 +63,7 @@ impl PluginCommand for FirstDF { }, Example { description: "Return the first two rows of a dataframe", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2 | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -98,13 +98,12 @@ impl PluginCommand for FirstDF { input: PipelineData, ) -> Result { let value = input.into_value(call.head); - if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; - command(plugin, engine, call, df).map_err(|e| e.into()) + if NuLazyFrame::can_downcast(&value) || NuDataFrame::can_downcast(&value) { + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command(plugin, engine, call, lazy).map_err(LabeledError::from) } else { let expr = NuExpression::try_from_value(plugin, &value)?; let expr: NuExpression = expr.into_polars().first().into(); - expr.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } @@ -115,13 +114,13 @@ fn command( plugin: &PolarsPlugin, engine: &EngineInterface, call: &EvaluatedCall, - df: NuDataFrame, + df: NuLazyFrame, ) -> Result { let rows: Option = call.opt(0)?; let rows = rows.unwrap_or(1); - let res = df.as_ref().head(Some(rows)); - let res = NuDataFrame::new(false, res); + let res = df.to_polars().slice(0, rows as u32); + let res: NuLazyFrame = res.into(); res.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/get.rs b/crates/nu_plugin_polars/src/dataframe/lazy/get.rs similarity index 77% rename from crates/nu_plugin_polars/src/dataframe/eager/get.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/get.rs index 34ba98154f..a56978f917 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/get.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/get.rs @@ -5,10 +5,13 @@ use nu_protocol::{ }; use crate::{ - dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin, + dataframe::values::utils::convert_columns_string, + values::{CustomValueSupport, NuDataFrame}, + PolarsPlugin, }; -use super::super::values::{Column, NuDataFrame}; +use super::super::values::{Column, NuLazyFrame}; +use polars::prelude::{col, Expr}; #[derive(Clone)] pub struct GetDF; @@ -37,7 +40,7 @@ impl PluginCommand for GetDF { fn examples(&self) -> Vec { vec![Example { description: "Returns the selected column", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a | polars collect", result: Some( NuDataFrame::try_from_columns( vec![Column::new( @@ -70,21 +73,13 @@ fn command( input: PipelineData, ) -> Result { let columns: Vec = call.rest(0)?; - let (col_string, col_span) = convert_columns_string(columns, call.head)?; + let (col_string, _col_span) = convert_columns_string(columns, call.head)?; + let col_expr: Vec = col_string.iter().map(|s| col(s)).collect(); - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; - let df = df - .as_ref() - .select(col_string) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - let df = NuDataFrame::new(false, df); + let df = df.to_polars().select(col_expr); + let df = NuLazyFrame::new(df); df.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs index 576d11e446..2bc7f578c8 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs @@ -79,7 +79,7 @@ impl PluginCommand for ToLazyGroupBy { Example { description: "Group by and perform an aggregation", example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | polars into-lazy + | polars into-df | polars group-by a | polars agg [ (polars col b | polars min | polars as "b_min") @@ -152,7 +152,7 @@ fn command( expressions: Vec, ) -> Result { let group_by = lazy.to_polars().group_by(expressions); - let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?); + let group_by = NuLazyGroupBy::new(group_by, lazy.schema()?); group_by.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs index f7c5f18584..feea8cf308 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs @@ -54,8 +54,8 @@ impl PluginCommand for LazyJoin { vec![ Example { description: "Join two lazy dataframes", - example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); + example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-df); $df_a | polars join $df_b a foo | polars collect"#, result: Some( NuDataFrame::try_from_columns( @@ -115,7 +115,7 @@ impl PluginCommand for LazyJoin { Example { description: "Join one eager dataframe with a lazy dataframe", example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-df); $df_a | polars join $df_b a foo"#, result: Some( NuDataFrame::try_from_columns( @@ -230,7 +230,6 @@ impl PluginCommand for LazyJoin { let value = input.into_value(call.head); let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; - let from_eager = lazy.from_eager; let lazy = lazy.to_polars(); let lazy = lazy @@ -243,7 +242,7 @@ impl PluginCommand for LazyJoin { .suffix(suffix) .finish(); - let lazy = NuLazyFrame::new(from_eager, lazy); + let lazy = NuLazyFrame::new(lazy); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/last.rs b/crates/nu_plugin_polars/src/dataframe/lazy/last.rs similarity index 90% rename from crates/nu_plugin_polars/src/dataframe/eager/last.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/last.rs index 0da0832d9b..44095ac44f 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/last.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/last.rs @@ -3,7 +3,7 @@ use crate::{ PolarsPlugin, }; -use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression}; +use super::super::values::{NuDataFrame, NuExpression}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, @@ -44,7 +44,7 @@ impl PluginCommand for LastDF { vec![ Example { description: "Create new dataframe with last rows", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -74,7 +74,7 @@ impl PluginCommand for LastDF { ) -> Result { let value = input.into_value(call.head); if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; + let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command(plugin, engine, call, df).map_err(|e| e.into()) } else { let expr = NuExpression::try_from_value(plugin, &value)?; @@ -90,13 +90,13 @@ fn command( plugin: &PolarsPlugin, engine: &EngineInterface, call: &EvaluatedCall, - df: NuDataFrame, + df: NuLazyFrame, ) -> Result { let rows: Option = call.opt(0)?; - let rows = rows.unwrap_or(DEFAULT_ROWS); + let rows = rows.unwrap_or(1); - let res = df.as_ref().tail(Some(rows)); - let res = NuDataFrame::new(false, res); + let res = df.to_polars().tail(rows as u32); + let res = NuLazyFrame::new(res); res.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs b/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs index 1727946f6a..447c806ea3 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs @@ -46,7 +46,7 @@ macro_rules! lazy_command { ) -> Result { let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head) .map_err(LabeledError::from)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func()); + let lazy = NuLazyFrame::new(lazy.to_polars().$func()); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs index c3cfdba099..abd55c77c1 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs @@ -126,7 +126,7 @@ fn command( span: None, inner: vec![], })?; - let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy); + let lazy = NuLazyFrame::new(polars_lazy); lazy.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/melt.rs b/crates/nu_plugin_polars/src/dataframe/lazy/melt.rs similarity index 56% rename from crates/nu_plugin_polars/src/dataframe/eager/melt.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/melt.rs index b69389ed24..3f0b77e2f4 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/melt.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/melt.rs @@ -1,11 +1,14 @@ use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ - Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, - SyntaxShape, Type, Value, + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, }; +use polars::frame::explode::MeltArgs; use crate::{ - dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin, + dataframe::values::utils::convert_columns_string, + values::{CustomValueSupport, NuLazyFrame}, + PolarsPlugin, }; use super::super::values::{Column, NuDataFrame}; @@ -50,6 +53,11 @@ impl PluginCommand for MeltDF { "optional name for value column", Some('l'), ) + .switch( + "streamable", + "Use polar's streaming engine. Results will not have a stable ordering.", + Some('s'), + ) .input_output_type( Type::Custom("dataframe".into()), Type::Custom("dataframe".into()), @@ -61,7 +69,7 @@ impl PluginCommand for MeltDF { vec![Example { description: "melt dataframe", example: - "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]", + "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d] | polars collect", result: Some( NuDataFrame::try_from_columns(vec![ Column::new( @@ -135,111 +143,31 @@ fn command( let id_col: Vec = call.get_flag("columns")?.expect("required value"); let val_col: Vec = call.get_flag("values")?.expect("required value"); - let value_name: Option> = call.get_flag("value-name")?; - let variable_name: Option> = call.get_flag("variable-name")?; + let value_name = call.get_flag("value-name")?.map(|v: String| v.into()); + let variable_name = call.get_flag("variable-name")?.map(|v: String| v.into()); + let streamable = call.has_flag("streamable")?; - let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; - let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; + let (id_vars, _id_col_span) = convert_columns_string(id_col, call.head)?; + let id_vars = id_vars.into_iter().map(Into::into).collect(); + let (value_vars, _val_col_span) = convert_columns_string(val_col, call.head)?; + let value_vars = value_vars.into_iter().map(Into::into).collect(); - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let polars_df = df.to_polars(); - check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; - check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; + let args = MeltArgs { + id_vars, + value_vars, + variable_name, + value_name, + streamable, + }; - let mut res = df - .as_ref() - .melt(&id_col_string, &val_col_string) - .map_err(|e| ShellError::GenericError { - error: "Error calculating melt".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - if let Some(name) = &variable_name { - res.rename("variable", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - - if let Some(name) = &value_name { - res.rename("value", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - - let res = NuDataFrame::new(false, res); + let res = polars_df.melt(args); + let res = NuLazyFrame::new(res); res.to_pipeline_data(plugin, engine, call.head) } -fn check_column_datatypes>( - df: &polars::prelude::DataFrame, - cols: &[T], - col_span: Span, -) -> Result<(), ShellError> { - if cols.is_empty() { - return Err(ShellError::GenericError { - error: "Merge error".into(), - msg: "empty column list".into(), - span: Some(col_span), - help: None, - inner: vec![], - }); - } - - // Checking if they are same type - if cols.len() > 1 { - for w in cols.windows(2) { - let l_series = df - .column(w[0].as_ref()) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - - let r_series = df - .column(w[1].as_ref()) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - - if l_series.dtype() != r_series.dtype() { - return Err(ShellError::GenericError { - error: "Merge error".into(), - msg: "found different column types in list".into(), - span: Some(col_span), - help: Some(format!( - "datatypes {} and {} are incompatible", - l_series.dtype(), - r_series.dtype() - )), - inner: vec![], - }); - } - } - } - - Ok(()) -} - #[cfg(test)] mod test { use crate::test::test_polars_plugin_command; diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs b/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs index e70143e6ce..1048fcc4c0 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/mod.rs @@ -1,19 +1,30 @@ mod aggregate; +mod cast; mod collect; +mod drop; +mod drop_duplicates; +mod drop_nulls; mod explode; mod fetch; mod fill_nan; mod fill_null; mod filter; +mod filter_with; +mod first; mod flatten; +mod get; pub mod groupby; mod join; +mod last; mod macro_commands; mod median; +mod melt; mod quantile; +mod rename; mod select; +mod slice; mod sort_by_expr; -mod to_lazy; +mod with_column; use nu_plugin::PluginCommand; @@ -29,13 +40,20 @@ pub(crate) use crate::dataframe::lazy::macro_commands::*; use crate::dataframe::lazy::quantile::LazyQuantile; pub(crate) use crate::dataframe::lazy::select::LazySelect; use crate::dataframe::lazy::sort_by_expr::LazySortBy; -pub use crate::dataframe::lazy::to_lazy::ToLazyFrame; use crate::PolarsPlugin; pub use explode::LazyExplode; pub use flatten::LazyFlatten; pub(crate) fn lazy_commands() -> Vec>> { vec![ + Box::new(cast::CastDF), + Box::new(drop::DropDF), + Box::new(drop_duplicates::DropDuplicates), + Box::new(drop_nulls::DropNulls), + Box::new(filter_with::FilterWith), + Box::new(first::FirstDF), + Box::new(get::GetDF), + Box::new(last::LastDF), Box::new(LazyAggregate), Box::new(LazyCache), Box::new(LazyCollect), @@ -47,11 +65,14 @@ pub(crate) fn lazy_commands() -> Vec Result { let lazy = NuLazyFrame::new( - lazy.from_eager, lazy.to_polars() .quantile(lit(quantile), QuantileInterpolOptions::default()) .map_err(|e| ShellError::GenericError { diff --git a/crates/nu_plugin_polars/src/dataframe/eager/rename.rs b/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs similarity index 77% rename from crates/nu_plugin_polars/src/dataframe/eager/rename.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/rename.rs index 5722dfde26..b678824584 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/rename.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs @@ -6,7 +6,7 @@ use nu_protocol::{ use crate::{ dataframe::{utils::extract_strings, values::NuLazyFrame}, - values::{CustomValueSupport, PolarsPluginObject}, + values::CustomValueSupport, PolarsPlugin, }; @@ -49,7 +49,7 @@ impl PluginCommand for RenameDF { vec![ Example { description: "Renames a series", - example: "[5 6 7 8] | polars into-df | polars rename '0' new_name", + example: "[5 6 7 8] | polars into-df | polars rename '0' new_name | polars collect", result: Some( NuDataFrame::try_from_columns( vec![Column::new( @@ -69,7 +69,7 @@ impl PluginCommand for RenameDF { }, Example { description: "Renames a dataframe column", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -91,7 +91,7 @@ impl PluginCommand for RenameDF { Example { description: "Renames two dataframe columns", example: - "[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]", + "[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new] | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -121,49 +121,11 @@ impl PluginCommand for RenameDF { input: PipelineData, ) -> Result { let value = input.into_value(call.head); - match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? { - PolarsPluginObject::NuDataFrame(df) => { - command_eager(plugin, engine, call, df).map_err(LabeledError::from) - } - PolarsPluginObject::NuLazyFrame(lazy) => { - command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) - } - _ => Err(LabeledError::new(format!("Unsupported type: {value:?}")) - .with_label("Unsupported Type", call.head)), - } + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } } -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - df: NuDataFrame, -) -> Result { - let columns: Value = call.req(0)?; - let columns = extract_strings(columns)?; - - let new_names: Value = call.req(1)?; - let new_names = extract_strings(new_names)?; - - let mut polars_df = df.to_polars(); - - for (from, to) in columns.iter().zip(new_names.iter()) { - polars_df - .rename(from, to) - .map_err(|e| ShellError::GenericError { - error: "Error renaming".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - } - - let df = NuDataFrame::new(false, polars_df); - df.to_pipeline_data(plugin, engine, call.head) -} - fn command_lazy( plugin: &PolarsPlugin, engine: &EngineInterface, diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs index b2cfc6f945..e49aa8e654 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs @@ -67,7 +67,7 @@ impl PluginCommand for LazySelect { let pipeline_value = input.into_value(call.head); let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions)); + let lazy = NuLazyFrame::new(lazy.to_polars().select(&expressions)); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/slice.rs b/crates/nu_plugin_polars/src/dataframe/lazy/slice.rs similarity index 87% rename from crates/nu_plugin_polars/src/dataframe/eager/slice.rs rename to crates/nu_plugin_polars/src/dataframe/lazy/slice.rs index c7ebaff4d7..664339d242 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/slice.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/slice.rs @@ -4,7 +4,11 @@ use nu_protocol::{ Value, }; -use crate::{dataframe::values::Column, values::CustomValueSupport, PolarsPlugin}; +use crate::{ + dataframe::values::Column, + values::{CustomValueSupport, NuLazyFrame}, + PolarsPlugin, +}; use super::super::values::NuDataFrame; @@ -36,7 +40,7 @@ impl PluginCommand for SliceDF { fn examples(&self) -> Vec { vec![Example { description: "Create new dataframe from a slice of the rows", - example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1", + example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1 | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -69,12 +73,12 @@ fn command( input: PipelineData, ) -> Result { let offset: i64 = call.req(0)?; - let size: usize = call.req(1)?; + let size: i64 = call.req(1)?; - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?; - let res = df.as_ref().slice(offset, size); - let res = NuDataFrame::new(false, res); + let res = df.to_polars().slice(offset, size as u32); + let res = NuLazyFrame::new(res); res.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs index 655e23e089..4a975afe97 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs @@ -147,10 +147,7 @@ impl PluginCommand for LazySortBy { let pipeline_value = input.into_value(call.head); let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.to_polars().sort_by_exprs(&expressions, sort_options), - ); + let lazy = NuLazyFrame::new(lazy.to_polars().sort_by_exprs(&expressions, sort_options)); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs b/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs deleted file mode 100644 index c4e013ac6a..0000000000 --- a/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin}; - -use super::super::values::{NuDataFrame, NuLazyFrame}; - -use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; -use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type}; - -#[derive(Clone)] -pub struct ToLazyFrame; - -impl PluginCommand for ToLazyFrame { - type Plugin = PolarsPlugin; - - fn name(&self) -> &str { - "polars into-lazy" - } - - fn usage(&self) -> &str { - "Converts a dataframe into a lazy dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "schema", - SyntaxShape::Record(vec![]), - r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, - Some('s'), - ) - .input_output_type(Type::Any, Type::Custom("dataframe".into())) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Takes a table and creates a lazyframe", - example: "[[a b];[1 2] [3 4]] | polars into-lazy", - result: None, - }, - Example { - description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema", - example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema", - result: None - }, - ] - } - - fn run( - &self, - plugin: &Self::Plugin, - engine: &EngineInterface, - call: &EvaluatedCall, - input: PipelineData, - ) -> Result { - let maybe_schema = call - .get_flag("schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - - let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?; - let mut lazy = NuLazyFrame::from_dataframe(df); - // We don't want this converted back to an eager dataframe at some point - lazy.from_eager = false; - Ok(PipelineData::Value( - lazy.cache(plugin, engine, call.head)?.into_value(call.head), - None, - )) - } -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use nu_plugin_test_support::PluginTest; - use nu_protocol::{ShellError, Span}; - - use super::*; - - #[test] - fn test_to_lazy() -> Result<(), ShellError> { - let plugin: Arc = PolarsPlugin::new_test_mode().into(); - let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?; - let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?; - let value = pipeline_data.into_value(Span::test_data()); - let df = NuLazyFrame::try_from_value(&plugin, &value)?; - assert!(!df.from_eager); - Ok(()) - } -} diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs b/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs new file mode 100644 index 0000000000..e8092231d8 --- /dev/null +++ b/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs @@ -0,0 +1,114 @@ +use super::super::values::{Column, NuDataFrame}; +use crate::{ + dataframe::values::{NuExpression, NuLazyFrame}, + values::CustomValueSupport, + PolarsPlugin, +}; +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{ + Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, + Value, +}; + +#[derive(Clone)] +pub struct WithColumn; + +impl PluginCommand for WithColumn { + type Plugin = PolarsPlugin; + + fn name(&self) -> &str { + "polars with-column" + } + + fn usage(&self) -> &str { + "Adds a series to the dataframe." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .rest( + "series or expressions", + SyntaxShape::Any, + "series to be added or expressions used to define the new columns", + ) + .input_output_type( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ) + .category(Category::Custom("dataframe or lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Adds a series to the dataframe", + example: r#"[[a b]; [1 2] [3 4]] + | polars into-df + | polars with-column [ + ((polars col a) * 2 | polars as "c") + ((polars col a) * 3 | polars as "d") + ] + | polars collect"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(2), Value::test_int(6)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(3), Value::test_int(9)], + ), + ], + None, + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + let value = input.into_value(call.head); + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, +) -> Result { + let vals: Vec = call.rest(0)?; + let value = Value::list(vals, call.head); + let expressions = NuExpression::extract_exprs(plugin, value)?; + let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into(); + lazy.to_pipeline_data(plugin, engine, call.head) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command(&WithColumn) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/series/shift.rs b/crates/nu_plugin_polars/src/dataframe/series/shift.rs index 64e8fe5b62..556b3361c1 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/shift.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/shift.rs @@ -1,6 +1,6 @@ use crate::{ dataframe::values::{NuExpression, NuLazyFrame}, - values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, + values::CustomValueSupport, PolarsPlugin, }; @@ -63,8 +63,7 @@ impl PluginCommand for Shift { }, Example { description: "Shifts the values by a given period, fill absent values with 0", - example: - "[1 2 2 3 3] | polars into-lazy | polars shift 2 --fill 0 | polars collect", + example: "[1 2 2 3 3] | polars into-df | polars shift 2 --fill 0 | polars collect", result: Some( NuDataFrame::try_from_columns( vec![Column::new( @@ -94,35 +93,11 @@ impl PluginCommand for Shift { input: PipelineData, ) -> Result { let value = input.into_value(call.head); - - match PolarsPluginObject::try_from_value(plugin, &value)? { - PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), - PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), - _ => Err(cant_convert_err( - &value, - &[ - PolarsPluginType::NuDataFrame, - PolarsPluginType::NuLazyGroupBy, - ], - )), - } - .map_err(LabeledError::from) + let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } } -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - df: NuDataFrame, -) -> Result { - let period: i64 = call.req(0)?; - let series = df.as_series(call.head)?.shift(period); - - let df = NuDataFrame::try_from_series_vec(vec![series], call.head)?; - df.to_pipeline_data(plugin, engine, call.head) -} - fn command_lazy( plugin: &PolarsPlugin, engine: &EngineInterface, diff --git a/crates/nu_plugin_polars/src/dataframe/series/unique.rs b/crates/nu_plugin_polars/src/dataframe/series/unique.rs index 8feec32c11..47efd880a6 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/unique.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/unique.rs @@ -1,6 +1,6 @@ use crate::{ dataframe::{utils::extract_strings, values::NuLazyFrame}, - values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, + values::CustomValueSupport, PolarsPlugin, }; @@ -11,7 +11,7 @@ use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::{IntoSeries, UniqueKeepStrategy}; +use polars::prelude::UniqueKeepStrategy; #[derive(Clone)] pub struct Unique; @@ -68,7 +68,7 @@ impl PluginCommand for Unique { }, Example { description: "Returns unique values in a subset of lazyframe columns", - example: "[[a b c]; [1 2 1] [2 2 2] [3 2 1]] | polars into-lazy | polars unique --subset [b c] | polars collect", + example: "[[a b c]; [1 2 1] [2 2 2] [3 2 1]] | polars into-df | polars unique --subset [b c] | polars collect", result: Some( NuDataFrame::try_from_columns( vec![ @@ -94,7 +94,7 @@ impl PluginCommand for Unique { Example { description: "Returns unique values in a subset of lazyframe columns", example: r#"[[a b c]; [1 2 1] [2 2 2] [3 2 1]] - | polars into-lazy + | polars into-df | polars unique --subset [b c] --last | polars collect"#, result: Some( @@ -135,42 +135,11 @@ impl PluginCommand for Unique { input: PipelineData, ) -> Result { let value = input.into_value(call.head); - - match PolarsPluginObject::try_from_value(plugin, &value)? { - PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df), - PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy), - _ => Err(cant_convert_err( - &value, - &[ - PolarsPluginType::NuDataFrame, - PolarsPluginType::NuLazyGroupBy, - ], - )), - } - .map_err(LabeledError::from) + let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?; + command_lazy(plugin, engine, call, df).map_err(LabeledError::from) } } -fn command_eager( - plugin: &PolarsPlugin, - engine: &EngineInterface, - call: &EvaluatedCall, - df: NuDataFrame, -) -> Result { - let series = df.as_series(call.head)?; - - let res = series.unique().map_err(|e| ShellError::GenericError { - error: "Error calculating unique values".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; - df.to_pipeline_data(plugin, engine, call.head) -} - fn command_lazy( plugin: &PolarsPlugin, engine: &EngineInterface, diff --git a/crates/nu_plugin_polars/src/dataframe/values/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/mod.rs index e0cc3edecb..a43c8f2412 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/mod.rs @@ -323,19 +323,7 @@ pub trait CustomValueSupport: Cacheable { engine: &EngineInterface, span: Span, ) -> Result { - match self.to_cache_value()? { - // if it was from a lazy value, make it lazy again - PolarsPluginObject::NuDataFrame(df) if df.from_lazy => { - let df = df.lazy(); - Ok(df.cache(plugin, engine, span)?.into_value(span)) - } - // if it was from an eager value, make it eager again - PolarsPluginObject::NuLazyFrame(lf) if lf.from_eager => { - let lf = lf.collect(span)?; - Ok(lf.cache(plugin, engine, span)?.into_value(span)) - } - _ => Ok(self.cache(plugin, engine, span)?.into_value(span)), - } + Ok(self.cache(plugin, engine, span)?.into_value(span)) } /// Caches the object, converts it to a it's CustomValue counterpart diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index f7941bf41d..7000cf10c4 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -472,7 +472,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result, - pub from_lazy: bool, } impl AsRef for NuDataFrame { @@ -115,17 +114,16 @@ impl AsRef for NuDataFrame { impl From for NuDataFrame { fn from(df: DataFrame) -> Self { - Self::new(false, df) + Self::new(df) } } impl NuDataFrame { - pub fn new(from_lazy: bool, df: DataFrame) -> Self { + pub fn new(df: DataFrame) -> Self { let id = Uuid::new_v4(); Self { id, df: Arc::new(df), - from_lazy, } } @@ -134,12 +132,12 @@ impl NuDataFrame { } pub fn lazy(&self) -> NuLazyFrame { - NuLazyFrame::new(true, self.to_polars().lazy()) + NuLazyFrame::new(self.to_polars().lazy()) } pub fn try_from_series(series: Series, span: Span) -> Result { match DataFrame::new(vec![series]) { - Ok(dataframe) => Ok(NuDataFrame::new(false, dataframe)), + Ok(dataframe) => Ok(NuDataFrame::new(dataframe)), Err(e) => Err(ShellError::GenericError { error: "Error creating dataframe".into(), msg: e.to_string(), @@ -202,7 +200,7 @@ impl NuDataFrame { inner: vec![], })?; - Ok(Self::new(false, dataframe)) + Ok(Self::new(dataframe)) } pub fn try_from_columns( @@ -276,7 +274,7 @@ impl NuDataFrame { inner: vec![], })?; - Ok(Self::new(false, df)) + Ok(Self::new(df)) } pub fn is_series(&self) -> bool { diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs index ecdcf73595..42d803b0e4 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs @@ -147,7 +147,7 @@ impl NuDataFrame { inner: vec![], })?; - Ok(NuDataFrame::new(false, df_new)) + Ok(NuDataFrame::new(df_new)) } Axis::Column => { if self.df.width() != other.df.width() { @@ -205,7 +205,7 @@ impl NuDataFrame { inner: vec![], })?; - Ok(NuDataFrame::new(false, df_new)) + Ok(NuDataFrame::new(df_new)) } } } diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs index 2aae66c36f..a9371f03f2 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/custom_value.rs @@ -63,45 +63,16 @@ fn compute_with_value( op: Span, right: &Value, ) -> Result { - let rhs_span = right.span(); - match right { - Value::Custom { val: rhs, .. } => { - let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { - ShellError::DowncastNotPossible { - msg: "Unable to create expression".into(), - span: rhs_span, - } - })?; - - match rhs.as_ref() { - polars::prelude::Expr::Literal(..) => with_operator( - (plugin, engine), - operator, - left, - rhs, - lhs_span, - right.span(), - op, - ), - _ => Err(ShellError::TypeMismatch { - err_message: "Only literal expressions or number".into(), - span: right.span(), - }), - } - } - _ => { - let rhs = NuExpression::try_from_value(plugin, right)?; - with_operator( - (plugin, engine), - operator, - left, - &rhs, - lhs_span, - right.span(), - op, - ) - } - } + let rhs = NuExpression::try_from_value(plugin, right)?; + with_operator( + (plugin, engine), + operator, + left, + &rhs, + lhs_span, + right.span(), + op, + ) } fn with_operator( diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs index 0f8d80c4b9..f3c969b03d 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs @@ -21,7 +21,6 @@ pub use custom_value::NuLazyFrameCustomValue; pub struct NuLazyFrame { pub id: Uuid, pub lazy: Arc, - pub from_eager: bool, } impl fmt::Debug for NuLazyFrame { @@ -32,22 +31,21 @@ impl fmt::Debug for NuLazyFrame { impl From for NuLazyFrame { fn from(lazy_frame: LazyFrame) -> Self { - NuLazyFrame::new(false, lazy_frame) + NuLazyFrame::new(lazy_frame) } } impl NuLazyFrame { - pub fn new(from_eager: bool, lazy: LazyFrame) -> Self { + pub fn new(lazy: LazyFrame) -> Self { Self { id: Uuid::new_v4(), lazy: Arc::new(lazy), - from_eager, } } pub fn from_dataframe(df: NuDataFrame) -> Self { let lazy = df.as_ref().clone().lazy(); - NuLazyFrame::new(true, lazy) + NuLazyFrame::new(lazy) } pub fn to_polars(&self) -> LazyFrame { @@ -64,7 +62,7 @@ impl NuLazyFrame { help: None, inner: vec![], }) - .map(|df| NuDataFrame::new(true, df)) + .map(NuDataFrame::new) } pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self @@ -74,7 +72,7 @@ impl NuLazyFrame { let df = self.to_polars(); let expr = expr.into_polars(); let new_frame = f(df, expr); - Self::new(self.from_eager, new_frame) + Self::new(new_frame) } pub fn schema(&self) -> Result { diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs index 8540d13c6f..fe5bc329e7 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazygroupby/mod.rs @@ -20,7 +20,6 @@ pub struct NuLazyGroupBy { pub id: Uuid, pub group_by: Arc, pub schema: NuSchema, - pub from_eager: bool, } impl fmt::Debug for NuLazyGroupBy { @@ -30,11 +29,10 @@ impl fmt::Debug for NuLazyGroupBy { } impl NuLazyGroupBy { - pub fn new(group_by: LazyGroupBy, from_eager: bool, schema: NuSchema) -> Self { + pub fn new(group_by: LazyGroupBy, schema: NuSchema) -> Self { NuLazyGroupBy { id: Uuid::new_v4(), group_by: Arc::new(group_by), - from_eager, schema, } } diff --git a/crates/nu_plugin_polars/src/dataframe/values/utils.rs b/crates/nu_plugin_polars/src/dataframe/values/utils.rs index 17e641cadc..f77870114b 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/utils.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/utils.rs @@ -48,8 +48,6 @@ pub(crate) fn convert_columns( // Converts a Vec to a Vec with a Span marking the whole // location of the columns for error referencing -// todo - fix -#[allow(dead_code)] pub(crate) fn convert_columns_string( columns: Vec, span: Span,