Polars lazy refactor (#12669)

This moves to predominantly supporting only lazy dataframes for most
operations. It removes a lot of the type conversion between lazy and
eager dataframes based on what was inputted into the command.

For the most part the changes will mean:
* You will need to run `polars collect` after performing operations
* The into-lazy command has been removed as it is redundant.
* When opening files a lazy frame will be outputted by default if the
reader supports lazy frames

A list of individual command changes can be found
[here](https://hackmd.io/@nucore/Bk-3V-hW0)

---------

Co-authored-by: Ian Manske <ian.manske@pm.me>
This commit is contained in:
Jack Wright 2024-05-06 16:19:11 -07:00 committed by GitHub
parent 97fc190cc5
commit 68adc4657f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 511 additions and 1210 deletions

1
Cargo.lock generated
View File

@ -3419,6 +3419,7 @@ dependencies = [
"polars", "polars",
"polars-arrow", "polars-arrow",
"polars-io", "polars-io",
"polars-lazy",
"polars-ops", "polars-ops",
"polars-plan", "polars-plan",
"polars-utils", "polars-utils",

View File

@ -34,6 +34,7 @@ polars-arrow = { version = "0.39"}
polars-ops = { version = "0.39"} polars-ops = { version = "0.39"}
polars-plan = { version = "0.39", features = ["regex"]} polars-plan = { version = "0.39", features = ["regex"]}
polars-utils = { version = "0.39"} polars-utils = { version = "0.39"}
polars-lazy = { version = "0.39"}
typetag = "0.2" typetag = "0.2"
uuid = { version = "1.7", features = ["v4", "serde"] } uuid = { version = "1.7", features = ["v4", "serde"] }

View File

@ -1,162 +0,0 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::LazyFrame;
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct FilterWith;
impl PluginCommand for FilterWith {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using a mask or expression as reference."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"mask or expression",
SyntaxShape::Any,
"boolean mask used to filter data",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([true false] | polars into-df);
[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
)),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(0)?;
let mask_span = mask_value.span();
if NuExpression::can_downcast(&mask_value) {
let expression = NuExpression::try_from_value(plugin, &mask_value)?;
let lazy = df.lazy();
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
lazy.to_pipeline_data(plugin, engine, call.head)
} else {
let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)?
.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| ShellError::GenericError {
error: "Error casting to bool".into(),
msg: e.to_string(),
span: Some(mask_span),
help: Some("Perhaps you want to use a series with booleans as mask".into()),
inner: vec![],
})?;
let polars_df = df
.as_ref()
.filter(mask)
.map_err(|e| ShellError::GenericError {
error: "Error filtering dataframe".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The only allowed column types for dummies are String or Int".into()),
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
df.to_pipeline_data(plugin, engine, call.head)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let expr: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &expr)?;
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
lazy.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FilterWith)
}
}

View File

@ -1,22 +1,11 @@
mod append; mod append;
mod cast;
mod columns; mod columns;
mod drop;
mod drop_duplicates;
mod drop_nulls;
mod dummies; mod dummies;
mod filter_with;
mod first;
mod get;
mod last;
mod melt;
mod open; mod open;
mod query_df; mod query_df;
mod rename;
mod sample; mod sample;
mod schema; mod schema;
mod shape; mod shape;
mod slice;
mod sql_context; mod sql_context;
mod sql_expr; mod sql_expr;
mod summary; mod summary;
@ -28,30 +17,18 @@ mod to_df;
mod to_json_lines; mod to_json_lines;
mod to_nu; mod to_nu;
mod to_parquet; mod to_parquet;
mod with_column;
use crate::PolarsPlugin; use crate::PolarsPlugin;
pub use self::open::OpenDataFrame; pub use self::open::OpenDataFrame;
pub use append::AppendDF; pub use append::AppendDF;
pub use cast::CastDF;
pub use columns::ColumnsDF; pub use columns::ColumnsDF;
pub use drop::DropDF;
pub use drop_duplicates::DropDuplicates;
pub use drop_nulls::DropNulls;
pub use dummies::Dummies; pub use dummies::Dummies;
pub use filter_with::FilterWith;
pub use first::FirstDF;
pub use get::GetDF;
pub use last::LastDF;
pub use melt::MeltDF;
use nu_plugin::PluginCommand; use nu_plugin::PluginCommand;
pub use query_df::QueryDf; pub use query_df::QueryDf;
pub use rename::RenameDF;
pub use sample::SampleDF; pub use sample::SampleDF;
pub use schema::SchemaCmd; pub use schema::SchemaCmd;
pub use shape::ShapeDF; pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sql_context::SQLContext; pub use sql_context::SQLContext;
pub use summary::Summary; pub use summary::Summary;
pub use take::TakeDF; pub use take::TakeDF;
@ -62,28 +39,16 @@ pub use to_df::ToDataFrame;
pub use to_json_lines::ToJsonLines; pub use to_json_lines::ToJsonLines;
pub use to_nu::ToNu; pub use to_nu::ToNu;
pub use to_parquet::ToParquet; pub use to_parquet::ToParquet;
pub use with_column::WithColumn;
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> { pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![ vec![
Box::new(AppendDF), Box::new(AppendDF),
Box::new(CastDF),
Box::new(ColumnsDF), Box::new(ColumnsDF),
Box::new(DropDF),
Box::new(DropDuplicates),
Box::new(DropNulls),
Box::new(Dummies), Box::new(Dummies),
Box::new(FilterWith),
Box::new(GetDF),
Box::new(OpenDataFrame), Box::new(OpenDataFrame),
Box::new(MeltDF),
Box::new(Summary), Box::new(Summary),
Box::new(FirstDF),
Box::new(LastDF),
Box::new(RenameDF),
Box::new(SampleDF), Box::new(SampleDF),
Box::new(ShapeDF), Box::new(ShapeDF),
Box::new(SliceDF),
Box::new(SchemaCmd), Box::new(SchemaCmd),
Box::new(TakeDF), Box::new(TakeDF),
Box::new(ToNu), Box::new(ToNu),
@ -94,6 +59,5 @@ pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugi
Box::new(ToJsonLines), Box::new(ToJsonLines),
Box::new(ToParquet), Box::new(ToParquet),
Box::new(QueryDf), Box::new(QueryDf),
Box::new(WithColumn),
] ]
} }

View File

@ -4,6 +4,7 @@ use crate::{
PolarsPlugin, PolarsPlugin,
}; };
use nu_path::expand_path_with; use nu_path::expand_path_with;
use polars_lazy::frame::LazyJsonLineReader;
use super::super::values::NuDataFrame; use super::super::values::NuDataFrame;
use nu_plugin::PluginCommand; use nu_plugin::PluginCommand;
@ -19,8 +20,8 @@ use std::{
}; };
use polars::prelude::{ use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, col, Expr, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame, ScanArgsIpc,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, ScanArgsParquet, SerReader,
}; };
use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions}; use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions};
@ -46,7 +47,6 @@ impl PluginCommand for OpenDataFrame {
SyntaxShape::Filepath, SyntaxShape::Filepath,
"file path to load values from", "file path to load values from",
) )
.switch("lazy", "creates a lazy dataframe", Some('l'))
.named( .named(
"type", "type",
SyntaxShape::String, SyntaxShape::String,
@ -161,63 +161,39 @@ fn from_parquet(
engine: &nu_plugin::EngineInterface, engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall, call: &nu_plugin::EvaluatedCall,
file_path: &Path, file_path: &Path,
file_span: Span, _file_span: Span,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
if call.has_flag("lazy")? { let args = ScanArgsParquet {
let file: String = call.req(0)?; n_rows: None,
let args = ScanArgsParquet { cache: true,
n_rows: None, parallel: ParallelStrategy::Auto,
cache: true, rechunk: false,
parallel: ParallelStrategy::Auto, row_index: None,
rechunk: false, low_memory: false,
row_index: None, cloud_options: None,
low_memory: false, use_statistics: false,
cloud_options: None, hive_options: HiveOptions::default(),
use_statistics: false, };
hive_options: HiveOptions::default(),
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) let maybe_columns: Option<Vec<Expr>> = call
.map_err(|e| ShellError::GenericError { .get_flag::<Vec<String>>("columns")?
error: "Parquet reader error".into(), .map(|cols| cols.iter().map(|s| col(s)).collect());
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head) let mut polars_df =
} else { LazyFrame::scan_parquet(file_path, args).map_err(|e| ShellError::GenericError {
let columns: Option<Vec<String>> = call.get_flag("columns")?; error: "Parquet reader error".into(),
msg: format!("{e:?}"),
let r = File::open(file_path).map_err(|e| ShellError::GenericError { span: Some(call.head),
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file_span),
help: None, help: None,
inner: vec![], inner: vec![],
})?; })?;
let reader = ParquetReader::new(r);
let reader = match columns { if let Some(columns) = maybe_columns {
None => reader, polars_df = polars_df.select(columns);
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head)
} }
let df: NuLazyFrame = polars_df.into();
df.cache_and_to_value(plugin, engine, call.head)
} }
fn from_avro( fn from_avro(
@ -262,60 +238,36 @@ fn from_ipc(
engine: &nu_plugin::EngineInterface, engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall, call: &nu_plugin::EvaluatedCall,
file_path: &Path, file_path: &Path,
file_span: Span, _file_span: Span,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
if call.has_flag("lazy")? { let args = ScanArgsIpc {
let file: String = call.req(0)?; n_rows: None,
let args = ScanArgsIpc { cache: true,
n_rows: None, rechunk: false,
cache: true, row_index: None,
rechunk: false, memory_map: true,
row_index: None, cloud_options: None,
memory_map: true, };
cloud_options: None,
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args) let maybe_columns: Option<Vec<Expr>> = call
.map_err(|e| ShellError::GenericError { .get_flag::<Vec<String>>("columns")?
error: "IPC reader error".into(), .map(|cols| cols.iter().map(|s| col(s)).collect());
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head) let mut polars_df =
} else { LazyFrame::scan_ipc(file_path, args).map_err(|e| ShellError::GenericError {
let columns: Option<Vec<String>> = call.get_flag("columns")?; error: "IPC reader error".into(),
msg: format!("{e:?}"),
let r = File::open(file_path).map_err(|e| ShellError::GenericError { span: Some(call.head),
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file_span),
help: None, help: None,
inner: vec![], inner: vec![],
})?; })?;
let reader = IpcReader::new(r);
let reader = match columns { if let Some(columns) = maybe_columns {
None => reader, polars_df = polars_df.select(columns);
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head)
} }
let df: NuLazyFrame = polars_df.into();
df.cache_and_to_value(plugin, engine, call.head)
} }
fn from_json( fn from_json(
@ -364,32 +316,21 @@ fn from_jsonl(
engine: &nu_plugin::EngineInterface, engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall, call: &nu_plugin::EvaluatedCall,
file_path: &Path, file_path: &Path,
file_span: Span, _file_span: Span,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let infer_schema: Option<usize> = call.get_flag("infer-schema")?; let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let maybe_schema = call let maybe_schema = call
.get_flag("schema")? .get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema)) .map(|schema| NuSchema::try_from(&schema))
.transpose()?; .transpose()?;
let file = File::open(file_path).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file_span),
help: None,
inner: vec![],
})?;
let buf_reader = BufReader::new(file); let maybe_columns: Option<Vec<Expr>> = call
let reader = JsonReader::new(buf_reader) .get_flag::<Vec<String>>("columns")?
.with_json_format(JsonFormat::JsonLines) .map(|cols| cols.iter().map(|s| col(s)).collect());
.infer_schema_len(infer_schema);
let reader = match maybe_schema { let mut polars_df = LazyJsonLineReader::new(file_path)
Some(schema) => reader.with_schema(schema.into()), .with_infer_schema_length(infer_schema)
None => reader, .with_schema(maybe_schema.map(|s| s.into()))
};
let df: NuDataFrame = reader
.finish() .finish()
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "Json lines reader error".into(), error: "Json lines reader error".into(),
@ -397,9 +338,13 @@ fn from_jsonl(
span: Some(call.head), span: Some(call.head),
help: None, help: None,
inner: vec![], inner: vec![],
})? })?;
.into();
if let Some(columns) = maybe_columns {
polars_df = polars_df.select(columns);
}
let df: NuLazyFrame = polars_df.into();
df.cache_and_to_value(plugin, engine, call.head) df.cache_and_to_value(plugin, engine, call.head)
} }
@ -408,137 +353,73 @@ fn from_csv(
engine: &nu_plugin::EngineInterface, engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall, call: &nu_plugin::EvaluatedCall,
file_path: &Path, file_path: &Path,
file_span: Span, _file_span: Span,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?; let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?; let no_header: bool = call.has_flag("no-header")?;
let infer_schema: Option<usize> = call.get_flag("infer-schema")?; let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let skip_rows: Option<usize> = call.get_flag("skip-rows")?; let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag("columns")?; let maybe_columns: Option<Vec<Expr>> = call
.get_flag::<Vec<String>>("columns")?
.map(|cols| cols.iter().map(|s| col(s)).collect());
let maybe_schema = call let maybe_schema = call
.get_flag("schema")? .get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema)) .map(|schema| NuSchema::try_from(&schema))
.transpose()?; .transpose()?;
if call.has_flag("lazy")? { let csv_reader = LazyCsvReader::new(file_path);
let csv_reader = LazyCsvReader::new(file_path);
let csv_reader = match delimiter { let csv_reader = match delimiter {
None => csv_reader, None => csv_reader,
Some(d) => { Some(d) => {
if d.item.len() != 1 { if d.item.len() != 1 {
return Err(ShellError::GenericError { return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(), error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(), msg: "Delimiter has to be one character".into(),
span: Some(d.span), span: Some(d.span),
help: None, help: None,
inner: vec![], inner: vec![],
}); });
} else { } else {
let delimiter = match d.item.chars().next() { let delimiter = match d.item.chars().next() {
Some(d) => d as u8, Some(d) => d as u8,
None => unreachable!(), None => unreachable!(),
}; };
csv_reader.with_separator(delimiter) csv_reader.with_separator(delimiter)
}
} }
}; }
};
let csv_reader = csv_reader.has_header(!no_header); let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema { let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())), Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader, None => csv_reader,
}; };
let csv_reader = match infer_schema { let csv_reader = match infer_schema {
None => csv_reader, None => csv_reader,
Some(r) => csv_reader.with_infer_schema_length(Some(r)), Some(r) => csv_reader.with_infer_schema_length(Some(r)),
}; };
let csv_reader = match skip_rows { let csv_reader = match skip_rows {
None => csv_reader, None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r), Some(r) => csv_reader.with_skip_rows(r),
}; };
let df: NuLazyFrame = csv_reader let mut polars_df = csv_reader.finish().map_err(|e| ShellError::GenericError {
.finish() error: "CSV reader error".into(),
.map_err(|e| ShellError::GenericError { msg: format!("{e:?}"),
error: "Parquet reader error".into(), span: Some(call.head),
msg: format!("{e:?}"), help: None,
span: Some(call.head), inner: vec![],
help: None, })?;
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head) if let Some(columns) = maybe_columns {
} else { polars_df = polars_df.select(columns);
let csv_reader = CsvReader::from_path(file_path)
.map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(),
msg: e.to_string(),
span: Some(file_span),
help: None,
inner: vec![],
})?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
let df: NuDataFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
df.cache_and_to_value(plugin, engine, call.head)
} }
let df: NuLazyFrame = polars_df.into();
df.cache_and_to_value(plugin, engine, call.head)
} }

View File

@ -91,7 +91,7 @@ fn command(
help: None, help: None,
inner: vec![], inner: vec![],
})?; })?;
let lazy = NuLazyFrame::new(!df.from_lazy, df_sql); let lazy = NuLazyFrame::new(df_sql);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -147,7 +147,7 @@ fn command(
inner: vec![], inner: vec![],
}), }),
}; };
let df = NuDataFrame::new(false, df?); let df = NuDataFrame::new(df?);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -272,7 +272,7 @@ fn command(
inner: vec![], inner: vec![],
})?; })?;
let df = NuDataFrame::new(df.from_lazy, polars_df); let df = NuDataFrame::new(polars_df);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -143,7 +143,7 @@ fn command(
inner: vec![], inner: vec![],
})?; })?;
let df = NuDataFrame::new(df.from_lazy, polars_df); let df = NuDataFrame::new(polars_df);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -1,196 +0,0 @@
use super::super::values::{Column, NuDataFrame};
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{CustomValueSupport, PolarsPluginObject},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct WithColumn;
impl PluginCommand for WithColumn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars with-column"
}
fn usage(&self) -> &str {
"Adds a series to the dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named("name", SyntaxShape::String, "new column name", Some('n'))
.rest(
"series or expressions",
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-df
| polars with-column ([5 6] | polars into-df) --name c"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(5), Value::test_int(6)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-lazy
| polars with-column [
((polars col a) * 2 | polars as "c")
((polars col a) * 3 | polars as "d")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(ShellError::CantConvert {
to_type: "lazy or eager dataframe".into(),
from_type: value.get_type().to_string(),
span: value.span(),
help: None,
}),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let new_column: Value = call.req(0)?;
let column_span = new_column.span();
if NuExpression::can_downcast(&new_column) {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions));
let df = lazy.collect(call.head)?;
df.to_pipeline_data(plugin, engine, call.head)
} else {
let mut other = NuDataFrame::try_from_value_coerce(plugin, &new_column, call.head)?
.as_series(column_span)?;
let name = match call.get_flag::<String>("name")? {
Some(name) => name,
None => other.name().to_string(),
};
let series = other.rename(&name).clone();
let mut polars_df = df.to_polars();
polars_df
.with_column(series)
.map_err(|e| ShellError::GenericError {
error: "Error adding column to dataframe".into(),
msg: e.to_string(),
span: Some(column_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
df.to_pipeline_data(plugin, engine, call.head)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into();
lazy.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&WithColumn)
}
}

View File

@ -164,7 +164,6 @@ macro_rules! lazy_expr_command {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?; .map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars() lazy.to_polars()
.$func() .$func()
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
@ -245,7 +244,6 @@ macro_rules! lazy_expr_command {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?; .map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars() lazy.to_polars()
.$func($ddof) .$func($ddof)
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {

View File

@ -181,8 +181,7 @@ fn command_df(
res.rename("is_in"); res.rename("is_in");
let mut new_df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?; let new_df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
new_df.from_lazy = df.from_lazy;
new_df.to_pipeline_data(plugin, engine, call.head) new_df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -48,7 +48,7 @@ impl PluginCommand for ExprOtherwise {
Example { Example {
description: "Create a new column for the dataframe", description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]] example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy | polars into-df
| polars with-column ( | polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
) )

View File

@ -57,7 +57,7 @@ impl PluginCommand for ExprWhen {
Example { Example {
description: "Create a new column for the dataframe", description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]] example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy | polars into-df
| polars with-column ( | polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
) )

View File

@ -80,7 +80,7 @@ impl PluginCommand for LazyAggregate {
Example { Example {
description: "Group by and perform an aggregation", description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy | polars into-df
| polars group-by a | polars group-by a
| polars agg [ | polars agg [
(polars col b | polars min | polars as "b_min") (polars col b | polars min | polars as "b_min")
@ -147,7 +147,7 @@ impl PluginCommand for LazyAggregate {
} }
let polars = group_by.to_polars(); let polars = group_by.to_polars();
let lazy = NuLazyFrame::new(false, polars.agg(&expressions)); let lazy = NuLazyFrame::new(polars.agg(&expressions));
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }

View File

@ -4,7 +4,6 @@ use crate::{
PolarsPlugin, PolarsPlugin,
}; };
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
@ -67,7 +66,7 @@ impl PluginCommand for CastDF {
Example { Example {
description: "Cast a column in a lazy dataframe to a different dtype", description: "Cast a column in a lazy dataframe to a different dtype",
example: example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema", "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
result: Some(Value::record( result: Some(Value::record(
record! { record! {
"a" => Value::string("u8", Span::test_data()), "a" => Value::string("u8", Span::test_data()),
@ -99,7 +98,7 @@ impl PluginCommand for CastDF {
} }
PolarsPluginObject::NuDataFrame(df) => { PolarsPluginObject::NuDataFrame(df) => {
let (dtype, column_nm) = df_args(call)?; let (dtype, column_nm) = df_args(call)?;
command_eager(plugin, engine, call, column_nm, dtype, df) command_lazy(plugin, engine, call, column_nm, dtype, df.lazy())
} }
PolarsPluginObject::NuExpression(expr) => { PolarsPluginObject::NuExpression(expr) => {
let dtype: String = call.req(0)?; let dtype: String = call.req(0)?;
@ -144,51 +143,10 @@ fn command_lazy(
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let column = col(&column_nm).cast(dtype); let column = col(&column_nm).cast(dtype);
let lazy = lazy.to_polars().with_columns(&[column]); let lazy = lazy.to_polars().with_columns(&[column]);
let lazy = NuLazyFrame::new(false, lazy); let lazy = NuLazyFrame::new(lazy);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
nu_df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut df = (*nu_df.df).clone();
let column = df
.column(&column_nm)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let _ = df
.with_column(casted)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
df.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {

View File

@ -32,8 +32,8 @@ impl PluginCommand for LazyCollect {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "drop duplicates", description: "collect a lazy dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars select [(polars col a) (polars col b)] | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -64,9 +64,7 @@ impl PluginCommand for LazyCollect {
let value = input.into_value(call.head); let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? { match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => { PolarsPluginObject::NuLazyFrame(lazy) => {
let mut eager = lazy.collect(call.head)?; let eager = lazy.collect(call.head)?;
// We don't want this converted back to a lazy frame
eager.from_lazy = true;
Ok(PipelineData::Value( Ok(PipelineData::Value(
eager eager
.cache(plugin, engine, call.head)? .cache(plugin, engine, call.head)?

View File

@ -4,7 +4,7 @@ use nu_protocol::{
Value, Value,
}; };
use crate::values::CustomValueSupport; use crate::values::{CustomValueSupport, NuLazyFrame};
use crate::PolarsPlugin; use crate::PolarsPlugin;
use super::super::values::utils::convert_columns; use super::super::values::utils::convert_columns;
@ -37,7 +37,7 @@ impl PluginCommand for DropDF {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "drop column a", description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![Column::new( vec![Column::new(
@ -70,46 +70,11 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?; let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns(columns, call.head)?; let (col_string, _col_span) = convert_columns(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df = df.to_polars().drop(col_string.iter().map(|s| &s.item));
let new_df = col_string let final_df = NuLazyFrame::new(polars_df);
.first()
.ok_or_else(|| ShellError::GenericError {
error: "Empty names list".into(),
msg: "No column names were found".into(),
span: Some(col_span),
help: None,
inner: vec![],
})
.and_then(|col| {
df.as_ref()
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
new_df
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
let final_df = NuDataFrame::new(df.from_lazy, polars_df);
final_df.to_pipeline_data(plugin, engine, call.head) final_df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -5,11 +5,11 @@ use nu_protocol::{
}; };
use polars::prelude::UniqueKeepStrategy; use polars::prelude::UniqueKeepStrategy;
use crate::values::CustomValueSupport; use crate::values::{CustomValueSupport, NuDataFrame};
use crate::PolarsPlugin; use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string; use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuLazyFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct DropDuplicates; pub struct DropDuplicates;
@ -48,7 +48,7 @@ impl PluginCommand for DropDuplicates {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "drop duplicates", description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates", example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -87,7 +87,7 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let columns: Option<Vec<Value>> = call.opt(0)?; let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns { let (subset, _col_span) = match columns {
Some(cols) => { Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?; let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span) (Some(agg_string), col_span)
@ -95,9 +95,7 @@ fn command(
None => (None, call.head), None => (None, call.head),
}; };
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let keep_strategy = if call.has_flag("last")? { let keep_strategy = if call.has_flag("last")? {
UniqueKeepStrategy::Last UniqueKeepStrategy::Last
@ -105,18 +103,9 @@ fn command(
UniqueKeepStrategy::First UniqueKeepStrategy::First
}; };
let polars_df = df let polars_df = df.to_polars().unique(subset, keep_strategy);
.as_ref()
.unique(subset_slice, keep_strategy, None)
.map_err(|e| ShellError::GenericError {
error: "Error dropping duplicates".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df); let df = NuLazyFrame::new(polars_df);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -4,11 +4,13 @@ use nu_protocol::{
Value, Value,
}; };
use crate::values::CustomValueSupport; use polars_lazy::dsl::col;
use crate::values::{CustomValueSupport, NuDataFrame};
use crate::PolarsPlugin; use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string; use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuLazyFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct DropNulls; pub struct DropNulls;
@ -43,8 +45,7 @@ impl PluginCommand for DropNulls {
Example { Example {
description: "drop null values in dataframe", description: "drop null values in dataframe",
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df); example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df);
let res = ($df.b / $df.b); let a = ($df | polars with-column [((polars col b) / (polars col b) | polars as res)]);
let a = ($df | polars with-column $res --name res);
$a | polars drop-nulls"#, $a | polars drop-nulls"#,
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
@ -109,31 +110,20 @@ fn command(
call: &EvaluatedCall, call: &EvaluatedCall,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let columns: Option<Vec<Value>> = call.opt(0)?; let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns { let (subset, _col_span) = match columns {
Some(cols) => { Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?; let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span) let agg_expr = agg_string.iter().map(|s| col(s)).collect();
(Some(agg_expr), col_span)
} }
None => (None, call.head), None => (None, call.head),
}; };
let subset_slice = subset.as_ref().map(|cols| &cols[..]); let polars_df = df.to_polars().drop_nulls(subset);
let df = NuLazyFrame::new(polars_df);
let polars_df = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| ShellError::GenericError {
error: "Error dropping nulls".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -70,7 +70,7 @@ impl PluginCommand for LazyFetch {
let value = input.into_value(call.head); let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let mut eager: NuDataFrame = lazy let eager: NuDataFrame = lazy
.to_polars() .to_polars()
.fetch(rows as usize) .fetch(rows as usize)
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
@ -82,8 +82,6 @@ impl PluginCommand for LazyFetch {
})? })?
.into(); .into();
// mark this as not from lazy so it doesn't get converted back to a lazy frame
eager.from_lazy = false;
eager eager
.to_pipeline_data(plugin, engine, call.head) .to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)

View File

@ -40,7 +40,7 @@ impl PluginCommand for LazyFillNull {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "Fills the null values by 0", description: "Fills the null values by 0",
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0", example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0 | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![Column::new( vec![Column::new(
@ -96,7 +96,7 @@ fn cmd_lazy(
fill: Value, fill: Value,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &fill)?.into_polars(); let expr = NuExpression::try_from_value(plugin, &fill)?.into_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr)); let lazy = NuLazyFrame::new(lazy.to_polars().fill_null(expr));
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -42,7 +42,7 @@ impl PluginCommand for LazyFilter {
vec![Example { vec![Example {
description: "Filter dataframe using an expression", description: "Filter dataframe using an expression",
example: example:
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)", "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4) | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -85,10 +85,7 @@ fn command(
lazy: NuLazyFrame, lazy: NuLazyFrame,
filter_expr: NuExpression, filter_expr: NuExpression,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(lazy.to_polars().filter(filter_expr.into_polars()));
lazy.from_eager,
lazy.to_polars().filter(filter_expr.into_polars()),
);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -0,0 +1,97 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::LazyFrame;
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::CustomValueSupport,
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct FilterWith;
impl PluginCommand for FilterWith {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using an expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"filter expression used to filter dataframe",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let expr: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &expr)?;
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
lazy.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FilterWith)
}
}

View File

@ -48,7 +48,7 @@ impl PluginCommand for FirstDF {
vec![ vec![
Example { Example {
description: "Return the first row of a dataframe", description: "Return the first row of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -63,7 +63,7 @@ impl PluginCommand for FirstDF {
}, },
Example { Example {
description: "Return the first two rows of a dataframe", description: "Return the first two rows of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2 | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -98,13 +98,12 @@ impl PluginCommand for FirstDF {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head); let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { if NuLazyFrame::can_downcast(&value) || NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into()) command(plugin, engine, call, lazy).map_err(LabeledError::from)
} else { } else {
let expr = NuExpression::try_from_value(plugin, &value)?; let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.into_polars().first().into(); let expr: NuExpression = expr.into_polars().first().into();
expr.to_pipeline_data(plugin, engine, call.head) expr.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }
@ -115,13 +114,13 @@ fn command(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
df: NuDataFrame, df: NuLazyFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?; let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(1); let rows = rows.unwrap_or(1);
let res = df.as_ref().head(Some(rows)); let res = df.to_polars().slice(0, rows as u32);
let res = NuDataFrame::new(false, res); let res: NuLazyFrame = res.into();
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -5,10 +5,13 @@ use nu_protocol::{
}; };
use crate::{ use crate::{
dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin, dataframe::values::utils::convert_columns_string,
values::{CustomValueSupport, NuDataFrame},
PolarsPlugin,
}; };
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuLazyFrame};
use polars::prelude::{col, Expr};
#[derive(Clone)] #[derive(Clone)]
pub struct GetDF; pub struct GetDF;
@ -37,7 +40,7 @@ impl PluginCommand for GetDF {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "Returns the selected column", description: "Returns the selected column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![Column::new( vec![Column::new(
@ -70,21 +73,13 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?; let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?; let (col_string, _col_span) = convert_columns_string(columns, call.head)?;
let col_expr: Vec<Expr> = col_string.iter().map(|s| col(s)).collect();
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let df = df let df = df.to_polars().select(col_expr);
.as_ref() let df = NuLazyFrame::new(df);
.select(col_string)
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
df.to_pipeline_data(plugin, engine, call.head) df.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -79,7 +79,7 @@ impl PluginCommand for ToLazyGroupBy {
Example { Example {
description: "Group by and perform an aggregation", description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy | polars into-df
| polars group-by a | polars group-by a
| polars agg [ | polars agg [
(polars col b | polars min | polars as "b_min") (polars col b | polars min | polars as "b_min")
@ -152,7 +152,7 @@ fn command(
expressions: Vec<Expr>, expressions: Vec<Expr>,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let group_by = lazy.to_polars().group_by(expressions); let group_by = lazy.to_polars().group_by(expressions);
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?); let group_by = NuLazyGroupBy::new(group_by, lazy.schema()?);
group_by.to_pipeline_data(plugin, engine, call.head) group_by.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -54,8 +54,8 @@ impl PluginCommand for LazyJoin {
vec![ vec![
Example { Example {
description: "Join two lazy dataframes", description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy); example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-df);
$df_a | polars join $df_b a foo | polars collect"#, $df_a | polars join $df_b a foo | polars collect"#,
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
@ -115,7 +115,7 @@ impl PluginCommand for LazyJoin {
Example { Example {
description: "Join one eager dataframe with a lazy dataframe", description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df); example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy); let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-df);
$df_a | polars join $df_b a foo"#, $df_a | polars join $df_b a foo"#,
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
@ -230,7 +230,6 @@ impl PluginCommand for LazyJoin {
let value = input.into_value(call.head); let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let from_eager = lazy.from_eager;
let lazy = lazy.to_polars(); let lazy = lazy.to_polars();
let lazy = lazy let lazy = lazy
@ -243,7 +242,7 @@ impl PluginCommand for LazyJoin {
.suffix(suffix) .suffix(suffix)
.finish(); .finish();
let lazy = NuLazyFrame::new(from_eager, lazy); let lazy = NuLazyFrame::new(lazy);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }

View File

@ -3,7 +3,7 @@ use crate::{
PolarsPlugin, PolarsPlugin,
}; };
use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression}; use super::super::values::{NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
@ -44,7 +44,7 @@ impl PluginCommand for LastDF {
vec![ vec![
Example { Example {
description: "Create new dataframe with last rows", description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -74,7 +74,7 @@ impl PluginCommand for LastDF {
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head); let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value_coerce(plugin, &value, call.head)?; let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into()) command(plugin, engine, call, df).map_err(|e| e.into())
} else { } else {
let expr = NuExpression::try_from_value(plugin, &value)?; let expr = NuExpression::try_from_value(plugin, &value)?;
@ -90,13 +90,13 @@ fn command(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,
call: &EvaluatedCall, call: &EvaluatedCall,
df: NuDataFrame, df: NuLazyFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?; let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS); let rows = rows.unwrap_or(1);
let res = df.as_ref().tail(Some(rows)); let res = df.to_polars().tail(rows as u32);
let res = NuDataFrame::new(false, res); let res = NuLazyFrame::new(res);
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -46,7 +46,7 @@ macro_rules! lazy_command {
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head) let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?; .map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func()); let lazy = NuLazyFrame::new(lazy.to_polars().$func());
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }

View File

@ -126,7 +126,7 @@ fn command(
span: None, span: None,
inner: vec![], inner: vec![],
})?; })?;
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy); let lazy = NuLazyFrame::new(polars_lazy);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -1,11 +1,14 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
SyntaxShape, Type, Value, Value,
}; };
use polars::frame::explode::MeltArgs;
use crate::{ use crate::{
dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin, dataframe::values::utils::convert_columns_string,
values::{CustomValueSupport, NuLazyFrame},
PolarsPlugin,
}; };
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
@ -50,6 +53,11 @@ impl PluginCommand for MeltDF {
"optional name for value column", "optional name for value column",
Some('l'), Some('l'),
) )
.switch(
"streamable",
"Use polar's streaming engine. Results will not have a stable ordering.",
Some('s'),
)
.input_output_type( .input_output_type(
Type::Custom("dataframe".into()), Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()), Type::Custom("dataframe".into()),
@ -61,7 +69,7 @@ impl PluginCommand for MeltDF {
vec![Example { vec![Example {
description: "melt dataframe", description: "melt dataframe",
example: example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]", "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d] | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
@ -135,111 +143,31 @@ fn command(
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value"); let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value"); let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?; let value_name = call.get_flag("value-name")?.map(|v: String| v.into());
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?; let variable_name = call.get_flag("variable-name")?.map(|v: String| v.into());
let streamable = call.has_flag("streamable")?;
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; let (id_vars, _id_col_span) = convert_columns_string(id_col, call.head)?;
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; let id_vars = id_vars.into_iter().map(Into::into).collect();
let (value_vars, _val_col_span) = convert_columns_string(val_col, call.head)?;
let value_vars = value_vars.into_iter().map(Into::into).collect();
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df = df.to_polars();
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; let args = MeltArgs {
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; id_vars,
value_vars,
variable_name,
value_name,
streamable,
};
let mut res = df let res = polars_df.melt(args);
.as_ref() let res = NuLazyFrame::new(res);
.melt(&id_col_string, &val_col_string)
.map_err(|e| ShellError::GenericError {
error: "Error calculating melt".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
if let Some(name) = &variable_name {
res.rename("variable", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
if let Some(name) = &value_name {
res.rename("value", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
let res = NuDataFrame::new(false, res);
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }
fn check_column_datatypes<T: AsRef<str>>(
df: &polars::prelude::DataFrame,
cols: &[T],
col_span: Span,
) -> Result<(), ShellError> {
if cols.is_empty() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "empty column list".into(),
span: Some(col_span),
help: None,
inner: vec![],
});
}
// Checking if they are same type
if cols.len() > 1 {
for w in cols.windows(2) {
let l_series = df
.column(w[0].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let r_series = df
.column(w[1].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "found different column types in list".into(),
span: Some(col_span),
help: Some(format!(
"datatypes {} and {} are incompatible",
l_series.dtype(),
r_series.dtype()
)),
inner: vec![],
});
}
}
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::test::test_polars_plugin_command; use crate::test::test_polars_plugin_command;

View File

@ -1,19 +1,30 @@
mod aggregate; mod aggregate;
mod cast;
mod collect; mod collect;
mod drop;
mod drop_duplicates;
mod drop_nulls;
mod explode; mod explode;
mod fetch; mod fetch;
mod fill_nan; mod fill_nan;
mod fill_null; mod fill_null;
mod filter; mod filter;
mod filter_with;
mod first;
mod flatten; mod flatten;
mod get;
pub mod groupby; pub mod groupby;
mod join; mod join;
mod last;
mod macro_commands; mod macro_commands;
mod median; mod median;
mod melt;
mod quantile; mod quantile;
mod rename;
mod select; mod select;
mod slice;
mod sort_by_expr; mod sort_by_expr;
mod to_lazy; mod with_column;
use nu_plugin::PluginCommand; use nu_plugin::PluginCommand;
@ -29,13 +40,20 @@ pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile; use crate::dataframe::lazy::quantile::LazyQuantile;
pub(crate) use crate::dataframe::lazy::select::LazySelect; pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy; use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
use crate::PolarsPlugin; use crate::PolarsPlugin;
pub use explode::LazyExplode; pub use explode::LazyExplode;
pub use flatten::LazyFlatten; pub use flatten::LazyFlatten;
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> { pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![ vec![
Box::new(cast::CastDF),
Box::new(drop::DropDF),
Box::new(drop_duplicates::DropDuplicates),
Box::new(drop_nulls::DropNulls),
Box::new(filter_with::FilterWith),
Box::new(first::FirstDF),
Box::new(get::GetDF),
Box::new(last::LastDF),
Box::new(LazyAggregate), Box::new(LazyAggregate),
Box::new(LazyCache), Box::new(LazyCache),
Box::new(LazyCollect), Box::new(LazyCollect),
@ -47,11 +65,14 @@ pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(LazyFlatten), Box::new(LazyFlatten),
Box::new(LazyJoin), Box::new(LazyJoin),
Box::new(median::LazyMedian), Box::new(median::LazyMedian),
Box::new(melt::MeltDF),
Box::new(LazyReverse), Box::new(LazyReverse),
Box::new(LazySelect), Box::new(LazySelect),
Box::new(LazySortBy), Box::new(LazySortBy),
Box::new(LazyQuantile), Box::new(LazyQuantile),
Box::new(ToLazyFrame), Box::new(rename::RenameDF),
Box::new(slice::SliceDF),
Box::new(ToLazyGroupBy), Box::new(ToLazyGroupBy),
Box::new(with_column::WithColumn),
] ]
} }

View File

@ -132,7 +132,6 @@ fn command(
quantile: f64, quantile: f64,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars() lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default()) .quantile(lit(quantile), QuantileInterpolOptions::default())
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {

View File

@ -6,7 +6,7 @@ use nu_protocol::{
use crate::{ use crate::{
dataframe::{utils::extract_strings, values::NuLazyFrame}, dataframe::{utils::extract_strings, values::NuLazyFrame},
values::{CustomValueSupport, PolarsPluginObject}, values::CustomValueSupport,
PolarsPlugin, PolarsPlugin,
}; };
@ -49,7 +49,7 @@ impl PluginCommand for RenameDF {
vec![ vec![
Example { Example {
description: "Renames a series", description: "Renames a series",
example: "[5 6 7 8] | polars into-df | polars rename '0' new_name", example: "[5 6 7 8] | polars into-df | polars rename '0' new_name | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![Column::new( vec![Column::new(
@ -69,7 +69,7 @@ impl PluginCommand for RenameDF {
}, },
Example { Example {
description: "Renames a dataframe column", description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -91,7 +91,7 @@ impl PluginCommand for RenameDF {
Example { Example {
description: "Renames two dataframe columns", description: "Renames two dataframe columns",
example: example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]", "[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new] | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -121,49 +121,11 @@ impl PluginCommand for RenameDF {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head); let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
PolarsPluginObject::NuDataFrame(df) => { command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
command_eager(plugin, engine, call, df).map_err(LabeledError::from)
}
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
}
_ => Err(LabeledError::new(format!("Unsupported type: {value:?}"))
.with_label("Unsupported Type", call.head)),
}
} }
} }
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(1)?;
let new_names = extract_strings(new_names)?;
let mut polars_df = df.to_polars();
for (from, to) in columns.iter().zip(new_names.iter()) {
polars_df
.rename(from, to)
.map_err(|e| ShellError::GenericError {
error: "Error renaming".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
}
let df = NuDataFrame::new(false, polars_df);
df.to_pipeline_data(plugin, engine, call.head)
}
fn command_lazy( fn command_lazy(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,

View File

@ -67,7 +67,7 @@ impl PluginCommand for LazySelect {
let pipeline_value = input.into_value(call.head); let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions)); let lazy = NuLazyFrame::new(lazy.to_polars().select(&expressions));
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }

View File

@ -4,7 +4,11 @@ use nu_protocol::{
Value, Value,
}; };
use crate::{dataframe::values::Column, values::CustomValueSupport, PolarsPlugin}; use crate::{
dataframe::values::Column,
values::{CustomValueSupport, NuLazyFrame},
PolarsPlugin,
};
use super::super::values::NuDataFrame; use super::super::values::NuDataFrame;
@ -36,7 +40,7 @@ impl PluginCommand for SliceDF {
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "Create new dataframe from a slice of the rows", description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1", example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1 | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -69,12 +73,12 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let offset: i64 = call.req(0)?; let offset: i64 = call.req(0)?;
let size: usize = call.req(1)?; let size: i64 = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let res = df.as_ref().slice(offset, size); let res = df.to_polars().slice(offset, size as u32);
let res = NuDataFrame::new(false, res); let res = NuLazyFrame::new(res);
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -147,10 +147,7 @@ impl PluginCommand for LazySortBy {
let pipeline_value = input.into_value(call.head); let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(lazy.to_polars().sort_by_exprs(&expressions, sort_options));
lazy.from_eager,
lazy.to_polars().sort_by_exprs(&expressions, sort_options),
);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} }

View File

@ -1,90 +0,0 @@
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
#[derive(Clone)]
pub struct ToLazyFrame;
impl PluginCommand for ToLazyFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-lazy"
}
fn usage(&self) -> &str {
"Converts a dataframe into a lazy dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a table and creates a lazyframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
},
Example {
description: "Takes a table, creates a lazyframe, assigns column 'b' type str, displays the schema",
example: "[[a b];[1 2] [3 4]] | polars into-lazy --schema {b: str} | polars schema",
result: None
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let mut lazy = NuLazyFrame::from_dataframe(df);
// We don't want this converted back to an eager dataframe at some point
lazy.from_eager = false;
Ok(PipelineData::Value(
lazy.cache(plugin, engine, call.head)?.into_value(call.head),
None,
))
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use nu_plugin_test_support::PluginTest;
use nu_protocol::{ShellError, Span};
use super::*;
#[test]
fn test_to_lazy() -> Result<(), ShellError> {
let plugin: Arc<PolarsPlugin> = PolarsPlugin::new_test_mode().into();
let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?;
let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?;
let value = pipeline_data.into_value(Span::test_data());
let df = NuLazyFrame::try_from_value(&plugin, &value)?;
assert!(!df.from_eager);
Ok(())
}
}

View File

@ -0,0 +1,114 @@
use super::super::values::{Column, NuDataFrame};
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::CustomValueSupport,
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct WithColumn;
impl PluginCommand for WithColumn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars with-column"
}
fn usage(&self) -> &str {
"Adds a series to the dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"series or expressions",
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-df
| polars with-column [
((polars col a) * 2 | polars as "c")
((polars col a) * 3 | polars as "d")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into();
lazy.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&WithColumn)
}
}

View File

@ -1,6 +1,6 @@
use crate::{ use crate::{
dataframe::values::{NuExpression, NuLazyFrame}, dataframe::values::{NuExpression, NuLazyFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, values::CustomValueSupport,
PolarsPlugin, PolarsPlugin,
}; };
@ -63,8 +63,7 @@ impl PluginCommand for Shift {
}, },
Example { Example {
description: "Shifts the values by a given period, fill absent values with 0", description: "Shifts the values by a given period, fill absent values with 0",
example: example: "[1 2 2 3 3] | polars into-df | polars shift 2 --fill 0 | polars collect",
"[1 2 2 3 3] | polars into-lazy | polars shift 2 --fill 0 | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![Column::new( vec![Column::new(
@ -94,35 +93,11 @@ impl PluginCommand for Shift {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head); let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
match PolarsPluginObject::try_from_value(plugin, &value)? { command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyGroupBy,
],
)),
}
.map_err(LabeledError::from)
} }
} }
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let period: i64 = call.req(0)?;
let series = df.as_series(call.head)?.shift(period);
let df = NuDataFrame::try_from_series_vec(vec![series], call.head)?;
df.to_pipeline_data(plugin, engine, call.head)
}
fn command_lazy( fn command_lazy(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,

View File

@ -1,6 +1,6 @@
use crate::{ use crate::{
dataframe::{utils::extract_strings, values::NuLazyFrame}, dataframe::{utils::extract_strings, values::NuLazyFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType}, values::CustomValueSupport,
PolarsPlugin, PolarsPlugin,
}; };
@ -11,7 +11,7 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value, Value,
}; };
use polars::prelude::{IntoSeries, UniqueKeepStrategy}; use polars::prelude::UniqueKeepStrategy;
#[derive(Clone)] #[derive(Clone)]
pub struct Unique; pub struct Unique;
@ -68,7 +68,7 @@ impl PluginCommand for Unique {
}, },
Example { Example {
description: "Returns unique values in a subset of lazyframe columns", description: "Returns unique values in a subset of lazyframe columns",
example: "[[a b c]; [1 2 1] [2 2 2] [3 2 1]] | polars into-lazy | polars unique --subset [b c] | polars collect", example: "[[a b c]; [1 2 1] [2 2 2] [3 2 1]] | polars into-df | polars unique --subset [b c] | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
@ -94,7 +94,7 @@ impl PluginCommand for Unique {
Example { Example {
description: "Returns unique values in a subset of lazyframe columns", description: "Returns unique values in a subset of lazyframe columns",
example: r#"[[a b c]; [1 2 1] [2 2 2] [3 2 1]] example: r#"[[a b c]; [1 2 1] [2 2 2] [3 2 1]]
| polars into-lazy | polars into-df
| polars unique --subset [b c] --last | polars unique --subset [b c] --last
| polars collect"#, | polars collect"#,
result: Some( result: Some(
@ -135,42 +135,11 @@ impl PluginCommand for Unique {
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, LabeledError> { ) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head); let value = input.into_value(call.head);
let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
match PolarsPluginObject::try_from_value(plugin, &value)? { command_lazy(plugin, engine, call, df).map_err(LabeledError::from)
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyGroupBy,
],
)),
}
.map_err(LabeledError::from)
} }
} }
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let series = df.as_series(call.head)?;
let res = series.unique().map_err(|e| ShellError::GenericError {
error: "Error calculating unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The str-slice command can only be used with string columns".into()),
inner: vec![],
})?;
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
df.to_pipeline_data(plugin, engine, call.head)
}
fn command_lazy( fn command_lazy(
plugin: &PolarsPlugin, plugin: &PolarsPlugin,
engine: &EngineInterface, engine: &EngineInterface,

View File

@ -323,19 +323,7 @@ pub trait CustomValueSupport: Cacheable {
engine: &EngineInterface, engine: &EngineInterface,
span: Span, span: Span,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
match self.to_cache_value()? { Ok(self.cache(plugin, engine, span)?.into_value(span))
// if it was from a lazy value, make it lazy again
PolarsPluginObject::NuDataFrame(df) if df.from_lazy => {
let df = df.lazy();
Ok(df.cache(plugin, engine, span)?.into_value(span))
}
// if it was from an eager value, make it eager again
PolarsPluginObject::NuLazyFrame(lf) if lf.from_eager => {
let lf = lf.collect(span)?;
Ok(lf.cache(plugin, engine, span)?.into_value(span))
}
_ => Ok(self.cache(plugin, engine, span)?.into_value(span)),
}
} }
/// Caches the object, converts it to a it's CustomValue counterpart /// Caches the object, converts it to a it's CustomValue counterpart

View File

@ -472,7 +472,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
} }
DataFrame::new(df_series) DataFrame::new(df_series)
.map(|df| NuDataFrame::new(false, df)) .map(NuDataFrame::new)
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "Error creating dataframe".into(), error: "Error creating dataframe".into(),
msg: e.to_string(), msg: e.to_string(),

View File

@ -104,7 +104,6 @@ impl PolarsObject for DataFrameValue {
pub struct NuDataFrame { pub struct NuDataFrame {
pub id: Uuid, pub id: Uuid,
pub df: Arc<DataFrame>, pub df: Arc<DataFrame>,
pub from_lazy: bool,
} }
impl AsRef<DataFrame> for NuDataFrame { impl AsRef<DataFrame> for NuDataFrame {
@ -115,17 +114,16 @@ impl AsRef<DataFrame> for NuDataFrame {
impl From<DataFrame> for NuDataFrame { impl From<DataFrame> for NuDataFrame {
fn from(df: DataFrame) -> Self { fn from(df: DataFrame) -> Self {
Self::new(false, df) Self::new(df)
} }
} }
impl NuDataFrame { impl NuDataFrame {
pub fn new(from_lazy: bool, df: DataFrame) -> Self { pub fn new(df: DataFrame) -> Self {
let id = Uuid::new_v4(); let id = Uuid::new_v4();
Self { Self {
id, id,
df: Arc::new(df), df: Arc::new(df),
from_lazy,
} }
} }
@ -134,12 +132,12 @@ impl NuDataFrame {
} }
pub fn lazy(&self) -> NuLazyFrame { pub fn lazy(&self) -> NuLazyFrame {
NuLazyFrame::new(true, self.to_polars().lazy()) NuLazyFrame::new(self.to_polars().lazy())
} }
pub fn try_from_series(series: Series, span: Span) -> Result<Self, ShellError> { pub fn try_from_series(series: Series, span: Span) -> Result<Self, ShellError> {
match DataFrame::new(vec![series]) { match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::new(false, dataframe)), Ok(dataframe) => Ok(NuDataFrame::new(dataframe)),
Err(e) => Err(ShellError::GenericError { Err(e) => Err(ShellError::GenericError {
error: "Error creating dataframe".into(), error: "Error creating dataframe".into(),
msg: e.to_string(), msg: e.to_string(),
@ -202,7 +200,7 @@ impl NuDataFrame {
inner: vec![], inner: vec![],
})?; })?;
Ok(Self::new(false, dataframe)) Ok(Self::new(dataframe))
} }
pub fn try_from_columns( pub fn try_from_columns(
@ -276,7 +274,7 @@ impl NuDataFrame {
inner: vec![], inner: vec![],
})?; })?;
Ok(Self::new(false, df)) Ok(Self::new(df))
} }
pub fn is_series(&self) -> bool { pub fn is_series(&self) -> bool {

View File

@ -147,7 +147,7 @@ impl NuDataFrame {
inner: vec![], inner: vec![],
})?; })?;
Ok(NuDataFrame::new(false, df_new)) Ok(NuDataFrame::new(df_new))
} }
Axis::Column => { Axis::Column => {
if self.df.width() != other.df.width() { if self.df.width() != other.df.width() {
@ -205,7 +205,7 @@ impl NuDataFrame {
inner: vec![], inner: vec![],
})?; })?;
Ok(NuDataFrame::new(false, df_new)) Ok(NuDataFrame::new(df_new))
} }
} }
} }

View File

@ -63,45 +63,16 @@ fn compute_with_value(
op: Span, op: Span,
right: &Value, right: &Value,
) -> Result<Value, ShellError> { ) -> Result<Value, ShellError> {
let rhs_span = right.span(); let rhs = NuExpression::try_from_value(plugin, right)?;
match right { with_operator(
Value::Custom { val: rhs, .. } => { (plugin, engine),
let rhs = rhs.as_any().downcast_ref::<NuExpression>().ok_or_else(|| { operator,
ShellError::DowncastNotPossible { left,
msg: "Unable to create expression".into(), &rhs,
span: rhs_span, lhs_span,
} right.span(),
})?; op,
)
match rhs.as_ref() {
polars::prelude::Expr::Literal(..) => with_operator(
(plugin, engine),
operator,
left,
rhs,
lhs_span,
right.span(),
op,
),
_ => Err(ShellError::TypeMismatch {
err_message: "Only literal expressions or number".into(),
span: right.span(),
}),
}
}
_ => {
let rhs = NuExpression::try_from_value(plugin, right)?;
with_operator(
(plugin, engine),
operator,
left,
&rhs,
lhs_span,
right.span(),
op,
)
}
}
} }
fn with_operator( fn with_operator(

View File

@ -21,7 +21,6 @@ pub use custom_value::NuLazyFrameCustomValue;
pub struct NuLazyFrame { pub struct NuLazyFrame {
pub id: Uuid, pub id: Uuid,
pub lazy: Arc<LazyFrame>, pub lazy: Arc<LazyFrame>,
pub from_eager: bool,
} }
impl fmt::Debug for NuLazyFrame { impl fmt::Debug for NuLazyFrame {
@ -32,22 +31,21 @@ impl fmt::Debug for NuLazyFrame {
impl From<LazyFrame> for NuLazyFrame { impl From<LazyFrame> for NuLazyFrame {
fn from(lazy_frame: LazyFrame) -> Self { fn from(lazy_frame: LazyFrame) -> Self {
NuLazyFrame::new(false, lazy_frame) NuLazyFrame::new(lazy_frame)
} }
} }
impl NuLazyFrame { impl NuLazyFrame {
pub fn new(from_eager: bool, lazy: LazyFrame) -> Self { pub fn new(lazy: LazyFrame) -> Self {
Self { Self {
id: Uuid::new_v4(), id: Uuid::new_v4(),
lazy: Arc::new(lazy), lazy: Arc::new(lazy),
from_eager,
} }
} }
pub fn from_dataframe(df: NuDataFrame) -> Self { pub fn from_dataframe(df: NuDataFrame) -> Self {
let lazy = df.as_ref().clone().lazy(); let lazy = df.as_ref().clone().lazy();
NuLazyFrame::new(true, lazy) NuLazyFrame::new(lazy)
} }
pub fn to_polars(&self) -> LazyFrame { pub fn to_polars(&self) -> LazyFrame {
@ -64,7 +62,7 @@ impl NuLazyFrame {
help: None, help: None,
inner: vec![], inner: vec![],
}) })
.map(|df| NuDataFrame::new(true, df)) .map(NuDataFrame::new)
} }
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self
@ -74,7 +72,7 @@ impl NuLazyFrame {
let df = self.to_polars(); let df = self.to_polars();
let expr = expr.into_polars(); let expr = expr.into_polars();
let new_frame = f(df, expr); let new_frame = f(df, expr);
Self::new(self.from_eager, new_frame) Self::new(new_frame)
} }
pub fn schema(&self) -> Result<NuSchema, ShellError> { pub fn schema(&self) -> Result<NuSchema, ShellError> {

View File

@ -20,7 +20,6 @@ pub struct NuLazyGroupBy {
pub id: Uuid, pub id: Uuid,
pub group_by: Arc<LazyGroupBy>, pub group_by: Arc<LazyGroupBy>,
pub schema: NuSchema, pub schema: NuSchema,
pub from_eager: bool,
} }
impl fmt::Debug for NuLazyGroupBy { impl fmt::Debug for NuLazyGroupBy {
@ -30,11 +29,10 @@ impl fmt::Debug for NuLazyGroupBy {
} }
impl NuLazyGroupBy { impl NuLazyGroupBy {
pub fn new(group_by: LazyGroupBy, from_eager: bool, schema: NuSchema) -> Self { pub fn new(group_by: LazyGroupBy, schema: NuSchema) -> Self {
NuLazyGroupBy { NuLazyGroupBy {
id: Uuid::new_v4(), id: Uuid::new_v4(),
group_by: Arc::new(group_by), group_by: Arc::new(group_by),
from_eager,
schema, schema,
} }
} }

View File

@ -48,8 +48,6 @@ pub(crate) fn convert_columns(
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole // Converts a Vec<Value> to a Vec<String> with a Span marking the whole
// location of the columns for error referencing // location of the columns for error referencing
// todo - fix
#[allow(dead_code)]
pub(crate) fn convert_columns_string( pub(crate) fn convert_columns_string(
columns: Vec<Value>, columns: Vec<Value>,
span: Span, span: Span,