Move dataframes support to a plugin (#12220)

WIP

This PR covers migration crates/nu-cmd-dataframes to a new plugin
./crates/nu_plugin_polars

## TODO List

Other:
- [X] Fix examples
- [x] Fix Plugin Test Harness
- [X] Move Cache to Mutex<BTreeMap>
- [X] Logic for disabling/enabling plugin GC based off whether items are
cached.
- [x] NuExpression custom values
- [X] Optimize caching (don't cache every object creation). 
- [x] Fix dataframe operations (in NuDataFrameCustomValue::operations)
- [x] Added plugin_debug! macro that for checking an env variable
POLARS_PLUGIN_DEBUG

Fix duplicated commands:
- [x] There are two polars median commands, one for lazy and one for
expr.. there should only be one that works for both. I temporarily
called on polars expr-median (inside expressions_macros.rs)
- [x] polars quantile (lazy, and expr). the expr one is temporarily
expr-median
- [x] polars is-in (renamed one series-is-in)

Commands:
- [x] AppendDF
- [x] CastDF
- [X] ColumnsDF
- [x] DataTypes
- [x] Summary
- [x] DropDF
- [x] DropDuplicates
- [x] DropNulls
- [x] Dummies
- [x] FilterWith
- [X] FirstDF
- [x] GetDF
- [x] LastDF
- [X] ListDF
- [x] MeltDF
- [X] OpenDataFrame
- [x] QueryDf
- [x] RenameDF
- [x] SampleDF
- [x] SchemaDF
- [x] ShapeDF
- [x] SliceDF
- [x] TakeDF
- [X] ToArrow
- [x] ToAvro
- [X] ToCSV
- [X] ToDataFrame
- [X] ToNu
- [x] ToParquet
- [x] ToJsonLines
- [x] WithColumn
- [x] ExprAlias
- [x] ExprArgWhere
- [x] ExprCol
- [x] ExprConcatStr
- [x] ExprCount
- [x] ExprLit
- [x] ExprWhen
- [x] ExprOtherwise
- [x] ExprQuantile
- [x] ExprList
- [x] ExprAggGroups
- [x] ExprCount
- [x] ExprIsIn
- [x] ExprNot
- [x] ExprMax
- [x] ExprMin
- [x] ExprSum
- [x] ExprMean
- [x] ExprMedian
- [x] ExprStd
- [x] ExprVar
- [x] ExprDatePart
- [X] LazyAggregate
- [x] LazyCache
- [X] LazyCollect
- [x] LazyFetch
- [x] LazyFillNA
- [x] LazyFillNull
- [x] LazyFilter
- [x] LazyJoin
- [x] LazyQuantile
- [x] LazyMedian
- [x] LazyReverse
- [x] LazySelect
- [x] LazySortBy
- [x] ToLazyFrame
- [x] ToLazyGroupBy
- [x] LazyExplode
- [x] LazyFlatten
- [x] AllFalse
- [x] AllTrue
- [x] ArgMax
- [x] ArgMin
- [x] ArgSort
- [x] ArgTrue
- [x] ArgUnique
- [x] AsDate
- [x] AsDateTime
- [x] Concatenate
- [x] Contains
- [x] Cumulative
- [x] GetDay
- [x] GetHour
- [x] GetMinute
- [x] GetMonth
- [x] GetNanosecond
- [x] GetOrdinal
- [x] GetSecond
- [x] GetWeek
- [x] GetWeekDay
- [x] GetYear
- [x] IsDuplicated
- [x] IsIn
- [x] IsNotNull
- [x] IsNull
- [x] IsUnique
- [x] NNull
- [x] NUnique
- [x] NotSeries
- [x] Replace
- [x] ReplaceAll
- [x] Rolling
- [x] SetSeries
- [x] SetWithIndex
- [x] Shift
- [x] StrLengths
- [x] StrSlice
- [x] StrFTime
- [x] ToLowerCase
- [x] ToUpperCase
- [x] Unique
- [x] ValueCount

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright
2024-04-09 17:31:43 -07:00
committed by GitHub
parent cbbccaa722
commit efc1cfa939
137 changed files with 21181 additions and 379 deletions

View File

@ -0,0 +1,210 @@
use crate::{
dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::{datatypes::DataType, prelude::Expr};
#[derive(Clone)]
pub struct LazyAggregate;
impl PluginCommand for LazyAggregate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars agg"
}
fn usage(&self) -> &str {
"Performs a series of aggregations from a group-by."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the aggregations to be applied",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?;
for expr in expressions.iter() {
if let Some(name) = get_col_name(expr) {
let dtype = group_by.schema.schema.get(name.as_str());
if matches!(dtype, Some(DataType::Object(..))) {
return Err(ShellError::GenericError {
error: "Object type column not supported for aggregation".into(),
msg: format!("Column '{name}' is type Object"),
span: Some(call.head),
help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
inner: vec![],
}).map_err(|e| e.into());
}
}
}
let polars = group_by.to_polars();
let lazy = NuLazyFrame::new(false, polars.agg(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
fn get_col_name(expr: &Expr) -> Option<String> {
match expr {
Expr::Column(column) => Some(column.to_string()),
Expr::Agg(agg) => match agg {
polars::prelude::AggExpr::Min { input: e, .. }
| polars::prelude::AggExpr::Max { input: e, .. }
| polars::prelude::AggExpr::Median(e)
| polars::prelude::AggExpr::NUnique(e)
| polars::prelude::AggExpr::First(e)
| polars::prelude::AggExpr::Last(e)
| polars::prelude::AggExpr::Mean(e)
| polars::prelude::AggExpr::Implode(e)
| polars::prelude::AggExpr::Count(e, _)
| polars::prelude::AggExpr::Sum(e)
| polars::prelude::AggExpr::AggGroups(e)
| polars::prelude::AggExpr::Std(e, _)
| polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()),
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
},
Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Gather { expr, .. }
| Expr::SortBy { expr, .. }
| Expr::Exclude(expr, _)
| Expr::Alias(expr, _)
| Expr::KeepName(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
Expr::Ternary { .. }
| Expr::AnonymousFunction { .. }
| Expr::Function { .. }
| Expr::Columns(_)
| Expr::DtypeColumn(_)
| Expr::Literal(_)
| Expr::BinaryExpr { .. }
| Expr::Window { .. }
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Len
| Expr::Nth(_)
| Expr::SubPlan(_, _)
| Expr::Selector(_) => None,
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyAggregate)
}
}

View File

@ -0,0 +1,98 @@
use crate::{
dataframe::values::{Column, NuDataFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
Cacheable, PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
#[derive(Clone)]
pub struct LazyCollect;
impl PluginCommand for LazyCollect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars collect"
}
fn usage(&self) -> &str {
"Collect lazy dataframe into eager dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let eager = lazy.collect(call.head)?;
Ok(PipelineData::Value(
eager.cache(plugin, engine)?.into_value(call.head),
None,
))
}
PolarsPluginObject::NuDataFrame(df) => {
// just return the dataframe, add to cache again to be safe
Ok(PipelineData::Value(
df.cache(plugin, engine)?.into_value(call.head),
None,
))
}
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame],
)),
}
.map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyCollect)
}
}

View File

@ -0,0 +1,175 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyExplode;
impl PluginCommand for LazyExplode {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars explode"
}
fn usage(&self) -> &str {
"Explodes a dataframe or creates a explode expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to explode, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Explode the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and explode the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
pub(crate) fn explode(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
let lazy = df.lazy();
explode_lazy(plugin, engine, call, lazy)
}
PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr),
_ => Err(ShellError::CantConvert {
to_type: "dataframe or expression".into(),
from_type: value.get_type().to_string(),
span: call.head,
help: None,
}),
}
}
pub(crate) fn explode_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns = call
.positional
.iter()
.map(|e| e.as_str().map(|s| s.to_string()))
.collect::<Result<Vec<String>, ShellError>>()?;
let exploded = lazy
.to_polars()
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
let lazy = NuLazyFrame::from(exploded);
to_pipeline_data(plugin, engine, call.head, lazy)
}
pub(crate) fn explode_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let expr: NuExpression = expr.to_polars().explode().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyExplode)
}
}

View File

@ -0,0 +1,100 @@
use crate::dataframe::values::{Column, NuDataFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFetch;
impl PluginCommand for LazyFetch {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fetch"
}
fn usage(&self) -> &str {
"Collects the lazyframe to the selected rows."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"rows",
SyntaxShape::Int,
"number of rows to be fetched from lazyframe",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let rows: i64 = call.req(0)?;
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let mut eager: NuDataFrame = lazy
.to_polars()
.fetch(rows as usize)
.map_err(|e| ShellError::GenericError {
error: "Error fetching rows".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
// mark this as not from lazy so it doesn't get converted back to a lazy frame
eager.from_lazy = false;
to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFetch)
}
}

View File

@ -0,0 +1,189 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNA;
impl PluginCommand for LazyFillNA {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-nan"
}
fn usage(&self) -> &str {
"Replaces NaN values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the NAN values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Fills the NaN values with 0",
example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(0),
Value::test_int(3),
Value::test_int(0),
],
)],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Fills the NaN values of a whole dataframe",
example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_float(0.2), Value::test_float(0.1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
cmd_df(plugin, engine, call, df, fill, value.span())
}
PolarsPluginObject::NuLazyFrame(lazy) => cmd_df(
plugin,
engine,
call,
lazy.collect(value.span())?,
fill,
value.span(),
),
PolarsPluginObject::NuExpression(expr) => {
Ok(cmd_expr(plugin, engine, call, expr, fill)?)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
frame: NuDataFrame,
fill: Value,
val_span: Span,
) -> Result<PipelineData, ShellError> {
let columns = frame.columns(val_span)?;
let dataframe = columns
.into_iter()
.map(|column| {
let column_name = column.name().to_string();
let values = column
.into_iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if val.is_nan() {
fill.clone()
} else {
value
}
}
Value::List { vals, .. } => {
NuDataFrame::fill_list_nan(vals, span, fill.clone())
}
_ => value,
}
})
.collect::<Vec<Value>>();
Column::new(column_name, values)
})
.collect::<Vec<Column>>();
let df = NuDataFrame::try_from_columns(dataframe, None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_nan(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNA)
}
}

View File

@ -0,0 +1,127 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNull;
impl PluginCommand for LazyFillNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-null"
}
fn usage(&self) -> &str {
"Replaces NULL values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fills the null values by 0",
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill),
PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill),
PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
fill: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr));
to_pipeline_data(plugin, engine, call.head, lazy)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_null(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNull)
}
}

View File

@ -0,0 +1,104 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFilter;
impl PluginCommand for LazyFilter {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter"
}
fn usage(&self) -> &str {
"Filter dataframe based in expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"Expression that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example:
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr_value: Value = call.req(0)?;
let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
filter_expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars().filter(filter_expr.to_polars()),
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFilter)
}
}

View File

@ -0,0 +1,125 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use crate::{
dataframe::values::{Column, NuDataFrame},
values::CustomValueSupport,
PolarsPlugin,
};
use super::explode::explode;
#[derive(Clone)]
pub struct LazyFlatten;
impl PluginCommand for LazyFlatten {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars flatten"
}
fn usage(&self) -> &str {
"An alias for polars explode."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to flatten, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Flatten the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and flatten the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyFlatten)
}
}

View File

@ -0,0 +1,168 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct ToLazyGroupBy;
impl PluginCommand for ToLazyGroupBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars group-by"
}
fn usage(&self) -> &str {
"Creates a group-by object that can be used for other aggregations."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the lazy group-by",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(0)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only Col expressions".into(),
span: value.span(),
})?;
}
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
expressions: Vec<Expr>,
) -> Result<PipelineData, ShellError> {
let group_by = lazy.to_polars().group_by(expressions);
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
to_pipeline_data(plugin, engine, call.head, group_by)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToLazyGroupBy)
}
}

View File

@ -0,0 +1,260 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{Expr, JoinType};
#[derive(Clone)]
pub struct LazyJoin;
impl PluginCommand for LazyJoin {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars join"
}
fn usage(&self) -> &str {
"Joins a lazy frame with other lazy frame."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "LazyFrame to join with")
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
.switch(
"inner",
"inner joining between lazyframes (default)",
Some('i'),
)
.switch("left", "left join between lazyframes", Some('l'))
.switch("outer", "outer join between lazyframes", Some('o'))
.switch("cross", "cross join between lazyframes", Some('c'))
.named(
"suffix",
SyntaxShape::String,
"Suffix to use on columns with same name",
Some('s'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo | polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let left = call.has_flag("left")?;
let outer = call.has_flag("outer")?;
let cross = call.has_flag("cross")?;
let how = if left {
JoinType::Left
} else if outer {
JoinType::Outer { coalesce: true }
} else if cross {
JoinType::Cross
} else {
JoinType::Inner
};
let other: Value = call.req(0)?;
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
let other = other.to_polars();
let left_on: Value = call.req(1)?;
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
let right_on: Value = call.req(2)?;
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
if left_on.len() != right_on.len() {
let right_on: Value = call.req(2)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "The right column list has a different size to the left column list".into(),
span: right_on.span(),
})?;
}
// Checking that both list of expressions are made out of col expressions or strings
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
let value: Value = call.req(*index)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only a string, col expressions or list of strings".into(),
span: value.span(),
})?;
}
}
let suffix: Option<String> = call.get_flag("suffix")?;
let suffix = suffix.unwrap_or_else(|| "_x".into());
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let from_eager = lazy.from_eager;
let lazy = lazy.to_polars();
let lazy = lazy
.join_builder()
.with(other)
.left_on(left_on)
.right_on(right_on)
.how(how)
.force_parallel(true)
.suffix(suffix)
.finish();
let lazy = NuLazyFrame::new(from_eager, lazy);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyJoin)
}
}

View File

@ -0,0 +1,225 @@
/// Definition of multiple lazyframe commands using a macro rule
/// All of these commands have an identical body and only require
/// to have a change in the name, description and function
use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
macro_rules! lazy_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build($name)
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
.plugin_examples($examples)
}
fn run(
&self,
_plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// LazyReverse command
// Expands to a command definition for reverse
lazy_command!(
LazyReverse,
"polars reverse",
"Reverses the LazyFrame",
vec![Example {
description: "Reverses the dataframe.",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
reverse,
test_reverse
);
// LazyCache command
// Expands to a command definition for cache
lazy_command!(
LazyCache,
"polars cache",
"Caches operations in a new LazyFrame.",
vec![Example {
description: "Caches the result into a new LazyFrame",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache",
result: None,
}],
cache,
test_cache
);

View File

@ -0,0 +1,143 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct LazyMedian;
impl PluginCommand for LazyMedian {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars median"
}
fn usage(&self) -> &str {
"Median value from columns in a dataframe or creates expression for an aggregation"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Median aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars median)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()),
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().median().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let polars_lazy = lazy
.to_polars()
.median()
.map_err(|e| ShellError::GenericError {
error: format!("Error in median operation: {e}"),
msg: "".into(),
help: None,
span: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyMedian)
}
}

View File

@ -0,0 +1,57 @@
mod aggregate;
mod collect;
mod explode;
mod fetch;
mod fill_nan;
mod fill_null;
mod filter;
mod flatten;
pub mod groupby;
mod join;
mod macro_commands;
mod median;
mod quantile;
mod select;
mod sort_by_expr;
mod to_lazy;
use nu_plugin::PluginCommand;
pub use crate::dataframe::lazy::aggregate::LazyAggregate;
pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_nan::LazyFillNA;
pub use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::filter::LazyFilter;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile;
pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
use crate::PolarsPlugin;
pub use explode::LazyExplode;
pub use flatten::LazyFlatten;
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(LazyAggregate),
Box::new(LazyCache),
Box::new(LazyCollect),
Box::new(LazyExplode),
Box::new(LazyFetch),
Box::new(LazyFillNA),
Box::new(LazyFillNull),
Box::new(LazyFilter),
Box::new(LazyFlatten),
Box::new(LazyJoin),
Box::new(median::LazyMedian),
Box::new(LazyReverse),
Box::new(LazySelect),
Box::new(LazySortBy),
Box::new(LazyQuantile),
Box::new(ToLazyFrame),
Box::new(ToLazyGroupBy),
]
}

View File

@ -0,0 +1,160 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{lit, QuantileInterpolOptions};
#[derive(Clone)]
pub struct LazyQuantile;
impl PluginCommand for LazyQuantile {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars quantile"
}
fn usage(&self) -> &str {
"Aggregates the columns to the selected quantile."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Quantile aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars quantile 0.5)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
let quantile: f64 = call.req(0)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
command(plugin, engine, call, df.lazy(), quantile)
}
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr
.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
quantile: f64,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})?,
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyQuantile)
}
}

View File

@ -0,0 +1,85 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazySelect;
impl PluginCommand for LazySelect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars select"
}
fn usage(&self) -> &str {
"Selects columns from lazyframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"select expressions",
SyntaxShape::Any,
"Expression(s) that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazySelect)
}
}

View File

@ -0,0 +1,160 @@
use super::super::values::NuLazyFrame;
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazySortBy;
impl PluginCommand for LazySortBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars sort-by"
}
fn usage(&self) -> &str {
"Sorts a lazy dataframe based on expression(s)."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"sort expression",
SyntaxShape::Any,
"sort expression for the dataframe",
)
.named(
"reverse",
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
"Reverse sorting. Default is false",
Some('r'),
)
.switch(
"nulls-last",
"nulls are shown last in the dataframe",
Some('n'),
)
.switch("maintain-order", "Maintains order during sort", Some('m'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sort dataframe by one column",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sort column using two columns",
example:
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(6),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(4),
Value::test_int(1),
Value::test_int(4),
Value::test_int(2),
],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let nulls_last = call.has_flag("nulls-last")?;
let maintain_order = call.has_flag("maintain-order")?;
let reverse: Option<Vec<bool>> = call.get_flag("reverse")?;
let reverse = match reverse {
Some(list) => {
if expressions.len() != list.len() {
let span = call
.get_flag::<Value>("reverse")?
.expect("already checked and it exists")
.span();
Err(ShellError::GenericError {
error: "Incorrect list size".into(),
msg: "Size doesn't match expression list".into(),
span: Some(span),
help: None,
inner: vec![],
})?
} else {
list
}
}
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazySortBy)
}
}

View File

@ -0,0 +1,61 @@
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
#[derive(Clone)]
pub struct ToLazyFrame;
impl PluginCommand for ToLazyFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-lazy"
}
fn usage(&self) -> &str {
"Converts a dataframe into a lazy dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a dictionary and creates a lazy dataframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
Ok(PipelineData::Value(
lazy.cache(plugin, engine)?.into_value(call.head),
None,
))
}
}