forked from extern/nushell
Move dataframes support to a plugin (#12220)
WIP This PR covers migration crates/nu-cmd-dataframes to a new plugin ./crates/nu_plugin_polars ## TODO List Other: - [X] Fix examples - [x] Fix Plugin Test Harness - [X] Move Cache to Mutex<BTreeMap> - [X] Logic for disabling/enabling plugin GC based off whether items are cached. - [x] NuExpression custom values - [X] Optimize caching (don't cache every object creation). - [x] Fix dataframe operations (in NuDataFrameCustomValue::operations) - [x] Added plugin_debug! macro that for checking an env variable POLARS_PLUGIN_DEBUG Fix duplicated commands: - [x] There are two polars median commands, one for lazy and one for expr.. there should only be one that works for both. I temporarily called on polars expr-median (inside expressions_macros.rs) - [x] polars quantile (lazy, and expr). the expr one is temporarily expr-median - [x] polars is-in (renamed one series-is-in) Commands: - [x] AppendDF - [x] CastDF - [X] ColumnsDF - [x] DataTypes - [x] Summary - [x] DropDF - [x] DropDuplicates - [x] DropNulls - [x] Dummies - [x] FilterWith - [X] FirstDF - [x] GetDF - [x] LastDF - [X] ListDF - [x] MeltDF - [X] OpenDataFrame - [x] QueryDf - [x] RenameDF - [x] SampleDF - [x] SchemaDF - [x] ShapeDF - [x] SliceDF - [x] TakeDF - [X] ToArrow - [x] ToAvro - [X] ToCSV - [X] ToDataFrame - [X] ToNu - [x] ToParquet - [x] ToJsonLines - [x] WithColumn - [x] ExprAlias - [x] ExprArgWhere - [x] ExprCol - [x] ExprConcatStr - [x] ExprCount - [x] ExprLit - [x] ExprWhen - [x] ExprOtherwise - [x] ExprQuantile - [x] ExprList - [x] ExprAggGroups - [x] ExprCount - [x] ExprIsIn - [x] ExprNot - [x] ExprMax - [x] ExprMin - [x] ExprSum - [x] ExprMean - [x] ExprMedian - [x] ExprStd - [x] ExprVar - [x] ExprDatePart - [X] LazyAggregate - [x] LazyCache - [X] LazyCollect - [x] LazyFetch - [x] LazyFillNA - [x] LazyFillNull - [x] LazyFilter - [x] LazyJoin - [x] LazyQuantile - [x] LazyMedian - [x] LazyReverse - [x] LazySelect - [x] LazySortBy - [x] ToLazyFrame - [x] ToLazyGroupBy - [x] LazyExplode - [x] LazyFlatten - [x] AllFalse - [x] AllTrue - [x] ArgMax - [x] ArgMin - [x] ArgSort - [x] ArgTrue - [x] ArgUnique - [x] AsDate - [x] AsDateTime - [x] Concatenate - [x] Contains - [x] Cumulative - [x] GetDay - [x] GetHour - [x] GetMinute - [x] GetMonth - [x] GetNanosecond - [x] GetOrdinal - [x] GetSecond - [x] GetWeek - [x] GetWeekDay - [x] GetYear - [x] IsDuplicated - [x] IsIn - [x] IsNotNull - [x] IsNull - [x] IsUnique - [x] NNull - [x] NUnique - [x] NotSeries - [x] Replace - [x] ReplaceAll - [x] Rolling - [x] SetSeries - [x] SetWithIndex - [x] Shift - [x] StrLengths - [x] StrSlice - [x] StrFTime - [x] ToLowerCase - [x] ToUpperCase - [x] Unique - [x] ValueCount --------- Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
210
crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs
Normal file
210
crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs
Normal file
@ -0,0 +1,210 @@
|
||||
use crate::{
|
||||
dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy},
|
||||
values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::{datatypes::DataType, prelude::Expr};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyAggregate;
|
||||
|
||||
impl PluginCommand for LazyAggregate {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars agg"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs a series of aggregations from a group-by."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group-by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the aggregations to be applied",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, value)?;
|
||||
|
||||
let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
for expr in expressions.iter() {
|
||||
if let Some(name) = get_col_name(expr) {
|
||||
let dtype = group_by.schema.schema.get(name.as_str());
|
||||
|
||||
if matches!(dtype, Some(DataType::Object(..))) {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Object type column not supported for aggregation".into(),
|
||||
msg: format!("Column '{name}' is type Object"),
|
||||
span: Some(call.head),
|
||||
help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
|
||||
inner: vec![],
|
||||
}).map_err(|e| e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let polars = group_by.to_polars();
|
||||
let lazy = NuLazyFrame::new(false, polars.agg(&expressions));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_col_name(expr: &Expr) -> Option<String> {
|
||||
match expr {
|
||||
Expr::Column(column) => Some(column.to_string()),
|
||||
Expr::Agg(agg) => match agg {
|
||||
polars::prelude::AggExpr::Min { input: e, .. }
|
||||
| polars::prelude::AggExpr::Max { input: e, .. }
|
||||
| polars::prelude::AggExpr::Median(e)
|
||||
| polars::prelude::AggExpr::NUnique(e)
|
||||
| polars::prelude::AggExpr::First(e)
|
||||
| polars::prelude::AggExpr::Last(e)
|
||||
| polars::prelude::AggExpr::Mean(e)
|
||||
| polars::prelude::AggExpr::Implode(e)
|
||||
| polars::prelude::AggExpr::Count(e, _)
|
||||
| polars::prelude::AggExpr::Sum(e)
|
||||
| polars::prelude::AggExpr::AggGroups(e)
|
||||
| polars::prelude::AggExpr::Std(e, _)
|
||||
| polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()),
|
||||
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
|
||||
},
|
||||
Expr::Filter { input: expr, .. }
|
||||
| Expr::Slice { input: expr, .. }
|
||||
| Expr::Cast { expr, .. }
|
||||
| Expr::Sort { expr, .. }
|
||||
| Expr::Gather { expr, .. }
|
||||
| Expr::SortBy { expr, .. }
|
||||
| Expr::Exclude(expr, _)
|
||||
| Expr::Alias(expr, _)
|
||||
| Expr::KeepName(expr)
|
||||
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
|
||||
Expr::Ternary { .. }
|
||||
| Expr::AnonymousFunction { .. }
|
||||
| Expr::Function { .. }
|
||||
| Expr::Columns(_)
|
||||
| Expr::DtypeColumn(_)
|
||||
| Expr::Literal(_)
|
||||
| Expr::BinaryExpr { .. }
|
||||
| Expr::Window { .. }
|
||||
| Expr::Wildcard
|
||||
| Expr::RenameAlias { .. }
|
||||
| Expr::Len
|
||||
| Expr::Nth(_)
|
||||
| Expr::SubPlan(_, _)
|
||||
| Expr::Selector(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyAggregate)
|
||||
}
|
||||
}
|
98
crates/nu_plugin_polars/src/dataframe/lazy/collect.rs
Normal file
98
crates/nu_plugin_polars/src/dataframe/lazy/collect.rs
Normal file
@ -0,0 +1,98 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
|
||||
Cacheable, PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyCollect;
|
||||
|
||||
impl PluginCommand for LazyCollect {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars collect"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Collect lazy dataframe into eager dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
let eager = lazy.collect(call.head)?;
|
||||
Ok(PipelineData::Value(
|
||||
eager.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
// just return the dataframe, add to cache again to be safe
|
||||
Ok(PipelineData::Value(
|
||||
df.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazyCollect)
|
||||
}
|
||||
}
|
175
crates/nu_plugin_polars/src/dataframe/lazy/explode.rs
Normal file
175
crates/nu_plugin_polars/src/dataframe/lazy/explode.rs
Normal file
@ -0,0 +1,175 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject};
|
||||
use crate::PolarsPlugin;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyExplode;
|
||||
|
||||
impl PluginCommand for LazyExplode {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars explode"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Explodes a dataframe or creates a explode expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to explode, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Explode the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and explode the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
explode(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn explode(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
let lazy = df.lazy();
|
||||
explode_lazy(plugin, engine, call, lazy)
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy),
|
||||
PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr),
|
||||
_ => Err(ShellError::CantConvert {
|
||||
to_type: "dataframe or expression".into(),
|
||||
from_type: value.get_type().to_string(),
|
||||
span: call.head,
|
||||
help: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn explode_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns = call
|
||||
.positional
|
||||
.iter()
|
||||
.map(|e| e.as_str().map(|s| s.to_string()))
|
||||
.collect::<Result<Vec<String>, ShellError>>()?;
|
||||
|
||||
let exploded = lazy
|
||||
.to_polars()
|
||||
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
|
||||
let lazy = NuLazyFrame::from(exploded);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
pub(crate) fn explode_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr: NuExpression = expr.to_polars().explode().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyExplode)
|
||||
}
|
||||
}
|
100
crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs
Normal file
100
crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFetch;
|
||||
|
||||
impl PluginCommand for LazyFetch {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fetch"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Collects the lazyframe to the selected rows."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be fetched from lazyframe",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Fetch a rows from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let rows: i64 = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
||||
|
||||
let mut eager: NuDataFrame = lazy
|
||||
.to_polars()
|
||||
.fetch(rows as usize)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error fetching rows".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
.into();
|
||||
|
||||
// mark this as not from lazy so it doesn't get converted back to a lazy frame
|
||||
eager.from_lazy = false;
|
||||
to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFetch)
|
||||
}
|
||||
}
|
189
crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs
Normal file
189
crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs
Normal file
@ -0,0 +1,189 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNA;
|
||||
|
||||
impl PluginCommand for LazyFillNA {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fill-nan"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NaN values with the given expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the NAN values",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Fills the NaN values with 0",
|
||||
example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(0),
|
||||
Value::test_int(3),
|
||||
Value::test_int(0),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Fills the NaN values of a whole dataframe",
|
||||
example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_float(0.2), Value::test_float(0.1)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("Df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let fill: Value = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
cmd_df(plugin, engine, call, df, fill, value.span())
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => cmd_df(
|
||||
plugin,
|
||||
engine,
|
||||
call,
|
||||
lazy.collect(value.span())?,
|
||||
fill,
|
||||
value.span(),
|
||||
),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
Ok(cmd_expr(plugin, engine, call, expr, fill)?)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_df(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
frame: NuDataFrame,
|
||||
fill: Value,
|
||||
val_span: Span,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns = frame.columns(val_span)?;
|
||||
let dataframe = columns
|
||||
.into_iter()
|
||||
.map(|column| {
|
||||
let column_name = column.name().to_string();
|
||||
let values = column
|
||||
.into_iter()
|
||||
.map(|value| {
|
||||
let span = value.span();
|
||||
match value {
|
||||
Value::Float { val, .. } => {
|
||||
if val.is_nan() {
|
||||
fill.clone()
|
||||
} else {
|
||||
value
|
||||
}
|
||||
}
|
||||
Value::List { vals, .. } => {
|
||||
NuDataFrame::fill_list_nan(vals, span, fill.clone())
|
||||
}
|
||||
_ => value,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
Column::new(column_name, values)
|
||||
})
|
||||
.collect::<Vec<Column>>();
|
||||
let df = NuDataFrame::try_from_columns(dataframe, None)?;
|
||||
to_pipeline_data(plugin, engine, call.head, df)
|
||||
}
|
||||
|
||||
fn cmd_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let expr: NuExpression = expr.to_polars().fill_nan(fill).into();
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFillNA)
|
||||
}
|
||||
}
|
127
crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs
Normal file
127
crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs
Normal file
@ -0,0 +1,127 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNull;
|
||||
|
||||
impl PluginCommand for LazyFillNull {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars fill-null"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NULL values with the given expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the null values",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Fills the null values by 0",
|
||||
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let fill: Value = call.req(0)?;
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill),
|
||||
PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill),
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
fn cmd_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
expr: NuExpression,
|
||||
fill: Value,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
|
||||
let expr: NuExpression = expr.to_polars().fill_null(fill).into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFillNull)
|
||||
}
|
||||
}
|
104
crates/nu_plugin_polars/src/dataframe/lazy/filter.rs
Normal file
104
crates/nu_plugin_polars/src/dataframe/lazy/filter.rs
Normal file
@ -0,0 +1,104 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFilter;
|
||||
|
||||
impl PluginCommand for LazyFilter {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars filter"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filter dataframe based in expression."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"filter expression",
|
||||
SyntaxShape::Any,
|
||||
"Expression that define the column selection",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example:
|
||||
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let expr_value: Value = call.req(0)?;
|
||||
let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?;
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
filter_expr: NuExpression,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars().filter(filter_expr.to_polars()),
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyFilter)
|
||||
}
|
||||
}
|
125
crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs
Normal file
125
crates/nu_plugin_polars/src/dataframe/lazy/flatten.rs
Normal file
@ -0,0 +1,125 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
values::CustomValueSupport,
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::explode::explode;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFlatten;
|
||||
|
||||
impl PluginCommand for LazyFlatten {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars flatten"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"An alias for polars explode."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"columns",
|
||||
SyntaxShape::String,
|
||||
"columns to flatten, only applicable for dataframes",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Flatten the specified dataframe",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"id".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(2),
|
||||
]),
|
||||
Column::new(
|
||||
"name".to_string(),
|
||||
vec![
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Mercy"),
|
||||
Value::test_string("Bob"),
|
||||
Value::test_string("Bob"),
|
||||
]),
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
)
|
||||
},
|
||||
Example {
|
||||
description: "Select a column and flatten the values",
|
||||
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"hobbies".to_string(),
|
||||
vec![
|
||||
Value::test_string("Cycling"),
|
||||
Value::test_string("Knitting"),
|
||||
Value::test_string("Skiing"),
|
||||
Value::test_string("Football"),
|
||||
]),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
explode(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazyFlatten)
|
||||
}
|
||||
}
|
168
crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs
Normal file
168
crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs
Normal file
@ -0,0 +1,168 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyGroupBy;
|
||||
|
||||
impl PluginCommand for ToLazyGroupBy {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars group-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a group-by object that can be used for other aggregations."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group-by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the lazy group-by",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| polars into-lazy
|
||||
| polars group-by a
|
||||
| polars agg [
|
||||
(polars col b | polars min | polars as "b_min")
|
||||
(polars col b | polars max | polars as "b_max")
|
||||
(polars col b | polars sum | polars as "b_sum")
|
||||
]
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
Column::new(
|
||||
"b_min".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b_max".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b_sum".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(10)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
{
|
||||
let value: Value = call.req(0)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only Col expressions".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
expressions: Vec<Expr>,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let group_by = lazy.to_polars().group_by(expressions);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
|
||||
to_pipeline_data(plugin, engine, call.head, group_by)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&ToLazyGroupBy)
|
||||
}
|
||||
}
|
260
crates/nu_plugin_polars/src/dataframe/lazy/join.rs
Normal file
260
crates/nu_plugin_polars/src/dataframe/lazy/join.rs
Normal file
@ -0,0 +1,260 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{Expr, JoinType};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyJoin;
|
||||
|
||||
impl PluginCommand for LazyJoin {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars join"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Joins a lazy frame with other lazy frame."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("other", SyntaxShape::Any, "LazyFrame to join with")
|
||||
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
|
||||
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
|
||||
.switch(
|
||||
"inner",
|
||||
"inner joining between lazyframes (default)",
|
||||
Some('i'),
|
||||
)
|
||||
.switch("left", "left join between lazyframes", Some('l'))
|
||||
.switch("outer", "outer join between lazyframes", Some('o'))
|
||||
.switch("cross", "cross join between lazyframes", Some('c'))
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"Suffix to use on columns with same name",
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Join two lazy dataframes",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
||||
$df_a | polars join $df_b a foo | polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"bar".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"ham".to_string(),
|
||||
vec![
|
||||
Value::test_string("let"),
|
||||
Value::test_string("var"),
|
||||
Value::test_string("let"),
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Join one eager dataframe with a lazy dataframe",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
|
||||
$df_a | polars join $df_b a foo"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"bar".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("c"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"ham".to_string(),
|
||||
vec![
|
||||
Value::test_string("let"),
|
||||
Value::test_string("var"),
|
||||
Value::test_string("let"),
|
||||
Value::test_string("let"),
|
||||
],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let left = call.has_flag("left")?;
|
||||
let outer = call.has_flag("outer")?;
|
||||
let cross = call.has_flag("cross")?;
|
||||
|
||||
let how = if left {
|
||||
JoinType::Left
|
||||
} else if outer {
|
||||
JoinType::Outer { coalesce: true }
|
||||
} else if cross {
|
||||
JoinType::Cross
|
||||
} else {
|
||||
JoinType::Inner
|
||||
};
|
||||
|
||||
let other: Value = call.req(0)?;
|
||||
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
|
||||
let other = other.to_polars();
|
||||
|
||||
let left_on: Value = call.req(1)?;
|
||||
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
|
||||
|
||||
let right_on: Value = call.req(2)?;
|
||||
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
|
||||
|
||||
if left_on.len() != right_on.len() {
|
||||
let right_on: Value = call.req(2)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "The right column list has a different size to the left column list".into(),
|
||||
span: right_on.span(),
|
||||
})?;
|
||||
}
|
||||
|
||||
// Checking that both list of expressions are made out of col expressions or strings
|
||||
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
|
||||
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
|
||||
let value: Value = call.req(*index)?;
|
||||
Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: "Expected only a string, col expressions or list of strings".into(),
|
||||
span: value.span(),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
let suffix: Option<String> = call.get_flag("suffix")?;
|
||||
let suffix = suffix.unwrap_or_else(|| "_x".into());
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
|
||||
let from_eager = lazy.from_eager;
|
||||
let lazy = lazy.to_polars();
|
||||
|
||||
let lazy = lazy
|
||||
.join_builder()
|
||||
.with(other)
|
||||
.left_on(left_on)
|
||||
.right_on(right_on)
|
||||
.how(how)
|
||||
.force_parallel(true)
|
||||
.suffix(suffix)
|
||||
.finish();
|
||||
|
||||
let lazy = NuLazyFrame::new(from_eager, lazy);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyJoin)
|
||||
}
|
||||
}
|
225
crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs
Normal file
225
crates/nu_plugin_polars/src/dataframe/lazy/macro_commands.rs
Normal file
@ -0,0 +1,225 @@
|
||||
/// Definition of multiple lazyframe commands using a macro rule
|
||||
/// All of these commands have an identical body and only require
|
||||
/// to have a change in the name, description and function
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame};
|
||||
use crate::values::{to_pipeline_data, CustomValueSupport};
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
|
||||
|
||||
macro_rules! lazy_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build($name)
|
||||
.usage($desc)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
.plugin_examples($examples)
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl PluginCommand for $command {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
|
||||
.map_err(LabeledError::from)?;
|
||||
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.$func()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})
|
||||
.map_err(LabeledError::from)?,
|
||||
);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&$command)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// LazyReverse command
|
||||
// Expands to a command definition for reverse
|
||||
lazy_command!(
|
||||
LazyReverse,
|
||||
"polars reverse",
|
||||
"Reverses the LazyFrame",
|
||||
vec![Example {
|
||||
description: "Reverses the dataframe.",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
|
||||
),
|
||||
],
|
||||
None
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},],
|
||||
reverse,
|
||||
test_reverse
|
||||
);
|
||||
|
||||
// LazyCache command
|
||||
// Expands to a command definition for cache
|
||||
lazy_command!(
|
||||
LazyCache,
|
||||
"polars cache",
|
||||
"Caches operations in a new LazyFrame.",
|
||||
vec![Example {
|
||||
description: "Caches the result into a new LazyFrame",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache",
|
||||
result: None,
|
||||
}],
|
||||
cache,
|
||||
test_cache
|
||||
);
|
143
crates/nu_plugin_polars/src/dataframe/lazy/median.rs
Normal file
143
crates/nu_plugin_polars/src/dataframe/lazy/median.rs
Normal file
@ -0,0 +1,143 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
#[derive(Clone)]
|
||||
pub struct LazyMedian;
|
||||
|
||||
impl PluginCommand for LazyMedian {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars median"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Median value from columns in a dataframe or creates expression for an aggregation"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Median aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars median)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(3.0), Value::test_float(1.0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Median value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr.to_polars().median().into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let polars_lazy = lazy
|
||||
.to_polars()
|
||||
.median()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: format!("Error in median operation: {e}"),
|
||||
msg: "".into(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyMedian)
|
||||
}
|
||||
}
|
57
crates/nu_plugin_polars/src/dataframe/lazy/mod.rs
Normal file
57
crates/nu_plugin_polars/src/dataframe/lazy/mod.rs
Normal file
@ -0,0 +1,57 @@
|
||||
mod aggregate;
|
||||
mod collect;
|
||||
mod explode;
|
||||
mod fetch;
|
||||
mod fill_nan;
|
||||
mod fill_null;
|
||||
mod filter;
|
||||
mod flatten;
|
||||
pub mod groupby;
|
||||
mod join;
|
||||
mod macro_commands;
|
||||
mod median;
|
||||
mod quantile;
|
||||
mod select;
|
||||
mod sort_by_expr;
|
||||
mod to_lazy;
|
||||
|
||||
use nu_plugin::PluginCommand;
|
||||
|
||||
pub use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
pub use crate::dataframe::lazy::collect::LazyCollect;
|
||||
use crate::dataframe::lazy::fetch::LazyFetch;
|
||||
use crate::dataframe::lazy::fill_nan::LazyFillNA;
|
||||
pub use crate::dataframe::lazy::fill_null::LazyFillNull;
|
||||
use crate::dataframe::lazy::filter::LazyFilter;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
use crate::dataframe::lazy::join::LazyJoin;
|
||||
pub(crate) use crate::dataframe::lazy::macro_commands::*;
|
||||
use crate::dataframe::lazy::quantile::LazyQuantile;
|
||||
pub(crate) use crate::dataframe::lazy::select::LazySelect;
|
||||
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
|
||||
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
|
||||
use crate::PolarsPlugin;
|
||||
pub use explode::LazyExplode;
|
||||
pub use flatten::LazyFlatten;
|
||||
|
||||
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
vec![
|
||||
Box::new(LazyAggregate),
|
||||
Box::new(LazyCache),
|
||||
Box::new(LazyCollect),
|
||||
Box::new(LazyExplode),
|
||||
Box::new(LazyFetch),
|
||||
Box::new(LazyFillNA),
|
||||
Box::new(LazyFillNull),
|
||||
Box::new(LazyFilter),
|
||||
Box::new(LazyFlatten),
|
||||
Box::new(LazyJoin),
|
||||
Box::new(median::LazyMedian),
|
||||
Box::new(LazyReverse),
|
||||
Box::new(LazySelect),
|
||||
Box::new(LazySortBy),
|
||||
Box::new(LazyQuantile),
|
||||
Box::new(ToLazyFrame),
|
||||
Box::new(ToLazyGroupBy),
|
||||
]
|
||||
}
|
160
crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs
Normal file
160
crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs
Normal file
@ -0,0 +1,160 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
|
||||
values::{
|
||||
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
|
||||
PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{lit, QuantileInterpolOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyQuantile;
|
||||
|
||||
impl PluginCommand for LazyQuantile {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars quantile"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Aggregates the columns to the selected quantile."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"quantile",
|
||||
SyntaxShape::Number,
|
||||
"quantile value for quantile operation",
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "quantile value from columns in a dataframe",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
|
||||
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Quantile aggregation for a group-by",
|
||||
example: r#"[[a b]; [one 2] [one 4] [two 1]]
|
||||
| polars into-df
|
||||
| polars group-by a
|
||||
| polars agg (polars col b | polars quantile 0.5)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_string("one"), Value::test_string("two")],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_float(4.0), Value::test_float(1.0)],
|
||||
),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let value = input.into_value(call.head);
|
||||
let quantile: f64 = call.req(0)?;
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
command(plugin, engine, call, df.lazy(), quantile)
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr
|
||||
.to_polars()
|
||||
.quantile(lit(quantile), QuantileInterpolOptions::default())
|
||||
.into();
|
||||
to_pipeline_data(plugin, engine, call.head, expr)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
quantile: f64,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.quantile(lit(quantile), QuantileInterpolOptions::default())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Dataframe Error".into(),
|
||||
msg: e.to_string(),
|
||||
help: None,
|
||||
span: None,
|
||||
inner: vec![],
|
||||
})?,
|
||||
);
|
||||
|
||||
to_pipeline_data(plugin, engine, call.head, lazy)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazyQuantile)
|
||||
}
|
||||
}
|
85
crates/nu_plugin_polars/src/dataframe/lazy/select.rs
Normal file
85
crates/nu_plugin_polars/src/dataframe/lazy/select.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
#[derive(Clone)]
|
||||
pub struct LazySelect;
|
||||
|
||||
impl PluginCommand for LazySelect {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars select"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Selects columns from lazyframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"select expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the column selection",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Select a column from the dataframe",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
|
||||
)],
|
||||
None,
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&LazySelect)
|
||||
}
|
||||
}
|
160
crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs
Normal file
160
crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs
Normal file
@ -0,0 +1,160 @@
|
||||
use super::super::values::NuLazyFrame;
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame, NuExpression},
|
||||
values::{to_pipeline_data, CustomValueSupport},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySortBy;
|
||||
|
||||
impl PluginCommand for LazySortBy {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars sort-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Sorts a lazy dataframe based on expression(s)."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"sort expression",
|
||||
SyntaxShape::Any,
|
||||
"sort expression for the dataframe",
|
||||
)
|
||||
.named(
|
||||
"reverse",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
|
||||
"Reverse sorting. Default is false",
|
||||
Some('r'),
|
||||
)
|
||||
.switch(
|
||||
"nulls-last",
|
||||
"nulls are shown last in the dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("maintain-order", "Maintains order during sort", Some('m'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sort dataframe by one column",
|
||||
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Sort column using two columns",
|
||||
example:
|
||||
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(1),
|
||||
Value::test_int(4),
|
||||
Value::test_int(2),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
let nulls_last = call.has_flag("nulls-last")?;
|
||||
let maintain_order = call.has_flag("maintain-order")?;
|
||||
|
||||
let reverse: Option<Vec<bool>> = call.get_flag("reverse")?;
|
||||
let reverse = match reverse {
|
||||
Some(list) => {
|
||||
if expressions.len() != list.len() {
|
||||
let span = call
|
||||
.get_flag::<Value>("reverse")?
|
||||
.expect("already checked and it exists")
|
||||
.span();
|
||||
Err(ShellError::GenericError {
|
||||
error: "Incorrect list size".into(),
|
||||
msg: "Size doesn't match expression list".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?
|
||||
} else {
|
||||
list
|
||||
}
|
||||
}
|
||||
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
|
||||
};
|
||||
|
||||
let pipeline_value = input.into_value(call.head);
|
||||
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.to_polars()
|
||||
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
|
||||
);
|
||||
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&LazySortBy)
|
||||
}
|
||||
}
|
61
crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs
Normal file
61
crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs
Normal file
@ -0,0 +1,61 @@
|
||||
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
|
||||
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyFrame;
|
||||
|
||||
impl PluginCommand for ToLazyFrame {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars into-lazy"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a dataframe into a lazy dataframe."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"schema",
|
||||
SyntaxShape::Record(vec![]),
|
||||
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
|
||||
Some('s'),
|
||||
)
|
||||
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a dictionary and creates a lazy dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
.transpose()?;
|
||||
|
||||
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
|
||||
let lazy = NuLazyFrame::from_dataframe(df);
|
||||
Ok(PipelineData::Value(
|
||||
lazy.cache(plugin, engine)?.into_value(call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user