Move dataframes support to a plugin (#12220)

WIP

This PR covers migration crates/nu-cmd-dataframes to a new plugin
./crates/nu_plugin_polars

## TODO List

Other:
- [X] Fix examples
- [x] Fix Plugin Test Harness
- [X] Move Cache to Mutex<BTreeMap>
- [X] Logic for disabling/enabling plugin GC based off whether items are
cached.
- [x] NuExpression custom values
- [X] Optimize caching (don't cache every object creation). 
- [x] Fix dataframe operations (in NuDataFrameCustomValue::operations)
- [x] Added plugin_debug! macro that for checking an env variable
POLARS_PLUGIN_DEBUG

Fix duplicated commands:
- [x] There are two polars median commands, one for lazy and one for
expr.. there should only be one that works for both. I temporarily
called on polars expr-median (inside expressions_macros.rs)
- [x] polars quantile (lazy, and expr). the expr one is temporarily
expr-median
- [x] polars is-in (renamed one series-is-in)

Commands:
- [x] AppendDF
- [x] CastDF
- [X] ColumnsDF
- [x] DataTypes
- [x] Summary
- [x] DropDF
- [x] DropDuplicates
- [x] DropNulls
- [x] Dummies
- [x] FilterWith
- [X] FirstDF
- [x] GetDF
- [x] LastDF
- [X] ListDF
- [x] MeltDF
- [X] OpenDataFrame
- [x] QueryDf
- [x] RenameDF
- [x] SampleDF
- [x] SchemaDF
- [x] ShapeDF
- [x] SliceDF
- [x] TakeDF
- [X] ToArrow
- [x] ToAvro
- [X] ToCSV
- [X] ToDataFrame
- [X] ToNu
- [x] ToParquet
- [x] ToJsonLines
- [x] WithColumn
- [x] ExprAlias
- [x] ExprArgWhere
- [x] ExprCol
- [x] ExprConcatStr
- [x] ExprCount
- [x] ExprLit
- [x] ExprWhen
- [x] ExprOtherwise
- [x] ExprQuantile
- [x] ExprList
- [x] ExprAggGroups
- [x] ExprCount
- [x] ExprIsIn
- [x] ExprNot
- [x] ExprMax
- [x] ExprMin
- [x] ExprSum
- [x] ExprMean
- [x] ExprMedian
- [x] ExprStd
- [x] ExprVar
- [x] ExprDatePart
- [X] LazyAggregate
- [x] LazyCache
- [X] LazyCollect
- [x] LazyFetch
- [x] LazyFillNA
- [x] LazyFillNull
- [x] LazyFilter
- [x] LazyJoin
- [x] LazyQuantile
- [x] LazyMedian
- [x] LazyReverse
- [x] LazySelect
- [x] LazySortBy
- [x] ToLazyFrame
- [x] ToLazyGroupBy
- [x] LazyExplode
- [x] LazyFlatten
- [x] AllFalse
- [x] AllTrue
- [x] ArgMax
- [x] ArgMin
- [x] ArgSort
- [x] ArgTrue
- [x] ArgUnique
- [x] AsDate
- [x] AsDateTime
- [x] Concatenate
- [x] Contains
- [x] Cumulative
- [x] GetDay
- [x] GetHour
- [x] GetMinute
- [x] GetMonth
- [x] GetNanosecond
- [x] GetOrdinal
- [x] GetSecond
- [x] GetWeek
- [x] GetWeekDay
- [x] GetYear
- [x] IsDuplicated
- [x] IsIn
- [x] IsNotNull
- [x] IsNull
- [x] IsUnique
- [x] NNull
- [x] NUnique
- [x] NotSeries
- [x] Replace
- [x] ReplaceAll
- [x] Rolling
- [x] SetSeries
- [x] SetWithIndex
- [x] Shift
- [x] StrLengths
- [x] StrSlice
- [x] StrFTime
- [x] ToLowerCase
- [x] ToUpperCase
- [x] Unique
- [x] ValueCount

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright
2024-04-09 17:31:43 -07:00
committed by GitHub
parent cbbccaa722
commit efc1cfa939
137 changed files with 21181 additions and 379 deletions

View File

@ -0,0 +1,12 @@
# Dataframe
This dataframe directory holds all of the definitions of the dataframe data structures and commands.
There are three sections of commands:
* [eager](./eager)
* [series](./series)
* [values](./values)
For more details see the
[Nushell book section on dataframes](https://www.nushell.sh/book/dataframes.html)

View File

@ -0,0 +1,144 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
values::{to_pipeline_data, Axis, Column, CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
#[derive(Clone)]
pub struct AppendDF;
impl PluginCommand for AppendDF {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "other dataframe to append")
.switch("col", "append as new columns instead of rows", None)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
fn name(&self) -> &str {
"polars append"
}
fn usage(&self) -> &str {
"Appends a new dataframe."
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Appends a dataframe as new columns",
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df);
$a | polars append $a"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"a_x".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b_x".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Appends a dataframe merging at the end of columns",
example: r#"let a = ([[a b]; [1 2] [3 4]] | polars into-df); $a | polars append $a --col"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(3),
Value::test_int(1),
Value::test_int(3),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(2),
Value::test_int(4),
Value::test_int(2),
Value::test_int(4),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let other: Value = call.req(0)?;
let axis = if call.has_flag("col")? {
Axis::Column
} else {
Axis::Row
};
let df_other = NuDataFrame::try_from_value_coerce(plugin, &other, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = df.append_df(&df_other, axis, call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AppendDF)
}
}

View File

@ -0,0 +1,202 @@
use crate::{
dataframe::values::{str_to_dtype, to_pipeline_data, NuExpression, NuLazyFrame},
values::{cant_convert_err, PolarsPluginObject, PolarsPluginType},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use polars::prelude::*;
#[derive(Clone)]
pub struct CastDF;
impl PluginCommand for CastDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cast"
}
fn usage(&self) -> &str {
"Cast a column to a different dtype."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.required(
"dtype",
SyntaxShape::String,
"The dtype to cast the column to",
)
.optional(
"column",
SyntaxShape::String,
"The column to cast. Required when used with a dataframe.",
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Cast a column in a dataframe to a different dtype",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a lazy dataframe to a different dtype",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars into-lazy | polars cast u8 a | polars schema",
result: Some(Value::record(
record! {
"a" => Value::string("u8", Span::test_data()),
"b" => Value::string("i64", Span::test_data()),
},
Span::test_data(),
)),
},
Example {
description: "Cast a column in a expression to a different dtype",
example: r#"[[a b]; [1 2] [1 4]] | polars into-df | polars group-by a | polars agg [ (polars col b | polars cast u8 | polars min | polars as "b_min") ] | polars schema"#,
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let (dtype, column_nm) = df_args(call)?;
command_lazy(plugin, engine, call, column_nm, dtype, lazy)
}
PolarsPluginObject::NuDataFrame(df) => {
let (dtype, column_nm) = df_args(call)?;
command_eager(plugin, engine, call, column_nm, dtype, df)
}
PolarsPluginObject::NuExpression(expr) => {
let dtype: String = call.req(0)?;
let dtype = str_to_dtype(&dtype, call.head)?;
let expr: NuExpression = expr.to_polars().cast(dtype).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn df_args(call: &EvaluatedCall) -> Result<(DataType, String), ShellError> {
let dtype = dtype_arg(call)?;
let column_nm: String = call.opt(1)?.ok_or(ShellError::MissingParameter {
param_name: "column_name".into(),
span: call.head,
})?;
Ok((dtype, column_nm))
}
fn dtype_arg(call: &EvaluatedCall) -> Result<DataType, ShellError> {
let dtype: String = call.req(0)?;
str_to_dtype(&dtype, call.head)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let column = col(&column_nm).cast(dtype);
let lazy = lazy.to_polars().with_columns(&[column]);
let lazy = NuLazyFrame::new(false, lazy);
to_pipeline_data(plugin, engine, call.head, lazy)
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
column_nm: String,
dtype: DataType,
nu_df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut df = (*nu_df.df).clone();
let column = df
.column(&column_nm)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let _ = df
.with_column(casted)
.map_err(|e| ShellError::GenericError {
error: format!("{e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&CastDF)
}
}

View File

@ -0,0 +1,79 @@
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct ColumnsDF;
impl PluginCommand for ColumnsDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars columns"
}
fn usage(&self) -> &str {
"Show dataframe columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe columns",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars columns",
result: Some(Value::list(
vec![Value::test_string("a"), Value::test_string("b")],
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| Value::string(*v, call.head))
.collect();
let names = Value::list(names, call.head);
Ok(PipelineData::Value(names, None))
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ColumnsDF)
}
}

View File

@ -0,0 +1,127 @@
use crate::values::to_pipeline_data;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::values::CustomValueSupport;
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropDF;
impl PluginCommand for DropDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop"
}
fn usage(&self) -> &str {
"Creates a new dataframe by dropping the selected columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to be dropped")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars drop a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let new_df = col_string
.first()
.ok_or_else(|| ShellError::GenericError {
error: "Empty names list".into(),
msg: "No column names were found".into(),
span: Some(col_span),
help: None,
inner: vec![],
})
.and_then(|col| {
df.as_ref()
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
let polars_df = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
new_df
.drop(&col.item)
.map_err(|e| ShellError::GenericError {
error: "Error dropping column".into(),
msg: e.to_string(),
span: Some(col.span),
help: None,
inner: vec![],
})
})?;
let final_df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, final_df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropDF)
}
}

View File

@ -0,0 +1,133 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::UniqueKeepStrategy;
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropDuplicates;
impl PluginCommand for DropDuplicates {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop-duplicates"
}
fn usage(&self) -> &str {
"Drops duplicate values in dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table(vec![]),
"subset of columns to drop duplicates",
)
.switch("maintain", "maintain order", Some('m'))
.switch(
"last",
"keeps last duplicate value (by default keeps first)",
Some('l'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | polars into-df | polars drop-duplicates",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(3), Value::test_int(1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span)
}
None => (None, call.head),
};
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let keep_strategy = if call.has_flag("last")? {
UniqueKeepStrategy::Last
} else {
UniqueKeepStrategy::First
};
let polars_df = df
.as_ref()
.unique(subset_slice, keep_strategy, None)
.map_err(|e| ShellError::GenericError {
error: "Error dropping duplicates".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropDuplicates)
}
}

View File

@ -0,0 +1,149 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropNulls;
impl PluginCommand for DropNulls {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars drop-nulls"
}
fn usage(&self) -> &str {
"Drops null values in dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"subset",
SyntaxShape::Table(vec![]),
"subset of columns to drop nulls",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "drop null values in dataframe",
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | polars into-df);
let res = ($df.b / $df.b);
let a = ($df | polars with-column $res --name res);
$a | polars drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
Column::new(
"res".to_string(),
vec![Value::test_int(1), Value::test_int(1)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "drop null values in dataframe",
example: r#"let s = ([1 2 0 0 3 4] | polars into-df);
($s / $s) | polars drop-nulls"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"div_0_0".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(1),
Value::test_int(1),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let columns: Option<Vec<Value>> = call.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
(Some(agg_string), col_span)
}
None => (None, call.head),
};
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let polars_df = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| ShellError::GenericError {
error: "Error dropping nulls".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DropNulls)
}
}

View File

@ -0,0 +1,111 @@
use crate::PolarsPlugin;
use super::super::values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct DataTypes;
impl PluginCommand for DataTypes {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars dtypes"
}
fn usage(&self) -> &str {
"Show dataframe data types."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dtypes",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"column".to_string(),
vec![Value::test_string("a"), Value::test_string("b")],
),
Column::new(
"dtype".to_string(),
vec![Value::test_string("i64"), Value::test_string("i64")],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut dtypes: Vec<Value> = Vec::new();
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| {
let dtype = df
.as_ref()
.column(v)
.expect("using name from list of names from dataframe")
.dtype();
let dtype_str = dtype.to_string();
dtypes.push(Value::string(dtype_str, call.head));
Value::string(*v, call.head)
})
.collect();
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&DataTypes)
}
}

View File

@ -0,0 +1,119 @@
use super::super::values::NuDataFrame;
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{prelude::*, series::Series};
#[derive(Clone)]
pub struct Dummies;
impl PluginCommand for Dummies {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars dummies"
}
fn usage(&self) -> &str {
"Creates a new dataframe with dummy variables."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("drop-first", "Drop first row", Some('d'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new dataframe with dummy variables from a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars dummies",
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("a_1", &[1_u8, 0]),
Series::new("a_3", &[0_u8, 1]),
Series::new("b_2", &[1_u8, 0]),
Series::new("b_4", &[0_u8, 1]),
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create new dataframe with dummy variables from a series",
example: "[1 2 2 3 3] | polars into-df | polars dummies",
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("0_1", &[1_u8, 0, 0, 0, 0]),
Series::new("0_2", &[0_u8, 1, 1, 0, 0]),
Series::new("0_3", &[0_u8, 0, 0, 1, 1]),
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let drop_first: bool = call.has_flag("drop-first")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df =
df.as_ref()
.to_dummies(None, drop_first)
.map_err(|e| ShellError::GenericError {
error: "Error calculating dummies".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The only allowed column types for dummies are String or Int".into()),
inner: vec![],
})?;
let df: NuDataFrame = polars_df.into();
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Dummies)
}
}

View File

@ -0,0 +1,165 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::LazyFrame;
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct FilterWith;
impl PluginCommand for FilterWith {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using a mask or expression as reference."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"mask or expression",
SyntaxShape::Any,
"boolean mask used to filter data",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([true false] | polars into-df);
[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars filter-with ((polars col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
)),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(0)?;
let mask_span = mask_value.span();
if NuExpression::can_downcast(&mask_value) {
let expression = NuExpression::try_from_value(plugin, &mask_value)?;
let lazy = df.lazy();
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
to_pipeline_data(plugin, engine, call.head, lazy)
} else {
let mask = NuDataFrame::try_from_value_coerce(plugin, &mask_value, mask_span)?
.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| ShellError::GenericError {
error: "Error casting to bool".into(),
msg: e.to_string(),
span: Some(mask_span),
help: Some("Perhaps you want to use a series with booleans as mask".into()),
inner: vec![],
})?;
let polars_df = df
.as_ref()
.filter(mask)
.map_err(|e| ShellError::GenericError {
error: "Error filtering dataframe".into(),
msg: e.to_string(),
span: Some(call.head),
help: Some("The only allowed column types for dummies are String or Int".into()),
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let expr: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &expr)?;
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FilterWith)
}
}

View File

@ -0,0 +1,137 @@
use crate::{
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct FirstDF;
impl PluginCommand for FirstDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars first"
}
fn usage(&self) -> &str {
"Show only the first number of rows or create a first expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional(
"rows",
SyntaxShape::Int,
"starting from the front, the number of rows to return",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Return the first row of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Return the first two rows of a dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars first 2",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a first expression from a column",
example: "polars col a | polars first",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().first().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(1);
let res = df.as_ref().head(Some(rows));
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&FirstDF)
}
}

View File

@ -0,0 +1,103 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::values::utils::convert_columns_string,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct GetDF;
impl PluginCommand for GetDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get"
}
fn usage(&self) -> &str {
"Creates dataframe with the selected columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns the selected column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars get a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = df
.as_ref()
.select(col_string)
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(false, df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetDF)
}
}

View File

@ -0,0 +1,112 @@
use crate::{
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{utils::DEFAULT_ROWS, NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LastDF;
impl PluginCommand for LastDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars last"
}
fn usage(&self) -> &str {
"Creates new dataframe with tail rows or creates a last expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars last 1",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a last expression from a column",
example: "polars col a | polars last",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(plugin, &value)?;
command(plugin, engine, call, df).map_err(|e| e.into())
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().last().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS);
let res = df.as_ref().tail(Some(rows));
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LastDF)
}
}

View File

@ -0,0 +1,96 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, IntoPipelineData, LabeledError, PipelineData, Signature, Value,
};
use crate::{values::PolarsPluginObject, PolarsPlugin};
#[derive(Clone)]
pub struct ListDF;
impl PluginCommand for ListDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars ls"
}
fn usage(&self) -> &str {
"Lists stored dataframes."
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a new dataframe and shows it in the dataframe list",
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df);
polars ls"#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals = plugin.cache.process_entries(|(key, value)| match value {
PolarsPluginObject::NuDataFrame(df) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::int(df.as_ref().width() as i64, call.head),
"rows" => Value::int(df.as_ref().height() as i64, call.head),
"type" => Value::string("NuDataFrame", call.head),
},
call.head,
))),
PolarsPluginObject::NuLazyFrame(lf) => {
let lf = lf.clone().collect(call.head)?;
Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::int(lf.as_ref().width() as i64, call.head),
"rows" => Value::int(lf.as_ref().height() as i64, call.head),
"type" => Value::string("NuLazyFrame", call.head),
},
call.head,
)))
}
PolarsPluginObject::NuExpression(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuExpression", call.head),
},
call.head,
))),
PolarsPluginObject::NuLazyGroupBy(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuLazyGroupBy", call.head),
},
call.head,
))),
PolarsPluginObject::NuWhen(_) => Ok(Some(Value::record(
record! {
"key" => Value::string(key.to_string(), call.head),
"columns" => Value::nothing(call.head),
"rows" => Value::nothing(call.head),
"type" => Value::string("NuWhen", call.head),
},
call.head,
))),
})?;
let vals = vals.into_iter().flatten().collect();
let list = Value::list(vals, call.head);
Ok(list.into_pipeline_data())
}
}

View File

@ -0,0 +1,255 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use crate::{
dataframe::values::utils::convert_columns_string,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct MeltDF;
impl PluginCommand for MeltDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars melt"
}
fn usage(&self) -> &str {
"Unpivot a DataFrame from wide to long format."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required_named(
"columns",
SyntaxShape::Table(vec![]),
"column names for melting",
Some('c'),
)
.required_named(
"values",
SyntaxShape::Table(vec![]),
"column names used as value columns",
Some('v'),
)
.named(
"variable-name",
SyntaxShape::String,
"optional name for variable column",
Some('r'),
)
.named(
"value-name",
SyntaxShape::String,
"optional name for value column",
Some('l'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "melt dataframe",
example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"b".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
],
),
Column::new(
"variable".to_string(),
vec![
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("d"),
Value::test_string("d"),
Value::test_string("d"),
],
),
Column::new(
"value".to_string(),
vec![
Value::test_string("x"),
Value::test_string("y"),
Value::test_string("z"),
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
let mut res = df
.as_ref()
.melt(&id_col_string, &val_col_string)
.map_err(|e| ShellError::GenericError {
error: "Error calculating melt".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
if let Some(name) = &variable_name {
res.rename("variable", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
if let Some(name) = &value_name {
res.rename("value", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
fn check_column_datatypes<T: AsRef<str>>(
df: &polars::prelude::DataFrame,
cols: &[T],
col_span: Span,
) -> Result<(), ShellError> {
if cols.is_empty() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "empty column list".into(),
span: Some(col_span),
help: None,
inner: vec![],
});
}
// Checking if they are same type
if cols.len() > 1 {
for w in cols.windows(2) {
let l_series = df
.column(w[0].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
let r_series = df
.column(w[1].as_ref())
.map_err(|e| ShellError::GenericError {
error: "Error selecting columns".into(),
msg: e.to_string(),
span: Some(col_span),
help: None,
inner: vec![],
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::GenericError {
error: "Merge error".into(),
msg: "found different column types in list".into(),
span: Some(col_span),
help: Some(format!(
"datatypes {} and {} are incompatible",
l_series.dtype(),
r_series.dtype()
)),
inner: vec![],
});
}
}
}
Ok(())
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&MeltDF)
}
}

View File

@ -0,0 +1,105 @@
mod append;
mod cast;
mod columns;
mod drop;
mod drop_duplicates;
mod drop_nulls;
mod dtypes;
mod dummies;
mod filter_with;
mod first;
mod get;
mod last;
mod list;
mod melt;
mod open;
mod query_df;
mod rename;
mod sample;
mod schema;
mod shape;
mod slice;
mod sql_context;
mod sql_expr;
mod summary;
mod take;
mod to_arrow;
mod to_avro;
mod to_csv;
mod to_df;
mod to_json_lines;
mod to_nu;
mod to_parquet;
mod with_column;
use crate::PolarsPlugin;
pub use self::open::OpenDataFrame;
pub use append::AppendDF;
pub use cast::CastDF;
pub use columns::ColumnsDF;
pub use drop::DropDF;
pub use drop_duplicates::DropDuplicates;
pub use drop_nulls::DropNulls;
pub use dtypes::DataTypes;
pub use dummies::Dummies;
pub use filter_with::FilterWith;
pub use first::FirstDF;
pub use get::GetDF;
pub use last::LastDF;
pub use list::ListDF;
pub use melt::MeltDF;
use nu_plugin::PluginCommand;
pub use query_df::QueryDf;
pub use rename::RenameDF;
pub use sample::SampleDF;
pub use schema::SchemaCmd;
pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sql_context::SQLContext;
pub use summary::Summary;
pub use take::TakeDF;
pub use to_arrow::ToArrow;
pub use to_avro::ToAvro;
pub use to_csv::ToCSV;
pub use to_df::ToDataFrame;
pub use to_json_lines::ToJsonLines;
pub use to_nu::ToNu;
pub use to_parquet::ToParquet;
pub use with_column::WithColumn;
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(AppendDF),
Box::new(CastDF),
Box::new(ColumnsDF),
Box::new(DataTypes),
Box::new(DropDF),
Box::new(DropDuplicates),
Box::new(DropNulls),
Box::new(Dummies),
Box::new(FilterWith),
Box::new(GetDF),
Box::new(OpenDataFrame),
Box::new(MeltDF),
Box::new(Summary),
Box::new(FirstDF),
Box::new(LastDF),
Box::new(ListDF),
Box::new(RenameDF),
Box::new(SampleDF),
Box::new(ShapeDF),
Box::new(SliceDF),
Box::new(SchemaCmd),
Box::new(TakeDF),
Box::new(ToNu),
Box::new(ToArrow),
Box::new(ToAvro),
Box::new(ToDataFrame),
Box::new(ToCSV),
Box::new(ToJsonLines),
Box::new(ToParquet),
Box::new(QueryDf),
Box::new(WithColumn),
]
}

View File

@ -0,0 +1,531 @@
use crate::{
dataframe::values::NuSchema,
values::{cache_and_to_value, NuLazyFrame},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::PluginCommand;
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use std::{fs::File, io::BufReader, path::PathBuf};
use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
};
use polars_io::{avro::AvroReader, prelude::ParallelStrategy};
#[derive(Clone)]
pub struct OpenDataFrame;
impl PluginCommand for OpenDataFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars open"
}
fn usage(&self) -> &str {
"Opens CSV, JSON, JSON lines, arrow, avro, or parquet file to create dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"file",
SyntaxShape::Filepath,
"file path to load values from",
)
.switch("lazy", "creates a lazy dataframe", Some('l'))
.named(
"type",
SyntaxShape::String,
"File type: csv, tsv, json, parquet, arrow, avro. If omitted, derive from file extension",
Some('t'),
)
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character. CSV file",
Some('d'),
)
.switch(
"no-header",
"Indicates if file doesn't have header. CSV file",
None,
)
.named(
"infer-schema",
SyntaxShape::Number,
"Number of rows to infer the schema of the file. CSV file",
None,
)
.named(
"skip-rows",
SyntaxShape::Number,
"Number of rows to skip from file. CSV file",
None,
)
.named(
"columns",
SyntaxShape::List(Box::new(SyntaxShape::String)),
"Columns to be selected from csv file. CSV and Parquet file",
None,
)
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s')
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a file name and creates a dataframe",
example: "polars open test.csv",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
_input: nu_protocol::PipelineData,
) -> Result<nu_protocol::PipelineData, LabeledError> {
command(plugin, engine, call).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<PipelineData, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let type_option: Option<Spanned<String>> = call.get_flag("type")?;
let type_id = match &type_option {
Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)),
None => file.item.extension().map(|e| {
(
e.to_string_lossy().into_owned(),
"Invalid extension",
file.span,
)
}),
};
match type_id {
Some((e, msg, blamed)) => match e.as_str() {
"csv" | "tsv" => from_csv(plugin, engine, call),
"parquet" | "parq" => from_parquet(plugin, engine, call),
"ipc" | "arrow" => from_ipc(plugin, engine, call),
"json" => from_json(plugin, engine, call),
"jsonl" => from_jsonl(plugin, engine, call),
"avro" => from_avro(plugin, engine, call),
_ => Err(ShellError::FileNotFoundCustom {
msg: format!(
"{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json, jsonl, avro"
),
span: blamed,
}),
},
None => Err(ShellError::FileNotFoundCustom {
msg: "File without extension".into(),
span: file.span,
}),
}
.map(|value| PipelineData::Value(value, None))
}
fn from_parquet(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let args = ScanArgsParquet {
n_rows: None,
cache: true,
parallel: ParallelStrategy::Auto,
rechunk: false,
row_index: None,
low_memory: false,
cloud_options: None,
use_statistics: false,
hive_partitioning: false,
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = ParquetReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}
fn from_avro(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = AvroReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Avro reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_ipc(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let args = ScanArgsIpc {
n_rows: None,
cache: true,
rechunk: false,
row_index: None,
memmap: true,
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let r = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let reader = IpcReader::new(r);
let reader = match columns {
None => reader,
Some(columns) => reader.with_columns(Some(columns)),
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "IPC reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}
fn from_json(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let file: Spanned<PathBuf> = call.req(0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_jsonl(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let file: Spanned<PathBuf> = call.req(0)?;
let file = File::open(&file.item).map_err(|e| ShellError::GenericError {
error: "Error opening file".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?;
let buf_reader = BufReader::new(file);
let reader = JsonReader::new(buf_reader)
.with_json_format(JsonFormat::JsonLines)
.infer_schema_len(infer_schema);
let reader = match maybe_schema {
Some(schema) => reader.with_schema(schema.into()),
None => reader,
};
let df: NuDataFrame = reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Json lines reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
fn from_csv(
plugin: &PolarsPlugin,
engine: &nu_plugin::EngineInterface,
call: &nu_plugin::EvaluatedCall,
) -> Result<Value, ShellError> {
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?;
let infer_schema: Option<usize> = call.get_flag("infer-schema")?;
let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag("columns")?;
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
if call.has_flag("lazy")? {
let file: String = call.req(0)?;
let csv_reader = LazyCsvReader::new(file);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.with_infer_schema_length(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let df: NuLazyFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
} else {
let file: Spanned<PathBuf> = call.req(0)?;
let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(),
msg: e.to_string(),
span: Some(file.span),
help: None,
inner: vec![],
})?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
let df: NuDataFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "Parquet reader error".into(),
msg: format!("{e:?}"),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
cache_and_to_value(plugin, engine, call.head, df)
}
}

View File

@ -0,0 +1,108 @@
use super::super::values::NuDataFrame;
use crate::dataframe::values::Column;
use crate::dataframe::{eager::SQLContext, values::NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
// attribution:
// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you.
// maybe we should just use the crate at some point but it's not published yet.
// https://github.com/pola-rs/polars/tree/master/polars-sql
#[derive(Clone)]
pub struct QueryDf;
impl PluginCommand for QueryDf {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars query"
}
fn usage(&self) -> &str {
"Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("sql", SyntaxShape::String, "sql query")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn search_terms(&self) -> Vec<&str> {
vec!["dataframe", "sql", "search"]
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Query dataframe using SQL",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars query 'select a from df'",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let sql_query: String = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut ctx = SQLContext::new();
ctx.register("df", &df.df);
let df_sql = ctx
.execute(&sql_query)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(!df.from_lazy, df_sql);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&QueryDf)
}
}

View File

@ -0,0 +1,203 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::{utils::extract_strings, values::NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct RenameDF;
impl PluginCommand for RenameDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars rename"
}
fn usage(&self) -> &str {
"Rename a dataframe column."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"columns",
SyntaxShape::Any,
"Column(s) to be renamed. A string or list of strings",
)
.required(
"new names",
SyntaxShape::Any,
"New names for the selected column(s). A string or list of strings",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Renames a series",
example: "[5 6 7 8] | polars into-df | polars rename '0' new_name",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"new_name".to_string(),
vec![
Value::test_int(5),
Value::test_int(6),
Value::test_int(7),
Value::test_int(8),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars rename a a_new",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames two dataframe columns",
example:
"[[a b]; [1 2] [3 4]] | polars into-df | polars rename [a b] [a_new b_new]",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b_new".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value).map_err(LabeledError::from)? {
PolarsPluginObject::NuDataFrame(df) => {
command_eager(plugin, engine, call, df).map_err(LabeledError::from)
}
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from)
}
_ => Err(LabeledError::new(format!("Unsupported type: {value:?}"))
.with_label("Unsupported Type", call.head)),
}
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(1)?;
let new_names = extract_strings(new_names)?;
let mut polars_df = df.to_polars();
for (from, to) in columns.iter().zip(new_names.iter()) {
polars_df
.rename(from, to)
.map_err(|e| ShellError::GenericError {
error: "Error renaming".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
}
let df = NuDataFrame::new(false, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(1)?;
let new_names = extract_strings(new_names)?;
if columns.len() != new_names.len() {
let value: Value = call.req(1)?;
return Err(ShellError::IncompatibleParametersSingle {
msg: "New name list has different size to column list".into(),
span: value.span(),
});
}
let lazy = lazy.to_polars();
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&RenameDF)
}
}

View File

@ -0,0 +1,138 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type,
};
use polars::prelude::NamedFrom;
use polars::series::Series;
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SampleDF;
impl PluginCommand for SampleDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars sample"
}
fn usage(&self) -> &str {
"Create sample dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"n-rows",
SyntaxShape::Int,
"number of rows to be taken from dataframe",
Some('n'),
)
.named(
"fraction",
SyntaxShape::Number,
"fraction of dataframe to be taken",
Some('f'),
)
.named(
"seed",
SyntaxShape::Number,
"seed for the selection",
Some('s'),
)
.switch("replace", "sample with replace", Some('e'))
.switch("shuffle", "shuffle sample", Some('u'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars sample --n-rows 1",
result: None, // No expected value because sampling is random
},
Example {
description: "Shows sample row using fraction and replace",
example:
"[[a b]; [1 2] [3 4] [5 6]] | polars into-df | polars sample --fraction 0.5 --replace",
result: None, // No expected value because sampling is random
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<Spanned<i64>> = call.get_flag("n-rows")?;
let fraction: Option<Spanned<f64>> = call.get_flag("fraction")?;
let seed: Option<u64> = call.get_flag::<i64>("seed")?.map(|val| val as u64);
let replace: bool = call.has_flag("replace")?;
let shuffle: bool = call.has_flag("shuffle")?;
let df = NuDataFrame::try_from_pipeline(plugin, input, call.head)?;
let df = match (rows, fraction) {
(Some(rows), None) => df
.as_ref()
.sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(rows.span),
help: None,
inner: vec![],
}),
(None, Some(frac)) => df
.as_ref()
.sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(frac.span),
help: None,
inner: vec![],
}),
(Some(_), Some(_)) => Err(ShellError::GenericError {
error: "Incompatible flags".into(),
msg: "Only one selection criterion allowed".into(),
span: Some(call.head),
help: None,
inner: vec![],
}),
(None, None) => Err(ShellError::GenericError {
error: "No selection".into(),
msg: "No selection criterion was found".into(),
span: Some(call.head),
help: Some("Perhaps you want to use the flag -n or -f".into()),
inner: vec![],
}),
};
let df = NuDataFrame::new(false, df?);
to_pipeline_data(plugin, engine, call.head, df)
}

View File

@ -0,0 +1,133 @@
use crate::{values::PolarsPluginObject, PolarsPlugin};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct SchemaCmd;
impl PluginCommand for SchemaCmd {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars schema"
}
fn usage(&self) -> &str {
"Show schema for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("datatype-list", "creates a lazy dataframe", Some('l'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe schema",
example: r#"[[a b]; [1 "foo"] [3 "bar"]] | polars into-df | polars schema"#,
result: Some(Value::record(
record! {
"a" => Value::string("i64", Span::test_data()),
"b" => Value::string("str", Span::test_data()),
},
Span::test_data(),
)),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
if call.has_flag("datatype-list")? {
Ok(PipelineData::Value(datatype_list(Span::unknown()), None))
} else {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
PolarsPluginObject::NuDataFrame(df) => {
let schema = df.schema();
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
PolarsPluginObject::NuLazyFrame(lazy) => {
let schema = lazy.schema()?;
let value: Value = schema.into();
Ok(PipelineData::Value(value, None))
}
_ => Err(ShellError::GenericError {
error: "Must be a dataframe or lazy dataframe".into(),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
}),
}
}
fn datatype_list(span: Span) -> Value {
let types: Vec<Value> = [
("null", ""),
("bool", ""),
("u8", ""),
("u16", ""),
("u32", ""),
("u64", ""),
("i8", ""),
("i16", ""),
("i32", ""),
("i64", ""),
("f32", ""),
("f64", ""),
("str", ""),
("binary", ""),
("date", ""),
("datetime<time_unit: (ms, us, ns) timezone (optional)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."),
("duration<time_unit: (ms, us, ns)>", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."),
("time", ""),
("object", ""),
("unknown", ""),
("list<dtype>", ""),
]
.iter()
.map(|(dtype, note)| {
Value::record(record! {
"dtype" => Value::string(*dtype, span),
"note" => Value::string(*note, span),
},
span)
})
.collect();
Value::list(types, span)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SchemaCmd)
}
}

View File

@ -0,0 +1,94 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ShapeDF;
impl PluginCommand for ShapeDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars shape"
}
fn usage(&self) -> &str {
"Shows column and row size for a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars shape",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("rows".to_string(), vec![Value::test_int(2)]),
Column::new("columns".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let rows = Value::int(df.as_ref().height() as i64, call.head);
let cols = Value::int(df.as_ref().width() as i64, call.head);
let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]);
let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ShapeDF)
}
}

View File

@ -0,0 +1,95 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SliceDF;
impl PluginCommand for SliceDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars slice"
}
fn usage(&self) -> &str {
"Creates new dataframe from a slice of rows."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("offset", SyntaxShape::Int, "start of slice")
.required("size", SyntaxShape::Int, "size of slice")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars slice 0 1",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let offset: i64 = call.req(0)?;
let size: usize = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let res = df.as_ref().slice(offset, size);
let res = NuDataFrame::new(false, res);
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SliceDF)
}
}

View File

@ -0,0 +1,228 @@
use crate::dataframe::eager::sql_expr::parse_sql_expr;
use polars::error::{ErrString, PolarsError};
use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame};
use sqlparser::ast::{
Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor,
Value as SQLValue,
};
use sqlparser::dialect::GenericDialect;
use sqlparser::parser::Parser;
use std::collections::HashMap;
#[derive(Default)]
pub struct SQLContext {
table_map: HashMap<String, LazyFrame>,
dialect: GenericDialect,
}
impl SQLContext {
pub fn new() -> Self {
Self {
table_map: HashMap::new(),
dialect: GenericDialect,
}
}
pub fn register(&mut self, name: &str, df: &DataFrame) {
self.table_map.insert(name.to_owned(), df.clone().lazy());
}
fn execute_select(&self, select_stmt: &Select) -> Result<LazyFrame, PolarsError> {
// Determine involved dataframe
// Implicit join require some more work in query parsers, Explicit join are preferred for now.
let tbl = select_stmt.from.first().ok_or_else(|| {
PolarsError::ComputeError(ErrString::from("No table found in select statement"))
})?;
let mut alias_map = HashMap::new();
let tbl_name = match &tbl.relation {
TableFactor::Table { name, alias, .. } => {
let tbl_name = name
.0
.first()
.ok_or_else(|| {
PolarsError::ComputeError(ErrString::from(
"No table found in select statement",
))
})?
.value
.to_string();
if self.table_map.contains_key(&tbl_name) {
if let Some(alias) = alias {
alias_map.insert(alias.name.value.clone(), tbl_name.to_owned());
};
tbl_name
} else {
return Err(PolarsError::ComputeError(
format!("Table name {tbl_name} was not found").into(),
));
}
}
// Support bare table, optional with alias for now
_ => return Err(PolarsError::ComputeError("Not implemented".into())),
};
let df = &self.table_map[&tbl_name];
let mut raw_projection_before_alias: HashMap<String, usize> = HashMap::new();
let mut contain_wildcard = false;
// Filter Expression
let df = match select_stmt.selection.as_ref() {
Some(expr) => {
let filter_expression = parse_sql_expr(expr)?;
df.clone().filter(filter_expression)
}
None => df.clone(),
};
// Column Projections
let projection = select_stmt
.projection
.iter()
.enumerate()
.map(|(i, select_item)| {
Ok(match select_item {
SelectItem::UnnamedExpr(expr) => {
let expr = parse_sql_expr(expr)?;
raw_projection_before_alias.insert(format!("{expr:?}"), i);
expr
}
SelectItem::ExprWithAlias { expr, alias } => {
let expr = parse_sql_expr(expr)?;
raw_projection_before_alias.insert(format!("{expr:?}"), i);
expr.alias(&alias.value)
}
SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => {
contain_wildcard = true;
col("*")
}
})
})
.collect::<Result<Vec<_>, PolarsError>>()?;
// Check for group by
// After projection since there might be number.
let group_by = match &select_stmt.group_by {
GroupByExpr::All =>
Err(
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into())
)?,
GroupByExpr::Expressions(expressions) => expressions
}
.iter()
.map(
|e|match e {
SqlExpr::Value(SQLValue::Number(idx, _)) => {
let idx = match idx.parse::<usize>() {
Ok(0)| Err(_) => Err(
PolarsError::ComputeError(
format!("Group-By Error: Only positive number or expression are supported, got {idx}").into()
)),
Ok(idx) => Ok(idx)
}?;
Ok(projection[idx].clone())
}
SqlExpr::Value(_) => Err(
PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into())
),
_ => parse_sql_expr(e)
}
)
.collect::<Result<Vec<_>, PolarsError>>()?;
let df = if group_by.is_empty() {
df.select(projection)
} else {
// check groupby and projection due to difference between SQL and polars
// Return error on wild card, shouldn't process this
if contain_wildcard {
return Err(PolarsError::ComputeError(
"Group-By Error: Can't process wildcard in group-by".into(),
));
}
// Default polars group by will have group by columns at the front
// need some container to contain position of group by columns and its position
// at the final agg projection, check the schema for the existence of group by column
// and its projections columns, keeping the original index
let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by
.iter()
.map(|expr| raw_projection_before_alias.get(&format!("{expr:?}")))
.enumerate()
.filter(|(_, proj_p)| proj_p.is_some())
.map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p)))
.unzip();
let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection
.iter()
.enumerate()
.filter(|(i, _)| !exclude_expr.contains(i))
.enumerate()
.map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len())))
.unzip();
let agg_df = df.group_by(group_by).agg(agg_projection);
let mut final_proj_pos = groupby_pos
.into_iter()
.chain(agg_proj_pos)
.collect::<Vec<_>>();
final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb));
let final_proj = final_proj_pos
.into_iter()
.map(|(_, shm_p)| {
col(agg_df
.clone()
// FIXME: had to do this mess to get get_index to work, not sure why. need help
.collect()
.unwrap_or_default()
.schema()
.get_at_index(shm_p)
.unwrap_or((&"".into(), &DataType::Null))
.0)
})
.collect::<Vec<_>>();
agg_df.select(final_proj)
};
Ok(df)
}
pub fn execute(&self, query: &str) -> Result<LazyFrame, PolarsError> {
let ast = Parser::parse_sql(&self.dialect, query)
.map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?;
if ast.len() != 1 {
Err(PolarsError::ComputeError(
"One and only one statement at a time please".into(),
))
} else {
let ast = ast
.first()
.ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?;
Ok(match ast {
Statement::Query(query) => {
let rs = match &*query.body {
SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?,
_ => {
return Err(PolarsError::ComputeError(
"INSERT, UPDATE is not supported for polars".into(),
))
}
};
match &query.limit {
Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => {
let nrow = nrow.parse().map_err(|err| {
PolarsError::ComputeError(
format!("Conversion Error: {err:?}").into(),
)
})?;
rs.limit(nrow)
}
None => rs,
_ => {
return Err(PolarsError::ComputeError(
"Only support number argument to LIMIT clause".into(),
))
}
}
}
_ => {
return Err(PolarsError::ComputeError(
format!("Statement type {ast:?} is not supported").into(),
))
}
})
}
}
}

View File

@ -0,0 +1,200 @@
use polars::error::PolarsError;
use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit};
use sqlparser::ast::{
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType,
};
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
Ok(match data_type {
SQLDataType::Char(_)
| SQLDataType::Varchar(_)
| SQLDataType::Uuid
| SQLDataType::Clob(_)
| SQLDataType::Text
| SQLDataType::String(_) => DataType::String,
SQLDataType::Float(_) => DataType::Float32,
SQLDataType::Real => DataType::Float32,
SQLDataType::Double => DataType::Float64,
SQLDataType::TinyInt(_) => DataType::Int8,
SQLDataType::UnsignedTinyInt(_) => DataType::UInt8,
SQLDataType::SmallInt(_) => DataType::Int16,
SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
SQLDataType::Int(_) => DataType::Int32,
SQLDataType::UnsignedInt(_) => DataType::UInt32,
SQLDataType::BigInt(_) => DataType::Int64,
SQLDataType::UnsignedBigInt(_) => DataType::UInt64,
SQLDataType::Boolean => DataType::Boolean,
SQLDataType::Date => DataType::Date,
SQLDataType::Time(_, _) => DataType::Time,
SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None),
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
SQLDataType::Array(array_type_def) => match array_type_def {
ArrayElemTypeDef::AngleBracket(inner_type)
| ArrayElemTypeDef::SquareBracket(inner_type) => {
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
}
_ => {
return Err(PolarsError::ComputeError(
"SQL Datatype Array(None) was not supported in polars-sql yet!".into(),
))
}
},
_ => {
return Err(PolarsError::ComputeError(
format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn cast_(expr: Expr, data_type: &SQLDataType) -> Result<Expr> {
let polars_type = map_sql_polars_datatype(data_type)?;
Ok(expr.cast(polars_type))
}
fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result<Expr> {
Ok(match op {
SQLBinaryOperator::Plus => left + right,
SQLBinaryOperator::Minus => left - right,
SQLBinaryOperator::Multiply => left * right,
SQLBinaryOperator::Divide => left / right,
SQLBinaryOperator::Modulo => left % right,
SQLBinaryOperator::StringConcat => {
left.cast(DataType::String) + right.cast(DataType::String)
}
SQLBinaryOperator::Gt => left.gt(right),
SQLBinaryOperator::Lt => left.lt(right),
SQLBinaryOperator::GtEq => left.gt_eq(right),
SQLBinaryOperator::LtEq => left.lt_eq(right),
SQLBinaryOperator::Eq => left.eq(right),
SQLBinaryOperator::NotEq => left.eq(right).not(),
SQLBinaryOperator::And => left.and(right),
SQLBinaryOperator::Or => left.or(right),
SQLBinaryOperator::Xor => left.xor(right),
_ => {
return Err(PolarsError::ComputeError(
format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn literal_expr(value: &SqlValue) -> Result<Expr> {
Ok(match value {
SqlValue::Number(s, _) => {
// Check for existence of decimal separator dot
if s.contains('.') {
s.parse::<f64>().map(lit).map_err(|_| {
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
})
} else {
s.parse::<i64>().map(lit).map_err(|_| {
PolarsError::ComputeError(format!("Can't parse literal {s:?}").into())
})
}?
}
SqlValue::SingleQuotedString(s) => lit(s.clone()),
SqlValue::NationalStringLiteral(s) => lit(s.clone()),
SqlValue::HexStringLiteral(s) => lit(s.clone()),
SqlValue::DoubleQuotedString(s) => lit(s.clone()),
SqlValue::Boolean(b) => lit(*b),
SqlValue::Null => Expr::Literal(LiteralValue::Null),
_ => {
return Err(PolarsError::ComputeError(
format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(),
))
}
})
}
pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
Ok(match expr {
SqlExpr::Identifier(e) => col(&e.value),
SqlExpr::BinaryOp { left, op, right } => {
let left = parse_sql_expr(left)?;
let right = parse_sql_expr(right)?;
binary_op_(left, right, op)?
}
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
SqlExpr::Cast {
expr,
data_type,
format: _,
} => cast_(parse_sql_expr(expr)?, data_type)?,
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
SqlExpr::Value(value) => literal_expr(value)?,
_ => {
return Err(PolarsError::ComputeError(
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
))
}
})
}
fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result<Expr> {
Ok(match &window_type {
Some(wtype) => match wtype {
WindowType::WindowSpec(window_spec) => {
// Process for simple window specification, partition by first
let partition_by = window_spec
.partition_by
.iter()
.map(parse_sql_expr)
.collect::<Result<Vec<_>>>()?;
expr.over(partition_by)
// Order by and Row range may not be supported at the moment
}
// TODO: make NamedWindow work
WindowType::NamedWindow(_named) => {
return Err(PolarsError::ComputeError(
format!("Expression: {expr:?} was not supported in polars-sql yet!").into(),
))
}
},
None => expr,
})
}
fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
use sqlparser::ast::{FunctionArg, FunctionArgExpr};
// Function name mostly do not have name space, so it mostly take the first args
let function_name = sql_function.name.0[0].value.to_ascii_lowercase();
let args = sql_function
.args
.iter()
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg,
})
.collect::<Vec<_>>();
Ok(
match (
function_name.as_str(),
args.as_slice(),
sql_function.distinct,
) {
("sum", [FunctionArgExpr::Expr(expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum()
}
("count", [FunctionArgExpr::Expr(expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count()
}
("count", [FunctionArgExpr::Expr(expr)], true) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique()
}
// Special case for wildcard args to count function.
("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(),
_ => {
return Err(PolarsError::ComputeError(
format!(
"Function {function_name:?} with args {args:?} was not supported in polars-sql yet!"
)
.into(),
))
}
},
)
}

View File

@ -0,0 +1,293 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::{
chunked_array::ChunkedArray,
prelude::{
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray,
QuantileInterpolOptions, Series, StringType,
},
};
#[derive(Clone)]
pub struct Summary;
impl PluginCommand for Summary {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars summary"
}
fn usage(&self) -> &str {
"For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.category(Category::Custom("dataframe".into()))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.named(
"quantiles",
SyntaxShape::Table(vec![]),
"provide optional quantiles",
Some('q'),
)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "list dataframe descriptives",
example: "[[a b]; [1 1] [1 1]] | polars into-df | polars summary",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"descriptor".to_string(),
vec![
Value::test_string("count"),
Value::test_string("sum"),
Value::test_string("mean"),
Value::test_string("median"),
Value::test_string("std"),
Value::test_string("min"),
Value::test_string("25%"),
Value::test_string("50%"),
Value::test_string("75%"),
Value::test_string("max"),
],
),
Column::new(
"a (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
Column::new(
"b (i64)".to_string(),
vec![
Value::test_float(2.0),
Value::test_float(2.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(0.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
Value::test_float(1.0),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let quantiles: Option<Vec<Value>> = call.get_flag("quantiles")?;
let quantiles = quantiles.map(|values| {
values
.iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if (&0.0..=&1.0).contains(&val) {
Ok(*val)
} else {
Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be between 0 and 1".into(),
span: Some(span),
help: None,
inner: vec![],
})
}
}
Value::Error { error, .. } => Err(*error.clone()),
_ => Err(ShellError::GenericError {
error: "Incorrect value for quantile".into(),
msg: "value should be a float".into(),
span: Some(span),
help: None,
inner: vec![],
}),
}
})
.collect::<Result<Vec<f64>, ShellError>>()
});
let quantiles = match quantiles {
Some(quantiles) => quantiles?,
None => vec![0.25, 0.50, 0.75],
};
let mut quantiles_labels = quantiles
.iter()
.map(|q| Some(format!("{}%", q * 100.0)))
.collect::<Vec<Option<String>>>();
let mut labels = vec![
Some("count".to_string()),
Some("sum".to_string()),
Some("mean".to_string()),
Some("median".to_string()),
Some("std".to_string()),
Some("min".to_string()),
];
labels.append(&mut quantiles_labels);
labels.push(Some("max".to_string()));
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let names = ChunkedArray::<StringType>::from_slice_options("descriptor", &labels).into_series();
let head = std::iter::once(names);
let tail = df
.as_ref()
.get_columns()
.iter()
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _)))
.map(|col| {
let count = col.len() as f64;
let sum = col.sum_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mean = match col.mean_as_series().get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
};
let median = match col.median_as_series() {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let std = match col.std_as_series(0) {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let min = col.min_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut quantiles = quantiles
.clone()
.into_iter()
.map(|q| {
col.quantile_as_series(q, QuantileInterpolOptions::default())
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
})
.collect::<Vec<Option<f64>>>();
let max = col.max_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
descriptors.append(&mut quantiles);
descriptors.push(max);
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::from_slice_options(&name, &descriptors).into_series()
});
let res = head.chain(tail).collect::<Vec<Series>>();
let polars_df = DataFrame::new(res).map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Summary)
}
}

View File

@ -0,0 +1,162 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::DataType;
use crate::{
dataframe::values::Column,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct TakeDF;
impl PluginCommand for TakeDF {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars take"
}
fn usage(&self) -> &str {
"Creates new dataframe using the given indices."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"indices",
SyntaxShape::Any,
"list of indices used to take data",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes selected rows from dataframe",
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | polars into-df);
let indices = ([0 2] | polars into-df);
$df | polars take $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(4), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
let indices = ([0 2] | polars into-df);
$series | polars take $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(4), Value::test_int(5)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let index_value: Value = call.req(0)?;
let index_span = index_value.span();
let index = NuDataFrame::try_from_value(plugin, &index_value)?.as_series(index_span)?;
let casted = match index.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => index
.cast(&DataType::UInt32)
.map_err(|e| ShellError::GenericError {
error: "Error casting index list".into(),
msg: e.to_string(),
span: Some(index_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "Series with incorrect type".into(),
span: Some(call.head),
help: Some("Consider using a Series with type int type".into()),
inner: vec![],
}),
}?;
let indices = casted.u32().map_err(|e| ShellError::GenericError {
error: "Error casting index list".into(),
msg: e.to_string(),
span: Some(index_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let polars_df = df
.to_polars()
.take(indices)
.map_err(|e| ShellError::GenericError {
error: "Error taking values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&TakeDF)
}
}

View File

@ -0,0 +1,87 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{IpcWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToArrow;
impl PluginCommand for ToArrow {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-arrow"
}
fn usage(&self) -> &str {
"Saves dataframe to arrow file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to arrow file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-arrow test.arrow",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
IpcWriter::new(&mut file)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,117 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars_io::avro::{AvroCompression, AvroWriter};
use polars_io::SerWriter;
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToAvro;
impl PluginCommand for ToAvro {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-avro"
}
fn usage(&self) -> &str {
"Saves dataframe to avro file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"compression",
SyntaxShape::String,
"use compression, supports deflate or snappy",
Some('c'),
)
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to avro file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-avro test.avro",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn get_compression(call: &EvaluatedCall) -> Result<Option<AvroCompression>, ShellError> {
if let Some((compression, span)) = call
.get_flag_value("compression")
.map(|e| e.as_str().map(|s| (s.to_owned(), e.span())))
.transpose()?
{
match compression.as_ref() {
"snappy" => Ok(Some(AvroCompression::Snappy)),
"deflate" => Ok(Some(AvroCompression::Deflate)),
_ => Err(ShellError::IncorrectValue {
msg: "compression must be one of deflate or snappy".to_string(),
val_span: span,
call_span: span,
}),
}
} else {
Ok(None)
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let compression = get_compression(call)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
AvroWriter::new(file)
.with_compression(compression)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,133 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{CsvWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToCSV;
impl PluginCommand for ToCSV {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-csv"
}
fn usage(&self) -> &str {
"Saves dataframe to CSV file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character",
Some('d'),
)
.switch("no-header", "Indicates if file doesn't have header", None)
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Saves dataframe to CSV file",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv",
result: None,
},
Example {
description: "Saves dataframe to CSV file using other delimiter",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv --delimiter '|'",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(|e| e.into())
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let delimiter: Option<Spanned<String>> = call.get_flag("delimiter")?;
let no_header: bool = call.has_flag("no-header")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let writer = CsvWriter::new(&mut file);
let writer = if no_header {
writer.include_header(false)
} else {
writer.include_header(true)
};
let mut writer = match delimiter {
None => writer,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one char".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
writer.with_separator(delimiter)
}
}
};
writer
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error writing to file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,201 @@
use crate::{
dataframe::values::NuSchema,
values::{to_pipeline_data, Column, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::{
prelude::{AnyValue, DataType, Field, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct ToDataFrame;
impl PluginCommand for ToDataFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-df"
}
fn usage(&self) -> &str {
"Converts a list, table or record into a dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
),
Column::new(
"1".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"2".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list and creates a dataframe",
example: "[a b c] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes a list of booleans and creates a dataframe",
example: "[true true false] | polars into-df",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema",
example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| polars into-df -s {a: u8, b: {a: list<u64>}, c: list<str>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a", &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1];
Series::from_any_values_and_dtype("b", &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))];
Series::from_any_values_and_dtype("c", &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Convert to a dataframe and provide a schema that adds a new column",
example: r#"[[a b]; [1 "foo"] [2 "bar"]] | polars into-df -s {a: u8, b:str, c:i64} | polars fill-null 3"#,
result: Some(NuDataFrame::try_from_series_vec(vec![
Series::new("a", [1u8, 2]),
Series::new("b", ["foo", "bar"]),
Series::new("c", [3i64, 3]),
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema.clone())?;
to_pipeline_data(plugin, engine, call.head, df).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
use nu_protocol::ShellError;
#[test]
fn test_into_df() -> Result<(), ShellError> {
test_polars_plugin_command(&ToDataFrame)
}
}

View File

@ -0,0 +1,89 @@
use std::{fs::File, io::BufWriter, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::{JsonWriter, SerWriter};
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToJsonLines;
impl PluginCommand for ToJsonLines {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-jsonl"
}
fn usage(&self) -> &str {
"Saves dataframe to a JSON lines file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to JSON lines file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-jsonl test.jsonl",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let buf_writer = BufWriter::new(file);
JsonWriter::new(buf_writer)
.finish(&mut df.to_polars())
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,144 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use crate::{dataframe::values::NuExpression, values::CustomValueSupport, PolarsPlugin};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToNu;
impl PluginCommand for ToNu {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-nu"
}
fn usage(&self) -> &str {
"Converts a dataframe or an expression into into nushell value for access and exploration."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"rows",
SyntaxShape::Number,
"number of rows to be shown",
Some('n'),
)
.switch("tail", "shows tail rows", Some('t'))
.input_output_types(vec![
(Type::Custom("expression".into()), Type::Any),
(Type::Custom("dataframe".into()), Type::Table(vec![])),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
let rec_1 = Value::test_record(record! {
"index" => Value::test_int(0),
"a" => Value::test_int(1),
"b" => Value::test_int(2),
});
let rec_2 = Value::test_record(record! {
"index" => Value::test_int(1),
"a" => Value::test_int(3),
"b" => Value::test_int(4),
});
let rec_3 = Value::test_record(record! {
"index" => Value::test_int(2),
"a" => Value::test_int(3),
"b" => Value::test_int(4),
});
vec![
Example {
description: "Shows head rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars into-nu",
result: Some(Value::list(vec![rec_1, rec_2], Span::test_data())),
},
Example {
description: "Shows tail rows from dataframe",
example:
"[[a b]; [1 2] [5 6] [3 4]] | polars into-df | polars into-nu --tail --rows 1",
result: Some(Value::list(vec![rec_3], Span::test_data())),
},
Example {
description: "Convert a col expression into a nushell value",
example: "polars col a | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
})),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) {
dataframe_command(plugin, call, value)
} else {
expression_command(plugin, call, value)
}
.map_err(|e| e.into())
}
}
fn dataframe_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.get_flag("rows")?;
let tail: bool = call.has_flag("tail")?;
let df = NuDataFrame::try_from_value(plugin, &input)?;
let values = if tail {
df.tail(rows, call.head)?
} else {
// if rows is specified, return those rows, otherwise return everything
if rows.is_some() {
df.head(rows, call.head)?
} else {
df.head(Some(df.height()), call.head)?
}
};
let value = Value::list(values, call.head);
Ok(PipelineData::Value(value, None))
}
fn expression_command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &input)?;
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToNu)
}
}

View File

@ -0,0 +1,87 @@
use std::{fs::File, path::PathBuf};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
Type, Value,
};
use polars::prelude::ParquetWriter;
use crate::PolarsPlugin;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToParquet;
impl PluginCommand for ToParquet {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars to-parquet"
}
fn usage(&self) -> &str {
"Saves dataframe to parquet file."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.input_output_type(Type::Custom("dataframe".into()), Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to parquet file",
example: "[[a b]; [1 2] [3 4]] | polars into-df | polars to-parquet test.parquet",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
_engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(0)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError {
error: "Error with file name".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let mut polars_df = df.to_polars();
ParquetWriter::new(file)
.finish(&mut polars_df)
.map_err(|e| ShellError::GenericError {
error: "Error saving file".into(),
msg: e.to_string(),
span: Some(file_name.span),
help: None,
inner: vec![],
})?;
let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span);
Ok(PipelineData::Value(
Value::list(vec![file_value], call.head),
None,
))
}

View File

@ -0,0 +1,195 @@
use super::super::values::{Column, NuDataFrame};
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct WithColumn;
impl PluginCommand for WithColumn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars with-column"
}
fn usage(&self) -> &str {
"Adds a series to the dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named("name", SyntaxShape::String, "new column name", Some('n'))
.rest(
"series or expressions",
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-df
| polars with-column ([5 6] | polars into-df) --name c"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(5), Value::test_int(6)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| polars into-lazy
| polars with-column [
((polars col a) * 2 | polars as "c")
((polars col a) * 3 | polars as "d")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(ShellError::CantConvert {
to_type: "lazy or eager dataframe".into(),
from_type: value.get_type().to_string(),
span: value.span(),
help: None,
}),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let new_column: Value = call.req(0)?;
let column_span = new_column.span();
if NuExpression::can_downcast(&new_column) {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy = NuLazyFrame::new(true, df.lazy().to_polars().with_columns(&expressions));
let df = lazy.collect(call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
} else {
let mut other = NuDataFrame::try_from_value(plugin, &new_column)?.as_series(column_span)?;
let name = match call.get_flag::<String>("name")? {
Some(name) => name,
None => other.name().to_string(),
};
let series = other.rename(&name).clone();
let mut polars_df = df.to_polars();
polars_df
.with_column(series)
.map_err(|e| ShellError::GenericError {
error: "Error adding column to dataframe".into(),
msg: e.to_string(),
span: Some(column_span),
help: None,
inner: vec![],
})?;
let df = NuDataFrame::new(df.from_lazy, polars_df);
to_pipeline_data(plugin, engine, call.head, df)
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let lazy: NuLazyFrame = lazy.to_polars().with_columns(&expressions).into();
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&WithColumn)
}
}

View File

@ -0,0 +1,88 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::NuExpression;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprAlias;
impl PluginCommand for ExprAlias {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as"
}
fn usage(&self) -> &str {
"Creates an alias expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"Alias name",
SyntaxShape::String,
"Alias name for the expression",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates and alias expression",
example: "polars col a | polars as new_a | polars into-nu",
result: {
let record = Value::test_record(record! {
"expr" => Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
}),
"alias" => Value::test_string("new_a"),
});
Some(record)
},
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["aka", "abbr", "otherwise"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let alias: String = call.req(0)?;
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
let expr: NuExpression = expr.to_polars().alias(alias.as_str()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprAlias)
}
}

View File

@ -0,0 +1,79 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::arg_where;
#[derive(Clone)]
pub struct ExprArgWhere;
impl PluginCommand for ExprArgWhere {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-where"
}
fn usage(&self) -> &str {
"Creates an expression that returns the arguments where expression is true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("column name", SyntaxShape::Any, "Expression to evaluate")
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Return a dataframe where the value match the expression",
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
$df | polars select (polars arg-where ((polars col b) >= 2) | polars as b_arg)",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "match", "if"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = arg_where(expr.to_polars()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprArgWhere)
}
}

View File

@ -0,0 +1,70 @@
use crate::{dataframe::values::NuExpression, values::to_pipeline_data, PolarsPlugin};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
use polars::prelude::col;
#[derive(Clone)]
pub struct ExprCol;
impl PluginCommand for ExprCol {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars col"
}
fn usage(&self) -> &str {
"Creates a named column expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"column name",
SyntaxShape::String,
"Name of column to be used",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a named column expression and converts it to a nu object",
example: "polars col a | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("column"),
"value" => Value::test_string("a"),
})),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["create"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let name: String = call.req(0)?;
let expr: NuExpression = col(name.as_str()).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprCol)
}
}

View File

@ -0,0 +1,109 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::concat_str;
#[derive(Clone)]
pub struct ExprConcatStr;
impl PluginCommand for ExprConcatStr {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars concat-str"
}
fn usage(&self) -> &str {
"Creates a concat string expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"separator",
SyntaxShape::String,
"Separator used during the concatenation",
)
.required(
"concat expressions",
SyntaxShape::List(Box::new(SyntaxShape::Any)),
"Expression(s) that define the string concatenation",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a concat string expression",
example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | polars into-df);
$df | polars with-column ((polars concat-str "-" [(polars col a) (polars col b) ((polars col c) * 2)]) | polars as concat)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("three")],
),
Column::new(
"b".to_string(),
vec![Value::test_string("two"), Value::test_string("four")],
),
Column::new(
"c".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"concat".to_string(),
vec![
Value::test_string("one-two-2"),
Value::test_string("three-four-4"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["join", "connect", "update"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let separator: String = call.req(0)?;
let value: Value = call.req(1)?;
let expressions = NuExpression::extract_exprs(plugin, value)?;
let expr: NuExpression = concat_str(expressions, &separator, false).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprConcatStr)
}
}

View File

@ -0,0 +1,165 @@
use super::super::values::NuExpression;
use crate::{
dataframe::values::{Column, NuDataFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use chrono::{DateTime, FixedOffset};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::{
datatypes::{DataType, TimeUnit},
prelude::NamedFrom,
series::Series,
};
#[derive(Clone)]
pub struct ExprDatePart;
impl PluginCommand for ExprDatePart {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars datepart"
}
fn usage(&self) -> &str {
"Creates an expression for capturing the specified datepart in a column."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"Datepart name",
SyntaxShape::String,
"Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
let dt = DateTime::<FixedOffset>::parse_from_str(
"2021-12-30T01:02:03.123456789 +0000",
"%Y-%m-%dT%H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test");
vec![
Example {
description: "Creates an expression to capture the year date part",
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates an expression to capture multiple date parts",
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
(polars col datetime | polars datepart month | polars as datetime_month ),
(polars col datetime | polars datepart day | polars as datetime_day ),
(polars col datetime | polars datepart hour | polars as datetime_hour ),
(polars col datetime | polars datepart minute | polars as datetime_minute ),
(polars col datetime | polars datepart second | polars as datetime_second ),
(polars col datetime | polars datepart nanosecond | polars as datetime_ns ) ]"#,
result: Some(
NuDataFrame::try_from_series_vec(
vec![
Series::new("datetime", &[dt.timestamp_nanos_opt()])
.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))
.expect("Error casting to datetime type"),
Series::new("datetime_year", &[2021_i64]), // i32 was coerced to i64
Series::new("datetime_month", &[12_i8]),
Series::new("datetime_day", &[30_i8]),
Series::new("datetime_hour", &[1_i8]),
Series::new("datetime_minute", &[2_i8]),
Series::new("datetime_second", &[3_i8]),
Series::new("datetime_ns", &[123456789_i64]), // i32 was coerced to i64
],
Span::test_data(),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec![
"year",
"month",
"week",
"weekday",
"quarter",
"day",
"hour",
"minute",
"second",
"millisecond",
"microsecond",
"nanosecond",
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let part: Spanned<String> = call.req(0)?;
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
let expr_dt = expr.to_polars().dt();
let expr: NuExpression = match part.item.as_str() {
"year" => expr_dt.year(),
"quarter" => expr_dt.quarter(),
"month" => expr_dt.month(),
"week" => expr_dt.week(),
"day" => expr_dt.day(),
"hour" => expr_dt.hour(),
"minute" => expr_dt.minute(),
"second" => expr_dt.second(),
"millisecond" => expr_dt.millisecond(),
"microsecond" => expr_dt.microsecond(),
"nanosecond" => expr_dt.nanosecond(),
_ => {
return Err(LabeledError::from(ShellError::UnsupportedInput {
msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item),
input: "value originates from here".to_string(),
msg_span: call.head,
input_span: part.span,
}))
}
}.into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ExprDatePart)
}
}

View File

@ -0,0 +1,645 @@
/// Definition of multiple Expression commands using a macro rule
/// All of these expressions have an identical body and only require
/// to have a change in the name, description and expression function
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)
.map_err(LabeledError::from)?;
let expr: NuExpression = expr.to_polars().$func().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
.plugin_examples($examples)
}
fn run(
&self,
_plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr = NuExpression::try_from_pipeline(input, call.head)
.map_err(LabeledError::from)?;
let expr: NuExpression = expr.into_polars().$func($ddof).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! lazy_expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
} else {
let expr =
NuExpression::try_from_value(plugin, &value).map_err(LabeledError::from)?;
let expr: NuExpression = expr.to_polars().$func().into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func($ddof)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
} else {
let expr = NuExpression::try_from_value(plugin, &value)?;
let expr: NuExpression = expr.to_polars().$func($ddof).into();
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// ExprList command
// Expands to a command definition for a list expression
expr_command!(
ExprList,
"polars implode",
"Aggregates a group to a Series.",
vec![Example {
description: "",
example: "",
result: None,
}],
implode,
test_implode
);
// ExprAggGroups command
// Expands to a command definition for a agg groups expression
expr_command!(
ExprAggGroups,
"polars agg-groups",
"Creates an agg_groups expression.",
vec![Example {
description: "",
example: "",
result: None,
}],
agg_groups,
test_groups
);
// ExprCount command
// Expands to a command definition for a count expression
expr_command!(
ExprCount,
"polars count",
"Creates a count expression.",
vec![Example {
description: "",
example: "",
result: None,
}],
count,
test_count
);
// ExprNot command
// Expands to a command definition for a not expression
expr_command!(
ExprNot,
"polars expr-not",
"Creates a not expression.",
vec![Example {
description: "Creates a not expression",
example: "(polars col a) > 2) | polars expr-not",
result: None,
},],
not,
test_not
);
// ExprMax command
// Expands to a command definition for max aggregation
lazy_expr_command!(
ExprMax,
"polars max",
"Creates a max expression or aggregates columns to their max value.",
vec![
Example {
description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars max",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Max aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars max)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
max,
test_max
);
// ExprMin command
// Expands to a command definition for min aggregation
lazy_expr_command!(
ExprMin,
"polars min",
"Creates a min expression or aggregates columns to their min value.",
vec![
Example {
description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars min",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Min aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars min)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
min,
test_min
);
// ExprSum command
// Expands to a command definition for sum aggregation
lazy_expr_command!(
ExprSum,
"polars sum",
"Creates a sum expression for an aggregation or aggregates columns to their sum value.",
vec![
Example {
description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sum",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sum aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars sum)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(6), Value::test_int(1)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
sum,
test_sum
);
// ExprMean command
// Expands to a command definition for mean aggregation
lazy_expr_command!(
ExprMean,
"polars mean",
"Creates a mean expression for an aggregation or aggregates columns to their mean value.",
vec![
Example {
description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars mean",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Mean aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars mean)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
mean,
test_mean
);
// ExprStd command
// Expands to a command definition for std aggregation
lazy_expr_command!(
ExprStd,
"polars std",
"Creates a std expression for an aggregation of std value from columns in a dataframe.",
vec![
Example {
description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars std",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Std aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars std)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
std,
test_std,
1
);
// ExprVar command
// Expands to a command definition for var aggregation
lazy_expr_command!(
ExprVar,
"polars var",
"Create a var expression for an aggregation.",
vec![
Example {
description:
"Var value from columns in a dataframe or aggregates columns to their var value",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars var",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Var aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars var)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
var,
test_var,
1
);

View File

@ -0,0 +1,200 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{is_in, lit, DataType, IntoSeries};
#[derive(Clone)]
pub struct ExprIsIn;
impl PluginCommand for ExprIsIn {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-in"
}
fn usage(&self) -> &str {
"Creates an is-in expression or checks to see if the elements are contained in the right series"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("list", SyntaxShape::Any, "List to check if values are in")
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Creates a is-in expression",
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
$df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_string("one"),
Value::test_string("two"),
Value::test_string("three"),
],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
),
Column::new(
"a_in".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks if elements from a series are contained in right series",
example: r#"let other = ([1 3 6] | polars into-df);
[5 6 6 6 8 8 8] | polars into-df | polars is-in $other"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["check", "contained", "is-contain", "match"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command_df(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let list: Vec<Value> = call.req(0)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?;
let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) {
return Err(ShellError::IncompatibleParametersSingle {
msg: "Cannot use a mixed list as argument".into(),
span: call.head,
});
}
let expr: NuExpression = expr.to_polars().is_in(lit(list)).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
fn command_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let other_value: Value = call.req(0)?;
let other_span = other_value.span();
let other_df = NuDataFrame::try_from_value(plugin, &other_value)?;
let other = other_df.as_series(other_span)?;
let series = df.as_series(call.head)?;
let mut res = is_in(&series, &other)
.map_err(|e| ShellError::GenericError {
error: "Error finding in other".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_in");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ExprIsIn)
}
}

View File

@ -0,0 +1,73 @@
use crate::{
dataframe::values::NuExpression,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprLit;
impl PluginCommand for ExprLit {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars lit"
}
fn usage(&self) -> &str {
"Creates a literal expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"literal",
SyntaxShape::Any,
"literal to construct the expression",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Created a literal expression and converts it to a nu object",
example: "polars lit 2 | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("literal"),
"value" => Value::test_string("2"),
})),
}]
}
fn search_terms(&self) -> Vec<&str> {
vec!["string", "literal", "expression"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
_input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let literal: Value = call.req(0)?;
let expr = NuExpression::try_from_value(plugin, &literal)?;
to_pipeline_data(plugin, engine, call.head, expr).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprLit)
}
}

View File

@ -0,0 +1,48 @@
mod alias;
mod arg_where;
mod col;
mod concat_str;
mod datepart;
mod expressions_macro;
mod is_in;
mod lit;
mod otherwise;
mod when;
use nu_plugin::PluginCommand;
pub use crate::dataframe::expressions::alias::ExprAlias;
pub use crate::dataframe::expressions::arg_where::ExprArgWhere;
pub use crate::dataframe::expressions::col::ExprCol;
pub use crate::dataframe::expressions::concat_str::ExprConcatStr;
pub use crate::dataframe::expressions::datepart::ExprDatePart;
pub use crate::dataframe::expressions::expressions_macro::*;
pub use crate::dataframe::expressions::is_in::ExprIsIn;
pub use crate::dataframe::expressions::lit::ExprLit;
pub use crate::dataframe::expressions::otherwise::ExprOtherwise;
pub use crate::dataframe::expressions::when::ExprWhen;
use crate::PolarsPlugin;
pub(crate) fn expr_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(ExprAlias),
Box::new(ExprArgWhere),
Box::new(ExprAggGroups),
Box::new(ExprCol),
Box::new(ExprConcatStr),
Box::new(ExprCount),
Box::new(ExprDatePart),
Box::new(ExprIsIn),
Box::new(ExprList),
Box::new(ExprLit),
Box::new(ExprNot),
Box::new(ExprMax),
Box::new(ExprMin),
Box::new(ExprOtherwise),
Box::new(ExprSum),
Box::new(ExprMean),
Box::new(ExprStd),
Box::new(ExprVar),
Box::new(ExprWhen),
]
}

View File

@ -0,0 +1,122 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen, NuWhenType},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct ExprOtherwise;
impl PluginCommand for ExprOtherwise {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars otherwise"
}
fn usage(&self) -> &str {
"Completes a when expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"otherwise expression",
SyntaxShape::Any,
"expression to apply when no when predicate matches",
)
.input_output_type(Type::Any, Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4 | polars otherwise 5",
result: None,
},
Example {
description: "Create a when conditions",
example:
"polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6 | polars otherwise 0",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy
| polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
)
| polars with-column (
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "else"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let otherwise_predicate: Value = call.req(0)?;
let otherwise_predicate = NuExpression::try_from_value(plugin, &otherwise_predicate)?;
let value = input.into_value(call.head);
let complete: NuExpression = match NuWhen::try_from_value(plugin, &value)?.when_type {
NuWhenType::Then(then) => then.otherwise(otherwise_predicate.to_polars()).into(),
NuWhenType::ChainedThen(chained_when) => chained_when
.otherwise(otherwise_predicate.to_polars())
.into(),
};
to_pipeline_data(plugin, engine, call.head, complete).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprOtherwise)
}
}

View File

@ -0,0 +1,144 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen},
values::{to_pipeline_data, CustomValueSupport, NuWhenType},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::when;
#[derive(Clone)]
pub struct ExprWhen;
impl PluginCommand for ExprWhen {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars when"
}
fn usage(&self) -> &str {
"Creates and modifies a when expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"when expression",
SyntaxShape::Any,
"when expression used for matching",
)
.required(
"then expression",
SyntaxShape::Any,
"expression that will be applied when predicate is true",
)
.input_output_type(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4",
result: None,
},
Example {
description: "Create a when conditions",
example: "polars when ((polars col a) > 2) 4 | polars when ((polars col a) < 0) 6",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| polars into-lazy
| polars with-column (
polars when ((polars col a) > 2) 4 | polars otherwise 5 | polars as c
)
| polars with-column (
polars when ((polars col a) > 5) 10 | polars when ((polars col a) < 2) 6 | polars otherwise 0 | polars as d
)
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn search_terms(&self) -> Vec<&str> {
vec!["condition", "match", "if", "else"]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let when_predicate: Value = call.req(0)?;
let when_predicate = NuExpression::try_from_value(plugin, &when_predicate)?;
let then_predicate: Value = call.req(1)?;
let then_predicate = NuExpression::try_from_value(plugin, &then_predicate)?;
let value = input.into_value(call.head);
let when_then: NuWhen = match value {
Value::Nothing { .. } => when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
v => match NuWhen::try_from_value(plugin, &v)?.when_type {
NuWhenType::Then(when_then) => when_then
.when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
NuWhenType::ChainedThen(when_then_then) => when_then_then
.when(when_predicate.to_polars())
.then(then_predicate.to_polars())
.into(),
},
};
to_pipeline_data(plugin, engine, call.head, when_then).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&ExprWhen)
}
}

View File

@ -0,0 +1,210 @@
use crate::{
dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, Column, CustomValueSupport, NuDataFrame},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::{datatypes::DataType, prelude::Expr};
#[derive(Clone)]
pub struct LazyAggregate;
impl PluginCommand for LazyAggregate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars agg"
}
fn usage(&self) -> &str {
"Performs a series of aggregations from a group-by."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the aggregations to be applied",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(plugin, input, call.head)?;
for expr in expressions.iter() {
if let Some(name) = get_col_name(expr) {
let dtype = group_by.schema.schema.get(name.as_str());
if matches!(dtype, Some(DataType::Object(..))) {
return Err(ShellError::GenericError {
error: "Object type column not supported for aggregation".into(),
msg: format!("Column '{name}' is type Object"),
span: Some(call.head),
help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()),
inner: vec![],
}).map_err(|e| e.into());
}
}
}
let polars = group_by.to_polars();
let lazy = NuLazyFrame::new(false, polars.agg(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
fn get_col_name(expr: &Expr) -> Option<String> {
match expr {
Expr::Column(column) => Some(column.to_string()),
Expr::Agg(agg) => match agg {
polars::prelude::AggExpr::Min { input: e, .. }
| polars::prelude::AggExpr::Max { input: e, .. }
| polars::prelude::AggExpr::Median(e)
| polars::prelude::AggExpr::NUnique(e)
| polars::prelude::AggExpr::First(e)
| polars::prelude::AggExpr::Last(e)
| polars::prelude::AggExpr::Mean(e)
| polars::prelude::AggExpr::Implode(e)
| polars::prelude::AggExpr::Count(e, _)
| polars::prelude::AggExpr::Sum(e)
| polars::prelude::AggExpr::AggGroups(e)
| polars::prelude::AggExpr::Std(e, _)
| polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()),
polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()),
},
Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Gather { expr, .. }
| Expr::SortBy { expr, .. }
| Expr::Exclude(expr, _)
| Expr::Alias(expr, _)
| Expr::KeepName(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()),
Expr::Ternary { .. }
| Expr::AnonymousFunction { .. }
| Expr::Function { .. }
| Expr::Columns(_)
| Expr::DtypeColumn(_)
| Expr::Literal(_)
| Expr::BinaryExpr { .. }
| Expr::Window { .. }
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Len
| Expr::Nth(_)
| Expr::SubPlan(_, _)
| Expr::Selector(_) => None,
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyAggregate)
}
}

View File

@ -0,0 +1,98 @@
use crate::{
dataframe::values::{Column, NuDataFrame},
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
Cacheable, PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
#[derive(Clone)]
pub struct LazyCollect;
impl PluginCommand for LazyCollect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars collect"
}
fn usage(&self) -> &str {
"Collect lazy dataframe into eager dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | polars into-lazy | polars collect",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
let eager = lazy.collect(call.head)?;
Ok(PipelineData::Value(
eager.cache(plugin, engine)?.into_value(call.head),
None,
))
}
PolarsPluginObject::NuDataFrame(df) => {
// just return the dataframe, add to cache again to be safe
Ok(PipelineData::Value(
df.cache(plugin, engine)?.into_value(call.head),
None,
))
}
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuLazyFrame, PolarsPluginType::NuDataFrame],
)),
}
.map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyCollect)
}
}

View File

@ -0,0 +1,175 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, PolarsPluginObject};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyExplode;
impl PluginCommand for LazyExplode {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars explode"
}
fn usage(&self) -> &str {
"Explodes a dataframe or creates a explode expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to explode, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Explode the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars explode hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and explode the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars explode)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
pub(crate) fn explode(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
let lazy = df.lazy();
explode_lazy(plugin, engine, call, lazy)
}
PolarsPluginObject::NuLazyFrame(lazy) => explode_lazy(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => explode_expr(plugin, engine, call, expr),
_ => Err(ShellError::CantConvert {
to_type: "dataframe or expression".into(),
from_type: value.get_type().to_string(),
span: call.head,
help: None,
}),
}
}
pub(crate) fn explode_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns = call
.positional
.iter()
.map(|e| e.as_str().map(|s| s.to_string()))
.collect::<Result<Vec<String>, ShellError>>()?;
let exploded = lazy
.to_polars()
.explode(columns.iter().map(AsRef::as_ref).collect::<Vec<&str>>());
let lazy = NuLazyFrame::from(exploded);
to_pipeline_data(plugin, engine, call.head, lazy)
}
pub(crate) fn explode_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let expr: NuExpression = expr.to_polars().explode().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyExplode)
}
}

View File

@ -0,0 +1,100 @@
use crate::dataframe::values::{Column, NuDataFrame};
use crate::values::{to_pipeline_data, CustomValueSupport, NuLazyFrame};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFetch;
impl PluginCommand for LazyFetch {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fetch"
}
fn usage(&self) -> &str {
"Collects the lazyframe to the selected rows."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"rows",
SyntaxShape::Int,
"number of rows to be fetched from lazyframe",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars fetch 2",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let rows: i64 = call.req(0)?;
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let mut eager: NuDataFrame = lazy
.to_polars()
.fetch(rows as usize)
.map_err(|e| ShellError::GenericError {
error: "Error fetching rows".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
// mark this as not from lazy so it doesn't get converted back to a lazy frame
eager.from_lazy = false;
to_pipeline_data(plugin, engine, call.head, eager).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFetch)
}
}

View File

@ -0,0 +1,189 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNA;
impl PluginCommand for LazyFillNA {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-nan"
}
fn usage(&self) -> &str {
"Replaces NaN values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the NAN values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Fills the NaN values with 0",
example: "[1 2 NaN 3 NaN] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(0),
Value::test_int(3),
Value::test_int(0),
],
)],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Fills the NaN values of a whole dataframe",
example: "[[a b]; [0.2 1] [0.1 NaN]] | polars into-df | polars fill-nan 0",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_float(0.2), Value::test_float(0.1)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
],
None,
)
.expect("Df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
cmd_df(plugin, engine, call, df, fill, value.span())
}
PolarsPluginObject::NuLazyFrame(lazy) => cmd_df(
plugin,
engine,
call,
lazy.collect(value.span())?,
fill,
value.span(),
),
PolarsPluginObject::NuExpression(expr) => {
Ok(cmd_expr(plugin, engine, call, expr, fill)?)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
frame: NuDataFrame,
fill: Value,
val_span: Span,
) -> Result<PipelineData, ShellError> {
let columns = frame.columns(val_span)?;
let dataframe = columns
.into_iter()
.map(|column| {
let column_name = column.name().to_string();
let values = column
.into_iter()
.map(|value| {
let span = value.span();
match value {
Value::Float { val, .. } => {
if val.is_nan() {
fill.clone()
} else {
value
}
}
Value::List { vals, .. } => {
NuDataFrame::fill_list_nan(vals, span, fill.clone())
}
_ => value,
}
})
.collect::<Vec<Value>>();
Column::new(column_name, values)
})
.collect::<Vec<Column>>();
let df = NuDataFrame::try_from_columns(dataframe, None)?;
to_pipeline_data(plugin, engine, call.head, df)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_nan(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNA)
}
}

View File

@ -0,0 +1,127 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFillNull;
impl PluginCommand for LazyFillNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars fill-null"
}
fn usage(&self) -> &str {
"Replaces NULL values with the given expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Fills the null values by 0",
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars fill-null 0",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let fill: Value = call.req(0)?;
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill),
PolarsPluginObject::NuLazyFrame(lazy) => cmd_lazy(plugin, engine, call, lazy, fill),
PolarsPluginObject::NuExpression(expr) => cmd_expr(plugin, engine, call, expr, fill),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn cmd_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
fill: Value,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().fill_null(expr));
to_pipeline_data(plugin, engine, call.head, lazy)
}
fn cmd_expr(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
expr: NuExpression,
fill: Value,
) -> Result<PipelineData, ShellError> {
let fill = NuExpression::try_from_value(plugin, &fill)?.to_polars();
let expr: NuExpression = expr.to_polars().fill_null(fill).into();
to_pipeline_data(plugin, engine, call.head, expr)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFillNull)
}
}

View File

@ -0,0 +1,104 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazyFilter;
impl PluginCommand for LazyFilter {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars filter"
}
fn usage(&self) -> &str {
"Filter dataframe based in expression."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"Expression that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example:
"[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars filter ((polars col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let expr_value: Value = call.req(0)?;
let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
filter_expr: NuExpression,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars().filter(filter_expr.to_polars()),
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyFilter)
}
}

View File

@ -0,0 +1,125 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
use crate::{
dataframe::values::{Column, NuDataFrame},
values::CustomValueSupport,
PolarsPlugin,
};
use super::explode::explode;
#[derive(Clone)]
pub struct LazyFlatten;
impl PluginCommand for LazyFlatten {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars flatten"
}
fn usage(&self) -> &str {
"An alias for polars explode."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"columns",
SyntaxShape::String,
"columns to flatten, only applicable for dataframes",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Flatten the specified dataframe",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars flatten hobbies | polars collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"id".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
]),
Column::new(
"name".to_string(),
vec![
Value::test_string("Mercy"),
Value::test_string("Mercy"),
Value::test_string("Bob"),
Value::test_string("Bob"),
]),
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
)
},
Example {
description: "Select a column and flatten the values",
example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | polars into-df | polars select (polars col hobbies | polars flatten)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"hobbies".to_string(),
vec![
Value::test_string("Cycling"),
Value::test_string("Knitting"),
Value::test_string("Skiing"),
Value::test_string("Football"),
]),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
explode(plugin, engine, call, input).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazyFlatten)
}
}

View File

@ -0,0 +1,168 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct ToLazyGroupBy;
impl PluginCommand for ToLazyGroupBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars group-by"
}
fn usage(&self) -> &str {
"Creates a group-by object that can be used for other aggregations."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group-by expressions",
SyntaxShape::Any,
"Expression(s) that define the lazy group-by",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-df
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(0)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only Col expressions".into(),
span: value.span(),
})?;
}
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
expressions: Vec<Expr>,
) -> Result<PipelineData, ShellError> {
let group_by = lazy.to_polars().group_by(expressions);
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
to_pipeline_data(plugin, engine, call.head, group_by)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ToLazyGroupBy)
}
}

View File

@ -0,0 +1,260 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{Expr, JoinType};
#[derive(Clone)]
pub struct LazyJoin;
impl PluginCommand for LazyJoin {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars join"
}
fn usage(&self) -> &str {
"Joins a lazy frame with other lazy frame."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "LazyFrame to join with")
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
.switch(
"inner",
"inner joining between lazyframes (default)",
Some('i'),
)
.switch("left", "left join between lazyframes", Some('l'))
.switch("outer", "outer join between lazyframes", Some('o'))
.switch("cross", "cross join between lazyframes", Some('c'))
.named(
"suffix",
SyntaxShape::String,
"Suffix to use on columns with same name",
Some('s'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo | polars collect"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | polars into-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | polars into-lazy);
$df_a | polars join $df_b a foo"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let left = call.has_flag("left")?;
let outer = call.has_flag("outer")?;
let cross = call.has_flag("cross")?;
let how = if left {
JoinType::Left
} else if outer {
JoinType::Outer { coalesce: true }
} else if cross {
JoinType::Cross
} else {
JoinType::Inner
};
let other: Value = call.req(0)?;
let other = NuLazyFrame::try_from_value_coerce(plugin, &other)?;
let other = other.to_polars();
let left_on: Value = call.req(1)?;
let left_on = NuExpression::extract_exprs(plugin, left_on)?;
let right_on: Value = call.req(2)?;
let right_on = NuExpression::extract_exprs(plugin, right_on)?;
if left_on.len() != right_on.len() {
let right_on: Value = call.req(2)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "The right column list has a different size to the left column list".into(),
span: right_on.span(),
})?;
}
// Checking that both list of expressions are made out of col expressions or strings
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
let value: Value = call.req(*index)?;
Err(ShellError::IncompatibleParametersSingle {
msg: "Expected only a string, col expressions or list of strings".into(),
span: value.span(),
})?;
}
}
let suffix: Option<String> = call.get_flag("suffix")?;
let suffix = suffix.unwrap_or_else(|| "_x".into());
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?;
let from_eager = lazy.from_eager;
let lazy = lazy.to_polars();
let lazy = lazy
.join_builder()
.with(other)
.left_on(left_on)
.right_on(right_on)
.how(how)
.force_parallel(true)
.suffix(suffix)
.finish();
let lazy = NuLazyFrame::new(from_eager, lazy);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyJoin)
}
}

View File

@ -0,0 +1,225 @@
/// Definition of multiple lazyframe commands using a macro rule
/// All of these commands have an identical body and only require
/// to have a change in the name, description and function
use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame};
use crate::values::{to_pipeline_data, CustomValueSupport};
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
macro_rules! lazy_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn signature(&self) -> Signature {
Signature::build($name)
.usage($desc)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
.plugin_examples($examples)
}
fn run(
&self,
_plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => {
#[derive(Clone)]
pub struct $command;
impl PluginCommand for $command {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let lazy = NuLazyFrame::try_from_pipeline_coerce(plugin, input, call.head)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&$command)
}
}
};
}
// LazyReverse command
// Expands to a command definition for reverse
lazy_command!(
LazyReverse,
"polars reverse",
"Reverses the LazyFrame",
vec![Example {
description: "Reverses the dataframe.",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
),
],
None
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
reverse,
test_reverse
);
// LazyCache command
// Expands to a command definition for cache
lazy_command!(
LazyCache,
"polars cache",
"Caches operations in a new LazyFrame.",
vec![Example {
description: "Caches the result into a new LazyFrame",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars reverse | polars cache",
result: None,
}],
cache,
test_cache
);

View File

@ -0,0 +1,143 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct LazyMedian;
impl PluginCommand for LazyMedian {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars median"
}
fn usage(&self) -> &str {
"Median value from columns in a dataframe or creates expression for an aggregation"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Median aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars median)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars median",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()),
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().median().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let polars_lazy = lazy
.to_polars()
.median()
.map_err(|e| ShellError::GenericError {
error: format!("Error in median operation: {e}"),
msg: "".into(),
help: None,
span: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyMedian)
}
}

View File

@ -0,0 +1,57 @@
mod aggregate;
mod collect;
mod explode;
mod fetch;
mod fill_nan;
mod fill_null;
mod filter;
mod flatten;
pub mod groupby;
mod join;
mod macro_commands;
mod median;
mod quantile;
mod select;
mod sort_by_expr;
mod to_lazy;
use nu_plugin::PluginCommand;
pub use crate::dataframe::lazy::aggregate::LazyAggregate;
pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_nan::LazyFillNA;
pub use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::filter::LazyFilter;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile;
pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
use crate::PolarsPlugin;
pub use explode::LazyExplode;
pub use flatten::LazyFlatten;
pub(crate) fn lazy_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(LazyAggregate),
Box::new(LazyCache),
Box::new(LazyCollect),
Box::new(LazyExplode),
Box::new(LazyFetch),
Box::new(LazyFillNA),
Box::new(LazyFillNull),
Box::new(LazyFilter),
Box::new(LazyFlatten),
Box::new(LazyJoin),
Box::new(median::LazyMedian),
Box::new(LazyReverse),
Box::new(LazySelect),
Box::new(LazySortBy),
Box::new(LazyQuantile),
Box::new(ToLazyFrame),
Box::new(ToLazyGroupBy),
]
}

View File

@ -0,0 +1,160 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{lit, QuantileInterpolOptions};
#[derive(Clone)]
pub struct LazyQuantile;
impl PluginCommand for LazyQuantile {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars quantile"
}
fn usage(&self) -> &str {
"Aggregates the columns to the selected quantile."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
)
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars quantile 0.5",
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Quantile aggregation for a group-by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| polars into-df
| polars group-by a
| polars agg (polars col b | polars quantile 0.5)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
let quantile: f64 = call.req(0)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => {
command(plugin, engine, call, df.lazy(), quantile)
}
PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy, quantile),
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr
.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
quantile: f64,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})?,
);
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazyQuantile)
}
}

View File

@ -0,0 +1,85 @@
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazySelect;
impl PluginCommand for LazySelect {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars select"
}
fn usage(&self) -> &str {
"Selects columns from lazyframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"select expressions",
SyntaxShape::Any,
"Expression(s) that define the column selection",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | polars into-df | polars select a",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().select(&expressions));
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use crate::test::test_polars_plugin_command;
use super::*;
#[test]
fn test_examples() -> Result<(), nu_protocol::ShellError> {
test_polars_plugin_command(&LazySelect)
}
}

View File

@ -0,0 +1,160 @@
use super::super::values::NuLazyFrame;
use crate::{
dataframe::values::{Column, NuDataFrame, NuExpression},
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
#[derive(Clone)]
pub struct LazySortBy;
impl PluginCommand for LazySortBy {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars sort-by"
}
fn usage(&self) -> &str {
"Sorts a lazy dataframe based on expression(s)."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"sort expression",
SyntaxShape::Any,
"sort expression for the dataframe",
)
.named(
"reverse",
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
"Reverse sorting. Default is false",
Some('r'),
)
.switch(
"nulls-last",
"nulls are shown last in the dataframe",
Some('n'),
)
.switch("maintain-order", "Maintains order during sort", Some('m'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sort dataframe by one column",
example: "[[a b]; [6 2] [1 4] [4 1]] | polars into-df | polars sort-by a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sort column using two columns",
example:
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | polars into-df | polars sort-by [a b] -r [false true]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(6),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(4),
Value::test_int(1),
Value::test_int(4),
Value::test_int(2),
],
),
], None)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let vals: Vec<Value> = call.rest(0)?;
let expr_value = Value::list(vals, call.head);
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
let nulls_last = call.has_flag("nulls-last")?;
let maintain_order = call.has_flag("maintain-order")?;
let reverse: Option<Vec<bool>> = call.get_flag("reverse")?;
let reverse = match reverse {
Some(list) => {
if expressions.len() != list.len() {
let span = call
.get_flag::<Value>("reverse")?
.expect("already checked and it exists")
.span();
Err(ShellError::GenericError {
error: "Incorrect list size".into(),
msg: "Size doesn't match expression list".into(),
span: Some(span),
help: None,
inner: vec![],
})?
} else {
list
}
}
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
);
to_pipeline_data(plugin, engine, call.head, lazy).map_err(LabeledError::from)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&LazySortBy)
}
}

View File

@ -0,0 +1,61 @@
use crate::{dataframe::values::NuSchema, values::CustomValueSupport, Cacheable, PolarsPlugin};
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, SyntaxShape, Type};
#[derive(Clone)]
pub struct ToLazyFrame;
impl PluginCommand for ToLazyFrame {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars into-lazy"
}
fn usage(&self) -> &str {
"Converts a dataframe into a lazy dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"schema",
SyntaxShape::Record(vec![]),
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s'),
)
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a dictionary and creates a lazy dataframe",
example: "[[a b];[1 2] [3 4]] | polars into-lazy",
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let maybe_schema = call
.get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema))
.transpose()?;
let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?;
let lazy = NuLazyFrame::from_dataframe(df);
Ok(PipelineData::Value(
lazy.cache(plugin, engine)?.into_value(call.head),
None,
))
}
}

View File

@ -0,0 +1,19 @@
use nu_protocol::{ShellError, Span};
pub mod eager;
pub mod expressions;
pub mod lazy;
pub mod series;
pub mod stub;
mod utils;
pub mod values;
pub fn missing_flag_error(flag: &str, span: Span) -> ShellError {
ShellError::GenericError {
error: format!("Missing flag: {flag}"),
msg: "".into(),
span: Some(span),
help: None,
inner: vec![],
}
}

View File

@ -0,0 +1,119 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct AllFalse;
impl PluginCommand for AllFalse {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars all-false"
}
fn usage(&self) -> &str {
"Returns true if all values are false."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are false",
example: "[false false false] | polars into-df | polars all-false",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(true)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 10] | polars into-df);
let res = ($s > 9);
$res | polars all-false"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![Value::test_bool(false)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|_| ShellError::GenericError {
error: "Error converting to bool".into(),
msg: "all-false only works with series of type bool".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let value = Value::bool(!bool.any(), call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_false".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AllFalse)
}
}

View File

@ -0,0 +1,119 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct AllTrue;
impl PluginCommand for AllTrue {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars all-true"
}
fn usage(&self) -> &str {
"Returns true if all values are true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns true if all values are true",
example: "[true true true] | polars into-df | polars all-true",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(true)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | polars into-df);
let res = ($s > 9);
$res | polars all-true"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![Value::test_bool(false)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|_| ShellError::GenericError {
error: "Error converting to bool".into(),
msg: "all-false only works with series of type bool".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let value = Value::bool(bool.all(), call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_true".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AllTrue)
}
}

View File

@ -0,0 +1,96 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
#[derive(Clone)]
pub struct ArgMax;
impl PluginCommand for ArgMax {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-max"
}
fn usage(&self) -> &str {
"Return index for max value in series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argmax", "maximum", "most", "largest", "greatest"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for max value",
example: "[1 3 2] | polars into-df | polars arg-max",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let res = series.arg_max();
let chunked = match res {
Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]),
None => UInt32Chunked::from_slice("arg_max", &[]),
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgMax)
}
}

View File

@ -0,0 +1,96 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked};
#[derive(Clone)]
pub struct ArgMin;
impl PluginCommand for ArgMin {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-min"
}
fn usage(&self) -> &str {
"Return index for min value in series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argmin", "minimum", "least", "smallest", "lowest"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for min value",
example: "[1 3 2] | polars into-df | polars arg-min",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let res = series.arg_min();
let chunked = match res {
Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]),
None => UInt32Chunked::from_slice("arg_min", &[]),
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgMin)
}
}

View File

@ -0,0 +1,159 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::prelude::{DataType, IntoSeries};
use polars_ops::prelude::{cum_max, cum_min, cum_sum};
enum CumulativeType {
Min,
Max,
Sum,
}
impl CumulativeType {
fn from_str(roll_type: &str, span: Span) -> Result<Self, ShellError> {
match roll_type {
"min" => Ok(Self::Min),
"max" => Ok(Self::Max),
"sum" => Ok(Self::Sum),
_ => Err(ShellError::GenericError {
error: "Wrong operation".into(),
msg: "Operation not valid for cumulative".into(),
span: Some(span),
help: Some("Allowed values: max, min, sum".into()),
inner: vec![],
}),
}
}
fn to_str(&self) -> &'static str {
match self {
CumulativeType::Min => "cumulative_min",
CumulativeType::Max => "cumulative_max",
CumulativeType::Sum => "cumulative_sum",
}
}
}
#[derive(Clone)]
pub struct Cumulative;
impl PluginCommand for Cumulative {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars cumulative"
}
fn usage(&self) -> &str {
"Cumulative calculation for a series."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("type", SyntaxShape::String, "rolling operation")
.switch("reverse", "Reverse cumulative calculation", Some('r'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Cumulative sum for a series",
example: "[1 2 3 4 5] | polars into-df | polars cumulative sum",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_cumulative_sum".to_string(),
vec![
Value::test_int(1),
Value::test_int(3),
Value::test_int(6),
Value::test_int(10),
Value::test_int(15),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let cum_type: Spanned<String> = call.req(0)?;
let reverse = call.has_flag("reverse")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
if let DataType::Object(..) = series.dtype() {
return Err(ShellError::GenericError {
error: "Found object series".into(),
msg: "Series of type object cannot be used for cumulative operation".into(),
span: Some(call.head),
help: None,
inner: vec![],
});
}
let cum_type = CumulativeType::from_str(&cum_type.item, cum_type.span)?;
let mut res = match cum_type {
CumulativeType::Max => cum_max(&series, reverse),
CumulativeType::Min => cum_min(&series, reverse),
CumulativeType::Sum => cum_sum(&series, reverse),
}
.map_err(|e| ShellError::GenericError {
error: "Error creating cumulative".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let name = format!("{}_{}", series.name(), cum_type.to_str());
res.rename(&name);
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Cumulative)
}
}

View File

@ -0,0 +1,101 @@
use crate::{values::to_pipeline_data, PolarsPlugin};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, SyntaxShape, Type,
};
use polars::prelude::{IntoSeries, StringMethods};
#[derive(Clone)]
pub struct AsDate;
impl PluginCommand for AsDate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as-date"
}
fn usage(&self) -> &str {
r#"Converts string to date."#
}
fn extra_usage(&self) -> &str {
r#"Format example:
"%Y-%m-%d" => 2021-12-31
"%d-%m-%Y" => 31-12-2021
"%Y%m%d" => 2021319 (2021-03-19)"#
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formatting date string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Converts string to date",
example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#,
result: None,
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let format: String = call.req(0)?;
let not_exact = call.has_flag("not-exact")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = if not_exact {
casted.as_date_not_exact(Some(format.as_str()))
} else {
casted.as_date(Some(format.as_str()), false)
};
let mut res = res
.map_err(|e| ShellError::GenericError {
error: "Error creating datetime".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("date");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}

View File

@ -0,0 +1,198 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use chrono::DateTime;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
#[derive(Clone)]
pub struct AsDateTime;
impl PluginCommand for AsDateTime {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars as-datetime"
}
fn usage(&self) -> &str {
r#"Converts string to datetime."#
}
fn extra_usage(&self) -> &str {
r#"Format example:
"%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98
"%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01
"%y%m%d %H:%M:%S" => 210319 23:58:50
"%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98
"%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01
"%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01
"%Y%m%d %H:%M:%S" => 20210319 23:58:50
"%FT%H:%M:%S" => 2019-04-18T02:45:55
"%FT%H:%M:%S.%6f" => microseconds
"%FT%H:%M:%S.%9f" => nanoseconds"#
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formatting date time string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Converts string to datetime",
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Converts string to datetime with high resolutions",
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00.123456789 +0000",
"%Y-%m-%d %H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00.123456789 +0000",
"%Y-%m-%d %H:%M:%S.%9f %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let format: String = call.req(0)?;
let not_exact = call.has_flag("not-exact")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = if not_exact {
casted.as_datetime_not_exact(
Some(format.as_str()),
TimeUnit::Nanoseconds,
false,
None,
&Default::default(),
)
} else {
casted.as_datetime(
Some(format.as_str()),
TimeUnit::Nanoseconds,
false,
false,
None,
&Default::default(),
)
};
let mut res = res
.map_err(|e| ShellError::GenericError {
error: "Error creating datetime".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("datetime");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&AsDateTime)
}
}

View File

@ -0,0 +1,105 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetDay;
impl PluginCommand for GetDay {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-day"
}
fn usage(&self) -> &str {
"Gets day from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns day from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-day"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[4i8, 4]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
fn extra_usage(&self) -> &str {
""
}
fn search_terms(&self) -> Vec<&str> {
vec![]
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.day().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetDay)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetHour;
impl PluginCommand for GetHour {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-hour"
}
fn usage(&self) -> &str {
"Gets hour from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns hour from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-hour"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[16i8, 16]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.hour().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetHour)
}
}

View File

@ -0,0 +1,95 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use polars::{prelude::NamedFrom, series::Series};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)]
pub struct GetMinute;
impl PluginCommand for GetMinute {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-minute"
}
fn usage(&self) -> &str {
"Gets minute from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns minute from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-minute"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[39i8, 39]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.minute().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetMinute)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetMonth;
impl PluginCommand for GetMonth {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-month"
}
fn usage(&self) -> &str {
"Gets month from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns month from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-month"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[8i8, 8]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.month().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetMonth)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetNanosecond;
impl PluginCommand for GetNanosecond {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-nanosecond"
}
fn usage(&self) -> &str {
"Gets nanosecond from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns nanosecond from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-nanosecond"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[0i32, 0]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.nanosecond().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetNanosecond)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetOrdinal;
impl PluginCommand for GetOrdinal {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-ordinal"
}
fn usage(&self) -> &str {
"Gets ordinal from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns ordinal from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-ordinal"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[217i16, 217]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.ordinal().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetOrdinal)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetSecond;
impl PluginCommand for GetSecond {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-second"
}
fn usage(&self) -> &str {
"Gets second from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns second from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-second"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[18i8, 18]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.second().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetSecond)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetWeek;
impl PluginCommand for GetWeek {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-week"
}
fn usage(&self) -> &str {
"Gets week from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns week from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-week"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[32i8, 32]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.week().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetWeek)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetWeekDay;
impl PluginCommand for GetWeekDay {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-weekday"
}
fn usage(&self) -> &str {
"Gets weekday from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns weekday from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-weekday"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[2i8, 2]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.weekday().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetWeekDay)
}
}

View File

@ -0,0 +1,97 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::NuDataFrame;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type,
};
use polars::{
prelude::{DatetimeMethods, IntoSeries, NamedFrom},
series::Series,
};
#[derive(Clone)]
pub struct GetYear;
impl PluginCommand for GetYear {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars get-year"
}
fn usage(&self) -> &str {
"Gets year from date."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns year from a date",
example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC');
let df = ([$dt $dt] | polars into-df);
$df | polars get-year"#,
result: Some(
NuDataFrame::try_from_series(Series::new("0", &[2020i32, 2020]), Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting to datetime type".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = casted.year().into_series();
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&GetYear)
}
}

View File

@ -0,0 +1,25 @@
mod as_date;
mod as_datetime;
mod get_day;
mod get_hour;
mod get_minute;
mod get_month;
mod get_nanosecond;
mod get_ordinal;
mod get_second;
mod get_week;
mod get_weekday;
mod get_year;
pub use as_date::AsDate;
pub use as_datetime::AsDateTime;
pub use get_day::GetDay;
pub use get_hour::GetHour;
pub use get_minute::GetMinute;
pub use get_month::GetMonth;
pub use get_nanosecond::GetNanosecond;
pub use get_ordinal::GetOrdinal;
pub use get_second::GetSecond;
pub use get_week::GetWeek;
pub use get_weekday::GetWeekDay;
pub use get_year::GetYear;

View File

@ -0,0 +1,140 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{IntoSeries, SortOptions};
#[derive(Clone)]
pub struct ArgSort;
impl PluginCommand for ArgSort {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-sort"
}
fn usage(&self) -> &str {
"Returns indexes for a sorted series."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argsort", "order", "arrange"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("reverse", "reverse order", Some('r'))
.switch("nulls-last", "nulls ordered last", Some('n'))
.switch(
"maintain-order",
"maintain order on sorted items",
Some('m'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | polars into-df | polars arg-sort",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(4),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | polars into-df | polars arg-sort --reverse",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
Value::test_int(3),
Value::test_int(4),
Value::test_int(1),
Value::test_int(2),
Value::test_int(0),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let sort_options = SortOptions {
descending: call.has_flag("reverse")?,
nulls_last: call.has_flag("nulls-last")?,
multithreaded: true,
maintain_order: call.has_flag("maintain-order")?,
};
let mut res = df
.as_series(call.head)?
.arg_sort(sort_options)
.into_series();
res.rename("arg_sort");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgSort)
}
}

View File

@ -0,0 +1,126 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::{arg_where, col, IntoLazy};
#[derive(Clone)]
pub struct ArgTrue;
impl PluginCommand for ArgTrue {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-true"
}
fn usage(&self) -> &str {
"Returns indexes where values are true."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argtrue", "truth", "boolean-true"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes where values are true",
example: "[false true false] | polars into-df | polars arg-true",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_true".to_string(),
vec![Value::test_int(1)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let columns = df.as_ref().get_column_names();
if columns.len() > 1 {
return Err(ShellError::GenericError {
error: "Error using as series".into(),
msg: "dataframe has more than one column".into(),
span: Some(call.head),
help: None,
inner: vec![],
});
}
match columns.first() {
Some(column) => {
let expression = arg_where(col(column).eq(true)).alias("arg_true");
let res: NuDataFrame = df
.as_ref()
.clone()
.lazy()
.select(&[expression])
.collect()
.map_err(|err| ShellError::GenericError {
error: "Error creating index column".into(),
msg: err.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
to_pipeline_data(plugin, engine, call.head, res)
}
_ => Err(ShellError::UnsupportedInput {
msg: "Expected the dataframe to have a column".to_string(),
input: "".to_string(),
msg_span: call.head,
input_span: call.head,
}),
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgTrue)
}
}

View File

@ -0,0 +1,104 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct ArgUnique;
impl PluginCommand for ArgUnique {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars arg-unique"
}
fn usage(&self) -> &str {
"Returns indexes for unique values."
}
fn search_terms(&self) -> Vec<&str> {
vec!["argunique", "distinct", "noduplicate", "unrepeated"]
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | polars into-df | polars arg-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"arg_unique".to_string(),
vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_series(call.head)?
.arg_unique()
.map_err(|e| ShellError::GenericError {
error: "Error extracting unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("arg_unique");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&ArgUnique)
}
}

View File

@ -0,0 +1,9 @@
mod arg_sort;
mod arg_true;
mod arg_unique;
mod set_with_idx;
pub use arg_sort::ArgSort;
pub use arg_true::ArgTrue;
pub use arg_unique::ArgUnique;
pub use set_with_idx::SetWithIndex;

View File

@ -0,0 +1,227 @@
use crate::{
missing_flag_error,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
#[derive(Clone)]
pub struct SetWithIndex;
impl PluginCommand for SetWithIndex {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars set-with-idx"
}
fn usage(&self) -> &str {
"Sets value in the given index."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"indices",
SyntaxShape::Any,
"list of indices indicating where to set the value",
Some('i'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Set value in selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | polars into-df);
let indices = ([0 2] | polars into-df);
$series | polars set-with-idx 6 --indices $indices"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(6),
Value::test_int(1),
Value::test_int(6),
Value::test_int(2),
Value::test_int(4),
Value::test_int(3),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(0)?;
let indices_value: Value = call
.get_flag("indices")?
.ok_or_else(|| missing_flag_error("indices", call.head))?;
let indices_span = indices_value.span();
let indices = NuDataFrame::try_from_value_coerce(plugin, &indices_value, call.head)?
.as_series(indices_span)?;
let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast(&DataType::UInt32)
.map_err(|e| ShellError::GenericError {
error: "Error casting indices".into(),
msg: e.to_string(),
span: Some(indices_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "Series with incorrect type".into(),
span: Some(indices_span),
help: Some("Consider using a Series with type int type".into()),
inner: vec![],
}),
}?;
let indices = casted
.u32()
.map_err(|e| ShellError::GenericError {
error: "Error casting indices".into(),
msg: e.to_string(),
span: Some(indices_span),
help: None,
inner: vec![],
})?
.into_iter()
.flatten();
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let span = value.span();
let res = match value {
Value::Int { val, .. } => {
let chunked = series.i64().map_err(|e| ShellError::GenericError {
error: "Error casting to i64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::Float { val, .. } => {
let chunked = series.f64().map_err(|e| ShellError::GenericError {
error: "Error casting to f64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.scatter_single(indices, Some(val)).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::String { val, .. } => {
let chunked = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.scatter_single(indices, Some(val.as_ref()))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let mut res = res.into_series();
res.rename("string");
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
_ => Err(ShellError::GenericError {
error: "Incorrect value type".into(),
msg: format!(
"this value cannot be set in a series of type '{}'",
series.dtype()
),
span: Some(span),
help: None,
inner: vec![],
}),
}?;
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SetWithIndex)
}
}

View File

@ -0,0 +1,133 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsDuplicated;
impl PluginCommand for IsDuplicated {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-duplicated"
}
fn usage(&self) -> &str {
"Creates mask indicating duplicated values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-duplicated",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-duplicated",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_ref()
.is_duplicated()
.map_err(|e| ShellError::GenericError {
error: "Error finding duplicates".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_duplicated");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsDuplicated)
}
}

View File

@ -0,0 +1,130 @@
use crate::{
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsNotNull;
impl PluginCommand for IsNotNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-not-null"
}
fn usage(&self) -> &str {
"Creates mask where value is not null."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | polars into-df);
let res = ($s / $s);
$res | polars is-not-null"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_not_null".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is not null expression from a column",
example: "polars col a | polars is-not-null",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().is_not_null().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut res = df.as_series(call.head)?.is_not_null();
res.rename("is_not_null");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsNotNull)
}
}

View File

@ -0,0 +1,130 @@
use crate::{
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, NuExpression, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsNull;
impl PluginCommand for IsNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-null"
}
fn usage(&self) -> &str {
"Creates mask where value is null."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | polars into-df);
let res = ($s / $s);
$res | polars is-null"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_null".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is null expression from a column",
example: "polars col a | polars is-null",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().is_null().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mut res = df.as_series(call.head)?.is_null();
res.rename("is_null");
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsNull)
}
}

View File

@ -0,0 +1,133 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
#[derive(Clone)]
pub struct IsUnique;
impl PluginCommand for IsUnique {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars is-unique"
}
fn usage(&self) -> &str {
"Creates mask indicating unique values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | polars into-df | polars is-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example:
"[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | polars into-df | polars is-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let mut res = df
.as_ref()
.is_unique()
.map_err(|e| ShellError::GenericError {
error: "Error finding unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_unique");
let df = NuDataFrame::try_from_series_vec(vec![res], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&IsUnique)
}
}

View File

@ -0,0 +1,13 @@
mod is_duplicated;
mod is_not_null;
mod is_null;
mod is_unique;
mod not;
mod set;
pub use is_duplicated::IsDuplicated;
pub use is_not_null::IsNotNull;
pub use is_null::IsNull;
pub use is_unique::IsUnique;
pub use not::NotSeries;
pub use set::SetSeries;

View File

@ -0,0 +1,103 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use polars::prelude::IntoSeries;
use std::ops::Not;
#[derive(Clone)]
pub struct NotSeries;
impl PluginCommand for NotSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars not"
}
fn usage(&self) -> &str {
"Inverts boolean mask."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Inverts boolean mask",
example: "[true false true] | polars into-df | polars not",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(true),
Value::test_bool(false),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
command(plugin, engine, call, df).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|e| ShellError::GenericError {
error: "Error inverting mask".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let res = bool.not();
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&NotSeries)
}
}

View File

@ -0,0 +1,210 @@
use crate::{
missing_flag_error,
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
#[derive(Clone)]
pub struct SetSeries;
impl PluginCommand for SetSeries {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars set"
}
fn usage(&self) -> &str {
"Sets value where given mask is true."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("value", SyntaxShape::Any, "value to be inserted in series")
.required_named(
"mask",
SyntaxShape::Any,
"mask indicating insertions",
Some('m'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: r#"let s = ([1 2 2 3 3] | polars into-df | polars shift 2);
let mask = ($s | polars is-null);
$s | polars set 0 --mask $mask"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(0)?;
let mask_value: Value = call
.get_flag("mask")?
.ok_or_else(|| missing_flag_error("mask", call.head))?;
let mask_span = mask_value.span();
let mask =
NuDataFrame::try_from_value_coerce(plugin, &mask_value, call.head)?.as_series(mask_span)?;
let bool_mask = match mask.dtype() {
DataType::Boolean => mask.bool().map_err(|e| ShellError::GenericError {
error: "Error casting to bool".into(),
msg: e.to_string(),
span: Some(mask_span),
help: None,
inner: vec![],
}),
_ => Err(ShellError::GenericError {
error: "Incorrect type".into(),
msg: "can only use bool series as mask".into(),
span: Some(mask_span),
help: None,
inner: vec![],
}),
}?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let span = value.span();
let res = match value {
Value::Int { val, .. } => {
let chunked = series.i64().map_err(|e| ShellError::GenericError {
error: "Error casting to i64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.set(bool_mask, Some(val))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::Float { val, .. } => {
let chunked = series.f64().map_err(|e| ShellError::GenericError {
error: "Error casting to f64".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked
.set(bool_mask, Some(val))
.map_err(|e| ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
Value::String { val, .. } => {
let chunked = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?;
let res = chunked.set(bool_mask, Some(val.as_ref())).map_err(|e| {
ShellError::GenericError {
error: "Error setting value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
}
})?;
let mut res = res.into_series();
res.rename("string");
NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)
}
_ => Err(ShellError::GenericError {
error: "Incorrect value type".into(),
msg: format!(
"this value cannot be set in a series of type '{}'",
series.dtype()
),
span: Some(span),
help: None,
inner: vec![],
}),
}?;
to_pipeline_data(plugin, engine, call.head, res)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&SetSeries)
}
}

View File

@ -0,0 +1,85 @@
mod date;
pub use date::*;
mod string;
pub use string::*;
mod masks;
pub use masks::*;
mod indexes;
pub use indexes::*;
mod all_false;
mod all_true;
mod arg_max;
mod arg_min;
mod cumulative;
mod n_null;
mod n_unique;
mod rolling;
mod shift;
mod unique;
mod value_counts;
pub use all_false::AllFalse;
use nu_plugin::PluginCommand;
use crate::PolarsPlugin;
pub use all_true::AllTrue;
pub use arg_max::ArgMax;
pub use arg_min::ArgMin;
pub use cumulative::Cumulative;
pub use n_null::NNull;
pub use n_unique::NUnique;
pub use rolling::Rolling;
pub use shift::Shift;
pub use unique::Unique;
pub use value_counts::ValueCount;
pub(crate) fn series_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![
Box::new(AllFalse),
Box::new(AllTrue),
Box::new(ArgMax),
Box::new(ArgMin),
Box::new(ArgSort),
Box::new(ArgTrue),
Box::new(ArgUnique),
Box::new(AsDate),
Box::new(AsDateTime),
Box::new(Concatenate),
Box::new(Contains),
Box::new(Cumulative),
Box::new(GetDay),
Box::new(GetHour),
Box::new(GetMinute),
Box::new(GetMonth),
Box::new(GetNanosecond),
Box::new(GetOrdinal),
Box::new(GetSecond),
Box::new(GetWeek),
Box::new(GetWeekDay),
Box::new(GetYear),
Box::new(IsDuplicated),
Box::new(IsNotNull),
Box::new(IsNull),
Box::new(IsUnique),
Box::new(NNull),
Box::new(NUnique),
Box::new(NotSeries),
Box::new(Replace),
Box::new(ReplaceAll),
Box::new(Rolling),
Box::new(SetSeries),
Box::new(SetWithIndex),
Box::new(Shift),
Box::new(StrLengths),
Box::new(StrSlice),
Box::new(StrFTime),
Box::new(ToLowerCase),
Box::new(ToUpperCase),
Box::new(Unique),
Box::new(ValueCount),
]
}

View File

@ -0,0 +1,93 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct NNull;
impl PluginCommand for NNull {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars count-null"
}
fn usage(&self) -> &str {
"Counts null values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Counts null values",
example: r#"let s = ([1 1 0 0 3 3 4] | polars into-df);
($s / $s) | polars count-null"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"count_null".to_string(),
vec![Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let res = df.as_series(call.head)?.null_count();
let value = Value::int(res as i64, call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("count_null".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&NNull)
}
}

View File

@ -0,0 +1,135 @@
use crate::{
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame, NuExpression};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct NUnique;
impl PluginCommand for NUnique {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars n-unique"
}
fn usage(&self) -> &str {
"Counts unique values."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | polars into-df | polars n-unique",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"count_unique".to_string(),
vec![Value::test_int(4)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is n-unique expression from a column",
example: "polars col a | polars n-unique",
result: None,
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr.to_polars().n_unique().into();
to_pipeline_data(plugin, engine, call.head, expr)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let res = df
.as_series(call.head)?
.n_unique()
.map_err(|e| ShellError::GenericError {
error: "Error counting unique values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let value = Value::int(res as i64, call.head);
let df = NuDataFrame::try_from_columns(
vec![Column::new("count_unique".to_string(), vec![value])],
None,
)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&NUnique)
}
}

View File

@ -0,0 +1,196 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime};
enum RollType {
Min,
Max,
Sum,
Mean,
}
impl RollType {
fn from_str(roll_type: &str, span: Span) -> Result<Self, ShellError> {
match roll_type {
"min" => Ok(Self::Min),
"max" => Ok(Self::Max),
"sum" => Ok(Self::Sum),
"mean" => Ok(Self::Mean),
_ => Err(ShellError::GenericError {
error: "Wrong operation".into(),
msg: "Operation not valid for cumulative".into(),
span: Some(span),
help: Some("Allowed values: min, max, sum, mean".into()),
inner: vec![],
}),
}
}
fn to_str(&self) -> &'static str {
match self {
RollType::Min => "rolling_min",
RollType::Max => "rolling_max",
RollType::Sum => "rolling_sum",
RollType::Mean => "rolling_mean",
}
}
}
#[derive(Clone)]
pub struct Rolling;
impl PluginCommand for Rolling {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars rolling"
}
fn usage(&self) -> &str {
"Rolling calculation for a series."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("type", SyntaxShape::String, "rolling operation")
.required("window", SyntaxShape::Int, "Window size for rolling")
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Rolling sum for a series",
example: "[1 2 3 4 5] | polars into-df | polars rolling sum 2 | polars drop-nulls",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_sum".to_string(),
vec![
Value::test_int(3),
Value::test_int(5),
Value::test_int(7),
Value::test_int(9),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Rolling max for a series",
example: "[1 2 3 4 5] | polars into-df | polars rolling max 2 | polars drop-nulls",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0_rolling_max".to_string(),
vec![
Value::test_int(2),
Value::test_int(3),
Value::test_int(4),
Value::test_int(5),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let roll_type: Spanned<String> = call.req(0)?;
let window_size: i64 = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
if let DataType::Object(..) = series.dtype() {
return Err(ShellError::GenericError {
error: "Found object series".into(),
msg: "Series of type object cannot be used for rolling operation".into(),
span: Some(call.head),
help: None,
inner: vec![],
});
}
let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?;
let rolling_opts = RollingOptionsImpl {
window_size: Duration::new(window_size),
min_periods: window_size as usize,
weights: None,
center: false,
by: None,
closed_window: None,
tu: None,
tz: None,
fn_params: None,
};
let res = match roll_type {
RollType::Max => series.rolling_max(rolling_opts),
RollType::Min => series.rolling_min(rolling_opts),
RollType::Sum => series.rolling_sum(rolling_opts),
RollType::Mean => series.rolling_mean(rolling_opts),
};
let mut res = res.map_err(|e| ShellError::GenericError {
error: "Error calculating rolling values".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let name = format!("{}_{}", series.name(), roll_type.to_str());
res.rename(&name);
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Rolling)
}
}

View File

@ -0,0 +1,136 @@
use crate::{
dataframe::values::{NuExpression, NuLazyFrame},
values::{
cant_convert_err, to_pipeline_data, CustomValueSupport, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};
use super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars_plan::prelude::lit;
#[derive(Clone)]
pub struct Shift;
impl PluginCommand for Shift {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars shift"
}
fn usage(&self) -> &str {
"Shifts the values by a given period."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("period", SyntaxShape::Int, "shift period")
.named(
"fill",
SyntaxShape::Any,
"Expression used to fill the null values (lazy df)",
Some('f'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: "[1 2 2 3 3] | polars into-df | polars shift 2 | polars drop-nulls",
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head);
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyGroupBy,
],
)),
}
.map_err(LabeledError::from)
}
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let period: i64 = call.req(0)?;
let series = df.as_series(call.head)?.shift(period);
let df = NuDataFrame::try_from_series_vec(vec![series], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let shift: i64 = call.req(0)?;
let fill: Option<Value> = call.get_flag("fill")?;
let lazy = lazy.to_polars();
let lazy: NuLazyFrame = match fill {
Some(ref fill) => {
let expr = NuExpression::try_from_value(plugin, fill)?.to_polars();
lazy.shift_and_fill(lit(shift), expr).into()
}
None => lazy.shift(shift).into(),
};
to_pipeline_data(plugin, engine, call.head, lazy)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Shift)
}
}

View File

@ -0,0 +1,124 @@
use crate::{
values::{to_pipeline_data, CustomValueSupport},
PolarsPlugin,
};
use super::super::super::values::{Column, NuDataFrame};
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{IntoSeries, StringNameSpaceImpl};
#[derive(Clone)]
pub struct Concatenate;
impl PluginCommand for Concatenate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars concatenate"
}
fn usage(&self) -> &str {
"Concatenates strings with other array."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"other",
SyntaxShape::Any,
"Other array with string to be concatenated",
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Concatenate string",
example: r#"let other = ([za xs cd] | polars into-df);
[abc abc abc] | polars into-df | polars concatenate $other"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
Value::test_string("abcza"),
Value::test_string("abcxs"),
Value::test_string("abccd"),
],
)],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let other: Value = call.req(0)?;
let other_span = other.span();
let other_df = NuDataFrame::try_from_value_coerce(plugin, &other, other_span)?;
let other_series = other_df.as_series(other_span)?;
let other_chunked = other_series.str().map_err(|e| ShellError::GenericError {
error: "The concatenate only with string columns".into(),
msg: e.to_string(),
span: Some(other_span),
help: None,
inner: vec![],
})?;
let series = df.as_series(call.head)?;
let chunked = series.str().map_err(|e| ShellError::GenericError {
error: "The concatenate only with string columns".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?;
let mut res = chunked.concat(other_chunked);
res.rename(series.name());
let df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
to_pipeline_data(plugin, engine, call.head, df)
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command(&Concatenate)
}
}

Some files were not shown because too many files have changed in this diff Show More