Lazy dataframes (#5687)

* change between lazy and eager

* when expressions

* examples for aggregations

* more examples for agg

* examples for dataframes

* checked examples

* cargo fmt
This commit is contained in:
Fernando Herrera 2022-05-31 07:29:55 +01:00 committed by GitHub
parent 0769e9b750
commit 997d56a288
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 1781 additions and 810 deletions

View File

@ -29,23 +29,37 @@ impl Command for FilterWith {
SyntaxShape::Any,
"boolean mask used to filter data",
)
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([true false] | dfr to-df);
vec![
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([true false] | dfr to-df);
[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with ((dfr col a) > 1)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -81,31 +95,42 @@ fn command_eager(
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(engine_state, stack, 0)?;
let mask_span = mask_value.span()?;
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| {
ShellError::GenericError(
"Error casting to bool".into(),
e.to_string(),
Some(mask_span),
Some("Perhaps you want to use a series with booleans as mask".into()),
Vec::new(),
)
})?;
df.as_ref()
.filter(mask)
.map_err(|e| {
if NuExpression::can_downcast(&mask_value) {
let expression = NuExpression::try_from_value(mask_value)?;
let lazy = NuLazyFrame::new(true, df.lazy());
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head)?,
None,
))
} else {
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| {
ShellError::GenericError(
"Error calculating dummies".into(),
"Error casting to bool".into(),
e.to_string(),
Some(call.head),
Some("The only allowed column types for dummies are String or Int".into()),
Some(mask_span),
Some("Perhaps you want to use a series with booleans as mask".into()),
Vec::new(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
})?;
df.as_ref()
.filter(mask)
.map_err(|e| {
ShellError::GenericError(
"Error filtering dataframe".into(),
e.to_string(),
Some(call.head),
Some("The only allowed column types for dummies are String or Int".into()),
Vec::new(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
}
fn command_lazy(
@ -120,7 +145,7 @@ fn command_lazy(
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
NuLazyFrame::into_value(lazy, call.head)?,
None,
))
}
@ -129,9 +154,10 @@ fn command_lazy(
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprCol;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(FilterWith {})])
test_dataframe(vec![Box::new(FilterWith {}), Box::new(ExprCol {})])
}
}

View File

@ -22,7 +22,7 @@ impl Command for FirstDF {
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for head")
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or expression".into()))
}
fn examples(&self) -> Vec<Example> {

View File

@ -22,7 +22,7 @@ impl Command for LastDF {
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {

View File

@ -18,7 +18,6 @@ mod rename;
mod sample;
mod shape;
mod slice;
mod sort;
mod take;
mod to_csv;
mod to_df;
@ -48,7 +47,6 @@ pub use rename::RenameDF;
pub use sample::SampleDF;
pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sort::SortDF;
pub use take::TakeDF;
pub use to_csv::ToCSV;
pub use to_df::ToDataFrame;
@ -88,7 +86,6 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
SampleDF,
ShapeDF,
SliceDF,
SortDF,
TakeDF,
ToCSV,
ToDataFrame,

View File

@ -14,7 +14,7 @@ pub struct RenameDF;
impl Command for RenameDF {
fn name(&self) -> &str {
"dfr rename-col"
"dfr rename"
}
fn usage(&self) -> &str {
@ -33,28 +33,65 @@ impl Command for RenameDF {
SyntaxShape::Any,
"New names for the selected column(s). A string or list of strings",
)
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename-col a a_new",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
vec![
Example {
description: "Renames a series",
example: "[5 6 7 8] | dfr to-df | dfr rename '0' new_name",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"new_name".to_string(),
vec![
Value::test_int(5),
Value::test_int(6),
Value::test_int(7),
Value::test_int(8),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename a a_new",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Renames two dataframe columns",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename [a b] [a_new b_new]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b_new".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -133,7 +170,7 @@ fn command_lazy(
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
#[cfg(test)]

View File

@ -1,148 +0,0 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use crate::dataframe::values::{utils::convert_columns_string, Column};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SortDF;
impl Command for SortDF {
fn name(&self) -> &str {
"dfr sort"
}
fn usage(&self) -> &str {
"Creates new sorted dataframe or series"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("reverse", "invert sort", Some('r'))
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | dfr to-df | dfr sort a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create new sorted series",
example: "[3 4 1 2] | dfr to-df | dfr sort",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(4),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let reverse = call.has_flag("reverse");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
if df.is_series() {
let columns = df.as_ref().get_column_names();
df.as_ref()
.sort(columns, reverse)
.map_err(|e| {
ShellError::GenericError(
"Error sorting dataframe".into(),
e.to_string(),
Some(call.head),
None,
Vec::new(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
} else {
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
if !columns.is_empty() {
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
df.as_ref()
.sort(&col_string, reverse)
.map_err(|e| {
ShellError::GenericError(
"Error sorting dataframe".into(),
e.to_string(),
Some(col_span),
None,
Vec::new(),
)
})
.map(|df| {
PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)
})
} else {
Err(ShellError::GenericError(
"Missing columns".into(),
"missing column name to perform sort".into(),
Some(call.head),
None,
Vec::new(),
))
}
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(SortDF {})])
}
}

View File

@ -27,7 +27,7 @@ impl Command for WithColumn {
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
@ -59,11 +59,34 @@ impl Command for WithColumn {
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| dfr to-df
| dfr to-lazy
| dfr with-column ((dfr col a) * 2 | dfr as "c")
| dfr with-column [
((dfr col a) * 2 | dfr as "c")
((dfr col a) * 3 | dfr as "d")
]
| dfr collect"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(2), Value::test_int(6)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(3), Value::test_int(9)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -85,7 +108,7 @@ impl Command for WithColumn {
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
"lazy or eager dataframe".into(),
value.get_type().to_string(),
value.span()?,
None,
@ -100,34 +123,49 @@ fn command_eager(
call: &Call,
mut df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let other_value: Value = call.req(engine_state, stack, 0)?;
let other_span = other_value.span()?;
let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?;
let new_column: Value = call.req(engine_state, stack, 0)?;
let column_span = new_column.span()?;
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
Some(name) => name,
None => other.name().to_string(),
};
if NuExpression::can_downcast(&new_column) {
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
let lazy = NuLazyFrame::new(true, df.lazy().with_columns(&expressions));
let series = other.rename(&name).clone();
let df = lazy.collect(call.head)?;
df.as_mut()
.with_column(series)
.map_err(|e| {
ShellError::GenericError(
"Error adding column to dataframe".into(),
e.to_string(),
Some(other_span),
None,
Vec::new(),
)
})
.map(|df| {
PipelineData::Value(
NuDataFrame::dataframe_into_value(df.clone(), call.head),
None,
)
})
Ok(PipelineData::Value(df.into_value(call.head), None))
} else {
let mut other = NuDataFrame::try_from_value(new_column)?.as_series(column_span)?;
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
Some(name) => name,
None => other.name().to_string(),
};
let series = other.rename(&name).clone();
df.as_mut()
.with_column(series)
.map_err(|e| {
ShellError::GenericError(
"Error adding column to dataframe".into(),
e.to_string(),
Some(column_span),
None,
Vec::new(),
)
})
.map(|df| {
PipelineData::Value(
NuDataFrame::dataframe_into_value(df.clone(), call.head),
None,
)
})
}
}
fn command_lazy(
@ -146,7 +184,7 @@ fn command_lazy(
let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into();
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
NuLazyFrame::into_value(lazy, call.head)?,
None,
))
}
@ -155,9 +193,15 @@ fn command_lazy(
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAlias;
use crate::dataframe::expressions::ExprCol;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(WithColumn {})])
test_dataframe(vec![
Box::new(WithColumn {}),
Box::new(ExprAlias {}),
Box::new(ExprCol {}),
])
}
}

View File

@ -4,7 +4,7 @@ use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
@ -26,14 +26,34 @@ impl Command for ExprAlias {
SyntaxShape::String,
"Alias name for the expression",
)
.category(Category::Custom("expressions".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates and alias expression",
example: "(dfr col a | df as new_a)",
result: None,
example: "dfr col a | dfr as new_a | dfr as-nu",
result: {
let cols = vec!["expr".into(), "value".into()];
let expr = Value::test_string("column");
let value = Value::test_string("a");
let expr = Value::Record {
cols,
vals: vec![expr, value],
span: Span::test_data(),
};
let cols = vec!["expr".into(), "alias".into()];
let value = Value::test_string("new_a");
let record = Value::Record {
cols,
vals: vec![expr, value],
span: Span::test_data(),
};
Some(record)
},
}]
}
@ -55,3 +75,20 @@ impl Command for ExprAlias {
))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAsNu;
use crate::dataframe::expressions::ExprCol;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(ExprAlias {}),
Box::new(ExprCol {}),
Box::new(ExprAsNu {}),
])
}
}

View File

@ -7,11 +7,11 @@ use nu_protocol::{
};
#[derive(Clone)]
pub struct ExprToNu;
pub struct ExprAsNu;
impl Command for ExprToNu {
impl Command for ExprAsNu {
fn name(&self) -> &str {
"dfr to-nu"
"dfr as-nu"
}
fn usage(&self) -> &str {
@ -19,13 +19,13 @@ impl Command for ExprToNu {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("expressions".into()))
Signature::build(self.name()).category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Convert a col expression into a nushell value",
example: "dfr col col_a | dfr to-nu",
example: "dfr col col_a | dfr as-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
@ -65,6 +65,6 @@ mod test {
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprToNu {}), Box::new(ExprCol {})])
test_dataframe(vec![Box::new(ExprAsNu {}), Box::new(ExprCol {})])
}
}

View File

@ -26,13 +26,13 @@ impl Command for ExprCol {
SyntaxShape::String,
"Name of column to be used",
)
.category(Category::Custom("expressions".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a named column expression and converts it to a nu object",
example: "dfr col col_a | dfr to-nu",
example: "dfr col col_a | dfr as-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
@ -66,12 +66,12 @@ impl Command for ExprCol {
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::as_nu::ExprAsNu;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprToNu {})])
test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprAsNu {})])
}
}

View File

@ -1,7 +0,0 @@
mod col;
mod lit;
mod when;
pub(super) use crate::dataframe::expressions::dsl::col::ExprCol;
pub(super) use crate::dataframe::expressions::dsl::lit::ExprLit;
pub(super) use crate::dataframe::expressions::dsl::when::ExprWhen;

View File

@ -1,96 +0,0 @@
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use polars::prelude::when;
#[derive(Clone)]
pub struct ExprWhen;
impl Command for ExprWhen {
fn name(&self) -> &str {
"dfr when"
}
fn usage(&self) -> &str {
"Creates a when expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"when predicate",
SyntaxShape::Any,
"Name of column to be used",
)
.required_named(
"then",
SyntaxShape::Any,
"Expression that will be applied when predicate is true",
Some('t'),
)
.required_named(
"otherwise",
SyntaxShape::Any,
"Expression that will be applied when predicate is false",
Some('o'),
)
.category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| dfr to-df
| dfr to-lazy
| dfr with-column (
dfr when ((dfr col a) > 2) --then 4 --otherwise 5 | dfr as "c"
)
| dfr collect"#,
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let predicate: Value = call.req(engine_state, stack, 0)?;
let predicate = NuExpression::try_from_value(predicate)?;
let then: Value = call
.get_flag(engine_state, stack, "then")?
.expect("it is a required named value");
let then = NuExpression::try_from_value(then)?;
let otherwise: Value = call
.get_flag(engine_state, stack, "otherwise")?
.expect("it is a required named value");
let otherwise = NuExpression::try_from_value(otherwise)?;
let expr: NuExpression = when(predicate.into_polars())
.then(then.into_polars())
.otherwise(otherwise.into_polars())
.into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprWhen {}), Box::new(ExprToNu {})])
}
}

View File

@ -26,7 +26,7 @@ macro_rules! expr_command {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
@ -107,3 +107,17 @@ expr_command!(
}],
explode
);
// ExprCount command
// Expands to a command definition for a count expression
expr_command!(
ExprCount,
"dfr count",
"creates a count expression",
vec![Example {
description: "",
example: "",
result: None,
}],
count
);

View File

@ -25,13 +25,13 @@ impl Command for ExprLit {
SyntaxShape::Any,
"literal to construct the expression",
)
.category(Category::Custom("expressions".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Created a literal expression and converts it to a nu object",
example: "dfr lit 2 | dfr to-nu",
example: "dfr lit 2 | dfr as-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
@ -68,12 +68,12 @@ impl Command for ExprLit {
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::as_nu::ExprAsNu;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprToNu {})])
test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprAsNu {})])
}
}

View File

@ -1,15 +1,20 @@
mod alias;
mod dsl;
mod as_nu;
mod col;
mod expressions_macro;
mod to_nu;
mod lit;
mod otherwise;
mod when;
use nu_protocol::engine::StateWorkingSet;
use crate::dataframe::expressions::dsl::*;
use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::expressions_macro::*;
use crate::dataframe::expressions::to_nu::ExprToNu;
pub(crate) use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::as_nu::ExprAsNu;
pub(super) use crate::dataframe::expressions::col::ExprCol;
pub(crate) use crate::dataframe::expressions::expressions_macro::*;
pub(super) use crate::dataframe::expressions::lit::ExprLit;
pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise;
pub(super) use crate::dataframe::expressions::when::ExprWhen;
pub fn add_expressions(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
@ -25,9 +30,11 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
bind_command!(
ExprAlias,
ExprCol,
ExprCount,
ExprLit,
ExprToNu,
ExprAsNu,
ExprWhen,
ExprOtherwise,
ExprList,
ExprAggGroups,
ExprFlatten,

View File

@ -0,0 +1,125 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct ExprOtherwise;
impl Command for ExprOtherwise {
fn name(&self) -> &str {
"dfr otherwise"
}
fn usage(&self) -> &str {
"completes a when expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"otherwise expression",
SyntaxShape::Any,
"expressioini to apply when no when predicate matches",
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "dfr when ((dfr col a) > 2) 4 | dfr otherwise 5",
result: None,
},
Example {
description: "Create a when conditions",
example:
"dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6 | dfr otherwise 0",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| dfr to-lazy
| dfr with-column (
dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c
)
| dfr with-column (
dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d
)
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let otherwise_predicate: Value = call.req(engine_state, stack, 0)?;
let otherwise_predicate = NuExpression::try_from_value(otherwise_predicate)?;
let value = input.into_value(call.head);
let complete: NuExpression = match NuWhen::try_from_value(value)? {
NuWhen::WhenThen(when_then) => when_then
.otherwise(otherwise_predicate.into_polars())
.into(),
NuWhen::WhenThenThen(when_then_then) => when_then_then
.otherwise(otherwise_predicate.into_polars())
.into(),
};
Ok(PipelineData::Value(complete.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use crate::dataframe::eager::WithColumn;
use crate::dataframe::expressions::when::ExprWhen;
use crate::dataframe::expressions::{ExprAlias, ExprAsNu, ExprCol};
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(WithColumn {}),
Box::new(ExprCol {}),
Box::new(ExprAlias {}),
Box::new(ExprWhen {}),
Box::new(ExprOtherwise {}),
Box::new(ExprAsNu {}),
])
}
}

View File

@ -0,0 +1,140 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::when;
#[derive(Clone)]
pub struct ExprWhen;
impl Command for ExprWhen {
fn name(&self) -> &str {
"dfr when"
}
fn usage(&self) -> &str {
"Creates and modifies a when expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"when expression",
SyntaxShape::Any,
"when expression used for matching",
)
.required(
"then expression",
SyntaxShape::Any,
"expression that will be applied when predicate is true",
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create a when conditions",
example: "dfr when ((dfr col a) > 2) 4",
result: None,
},
Example {
description: "Create a when conditions",
example: "dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6",
result: None,
},
Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [6 2] [1 4] [4 1]]
| dfr to-lazy
| dfr with-column (
dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c
)
| dfr with-column (
dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d
)
| dfr collect"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)],
),
Column::new(
"d".to_string(),
vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let when_predicate: Value = call.req(engine_state, stack, 0)?;
let when_predicate = NuExpression::try_from_value(when_predicate)?;
let then_predicate: Value = call.req(engine_state, stack, 1)?;
let then_predicate = NuExpression::try_from_value(then_predicate)?;
let value = input.into_value(call.head);
let when_then: NuWhen = match value {
Value::Nothing { .. } => when(when_predicate.into_polars())
.then(then_predicate.into_polars())
.into(),
v => match NuWhen::try_from_value(v)? {
NuWhen::WhenThen(when_then) => when_then
.when(when_predicate.into_polars())
.then(then_predicate.into_polars())
.into(),
NuWhen::WhenThenThen(when_then_then) => when_then_then
.when(when_predicate.into_polars())
.then(then_predicate.into_polars())
.into(),
},
};
Ok(PipelineData::Value(when_then.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use crate::dataframe::eager::WithColumn;
use crate::dataframe::expressions::otherwise::ExprOtherwise;
use crate::dataframe::expressions::{ExprAlias, ExprAsNu, ExprCol};
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(WithColumn {}),
Box::new(ExprCol {}),
Box::new(ExprAlias {}),
Box::new(ExprWhen {}),
Box::new(ExprOtherwise {}),
Box::new(ExprAsNu {}),
])
}
}

View File

@ -1,10 +1,10 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
@ -12,7 +12,7 @@ pub struct LazyAggregate;
impl Command for LazyAggregate {
fn name(&self) -> &str {
"dfr aggregate"
"dfr agg"
}
fn usage(&self) -> &str {
@ -36,26 +36,67 @@ impl Command for LazyAggregate {
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr group-by a
| dfr aggregate [
| dfr agg [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr to-lazy
| dfr group-by a
| dfr aggregate [
| dfr agg [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]
| dfr collect"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -78,14 +119,33 @@ impl Command for LazyAggregate {
let from_eager = group_by.from_eager;
let group_by = group_by.into_polars();
let lazy: NuLazyFrame = group_by.agg(&expressions).into();
let res = if from_eager {
lazy.collect(call.head)?.into_value(call.head)
} else {
lazy.into_value(call.head)
let lazy = NuLazyFrame {
lazy: group_by.agg(&expressions).into(),
from_eager,
};
let res = lazy.into_value(call.head)?;
Ok(PipelineData::Value(res, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAlias;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::{LazyMax, LazyMin, LazySum};
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
Box::new(ExprAlias {}),
Box::new(LazyMin {}),
Box::new(LazyMax {}),
Box::new(LazySum {}),
])
}
}

View File

@ -1,9 +1,10 @@
use super::super::values::{NuDataFrame, NuLazyFrame};
use crate::dataframe::values::{Column, NuDataFrame};
use super::super::values::NuLazyFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
Category, Example, PipelineData, ShellError, Signature, Span, Value,
};
#[derive(Clone)]
@ -15,7 +16,7 @@ impl Command for LazyCollect {
}
fn usage(&self) -> &str {
"Collect lazy dataframe into dataframe"
"Collect lazy dataframe into eager dataframe"
}
fn signature(&self) -> Signature {
@ -24,9 +25,22 @@ impl Command for LazyCollect {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4]] | dfr to-lazy | dfr collect",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
@ -39,10 +53,22 @@ impl Command for LazyCollect {
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let eager = lazy.collect(call.head)?;
let value = Value::CustomValue {
val: Box::new(eager),
span: call.head,
};
Ok(PipelineData::Value(
NuDataFrame::into_value(eager, call.head),
None,
))
Ok(PipelineData::Value(value, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyCollect {})])
}
}

View File

@ -1,10 +1,10 @@
use super::super::values::NuLazyFrame;
use crate::dataframe::values::NuDataFrame;
use crate::dataframe::values::{Column, NuDataFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
@ -31,9 +31,22 @@ impl Command for LazyFetch {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
description: "Fetch a rows from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr fetch 2",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
@ -68,13 +81,13 @@ impl Command for LazyFetch {
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFetch {})])
// }
//}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyFetch {})])
}
}

View File

@ -44,22 +44,23 @@ impl Command for LazyFillNA {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let fill: Value = call.req(engine_state, stack, 0)?;
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy: NuLazyFrame = lazy.fill_nan(expr).into();
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let fill = NuExpression::try_from_value(fill)?.into_polars();
let expr: NuExpression = expr.into_polars().fill_nan(fill).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else {
let lazy = NuLazyFrame::try_from_value(value)?;
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_nan(expr));
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFillNA {})])
// }
//}

View File

@ -1,9 +1,9 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
@ -30,9 +30,22 @@ impl Command for LazyFillNull {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
description: "Fills the null values by 0",
example: "[1 2 2 3 3] | dfr to-df | dfr shift 2 | dfr fill-null 0",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(2),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
@ -44,22 +57,35 @@ impl Command for LazyFillNull {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let fill: Value = call.req(engine_state, stack, 0)?;
let value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy: NuLazyFrame = lazy.fill_null(expr).into();
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let fill = NuExpression::try_from_value(fill)?.into_polars();
let expr: NuExpression = expr.into_polars().fill_null(fill).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else {
let lazy = NuLazyFrame::try_from_value(value)?;
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_null(expr));
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFillNull {})])
// }
//}
#[cfg(test)]
mod test {
use super::super::super::series::Shift;
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyFillNull {}), Box::new(Shift {})])
}
}

View File

@ -1,9 +1,9 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::Expr;
@ -36,26 +36,67 @@ impl Command for ToLazyGroupBy {
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr group-by a
| dfr aggregate [
| dfr agg [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr to-lazy
| dfr group-by a
| dfr aggregate [
| dfr agg [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]
| dfr collect"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
),
Column::new(
"b_min".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
Column::new(
"b_max".to_string(),
vec![Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b_sum".to_string(),
vec![Value::test_int(6), Value::test_int(10)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -86,7 +127,8 @@ impl Command for ToLazyGroupBy {
}
let value = input.into_value(call.head);
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
let lazy = NuLazyFrame::try_from_value(value)?;
let from_eager = lazy.from_eager;
let group_by = NuLazyGroupBy {
group_by: Some(lazy.into_polars().groupby(&expressions)),
@ -96,3 +138,24 @@ impl Command for ToLazyGroupBy {
Ok(PipelineData::Value(group_by.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAlias;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::{LazyMax, LazyMin, LazySum};
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
Box::new(ExprAlias {}),
Box::new(LazyMin {}),
Box::new(LazyMax {}),
Box::new(LazySum {}),
])
}
}

View File

@ -1,9 +1,9 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::{Expr, JoinType};
@ -46,16 +46,116 @@ impl Command for LazyJoin {
Example {
description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr to-lazy);
$df_a | dfr join $df_b a foo | dfr collect"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr to-lazy);
$df_a | dfr join $df_b a foo"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(1),
Value::test_int(1),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
Value::test_string("c"),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"bar".to_string(),
vec![
Value::test_string("a"),
Value::test_string("c"),
Value::test_string("a"),
Value::test_string("a"),
],
),
Column::new(
"ham".to_string(),
vec![
Value::test_string("let"),
Value::test_string("var"),
Value::test_string("let"),
Value::test_string("let"),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -82,7 +182,7 @@ impl Command for LazyJoin {
};
let other: Value = call.req(engine_state, stack, 0)?;
let (other, _) = NuLazyFrame::maybe_is_eager(other)?;
let other = NuLazyFrame::try_from_value(other)?;
let other = other.into_polars();
let left_on: Value = call.req(engine_state, stack, 1)?;
@ -114,10 +214,11 @@ impl Command for LazyJoin {
let suffix = suffix.unwrap_or_else(|| "_x".into());
let value = input.into_value(call.head);
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
let lazy = NuLazyFrame::try_from_value(value)?;
let from_eager = lazy.from_eager;
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = lazy
let lazy = lazy
.join_builder()
.with(other)
.left_on(left_on)
@ -125,15 +226,21 @@ impl Command for LazyJoin {
.how(how)
.force_parallel(true)
.suffix(suffix)
.finish()
.into();
.finish();
let res = if from_eager {
lazy.collect(call.head)?.into_value(call.head)
} else {
lazy.into_value(call.head)
};
let lazy = NuLazyFrame::new(from_eager, lazy);
Ok(PipelineData::Value(res, None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyJoin {})])
}
}

View File

@ -1,15 +1,15 @@
/// Definition of multiple lazyframe commands using a macro rule
/// All of these commands have an identical body and only require
/// to have a change in the name, description and function
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
Category, Example, PipelineData, ShellError, Signature, Span, Value,
};
macro_rules! lazy_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
@ -37,10 +37,21 @@ macro_rules! lazy_command {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy.$func().into();
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func());
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
#[cfg(test)]
mod $test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new($command {})])
}
}
};
@ -53,11 +64,25 @@ lazy_command!(
"dfr reverse",
"Reverses the LazyFrame",
vec![Example {
description: "",
example: "",
result: None,
}],
reverse
description: "Reverses the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr reverse",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},],
reverse,
test_reverse
);
// LazyCache command
@ -67,17 +92,18 @@ lazy_command!(
"dfr cache",
"Caches operations in a new LazyFrame",
vec![Example {
description: "",
example: "",
description: "Caches the result into a new LazyFrame",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr reverse | dfr cache",
result: None,
}],
cache
cache,
test_cache
);
// Creates a command that may result in a lazy frame operation or
// lazy frame expression
macro_rules! lazy_expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => {
#[derive(Clone)]
pub struct $command;
@ -91,7 +117,8 @@ macro_rules! lazy_expr_command {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
Signature::build(self.name())
.category(Category::Custom("lazyframe or expression".into()))
}
fn examples(&self) -> Vec<Example> {
@ -115,21 +142,31 @@ macro_rules! lazy_expr_command {
NuExpression::into_value(expr, call.head),
None,
))
} else if NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value(value)?.into_polars();
let lazy: NuLazyFrame = lazy.$func().into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
} else {
Err(ShellError::CantConvert(
"expression or lazyframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
let lazy = NuLazyFrame::try_from_value(value)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func());
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
}
#[cfg(test)]
mod $test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new($command {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
])
}
}
};
}
@ -139,12 +176,43 @@ lazy_expr_command!(
LazyMax,
"dfr max",
"Aggregates columns to their max value or creates a max expression",
vec![Example {
description: "",
example: "",
result: None,
}],
max
vec![
Example {
description: "Max value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr max",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(6)],),
Column::new("b".to_string(), vec![Value::test_int(4)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Max aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr max)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
max,
test_max
);
// LazyMin command
@ -153,12 +221,43 @@ lazy_expr_command!(
LazyMin,
"dfr min",
"Aggregates columns to their min value or creates a min expression",
vec![Example {
description: "",
example: "",
result: None,
}],
min
vec![
Example {
description: "Min value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr min",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)],),
Column::new("b".to_string(), vec![Value::test_int(1)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Min aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr min)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(1)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
min,
test_min
);
// LazySum command
@ -166,13 +265,44 @@ lazy_expr_command!(
lazy_expr_command!(
LazySum,
"dfr sum",
"Aggregates columns to their sum value or creates a sum expression",
vec![Example {
description: "",
example: "",
result: None,
}],
sum
"Aggregates columns to their sum value or creates a sum expression for an aggregation",
vec![
Example {
description: "Sums all columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(11)],),
Column::new("b".to_string(), vec![Value::test_int(7)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sum aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr sum)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_int(6), Value::test_int(1)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
sum,
test_sum
);
// LazyMean command
@ -180,13 +310,44 @@ lazy_expr_command!(
lazy_expr_command!(
LazyMean,
"dfr mean",
"Aggregates columns to their mean value or creates a mean expression",
vec![Example {
description: "",
example: "",
result: None,
}],
mean
"Aggregates columns to their mean value or creates a mean expression for an aggregation",
vec![
Example {
description: "Mean value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr mean",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Mean aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr mean)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
mean,
test_mean
);
// LazyMedian command
@ -194,13 +355,44 @@ lazy_expr_command!(
lazy_expr_command!(
LazyMedian,
"dfr median",
"Aggregates columns to their median value or creates a median expression",
vec![Example {
description: "",
example: "",
result: None,
}],
median
"Aggregates columns to their median value or creates a median expression for an aggregation",
vec![
Example {
description: "Median value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr median",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(2.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Median aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr median)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(3.0), Value::test_float(1.0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
median,
test_median
);
// LazyStd command
@ -208,13 +400,44 @@ lazy_expr_command!(
lazy_expr_command!(
LazyStd,
"dfr std",
"Aggregates columns to their std value",
vec![Example {
description: "",
example: "",
result: None,
}],
std
"Aggregates columns to their std value or creates a std expression for an aggregation",
vec![
Example {
description: "Std value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr std",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(2.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Std aggregation for a group by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr std)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
std,
test_std
);
// LazyVar command
@ -222,11 +445,42 @@ lazy_expr_command!(
lazy_expr_command!(
LazyVar,
"dfr var",
"Aggregates columns to their var value",
vec![Example {
description: "",
example: "",
result: None,
}],
var
"Aggregates columns to their var value or create a var expression for an aggregation",
vec![
Example {
description: "Var value from columns in a dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr var",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)],),
Column::new("b".to_string(), vec![Value::test_float(0.0)],),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Var aggregation for a group by",
example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr var)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(0.0), Value::test_float(0.0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
],
var,
test_var
);

View File

@ -13,10 +13,10 @@ mod to_lazy;
use nu_protocol::engine::StateWorkingSet;
use crate::dataframe::lazy::macro_commands::*;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::collect::LazyCollect;
pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_na::LazyFillNA;
use crate::dataframe::lazy::fill_null::LazyFillNull;
@ -25,7 +25,7 @@ use crate::dataframe::lazy::join::LazyJoin;
use crate::dataframe::lazy::quantile::LazyQuantile;
use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
use crate::dataframe::lazy::to_lazy::ToLazyFrame;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {

View File

@ -1,9 +1,9 @@
use crate::dataframe::values::NuLazyFrame;
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::QuantileInterpolOptions;
@ -30,11 +30,41 @@ impl Command for LazyQuantile {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
vec![
Example {
description: "quantile value from columns in a dataframe",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr quantile 0.5",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_float(4.0)]),
Column::new("b".to_string(), vec![Value::test_float(2.0)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Quantile aggregation for a group by",
example: r#"[[a b]; [one 2] [one 4] [two 1]]
| dfr to-df
| dfr group-by a
| dfr agg ("b" | dfr quantile 0.5)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_string("one"), Value::test_string("two")],
),
Column::new(
"b".to_string(),
vec![Value::test_float(4.0), Value::test_float(1.0)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -44,24 +74,46 @@ impl Command for LazyQuantile {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
let quantile: f64 = call.req(engine_state, stack, 0)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy
.quantile(quantile, QuantileInterpolOptions::default())
.into();
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr
.into_polars()
.quantile(quantile, QuantileInterpolOptions::default())
.into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else {
let lazy = NuLazyFrame::try_from_value(value)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars()
.quantile(quantile, QuantileInterpolOptions::default()),
);
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyQuantile {})])
// }
//}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(LazyQuantile {}),
Box::new(LazyAggregate {}),
Box::new(ToLazyGroupBy {}),
])
}
}

View File

@ -1,10 +1,10 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::Expr;
@ -22,7 +22,7 @@ impl Command for LazySelect {
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
.rest(
"select expressions",
SyntaxShape::Any,
"Expression(s) that define the column selection",
@ -32,9 +32,16 @@ impl Command for LazySelect {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
description: "Select a column from the dataframe",
example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr select a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
@ -45,7 +52,11 @@ impl Command for LazySelect {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
if expressions
@ -59,20 +70,20 @@ impl Command for LazySelect {
));
}
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy.select(&expressions).into();
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions));
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazySelect {})])
// }
//}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazySelect {})])
}
}

View File

@ -1,10 +1,10 @@
use super::super::values::NuLazyFrame;
use crate::dataframe::values::NuExpression;
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
@ -21,26 +21,70 @@ impl Command for LazySortBy {
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
.rest(
"sort expression",
SyntaxShape::Any,
"filtering expression",
"sort expression for the dataframe",
)
.named(
"reverse",
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
"list indicating if reverse search should be done in the column. Default is false",
"Reverse sorting. Default is false",
Some('r'),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
vec![
Example {
description: "Sort dataframe by one column",
example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr sort-by a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Sort column using two columns",
example:
"[[a b]; [6 2] [1 1] [1 4] [2 4]] | dfr to-df | dfr sort-by [a b] -r [false true]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_int(1),
Value::test_int(1),
Value::test_int(2),
Value::test_int(6),
],
),
Column::new(
"b".to_string(),
vec![
Value::test_int(4),
Value::test_int(1),
Value::test_int(4),
Value::test_int(2),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
@ -50,7 +94,11 @@ impl Command for LazySortBy {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
@ -76,25 +124,25 @@ impl Command for LazySortBy {
};
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy: NuLazyFrame = lazy
.into_polars()
.sort_by_exprs(&expressions, reverse)
.into();
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars().sort_by_exprs(&expressions, reverse),
);
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
NuLazyFrame::into_value(lazy, call.head)?,
None,
))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazySortBy {})])
// }
//}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazySortBy {})])
}
}

View File

@ -3,7 +3,7 @@ use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
Category, Example, PipelineData, ShellError, Signature, Value,
};
#[derive(Clone)]
@ -39,7 +39,11 @@ impl Command for ToLazyFrame {
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?;
let lazy = NuLazyFrame::from_dataframe(df);
let value = Value::CustomValue {
val: Box::new(lazy),
span: call.head,
};
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(value, None))
}
}

View File

@ -20,7 +20,7 @@ impl Command for IsNotNull {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {

View File

@ -20,7 +20,7 @@ impl Command for IsNull {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe or expression".into()))
}
fn examples(&self) -> Vec<Example> {

View File

@ -22,7 +22,7 @@ impl Command for NotSeries {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe or lazyframes".into()))
}
fn examples(&self) -> Vec<Example> {
@ -45,7 +45,7 @@ impl Command for NotSeries {
},
Example {
description: "Creates a not expression from a column",
example: "dfr col a | dfr not",
example: "((dfr col a) > 2) | dfr not",
result: None,
},
]

View File

@ -17,7 +17,6 @@ mod arg_min;
mod cumulative;
mod n_null;
mod n_unique;
mod rename;
mod rolling;
mod shift;
mod unique;
@ -32,7 +31,6 @@ pub use arg_min::ArgMin;
pub use cumulative::Cumulative;
pub use n_null::NNull;
pub use n_unique::NUnique;
pub use rename::Rename;
pub use rolling::Rolling;
pub use shift::Shift;
pub use unique::Unique;
@ -80,7 +78,6 @@ pub fn add_series_decls(working_set: &mut StateWorkingSet) {
NNull,
NUnique,
NotSeries,
Rename,
Replace,
ReplaceAll,
Rolling,

View File

@ -11,7 +11,7 @@ pub struct NUnique;
impl Command for NUnique {
fn name(&self) -> &str {
"dfr count-unique"
"dfr n-unique"
}
fn usage(&self) -> &str {
@ -19,14 +19,14 @@ impl Command for NUnique {
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
Signature::build(self.name()).category(Category::Custom("dataframe or expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique",
example: "[1 1 2 2 3 3 4] | dfr to-df | dfr n-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"count_unique".to_string(),

View File

@ -1,84 +0,0 @@
use super::super::values::{Column, NuDataFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct Rename;
impl Command for Rename {
fn name(&self) -> &str {
"dfr rename"
}
fn usage(&self) -> &str {
"Renames a series"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("name", SyntaxShape::String, "new series name")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Renames a series",
example: "[5 6 7 8] | dfr to-df | dfr rename new_name",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"new_name".to_string(),
vec![
Value::test_int(5),
Value::test_int(6),
Value::test_int(7),
Value::test_int(8),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name: String = call.req(engine_state, stack, 0)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut series = df.as_series(call.head)?;
series.rename(&name);
NuDataFrame::try_from_series(vec![series], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(Rename {})])
}
}

View File

@ -27,10 +27,10 @@ impl Command for Shift {
.named(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values (lazy df)",
"Expression used to fill the null values (lazy df)",
Some('f'),
)
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
@ -106,7 +106,7 @@ fn command_lazy(
None => lazy.shift(shift).into(),
};
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
#[cfg(test)]

View File

@ -40,22 +40,29 @@ impl Command for Unique {
"Keep the same order as the original DataFrame (lazy df)",
Some('k'),
)
.category(Category::Custom("dataframe".into()))
.category(Category::Custom("dataframe or expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns unique values from a series",
example: "[2 2 2 2 2] | dfr to-df | dfr unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(2)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
vec![
Example {
description: "Returns unique values from a series",
example: "[2 2 2 2 2] | dfr to-df | dfr unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(2)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Creates a is unique expression from a column",
example: "dfr col a | dfr unique",
result: None,
},
]
}
fn run(
@ -134,7 +141,7 @@ fn command_lazy(
lazy.unique_stable(subset, strategy).into()
};
Ok(PipelineData::Value(lazy.into_value(call.head), None))
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
#[cfg(test)]

View File

@ -6,6 +6,7 @@ use nu_protocol::{
};
use super::eager::ToDataFrame;
use super::lazy::{LazyCollect, ToLazyFrame};
use crate::Let;
pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {
@ -23,6 +24,8 @@ pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {
let mut working_set = StateWorkingSet::new(&*engine_state);
working_set.add_decl(Box::new(Let));
working_set.add_decl(Box::new(ToDataFrame));
working_set.add_decl(Box::new(ToLazyFrame));
working_set.add_decl(Box::new(LazyCollect));
// Adding the command that is being tested to the working set
for cmd in cmds {

View File

@ -2,9 +2,11 @@ mod nu_dataframe;
mod nu_expression;
mod nu_lazyframe;
mod nu_lazygroupby;
mod nu_when;
pub mod utils;
pub use nu_dataframe::{Axis, Column, NuDataFrame};
pub use nu_expression::NuExpression;
pub use nu_lazyframe::NuLazyFrame;
pub use nu_lazygroupby::NuLazyGroupBy;
pub use nu_when::NuWhen;

View File

@ -715,7 +715,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
}
DataFrame::new(df_series)
.map(NuDataFrame::new)
.map(|df| NuDataFrame::new(false, df))
.map_err(|e| {
ShellError::GenericError(
"Error creating dataframe".into(),

View File

@ -12,7 +12,10 @@ impl CustomValue for NuDataFrame {
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuDataFrame(self.0.clone());
let cloned = NuDataFrame {
df: self.df.clone(),
from_lazy: false,
};
Value::CustomValue {
val: Box::new(cloned),

View File

@ -8,11 +8,11 @@ pub use operations::Axis;
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
use super::utils::DEFAULT_ROWS;
use super::{utils::DEFAULT_ROWS, NuLazyFrame};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
@ -70,29 +70,39 @@ impl PolarsObject for DataFrameValue {
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
pub struct NuDataFrame {
pub df: DataFrame,
pub from_lazy: bool,
}
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
&self.df
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
&mut self.df
}
}
impl From<DataFrame> for NuDataFrame {
fn from(dataframe: DataFrame) -> Self {
Self(dataframe)
fn from(df: DataFrame) -> Self {
Self {
df,
from_lazy: false,
}
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
pub fn new(from_lazy: bool, df: DataFrame) -> Self {
Self { df, from_lazy }
}
pub fn lazy(&self) -> LazyFrame {
self.df.clone().lazy()
}
fn default_value(span: Span) -> Value {
@ -102,15 +112,23 @@ impl NuDataFrame {
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
val: Box::new(Self::new(false, dataframe)),
span,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
if self.from_lazy {
let lazy = NuLazyFrame::from_dataframe(self);
Value::CustomValue {
val: Box::new(lazy),
span,
}
} else {
Value::CustomValue {
val: Box::new(self),
span,
}
}
}
@ -170,7 +188,7 @@ impl NuDataFrame {
)
})?;
Ok(Self::new(dataframe))
Ok(Self::new(false, dataframe))
}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
@ -187,9 +205,30 @@ impl NuDataFrame {
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
if Self::can_downcast(&value) {
Ok(Self::get_df(value)?)
} else if NuLazyFrame::can_downcast(&value) {
let span = value.span()?;
let lazy = NuLazyFrame::try_from_value(value)?;
let df = lazy.collect(span)?;
Ok(df)
} else {
Err(ShellError::CantConvert(
"lazy or eager dataframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
}
pub fn get_df(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
Some(df) => Ok(NuDataFrame {
df: df.df.clone(),
from_lazy: false,
}),
None => Err(ShellError::CantConvert(
"dataframe".into(),
"non-dataframe".into(),
@ -220,9 +259,9 @@ impl NuDataFrame {
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let s = self.df.column(column).map_err(|_| {
let possibilities = self
.0
.df
.get_column_names()
.iter()
.map(|name| name.to_string())
@ -232,7 +271,7 @@ impl NuDataFrame {
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
let df = DataFrame::new(vec![s.clone()]).map_err(|e| {
ShellError::GenericError(
"Error creating dataframe".into(),
e.to_string(),
@ -242,11 +281,14 @@ impl NuDataFrame {
)
})?;
Ok(Self(dataframe))
Ok(Self {
df,
from_lazy: false,
})
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
self.df.width() == 1
}
pub fn as_series(&self, span: Span) -> Result<Series, ShellError> {
@ -261,7 +303,7 @@ impl NuDataFrame {
}
let series = self
.0
.df
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
@ -286,7 +328,7 @@ impl NuDataFrame {
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let df = &self.df;
let size: usize = 20;
if df.height() > size {
@ -305,7 +347,7 @@ impl NuDataFrame {
}
pub fn height(&self) -> usize {
self.0.height()
self.df.height()
}
pub fn head(&self, rows: Option<usize>, span: Span) -> Result<Vec<Value>, ShellError> {
@ -316,7 +358,7 @@ impl NuDataFrame {
}
pub fn tail(&self, rows: Option<usize>, span: Span) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let df = &self.df;
let to_row = df.height();
let size = rows.unwrap_or(DEFAULT_ROWS);
let from_row = to_row.saturating_sub(size);
@ -332,12 +374,12 @@ impl NuDataFrame {
to_row: usize,
span: Span,
) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let df = &self.df;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.df
.get_columns()
.iter()
.map(

View File

@ -73,11 +73,11 @@ impl NuDataFrame {
)
}
_ => {
if self.0.height() != rhs.0.height() {
if self.df.height() != rhs.df.height() {
return Err(ShellError::IncompatibleParameters {
left_message: format!("rows {}", self.0.height()),
left_message: format!("rows {}", self.df.height()),
left_span: lhs_span,
right_message: format!("rows {}", rhs.0.height()),
right_message: format!("rows {}", rhs.df.height()),
right_span: *rhs_span,
});
}
@ -119,10 +119,10 @@ impl NuDataFrame {
let mut columns: Vec<&str> = Vec::new();
let new_cols = self
.0
.df
.get_columns()
.iter()
.chain(other.0.get_columns())
.chain(other.df.get_columns())
.map(|s| {
let name = if columns.contains(&s.name()) {
format!("{}_{}", s.name(), "x")
@ -147,10 +147,10 @@ impl NuDataFrame {
)
})?;
Ok(NuDataFrame::new(df_new))
Ok(NuDataFrame::new(false, df_new))
}
Axis::Column => {
if self.0.width() != other.0.width() {
if self.df.width() != other.df.width() {
return Err(ShellError::IncompatibleParametersSingle(
"Dataframes with different number of columns".into(),
span,
@ -158,10 +158,10 @@ impl NuDataFrame {
}
if !self
.0
.df
.get_column_names()
.iter()
.all(|col| other.0.get_column_names().contains(col))
.all(|col| other.df.get_column_names().contains(col))
{
return Err(ShellError::IncompatibleParametersSingle(
"Dataframes with different columns names".into(),
@ -170,12 +170,12 @@ impl NuDataFrame {
}
let new_cols = self
.0
.df
.get_columns()
.iter()
.map(|s| {
let other_col = other
.0
.df
.column(s.name())
.expect("Already checked that dataframes have same columns");
@ -207,7 +207,7 @@ impl NuDataFrame {
)
})?;
Ok(NuDataFrame::new(df_new))
Ok(NuDataFrame::new(false, df_new))
}
}
}

View File

@ -100,6 +100,7 @@ impl NuExpression {
pub fn can_downcast(value: &Value) -> bool {
match value {
Value::CustomValue { val, .. } => val.as_any().downcast_ref::<Self>().is_some(),
Value::List { vals, .. } => vals.iter().all(Self::can_downcast),
Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => {
true
}

View File

@ -12,7 +12,10 @@ impl CustomValue for NuLazyFrame {
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuLazyFrame(self.0.clone());
let cloned = NuLazyFrame {
lazy: self.lazy.clone(),
from_eager: self.from_eager,
};
Value::CustomValue {
val: Box::new(cloned),

View File

@ -10,7 +10,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Polars LazyFrame is behind and Option to allow easy implementation of
// the Deserialize trait
#[derive(Default)]
pub struct NuLazyFrame(Option<LazyFrame>);
pub struct NuLazyFrame {
pub lazy: Option<LazyFrame>,
pub from_eager: bool,
}
// Mocked serialization of the LazyFrame object
impl Serialize for NuLazyFrame {
@ -43,7 +46,7 @@ impl AsRef<LazyFrame> for NuLazyFrame {
fn as_ref(&self) -> &polars::prelude::LazyFrame {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.0.as_ref().expect("there should always be a frame")
self.lazy.as_ref().expect("there should always be a frame")
}
}
@ -51,35 +54,56 @@ impl AsMut<LazyFrame> for NuLazyFrame {
fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.0.as_mut().expect("there should always be a frame")
self.lazy.as_mut().expect("there should always be a frame")
}
}
impl From<LazyFrame> for NuLazyFrame {
fn from(lazy_frame: LazyFrame) -> Self {
Self(Some(lazy_frame))
Self {
lazy: Some(lazy_frame),
from_eager: false,
}
}
}
impl NuLazyFrame {
pub fn from_dataframe(df: NuDataFrame) -> Self {
let lazy = df.as_ref().clone().lazy();
Self(Some(lazy))
pub fn new(from_eager: bool, lazy: LazyFrame) -> Self {
Self {
lazy: Some(lazy),
from_eager,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
pub fn from_dataframe(df: NuDataFrame) -> Self {
let lazy = df.as_ref().clone().lazy();
Self {
lazy: Some(lazy),
from_eager: true,
}
}
pub fn into_value(self, span: Span) -> Result<Value, ShellError> {
if self.from_eager {
let df = self.collect(span)?;
Ok(Value::CustomValue {
val: Box::new(df),
span,
})
} else {
Ok(Value::CustomValue {
val: Box::new(self),
span,
})
}
}
pub fn into_polars(self) -> LazyFrame {
self.0.expect("lazyframe cannot be none to convert")
self.lazy.expect("lazyframe cannot be none to convert")
}
pub fn collect(self, span: Span) -> Result<NuDataFrame, ShellError> {
self.0
self.lazy
.expect("No empty lazy for collect")
.collect()
.map_err(|e| {
@ -91,13 +115,40 @@ impl NuLazyFrame {
Vec::new(),
)
})
.map(NuDataFrame::new)
.map(|df| NuDataFrame {
df,
from_lazy: !self.from_eager,
})
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
if Self::can_downcast(&value) {
Ok(Self::get_lazy_df(value)?)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
Ok(NuLazyFrame::from_dataframe(df))
} else {
Err(ShellError::CantConvert(
"lazy or eager dataframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
Self::try_from_value(value)
}
pub fn get_lazy_df(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(expr) => Ok(Self(expr.0.clone())),
Some(expr) => Ok(Self {
lazy: expr.lazy.clone(),
from_eager: false,
}),
None => Err(ShellError::CantConvert(
"lazy frame".into(),
"non-dataframe".into(),
@ -114,11 +165,6 @@ impl NuLazyFrame {
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
Self::try_from_value(value)
}
pub fn can_downcast(value: &Value) -> bool {
if let Value::CustomValue { val, .. } = value {
val.as_any().downcast_ref::<Self>().is_some()
@ -127,30 +173,17 @@ impl NuLazyFrame {
}
}
pub fn maybe_is_eager(value: Value) -> Result<(Self, bool), ShellError> {
if Self::can_downcast(&value) {
Ok((Self::try_from_value(value)?, false))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
Ok((NuLazyFrame::from_dataframe(df), true))
} else {
Err(ShellError::CantConvert(
"lazy or eager dataframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
}
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self
where
F: Fn(LazyFrame, Expr) -> LazyFrame,
{
let df = self.0.expect("Lazy frame must not be empty to apply");
let df = self.lazy.expect("Lazy frame must not be empty to apply");
let expr = expr.into_polars();
let new_frame = f(df, expr);
new_frame.into()
Self {
from_eager: self.from_eager,
lazy: Some(new_frame),
}
}
}

View File

@ -91,8 +91,8 @@ impl NuLazyGroupBy {
from_eager: group.from_eager,
}),
None => Err(ShellError::CantConvert(
"lazy frame".into(),
"non-dataframe".into(),
"lazy groupby".into(),
"custom value".into(),
span,
None,
)),

View File

@ -0,0 +1,44 @@
use super::NuWhen;
use nu_protocol::{CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame
impl CustomValue for NuWhen {
fn typetag_name(&self) -> &'static str {
"when"
}
fn typetag_deserialize(&self) {
unimplemented!("typetag_deserialize")
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = self.clone();
Value::CustomValue {
val: Box::new(cloned),
span,
}
}
fn value_string(&self) -> String {
self.typetag_name().to_string()
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let val = match self {
NuWhen::WhenThen(_) => "whenthen".into(),
NuWhen::WhenThenThen(_) => "whenthenthen".into(),
};
let value = Value::String { val, span };
Ok(value)
}
fn to_json(&self) -> nu_json::Value {
nu_json::Value::Null
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
}

View File

@ -0,0 +1,79 @@
mod custom_value;
use core::fmt;
use nu_protocol::{ShellError, Span, Value};
use polars::prelude::{col, when, WhenThen, WhenThenThen};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Clone)]
pub enum NuWhen {
WhenThen(WhenThen),
WhenThenThen(WhenThenThen),
}
// Mocked serialization of the LazyFrame object
impl Serialize for NuWhen {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_none()
}
}
// Mocked deserialization of the LazyFrame object
impl<'de> Deserialize<'de> for NuWhen {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(NuWhen::WhenThen(when(col("a")).then(col("b"))))
}
}
impl fmt::Debug for NuWhen {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "NuWhen")
}
}
impl From<WhenThen> for NuWhen {
fn from(when_then: WhenThen) -> Self {
NuWhen::WhenThen(when_then)
}
}
impl From<WhenThenThen> for NuWhen {
fn from(when_then_then: WhenThenThen) -> Self {
NuWhen::WhenThenThen(when_then_then)
}
}
impl NuWhen {
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(expr) => Ok(expr.clone()),
None => Err(ShellError::CantConvert(
"when expression".into(),
"non when expression".into(),
span,
None,
)),
},
x => Err(ShellError::CantConvert(
"when expression".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
}