mirror of
https://github.com/nushell/nushell.git
synced 2025-08-18 18:28:25 +02:00
Lazy dataframes (#5687)
* change between lazy and eager * when expressions * examples for aggregations * more examples for agg * examples for dataframes * checked examples * cargo fmt
This commit is contained in:
@@ -29,23 +29,37 @@ impl Command for FilterWith {
|
||||
SyntaxShape::Any,
|
||||
"boolean mask used to filter data",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([true false] | dfr to-df);
|
||||
vec![
|
||||
Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([true false] | dfr to-df);
|
||||
[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with ((dfr col a) > 1)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@@ -81,31 +95,42 @@ fn command_eager(
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mask_value: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let mask_span = mask_value.span()?;
|
||||
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
|
||||
let mask = mask.bool().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to bool".into(),
|
||||
e.to_string(),
|
||||
Some(mask_span),
|
||||
Some("Perhaps you want to use a series with booleans as mask".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
df.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| {
|
||||
if NuExpression::can_downcast(&mask_value) {
|
||||
let expression = NuExpression::try_from_value(mask_value)?;
|
||||
let lazy = NuLazyFrame::new(true, df.lazy());
|
||||
let lazy = lazy.apply_with_expr(expression, LazyFrame::filter);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
} else {
|
||||
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
|
||||
let mask = mask.bool().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating dummies".into(),
|
||||
"Error casting to bool".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
Some("The only allowed column types for dummies are String or Int".into()),
|
||||
Some(mask_span),
|
||||
Some("Perhaps you want to use a series with booleans as mask".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
})?;
|
||||
|
||||
df.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error filtering dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
Some("The only allowed column types for dummies are String or Int".into()),
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
@@ -120,7 +145,7 @@ fn command_lazy(
|
||||
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head),
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
@@ -129,9 +154,10 @@ fn command_lazy(
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::expressions::ExprCol;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(FilterWith {})])
|
||||
test_dataframe(vec![Box::new(FilterWith {}), Box::new(ExprCol {})])
|
||||
}
|
||||
}
|
||||
|
@@ -22,7 +22,7 @@ impl Command for FirstDF {
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for head")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
|
@@ -22,7 +22,7 @@ impl Command for LastDF {
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
|
@@ -18,7 +18,6 @@ mod rename;
|
||||
mod sample;
|
||||
mod shape;
|
||||
mod slice;
|
||||
mod sort;
|
||||
mod take;
|
||||
mod to_csv;
|
||||
mod to_df;
|
||||
@@ -48,7 +47,6 @@ pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use shape::ShapeDF;
|
||||
pub use slice::SliceDF;
|
||||
pub use sort::SortDF;
|
||||
pub use take::TakeDF;
|
||||
pub use to_csv::ToCSV;
|
||||
pub use to_df::ToDataFrame;
|
||||
@@ -88,7 +86,6 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
SampleDF,
|
||||
ShapeDF,
|
||||
SliceDF,
|
||||
SortDF,
|
||||
TakeDF,
|
||||
ToCSV,
|
||||
ToDataFrame,
|
||||
|
@@ -14,7 +14,7 @@ pub struct RenameDF;
|
||||
|
||||
impl Command for RenameDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr rename-col"
|
||||
"dfr rename"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@@ -33,28 +33,65 @@ impl Command for RenameDF {
|
||||
SyntaxShape::Any,
|
||||
"New names for the selected column(s). A string or list of strings",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename-col a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Renames a series",
|
||||
example: "[5 6 7 8] | dfr to-df | dfr rename '0' new_name",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"new_name".to_string(),
|
||||
vec![
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(7),
|
||||
Value::test_int(8),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Renames two dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename [a b] [a_new b_new]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b_new".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@@ -133,7 +170,7 @@ fn command_lazy(
|
||||
let lazy = lazy.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@@ -1,148 +0,0 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::{utils::convert_columns_string, Column};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SortDF;
|
||||
|
||||
impl Command for SortDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr sort"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new sorted dataframe or series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("reverse", "invert sort", Some('r'))
|
||||
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new sorted dataframe",
|
||||
example: "[[a b]; [3 4] [1 2]] | dfr to-df | dfr sort a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create new sorted series",
|
||||
example: "[3 4 1 2] | dfr to-df | dfr sort",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(4),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let reverse = call.has_flag("reverse");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
if df.is_series() {
|
||||
let columns = df.as_ref().get_column_names();
|
||||
|
||||
df.as_ref()
|
||||
.sort(columns, reverse)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error sorting dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
} else {
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
|
||||
if !columns.is_empty() {
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.sort(&col_string, reverse)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error sorting dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)
|
||||
})
|
||||
} else {
|
||||
Err(ShellError::GenericError(
|
||||
"Missing columns".into(),
|
||||
"missing column name to perform sort".into(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(SortDF {})])
|
||||
}
|
||||
}
|
@@ -27,7 +27,7 @@ impl Command for WithColumn {
|
||||
SyntaxShape::Any,
|
||||
"series to be added or expressions used to define the new columns",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.category(Category::Custom("dataframe or lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
@@ -59,11 +59,34 @@ impl Command for WithColumn {
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr to-df
|
||||
| dfr to-lazy
|
||||
| dfr with-column ((dfr col a) * 2 | dfr as "c")
|
||||
| dfr with-column [
|
||||
((dfr col a) * 2 | dfr as "c")
|
||||
((dfr col a) * 3 | dfr as "d")
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: None,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"d".to_string(),
|
||||
vec![Value::test_int(3), Value::test_int(9)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
@@ -85,7 +108,7 @@ impl Command for WithColumn {
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
"lazy or eager dataframe".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
@@ -100,34 +123,49 @@ fn command_eager(
|
||||
call: &Call,
|
||||
mut df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let other_value: Value = call.req(engine_state, stack, 0)?;
|
||||
let other_span = other_value.span()?;
|
||||
let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?;
|
||||
let new_column: Value = call.req(engine_state, stack, 0)?;
|
||||
let column_span = new_column.span()?;
|
||||
|
||||
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
|
||||
Some(name) => name,
|
||||
None => other.name().to_string(),
|
||||
};
|
||||
if NuExpression::can_downcast(&new_column) {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
let lazy = NuLazyFrame::new(true, df.lazy().with_columns(&expressions));
|
||||
|
||||
let series = other.rename(&name).clone();
|
||||
let df = lazy.collect(call.head)?;
|
||||
|
||||
df.as_mut()
|
||||
.with_column(series)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error adding column to dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(other_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(df.clone(), call.head),
|
||||
None,
|
||||
)
|
||||
})
|
||||
Ok(PipelineData::Value(df.into_value(call.head), None))
|
||||
} else {
|
||||
let mut other = NuDataFrame::try_from_value(new_column)?.as_series(column_span)?;
|
||||
|
||||
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
|
||||
Some(name) => name,
|
||||
None => other.name().to_string(),
|
||||
};
|
||||
|
||||
let series = other.rename(&name).clone();
|
||||
|
||||
df.as_mut()
|
||||
.with_column(series)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error adding column to dataframe".into(),
|
||||
e.to_string(),
|
||||
Some(column_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(df.clone(), call.head),
|
||||
None,
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
@@ -146,7 +184,7 @@ fn command_lazy(
|
||||
let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head),
|
||||
NuLazyFrame::into_value(lazy, call.head)?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
@@ -155,9 +193,15 @@ fn command_lazy(
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::expressions::ExprAlias;
|
||||
use crate::dataframe::expressions::ExprCol;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(WithColumn {})])
|
||||
test_dataframe(vec![
|
||||
Box::new(WithColumn {}),
|
||||
Box::new(ExprAlias {}),
|
||||
Box::new(ExprCol {}),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user