Lazy dataframes (#5546)

* lazyframe definition

* expressions and lazy frames

* new alias expression

* more expression commands

* updated to polars main

* more expressions and groupby

* more expressions, fetch and sort-by

* csv reader

* removed open csv

* unique function

* joining functions

* join lazy frames commands with eager commands

* corrected tests

* Update .gitignore

* Update .gitignore

Co-authored-by: JT <547158+jntrnr@users.noreply.github.com>
This commit is contained in:
Fernando Herrera 2022-05-16 08:27:43 +01:00 committed by GitHub
parent 2062e33c37
commit 8bd68416e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
71 changed files with 3304 additions and 1364 deletions

1
.gitignore vendored
View File

@ -23,4 +23,5 @@ debian/nu/
.vscode/* .vscode/*
# Helix configuration folder # Helix configuration folder
.helix/*
.helix .helix

90
Cargo.lock generated
View File

@ -141,9 +141,9 @@ dependencies = [
[[package]] [[package]]
name = "arrow2" name = "arrow2"
version = "0.10.1" version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e" checksum = "b040061368d1314b0fd8b8f1fde0671eba1afc63a1c61a4dafaf2d4fc10c96f9"
dependencies = [ dependencies = [
"arrow-format", "arrow-format",
"base64", "base64",
@ -2980,15 +2980,6 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "ordered-float"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "output_vt100" name = "output_vt100"
version = "0.1.3" version = "0.1.3"
@ -3060,22 +3051,20 @@ dependencies = [
[[package]] [[package]]
name = "parquet-format-async-temp" name = "parquet-format-async-temp"
version = "0.2.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03abc2f9c83fe9ceec83f47c76cc071bfd56caba33794340330f35623ab1f544" checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"byteorder",
"futures", "futures",
"integer-encoding", "integer-encoding",
"ordered-float",
] ]
[[package]] [[package]]
name = "parquet2" name = "parquet2"
version = "0.10.3" version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b085f9e78e4842865151b693f6d94bdf7b280af66daa6e3587adeb3106a07e9" checksum = "98f99f9724402d81faadd9cfa1e8dc78055fd0ddfdbefb7adab3a3a13e893408"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"bitpacking", "bitpacking",
@ -3247,33 +3236,35 @@ dependencies = [
[[package]] [[package]]
name = "polars" name = "polars"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "656db3b86c338a8a717476eb29436a380ebdf74915a71cff6ecce78d52173e53" checksum = "b140da767e129c60c41c8e1968ffab5f114bcf823182edb7fa900464a31bf421"
dependencies = [ dependencies = [
"polars-core", "polars-core",
"polars-io", "polars-io",
"polars-lazy", "polars-lazy",
"polars-ops",
"polars-time", "polars-time",
] ]
[[package]] [[package]]
name = "polars-arrow" name = "polars-arrow"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcedf44a7b15b60c69e811c9d343ac459788e961dc4136f002ed1b68a1fada07" checksum = "6d27df11ee28956bd6f5aed54e7e05ce87b886871995e1da501134627ec89077"
dependencies = [ dependencies = [
"arrow2", "arrow2",
"hashbrown 0.12.0", "hashbrown 0.12.0",
"num 0.4.0", "num 0.4.0",
"serde",
"thiserror", "thiserror",
] ]
[[package]] [[package]]
name = "polars-core" name = "polars-core"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dfed0e21ac4d4c85df45b5864a68cfc5b2a97e9fba8a981be7b09c6f02a7eaa" checksum = "fdf8d12cb7ec278516228fc86469f98c62ab81ca31e4e76d2c0ccf5a09c70491"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
@ -3284,8 +3275,8 @@ dependencies = [
"indexmap", "indexmap",
"lazy_static", "lazy_static",
"num 0.4.0", "num 0.4.0",
"num_cpus",
"polars-arrow", "polars-arrow",
"polars-utils",
"rand 0.8.5", "rand 0.8.5",
"rand_distr", "rand_distr",
"rayon", "rayon",
@ -3297,9 +3288,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-io" name = "polars-io"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8770fb4233ab88affac80c410be090dc7a2c044a9e4e7b942132e94ceeb732b" checksum = "fdd4b762e5694f359ded21ca0627b5bc95b6eb49f6b330569afc1d20f0564b01"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
@ -3311,21 +3302,22 @@ dependencies = [
"memchr", "memchr",
"memmap2", "memmap2",
"num 0.4.0", "num 0.4.0",
"num_cpus",
"polars-arrow", "polars-arrow",
"polars-core", "polars-core",
"polars-time",
"polars-utils", "polars-utils",
"rayon", "rayon",
"regex", "regex",
"serde",
"serde_json", "serde_json",
"simdutf8", "simdutf8",
] ]
[[package]] [[package]]
name = "polars-lazy" name = "polars-lazy"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eca1fed3b88ae1bb9b7f1d7b2958f1655d9c1aed33495d6ba30ff84a0c1e9e9" checksum = "eedc21001f05611e41bb7439b38d0f4ef9406aa49c17f3b289b5f57d8fa40c59"
dependencies = [ dependencies = [
"ahash", "ahash",
"glob", "glob",
@ -3336,24 +3328,36 @@ dependencies = [
"polars-time", "polars-time",
"polars-utils", "polars-utils",
"rayon", "rayon",
"serde",
] ]
[[package]] [[package]]
name = "polars-time" name = "polars-ops"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fe48c759ca778a8b6fb30f70e9a81b56f0987a82dc71e61c5b2d3c236b6b8d6" checksum = "86fae68f0992955f224f09d1f15648a6fb76d8e3b962efac2f97ccc2aa58977a"
dependencies = [ dependencies = [
"chrono",
"polars-arrow",
"polars-core", "polars-core",
] ]
[[package]] [[package]]
name = "polars-utils" name = "polars-time"
version = "0.20.0" version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71011e8ed52f123ce23d110b496c8704d0a59c5fd4115cd938e7ff19d4bcb7ca" checksum = "be499f73749e820f96689c5f9ec59669b7cdd551d864358e2bdaebb5944e4bfb"
dependencies = [
"chrono",
"lexical",
"polars-arrow",
"polars-core",
"serde",
]
[[package]]
name = "polars-utils"
version = "0.21.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f4cd569d383f5f000abbd6d5146550e6cb4e43fac30d1af98699499a440d56"
dependencies = [ dependencies = [
"parking_lot 0.12.0", "parking_lot 0.12.0",
"rayon", "rayon",
@ -5333,18 +5337,18 @@ dependencies = [
[[package]] [[package]]
name = "zstd" name = "zstd"
version = "0.10.0+zstd.1.5.2" version = "0.11.1+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" checksum = "77a16b8414fde0414e90c612eba70985577451c4c504b99885ebed24762cb81a"
dependencies = [ dependencies = [
"zstd-safe", "zstd-safe",
] ]
[[package]] [[package]]
name = "zstd-safe" name = "zstd-safe"
version = "4.1.4+zstd.1.5.2" version = "5.0.1+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" checksum = "7c12659121420dd6365c5c3de4901f97145b79651fb1d25814020ed2ed0585ae"
dependencies = [ dependencies = [
"libc", "libc",
"zstd-sys", "zstd-sys",
@ -5352,9 +5356,9 @@ dependencies = [
[[package]] [[package]]
name = "zstd-sys" name = "zstd-sys"
version = "1.6.3+zstd.1.5.2" version = "2.0.1+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b"
dependencies = [ dependencies = [
"cc", "cc",
"libc", "libc",

View File

@ -96,13 +96,15 @@ version = "2.0.2"
optional = true optional = true
[dependencies.polars] [dependencies.polars]
version = "0.20.0" version = "0.21.1"
# path = "../../../../polars/polars"
optional = true optional = true
features = [ features = [
"default", "parquet", "json", "serde", "object", "default", "to_dummies", "parquet", "json", "serde", "serde-lazy",
"checked_arithmetic", "strings", "cum_agg", "is_in", "object", "checked_arithmetic", "strings", "cum_agg", "is_in",
"rolling_window", "strings", "rows", "random", "rolling_window", "strings", "rows", "random",
"dtype-datetime" "dtype-datetime", "dtype-struct", "lazy", "cross_join",
"dynamic_groupby"
] ]
[features] [features]

View File

@ -1,403 +0,0 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
did_you_mean,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use polars::{
frame::groupby::GroupBy,
prelude::{PolarsError, QuantileInterpolOptions},
};
use crate::dataframe::values::NuGroupBy;
use super::super::values::{Column, NuDataFrame};
enum Operation {
Mean,
Sum,
Min,
Max,
First,
Last,
Nunique,
Quantile(f64),
Median,
Var,
Std,
Count,
}
impl Operation {
fn from_tagged(
name: &Spanned<String>,
quantile: Option<Spanned<f64>>,
) -> Result<Operation, ShellError> {
match name.item.as_ref() {
"mean" => Ok(Operation::Mean),
"sum" => Ok(Operation::Sum),
"min" => Ok(Operation::Min),
"max" => Ok(Operation::Max),
"first" => Ok(Operation::First),
"last" => Ok(Operation::Last),
"nunique" => Ok(Operation::Nunique),
"quantile" => match quantile {
None => Err(ShellError::GenericError(
"Quantile value not fount".into(),
"Quantile operation requires quantile value".into(),
Some(name.span),
None,
Vec::new(),
)),
Some(value) => {
if (value.item < 0.0) | (value.item > 1.0) {
Err(ShellError::GenericError(
"Inappropriate quantile".into(),
"Quantile value should be between 0.0 and 1.0".into(),
Some(value.span),
None,
Vec::new(),
))
} else {
Ok(Operation::Quantile(value.item))
}
}
},
"median" => Ok(Operation::Median),
"var" => Ok(Operation::Var),
"std" => Ok(Operation::Std),
"count" => Ok(Operation::Count),
selection => {
let possibilities = [
"mean".to_string(),
"sum".to_string(),
"min".to_string(),
"max".to_string(),
"first".to_string(),
"last".to_string(),
"nunique".to_string(),
"quantile".to_string(),
"median".to_string(),
"var".to_string(),
"std".to_string(),
"count".to_string(),
];
match did_you_mean(&possibilities, selection) {
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, name.span)),
None => Err(ShellError::GenericError(
"Operation not fount".into(),
"Operation does not exist".into(),
Some(name.span),
Some("Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count".into()),
Vec::new(),
))
}
}
}
}
fn to_str(&self) -> &'static str {
match self {
Self::Mean => "mean",
Self::Sum => "sum",
Self::Min => "min",
Self::Max => "max",
Self::First => "first",
Self::Last => "last",
Self::Nunique => "nunique",
Self::Quantile(_) => "quantile",
Self::Median => "median",
Self::Var => "var",
Self::Std => "std",
Self::Count => "count",
}
}
}
#[derive(Clone)]
pub struct Aggregate;
impl Command for Aggregate {
fn name(&self) -> &str {
"dfr aggregate"
}
fn usage(&self) -> &str {
"Performs an aggregation operation on a dataframe and groupby object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"operation_name",
SyntaxShape::String,
"\n\tDataframes: mean, sum, min, max, quantile, median, var, std
\tGroupBy: mean, sum, min, max, first, last, nunique, quantile, median, var, std, count",
)
.named(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
Some('q'),
)
.switch(
"explicit",
"returns explicit names for groupby aggregations",
Some('e'),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Aggregate sum by grouping by column a and summing on col b",
example:
"[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_string("one")]),
Column::new("b".to_string(), vec![Value::test_int(3)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Aggregate sum in dataframe columns",
example: "[[a b]; [4 1] [5 2]] | dfr to-df | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(9)]),
Column::new("b".to_string(), vec![Value::test_int(3)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Aggregate sum in series",
example: "[4 1 5 6] | dfr to-df | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(16)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let operation: Spanned<String> = call.req(engine_state, stack, 0)?;
let quantile: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "quantile")?;
let op = Operation::from_tagged(&operation, quantile)?;
match input {
PipelineData::Value(Value::CustomValue { val, span }, _) => {
let df = val.as_any().downcast_ref::<NuDataFrame>();
let groupby = val.as_any().downcast_ref::<NuGroupBy>();
match (df, groupby) {
(Some(df), None) => {
let df = df.as_ref();
let res = perform_dataframe_aggregation(df, op, operation.span)?;
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, span),
None,
))
}
(None, Some(nu_groupby)) => {
let groupby = nu_groupby.to_groupby()?;
let res = perform_groupby_aggregation(
groupby,
op,
operation.span,
call.head,
call.has_flag("explicit"),
)?;
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, span),
None,
))
}
_ => Err(ShellError::GenericError(
"Incorrect datatype".into(),
"no groupby or dataframe found in input stream".into(),
Some(call.head),
None,
Vec::new(),
)),
}
}
_ => Err(ShellError::GenericError(
"Incorrect datatype".into(),
"no groupby or dataframe found in input stream".into(),
Some(call.head),
None,
Vec::new(),
)),
}
}
fn perform_groupby_aggregation(
groupby: GroupBy,
operation: Operation,
operation_span: Span,
agg_span: Span,
explicit: bool,
) -> Result<polars::prelude::DataFrame, ShellError> {
let mut res = match operation {
Operation::Mean => groupby.mean(),
Operation::Sum => groupby.sum(),
Operation::Min => groupby.min(),
Operation::Max => groupby.max(),
Operation::First => groupby.first(),
Operation::Last => groupby.last(),
Operation::Nunique => groupby.n_unique(),
Operation::Quantile(quantile) => {
groupby.quantile(quantile, QuantileInterpolOptions::default())
}
Operation::Median => groupby.median(),
Operation::Var => groupby.var(),
Operation::Std => groupby.std(),
Operation::Count => groupby.count(),
}
.map_err(|e| {
let span = match &e {
PolarsError::NotFound(_) => agg_span,
_ => operation_span,
};
ShellError::GenericError(
"Error calculating aggregation".into(),
e.to_string(),
Some(span),
None,
Vec::new(),
)
})?;
if !explicit {
let col_names = res
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
for col in col_names {
let from = match operation {
Operation::Mean => "_mean",
Operation::Sum => "_sum",
Operation::Min => "_min",
Operation::Max => "_max",
Operation::First => "_first",
Operation::Last => "_last",
Operation::Nunique => "_n_unique",
Operation::Quantile(_) => "_quantile",
Operation::Median => "_median",
Operation::Var => "_agg_var",
Operation::Std => "_agg_std",
Operation::Count => "_count",
};
let new_col = match col.find(from) {
Some(index) => &col[..index],
None => &col[..],
};
res.rename(&col, new_col)
.expect("Column is always there. Looping with known names");
}
}
Ok(res)
}
fn perform_dataframe_aggregation(
dataframe: &polars::prelude::DataFrame,
operation: Operation,
operation_span: Span,
) -> Result<polars::prelude::DataFrame, ShellError> {
match operation {
Operation::Mean => Ok(dataframe.mean()),
Operation::Sum => Ok(dataframe.sum()),
Operation::Min => Ok(dataframe.min()),
Operation::Max => Ok(dataframe.max()),
Operation::Quantile(quantile) => dataframe
.quantile(quantile, QuantileInterpolOptions::default())
.map_err(|e| {
ShellError::GenericError(
"Error calculating quantile".into(),
e.to_string(),
Some(operation_span),
None,
Vec::new(),
)
}),
Operation::Median => Ok(dataframe.median()),
Operation::Var => Ok(dataframe.var()),
Operation::Std => Ok(dataframe.std()),
operation => {
let possibilities = [
"mean".to_string(),
"sum".to_string(),
"min".to_string(),
"max".to_string(),
"quantile".to_string(),
"median".to_string(),
"var".to_string(),
"std".to_string(),
];
match did_you_mean(&possibilities, operation.to_str()) {
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, operation_span)),
None => Err(ShellError::GenericError(
"Operation not fount".into(),
"Operation does not exist".into(),
Some(operation_span),
Some(
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std"
.into(),
),
Vec::new(),
)),
}
}
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::super::CreateGroupBy;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(Aggregate {}), Box::new(CreateGroupBy {})])
}
}

View File

@ -4,7 +4,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use polars::prelude::DistinctKeepStrategy; use polars::prelude::UniqueKeepStrategy;
use super::super::values::utils::convert_columns_string; use super::super::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
@ -89,13 +89,13 @@ fn command(
let subset_slice = subset.as_ref().map(|cols| &cols[..]); let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let keep_strategy = if call.has_flag("last") { let keep_strategy = if call.has_flag("last") {
DistinctKeepStrategy::Last UniqueKeepStrategy::Last
} else { } else {
DistinctKeepStrategy::First UniqueKeepStrategy::First
}; };
df.as_ref() df.as_ref()
.distinct(subset_slice, keep_strategy) .unique(subset_slice, keep_strategy)
.map_err(|e| { .map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error dropping duplicates".into(), "Error dropping duplicates".into(),

View File

@ -1,10 +1,10 @@
use super::super::values::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::DataFrameOps;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct Dummies; pub struct Dummies;

View File

@ -4,6 +4,9 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use polars::prelude::LazyFrame;
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
@ -16,12 +19,16 @@ impl Command for FilterWith {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Filters dataframe using a mask as reference" "Filters dataframe using a mask or expression as reference"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required("mask", SyntaxShape::Any, "boolean mask used to filter data") .required(
"mask or expression",
SyntaxShape::Any,
"boolean mask used to filter data",
)
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -48,15 +55,30 @@ impl Command for FilterWith {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuLazyFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
command_lazy(engine_state, stack, call, df)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
fn command( fn command_eager(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(engine_state, stack, 0)?; let mask_value: Value = call.req(engine_state, stack, 0)?;
@ -72,8 +94,6 @@ fn command(
) )
})?; })?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_ref() df.as_ref()
.filter(mask) .filter(mask)
.map_err(|e| { .map_err(|e| {
@ -88,6 +108,23 @@ fn command(
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
} }
fn command_lazy(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let expr: Value = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_value(expr)?;
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
None,
))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;

View File

@ -1,3 +1,5 @@
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -5,8 +7,6 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct FirstDF; pub struct FirstDF;
@ -16,7 +16,7 @@ impl Command for FirstDF {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates new dataframe with first rows" "Creates new dataframe with first rows or creates a first expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -26,18 +26,25 @@ impl Command for FirstDF {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create new dataframe with head rows", Example {
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1", description: "Create new dataframe with head rows",
result: Some( example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1",
NuDataFrame::try_from_columns(vec![ result: Some(
Column::new("a".to_string(), vec![Value::test_int(1)]), NuDataFrame::try_from_columns(vec![
Column::new("b".to_string(), vec![Value::test_int(2)]), Column::new("a".to_string(), vec![Value::test_int(1)]),
]) Column::new("b".to_string(), vec![Value::test_int(2)]),
.expect("simple df for test should not fail") ])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a first expression from a column",
example: "dfr col a | dfr first",
result: None,
},
]
} }
fn run( fn run(
@ -47,7 +54,27 @@ impl Command for FirstDF {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -55,12 +82,11 @@ fn command(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(engine_state, stack, 0)?; let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS); let rows = rows.unwrap_or(DEFAULT_ROWS);
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().head(Some(rows)); let res = df.as_ref().head(Some(rows));
Ok(PipelineData::Value( Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head), NuDataFrame::dataframe_into_value(res, call.head),

View File

@ -1,77 +0,0 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use super::super::values::{utils::convert_columns_string, NuDataFrame, NuGroupBy};
#[derive(Clone)]
pub struct CreateGroupBy;
impl Command for CreateGroupBy {
fn name(&self) -> &str {
"dfr group-by"
}
fn usage(&self) -> &str {
"Creates a groupby object that can be used for other aggregations"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "groupby columns")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Grouping by column a",
example: "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
// Extracting the names of the columns to perform the groupby
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
// This is the expensive part of the groupby; to create the
// groups that will be used for grouping the data in the
// dataframe. Once it has been done these values can be stored
// in a NuGroupBy
let groupby = df.as_ref().groupby(&col_string).map_err(|e| {
ShellError::GenericError(
"Error creating groupby".into(),
e.to_string(),
Some(col_span),
None,
Vec::new(),
)
})?;
let groups = groupby.get_groups();
let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups);
Ok(PipelineData::Value(groupby.into_value(call.head), None))
}

View File

@ -1,235 +0,0 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use polars::prelude::JoinType;
use crate::dataframe::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct JoinDF;
impl Command for JoinDF {
fn name(&self) -> &str {
"dfr join"
}
fn usage(&self) -> &str {
"Joins a dataframe using columns as reference"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("dataframe", SyntaxShape::Any, "right dataframe to join")
.required_named(
"left",
SyntaxShape::Table,
"left column names to perform join",
Some('l'),
)
.required_named(
"right",
SyntaxShape::Table,
"right column names to perform join",
Some('r'),
)
.named(
"type",
SyntaxShape::String,
"type of join. Inner by default",
Some('t'),
)
.named(
"suffix",
SyntaxShape::String,
"suffix for the columns of the right dataframe",
Some('s'),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "inner join dataframe",
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dfr to-df);
$right | dfr join $right -l [a b] -r [a b]"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
),
Column::new(
"c_right".to_string(),
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let r_df: Value = call.req(engine_state, stack, 0)?;
let l_col: Vec<Value> = call
.get_flag(engine_state, stack, "left")?
.expect("required value in syntax");
let r_col: Vec<Value> = call
.get_flag(engine_state, stack, "right")?
.expect("required value in syntax");
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
let join_type_op: Option<Spanned<String>> = call.get_flag(engine_state, stack, "type")?;
let join_type = match join_type_op {
None => JoinType::Inner,
Some(val) => match val.item.as_ref() {
"inner" => JoinType::Inner,
"outer" => JoinType::Outer,
"left" => JoinType::Left,
_ => {
return Err(ShellError::GenericError(
"Incorrect join type".into(),
"Invalid join type".into(),
Some(val.span),
Some("Options: inner, outer or left".into()),
Vec::new(),
))
}
},
};
let (l_col_string, l_col_span) = convert_columns_string(l_col, call.head)?;
let (r_col_string, r_col_span) = convert_columns_string(r_col, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let r_df = NuDataFrame::try_from_value(r_df)?;
check_column_datatypes(
df.as_ref(),
r_df.as_ref(),
&l_col_string,
l_col_span,
&r_col_string,
r_col_span,
)?;
df.as_ref()
.join(
r_df.as_ref(),
&l_col_string,
&r_col_string,
join_type,
suffix,
)
.map_err(|e| {
ShellError::GenericError(
"Error joining dataframes".into(),
e.to_string(),
Some(l_col_span),
None,
Vec::new(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
fn check_column_datatypes<T: AsRef<str>>(
df_l: &polars::prelude::DataFrame,
df_r: &polars::prelude::DataFrame,
l_cols: &[T],
l_col_span: Span,
r_cols: &[T],
r_col_span: Span,
) -> Result<(), ShellError> {
if l_cols.len() != r_cols.len() {
return Err(ShellError::GenericError(
"Mismatched number of column names".into(),
format!(
"found {} left names vs {} right names",
l_cols.len(),
r_cols.len()
),
Some(l_col_span),
Some("perhaps you need to change the number of columns to join".into()),
Vec::new(),
));
}
for (l, r) in l_cols.iter().zip(r_cols) {
let l_series = df_l.column(l.as_ref()).map_err(|e| {
ShellError::GenericError(
"Error selecting the columns".into(),
e.to_string(),
Some(l_col_span),
None,
Vec::new(),
)
})?;
let r_series = df_r.column(r.as_ref()).map_err(|e| {
ShellError::GenericError(
"Error selecting the columns".into(),
e.to_string(),
Some(r_col_span),
None,
Vec::new(),
)
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::GenericError(
"Mismatched datatypes".into(),
format!(
"left column type '{}' doesn't match '{}' right column match",
l_series.dtype(),
r_series.dtype()
),
Some(l_col_span),
Some("perhaps you need to select other column to match".into()),
Vec::new(),
));
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(JoinDF {})])
}
}

View File

@ -1,3 +1,5 @@
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -5,8 +7,6 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct LastDF; pub struct LastDF;
@ -16,7 +16,7 @@ impl Command for LastDF {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates new dataframe with tail rows" "Creates new dataframe with tail rows or creates a last expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -26,18 +26,25 @@ impl Command for LastDF {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create new dataframe with last rows", Example {
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1", description: "Create new dataframe with last rows",
result: Some( example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1",
NuDataFrame::try_from_columns(vec![ result: Some(
Column::new("a".to_string(), vec![Value::test_int(3)]), NuDataFrame::try_from_columns(vec![
Column::new("b".to_string(), vec![Value::test_int(4)]), Column::new("a".to_string(), vec![Value::test_int(3)]),
]) Column::new("b".to_string(), vec![Value::test_int(4)]),
.expect("simple df for test should not fail") ])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a last expression from a column",
example: "dfr col a | dfr last",
result: None,
},
]
} }
fn run( fn run(
@ -47,7 +54,27 @@ impl Command for LastDF {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -55,12 +82,11 @@ fn command(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(engine_state, stack, 0)?; let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS); let rows = rows.unwrap_or(DEFAULT_ROWS);
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().tail(Some(rows)); let res = df.as_ref().tail(Some(rows));
Ok(PipelineData::Value( Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head), NuDataFrame::dataframe_into_value(res, call.head),

View File

@ -11,7 +11,7 @@ pub struct ListDF;
impl Command for ListDF { impl Command for ListDF {
fn name(&self) -> &str { fn name(&self) -> &str {
"dfr list" "dfr ls"
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -26,7 +26,7 @@ impl Command for ListDF {
vec![Example { vec![Example {
description: "Creates a new dataframe and shows it in the dataframe list", description: "Creates a new dataframe and shows it in the dataframe list",
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr to-df); example: r#"let test = ([[a b];[1 2] [3 4]] | dfr to-df);
dfr list"#, dfr ls"#,
result: None, result: None,
}] }]
} }

View File

@ -1,4 +1,3 @@
mod aggregate;
mod append; mod append;
mod column; mod column;
mod command; mod command;
@ -11,13 +10,10 @@ mod dummies;
mod filter_with; mod filter_with;
mod first; mod first;
mod get; mod get;
mod groupby;
mod join;
mod last; mod last;
mod list; mod list;
mod melt; mod melt;
mod open; mod open;
mod pivot;
mod rename; mod rename;
mod sample; mod sample;
mod shape; mod shape;
@ -32,7 +28,6 @@ mod with_column;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
pub use aggregate::Aggregate;
pub use append::AppendDF; pub use append::AppendDF;
pub use column::ColumnDF; pub use column::ColumnDF;
pub use command::Dataframe; pub use command::Dataframe;
@ -45,13 +40,10 @@ pub use dummies::Dummies;
pub use filter_with::FilterWith; pub use filter_with::FilterWith;
pub use first::FirstDF; pub use first::FirstDF;
pub use get::GetDF; pub use get::GetDF;
pub use groupby::CreateGroupBy;
pub use join::JoinDF;
pub use last::LastDF; pub use last::LastDF;
pub use list::ListDF; pub use list::ListDF;
pub use melt::MeltDF; pub use melt::MeltDF;
pub use open::OpenDataFrame; pub use open::OpenDataFrame;
pub use pivot::PivotDF;
pub use rename::RenameDF; pub use rename::RenameDF;
pub use sample::SampleDF; pub use sample::SampleDF;
pub use shape::ShapeDF; pub use shape::ShapeDF;
@ -76,10 +68,8 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
// Dataframe commands // Dataframe commands
bind_command!( bind_command!(
Aggregate,
AppendDF, AppendDF,
ColumnDF, ColumnDF,
CreateGroupBy,
Dataframe, Dataframe,
DataTypes, DataTypes,
DescribeDF, DescribeDF,
@ -90,12 +80,10 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
FilterWith, FilterWith,
FirstDF, FirstDF,
GetDF, GetDF,
JoinDF,
LastDF, LastDF,
ListDF, ListDF,
MeltDF, MeltDF,
OpenDataFrame, OpenDataFrame,
PivotDF,
RenameDF, RenameDF,
SampleDF, SampleDF,
ShapeDF, ShapeDF,

View File

@ -1,198 +0,0 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
};
use polars::prelude::DataType;
use crate::dataframe::values::NuGroupBy;
use super::super::values::NuDataFrame;
enum Operation {
First,
Sum,
Min,
Max,
Mean,
Median,
}
impl Operation {
fn from_tagged(name: Spanned<String>) -> Result<Operation, ShellError> {
match name.item.as_ref() {
"first" => Ok(Operation::First),
"sum" => Ok(Operation::Sum),
"min" => Ok(Operation::Min),
"max" => Ok(Operation::Max),
"mean" => Ok(Operation::Mean),
"median" => Ok(Operation::Median),
_ => Err(ShellError::GenericError(
"Operation not fount".into(),
"Operation does not exist for pivot".into(),
Some(name.span),
Some("Options: first, sum, min, max, mean, median".into()),
Vec::new(),
)),
}
}
}
#[derive(Clone)]
pub struct PivotDF;
impl Command for PivotDF {
fn name(&self) -> &str {
"dfr pivot"
}
fn usage(&self) -> &str {
"Performs a pivot operation on a groupby object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"pivot_column",
SyntaxShape::String,
"pivot column to perform pivot",
)
.required(
"value_column",
SyntaxShape::String,
"value column to perform pivot",
)
.required("operation", SyntaxShape::String, "aggregate operation")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Pivot a dataframe on b and aggregation on col c",
example:
"[[a b c]; [one x 1] [two y 2]] | dfr to-df | dfr group-by a | dfr pivot b c sum",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let pivot_col: Spanned<String> = call.req(engine_state, stack, 0)?;
let value_col: Spanned<String> = call.req(engine_state, stack, 1)?;
let operation: Spanned<String> = call.req(engine_state, stack, 2)?;
let op = Operation::from_tagged(operation)?;
let nu_groupby = NuGroupBy::try_from_pipeline(input, call.head)?;
let df_ref = nu_groupby.as_ref();
check_pivot_column(df_ref, &pivot_col)?;
check_value_column(df_ref, &value_col)?;
let mut groupby = nu_groupby.to_groupby()?;
let pivot = groupby.pivot(vec![&pivot_col.item], vec![&value_col.item]);
match op {
Operation::Mean => pivot.mean(),
Operation::Sum => pivot.sum(),
Operation::Min => pivot.min(),
Operation::Max => pivot.max(),
Operation::First => pivot.first(),
Operation::Median => pivot.median(),
}
.map_err(|e| {
ShellError::GenericError(
"Error creating pivot".into(),
e.to_string(),
Some(call.head),
None,
Vec::new(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
fn check_pivot_column(
df: &polars::prelude::DataFrame,
col: &Spanned<String>,
) -> Result<(), ShellError> {
let series = df.column(&col.item).map_err(|e| {
ShellError::GenericError(
"Column not found".into(),
e.to_string(),
Some(col.span),
None,
Vec::new(),
)
})?;
match series.dtype() {
DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::Utf8 => Ok(()),
_ => Err(ShellError::GenericError(
"Pivot error".into(),
format!("Unsupported datatype {}", series.dtype()),
Some(col.span),
None,
Vec::new(),
)),
}
}
fn check_value_column(
df: &polars::prelude::DataFrame,
col: &Spanned<String>,
) -> Result<(), ShellError> {
let series = df.column(&col.item).map_err(|e| {
ShellError::GenericError(
"Column not found".into(),
e.to_string(),
Some(col.span),
None,
Vec::new(),
)
})?;
match series.dtype() {
DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::Float32
| DataType::Float64 => Ok(()),
_ => Err(ShellError::GenericError(
"Pivot error".into(),
format!("Unsupported datatype {}", series.dtype()),
Some(col.span),
None,
Vec::new(),
)),
}
}

View File

@ -5,6 +5,8 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use crate::dataframe::{utils::extract_strings, values::NuLazyFrame};
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
@ -21,8 +23,16 @@ impl Command for RenameDF {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required("from", SyntaxShape::String, "column name to be renamed") .required(
.required("to", SyntaxShape::String, "new column name") "columns",
SyntaxShape::Any,
"Column(s) to be renamed. A string or list of strings",
)
.required(
"new names",
SyntaxShape::Any,
"New names for the selected column(s). A string or list of strings",
)
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -54,24 +64,39 @@ impl Command for RenameDF {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuLazyFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
command_lazy(engine_state, stack, call, df)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
fn command( fn command_eager(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, mut df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let from: String = call.req(engine_state, stack, 0)?; let columns: Value = call.req(engine_state, stack, 0)?;
let to: String = call.req(engine_state, stack, 1)?; let columns = extract_strings(columns)?;
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; let new_names: Value = call.req(engine_state, stack, 1)?;
let new_names = extract_strings(new_names)?;
df.as_mut() for (from, to) in columns.iter().zip(new_names.iter()) {
.rename(&from, &to) df.as_mut().rename(from, to).map_err(|e| {
.map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error renaming".into(), "Error renaming".into(),
e.to_string(), e.to_string(),
@ -79,13 +104,36 @@ fn command(
None, None,
Vec::new(), Vec::new(),
) )
}) })?;
.map(|df| { }
PipelineData::Value(
NuDataFrame::dataframe_into_value(df.clone(), call.head), Ok(PipelineData::Value(df.into_value(call.head), None))
None, }
)
}) fn command_lazy(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let columns: Value = call.req(engine_state, stack, 0)?;
let columns = extract_strings(columns)?;
let new_names: Value = call.req(engine_state, stack, 1)?;
let new_names = extract_strings(new_names)?;
if columns.len() != new_names.len() {
let value: Value = call.req(engine_state, stack, 1)?;
return Err(ShellError::IncompatibleParametersSingle(
"New name list has different size to column list".into(),
value.span()?,
));
}
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
} }
#[cfg(test)] #[cfg(test)]

View File

@ -33,6 +33,12 @@ impl Command for SampleDF {
"fraction of dataframe to be taken", "fraction of dataframe to be taken",
Some('f'), Some('f'),
) )
.named(
"seed",
SyntaxShape::Number,
"seed for the selection",
Some('s'),
)
.switch("replace", "sample with replace", Some('e')) .switch("replace", "sample with replace", Some('e'))
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -71,12 +77,15 @@ fn command(
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?; let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?; let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
let seed: Option<u64> = call
.get_flag::<i64>(engine_state, stack, "seed")?
.map(|val| val as u64);
let replace: bool = call.has_flag("replace"); let replace: bool = call.has_flag("replace");
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
match (rows, fraction) { match (rows, fraction) {
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace, 0).map_err(|e| { (Some(rows), None) => df.as_ref().sample_n(rows.item, replace, seed).map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error creating sample".into(), "Error creating sample".into(),
e.to_string(), e.to_string(),
@ -85,15 +94,18 @@ fn command(
Vec::new(), Vec::new(),
) )
}), }),
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace, 0).map_err(|e| { (None, Some(frac)) => df
ShellError::GenericError( .as_ref()
"Error creating sample".into(), .sample_frac(frac.item, replace, seed)
e.to_string(), .map_err(|e| {
Some(frac.span), ShellError::GenericError(
None, "Error creating sample".into(),
Vec::new(), e.to_string(),
) Some(frac.span),
}), None,
Vec::new(),
)
}),
(Some(_), Some(_)) => Err(ShellError::GenericError( (Some(_), Some(_)) => Err(ShellError::GenericError(
"Incompatible flags".into(), "Incompatible flags".into(),
"Only one selection criterion allowed".into(), "Only one selection criterion allowed".into(),

View File

@ -1,12 +1,12 @@
use super::super::values::{Column, NuDataFrame};
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct WithColumn; pub struct WithColumn;
@ -21,35 +21,51 @@ impl Command for WithColumn {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required("series", SyntaxShape::Any, "series to be added") .named("name", SyntaxShape::String, "new column name", Some('n'))
.required_named("name", SyntaxShape::String, "column name", Some('n')) .rest(
"series or expressions",
SyntaxShape::Any,
"series to be added or expressions used to define the new columns",
)
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Adds a series to the dataframe", Example {
example: description: "Adds a series to the dataframe",
"[[a b]; [1 2] [3 4]] | dfr to-df | dfr with-column ([5 6] | dfr to-df) --name c", example: r#"[[a b]; [1 2] [3 4]]
result: Some( | dfr to-df
NuDataFrame::try_from_columns(vec![ | dfr with-column ([5 6] | dfr to-df) --name c"#,
Column::new( result: Some(
"a".to_string(), NuDataFrame::try_from_columns(vec![
vec![Value::test_int(1), Value::test_int(3)], Column::new(
), "a".to_string(),
Column::new( vec![Value::test_int(1), Value::test_int(3)],
"b".to_string(), ),
vec![Value::test_int(2), Value::test_int(4)], Column::new(
), "b".to_string(),
Column::new( vec![Value::test_int(2), Value::test_int(4)],
"c".to_string(), ),
vec![Value::test_int(5), Value::test_int(6)], Column::new(
), "c".to_string(),
]) vec![Value::test_int(5), Value::test_int(6)],
.expect("simple df for test should not fail") ),
.into_value(Span::test_data()), ])
), .expect("simple df for test should not fail")
}] .into_value(Span::test_data()),
),
},
Example {
description: "Adds a series to the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| dfr to-df
| dfr to-lazy
| dfr with-column ((dfr col a) * 2 | dfr as "c")
| dfr collect"#,
result: None,
},
]
} }
fn run( fn run(
@ -59,26 +75,41 @@ impl Command for WithColumn {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuLazyFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
command_lazy(engine_state, stack, call, df)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
fn command( fn command_eager(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, mut df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let name: Spanned<String> = call
.get_flag(engine_state, stack, "name")?
.expect("required named value");
let other_value: Value = call.req(engine_state, stack, 0)?; let other_value: Value = call.req(engine_state, stack, 0)?;
let other_span = other_value.span()?; let other_span = other_value.span()?;
let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?; let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?;
let series = other.rename(&name.item).clone();
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; let name = match call.get_flag::<String>(engine_state, stack, "name")? {
Some(name) => name,
None => other.name().to_string(),
};
let series = other.rename(&name).clone();
df.as_mut() df.as_mut()
.with_column(series) .with_column(series)
@ -99,6 +130,27 @@ fn command(
}) })
} }
fn command_lazy(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into();
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
None,
))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;

View File

@ -0,0 +1,57 @@
use super::super::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
};
#[derive(Clone)]
pub struct ExprAlias;
impl Command for ExprAlias {
fn name(&self) -> &str {
"dfr as"
}
fn usage(&self) -> &str {
"Creates an alias expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"Alias name",
SyntaxShape::String,
"Alias name for the expression",
)
.category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates and alias expression",
example: "(dfr col a | df as new_a)",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let alias: String = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let expr: NuExpression = expr.into_polars().alias(alias.as_str()).into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}

View File

@ -0,0 +1,77 @@
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::col;
#[derive(Clone)]
pub struct ExprCol;
impl Command for ExprCol {
fn name(&self) -> &str {
"dfr col"
}
fn usage(&self) -> &str {
"Creates a named column expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"column name",
SyntaxShape::String,
"Name of column to be used",
)
.category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a named column expression and converts it to a nu object",
example: "dfr col col_a | dfr to-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
Value::String {
val: "column".into(),
span: Span::test_data(),
},
Value::String {
val: "col_a".into(),
span: Span::test_data(),
},
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name: String = call.req(engine_state, stack, 0)?;
let expr: NuExpression = col(name.as_str()).into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprToNu {})])
}
}

View File

@ -0,0 +1,79 @@
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct ExprLit;
impl Command for ExprLit {
fn name(&self) -> &str {
"dfr lit"
}
fn usage(&self) -> &str {
"Creates a literal expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"literal",
SyntaxShape::Any,
"literal to construct the expression",
)
.category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Created a literal expression and converts it to a nu object",
example: "dfr lit 2 | dfr to-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
Value::String {
val: "literal".into(),
span: Span::test_data(),
},
Value::String {
val: "2i64".into(),
span: Span::test_data(),
},
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let literal: Value = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_value(literal)?;
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprToNu {})])
}
}

View File

@ -0,0 +1,7 @@
mod col;
mod lit;
mod when;
pub(super) use crate::dataframe::expressions::dsl::col::ExprCol;
pub(super) use crate::dataframe::expressions::dsl::lit::ExprLit;
pub(super) use crate::dataframe::expressions::dsl::when::ExprWhen;

View File

@ -0,0 +1,96 @@
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use polars::prelude::when;
#[derive(Clone)]
pub struct ExprWhen;
impl Command for ExprWhen {
fn name(&self) -> &str {
"dfr when"
}
fn usage(&self) -> &str {
"Creates a when expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"when predicate",
SyntaxShape::Any,
"Name of column to be used",
)
.required_named(
"then",
SyntaxShape::Any,
"Expression that will be applied when predicate is true",
Some('t'),
)
.required_named(
"otherwise",
SyntaxShape::Any,
"Expression that will be applied when predicate is false",
Some('o'),
)
.category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create a new column for the dataframe",
example: r#"[[a b]; [1 2] [3 4]]
| dfr to-df
| dfr to-lazy
| dfr with-column (
dfr when ((dfr col a) > 2) --then 4 --otherwise 5 | dfr as "c"
)
| dfr collect"#,
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let predicate: Value = call.req(engine_state, stack, 0)?;
let predicate = NuExpression::try_from_value(predicate)?;
let then: Value = call
.get_flag(engine_state, stack, "then")?
.expect("it is a required named value");
let then = NuExpression::try_from_value(then)?;
let otherwise: Value = call
.get_flag(engine_state, stack, "otherwise")?
.expect("it is a required named value");
let otherwise = NuExpression::try_from_value(otherwise)?;
let expr: NuExpression = when(predicate.into_polars())
.then(then.into_polars())
.otherwise(otherwise.into_polars())
.into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::super::test_dataframe::test_dataframe;
use super::super::super::ExprToNu;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprWhen {}), Box::new(ExprToNu {})])
}
}

View File

@ -0,0 +1,109 @@
/// Definition of multiple Expression commands using a macro rule
/// All of these expressions have an identical body and only require
/// to have a change in the name, description and expression function
use super::super::values::NuExpression;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
macro_rules! expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
#[derive(Clone)]
pub struct $command;
impl Command for $command {
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let expr: NuExpression = expr.into_polars().$func().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
}
}
};
}
// ExprList command
// Expands to a command definition for a list expression
expr_command!(
ExprList,
"dfr list",
"Aggregates a group to a Series",
vec![Example {
description: "",
example: "",
result: None,
}],
list
);
// ExprAggGroups command
// Expands to a command definition for a agg groups expression
expr_command!(
ExprAggGroups,
"dfr agg-groups",
"creates an agg_groups expression",
vec![Example {
description: "",
example: "",
result: None,
}],
agg_groups
);
// ExprFlatten command
// Expands to a command definition for a flatten expression
expr_command!(
ExprFlatten,
"dfr flatten",
"creates a flatten expression",
vec![Example {
description: "",
example: "",
result: None,
}],
flatten
);
// ExprExplode command
// Expands to a command definition for a explode expression
expr_command!(
ExprExplode,
"dfr explode",
"creates an explode expression",
vec![Example {
description: "",
example: "",
result: None,
}],
explode
);

View File

@ -0,0 +1,36 @@
mod alias;
mod dsl;
mod expressions_macro;
mod to_nu;
use nu_protocol::engine::StateWorkingSet;
use crate::dataframe::expressions::dsl::*;
use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::expressions_macro::*;
use crate::dataframe::expressions::to_nu::ExprToNu;
pub fn add_expressions(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
( $command:expr ) => {
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
// Dataframe commands
bind_command!(
ExprAlias,
ExprCol,
ExprLit,
ExprToNu,
ExprWhen,
ExprList,
ExprAggGroups,
ExprFlatten,
ExprExplode
);
}

View File

@ -0,0 +1,70 @@
use super::super::values::NuExpression;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value,
};
#[derive(Clone)]
pub struct ExprToNu;
impl Command for ExprToNu {
fn name(&self) -> &str {
"dfr to-nu"
}
fn usage(&self) -> &str {
"Convert expression to a nu value for access and exploration"
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("expressions".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Convert a col expression into a nushell value",
example: "dfr col col_a | dfr to-nu",
result: Some(Value::Record {
cols: vec!["expr".into(), "value".into()],
vals: vec![
Value::String {
val: "column".into(),
span: Span::test_data(),
},
Value::String {
val: "col_a".into(),
span: Span::test_data(),
},
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let value = expr.to_value(call.head);
Ok(PipelineData::Value(value, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::super::ExprCol;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ExprToNu {}), Box::new(ExprCol {})])
}
}

View File

@ -0,0 +1,91 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct LazyAggregate;
impl Command for LazyAggregate {
fn name(&self) -> &str {
"dfr aggregate"
}
fn usage(&self) -> &str {
"Performs a series of aggregations from a group by"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group by expressions",
SyntaxShape::Any,
"Expression(s) that define the aggregations to be applied",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr group-by a
| dfr aggregate [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]"#,
result: None,
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr to-lazy
| dfr group-by a
| dfr aggregate [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]
| dfr collect"#,
result: None,
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
let from_eager = group_by.from_eager;
let group_by = group_by.into_polars();
let lazy: NuLazyFrame = group_by.agg(&expressions).into();
let res = if from_eager {
lazy.collect(call.head)?.into_value(call.head)
} else {
lazy.into_value(call.head)
};
Ok(PipelineData::Value(res, None))
}
}

View File

@ -0,0 +1,48 @@
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
#[derive(Clone)]
pub struct LazyCollect;
impl Command for LazyCollect {
fn name(&self) -> &str {
"dfr collect"
}
fn usage(&self) -> &str {
"Collect lazy dataframe into dataframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let eager = lazy.collect(call.head)?;
Ok(PipelineData::Value(
NuDataFrame::into_value(eager, call.head),
None,
))
}
}

View File

@ -0,0 +1,80 @@
use super::super::values::NuLazyFrame;
use crate::dataframe::values::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
};
#[derive(Clone)]
pub struct LazyFetch;
impl Command for LazyFetch {
fn name(&self) -> &str {
"dfr fetch"
}
fn usage(&self) -> &str {
"collects the lazyframe to the selected rows"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"rows",
SyntaxShape::Int,
"number of rows to be fetched from lazyframe",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: i64 = call.req(engine_state, stack, 0)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let eager: NuDataFrame = lazy
.into_polars()
.fetch(rows as usize)
.map_err(|e| {
ShellError::GenericError(
"Error fetching rows".into(),
e.to_string(),
Some(call.head),
None,
Vec::new(),
)
})?
.into();
Ok(PipelineData::Value(
NuDataFrame::into_value(eager, call.head),
None,
))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFetch {})])
// }
//}

View File

@ -0,0 +1,65 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct LazyFillNA;
impl Command for LazyFillNA {
fn name(&self) -> &str {
"dfr fill-na"
}
fn usage(&self) -> &str {
"Replaces NA values with the given expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the NAN values",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let fill: Value = call.req(engine_state, stack, 0)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy: NuLazyFrame = lazy.fill_nan(expr).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFillNA {})])
// }
//}

View File

@ -0,0 +1,65 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct LazyFillNull;
impl Command for LazyFillNull {
fn name(&self) -> &str {
"dfr fill-null"
}
fn usage(&self) -> &str {
"Replaces NULL values with the given expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let fill: Value = call.req(engine_state, stack, 0)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let expr = NuExpression::try_from_value(fill)?.into_polars();
let lazy: NuLazyFrame = lazy.fill_null(expr).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyFillNull {})])
// }
//}

View File

@ -0,0 +1,98 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct ToLazyGroupBy;
impl Command for ToLazyGroupBy {
fn name(&self) -> &str {
"dfr group-by"
}
fn usage(&self) -> &str {
"Creates a groupby object that can be used for other aggregations"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest(
"Group by expressions",
SyntaxShape::Any,
"Expression(s) that define the lazy group by",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr group-by a
| dfr aggregate [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]"#,
result: None,
},
Example {
description: "Group by and perform an aggregation",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| dfr to-df
| dfr to-lazy
| dfr group-by a
| dfr aggregate [
("b" | dfr min | dfr as "b_min")
("b" | dfr max | dfr as "b_max")
("b" | dfr sum | dfr as "b_sum")
]
| dfr collect"#,
result: None,
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
let value = Value::List {
vals,
span: call.head,
};
let expressions = NuExpression::extract_exprs(value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(engine_state, stack, 0)?;
return Err(ShellError::IncompatibleParametersSingle(
"Expected only Col expressions".into(),
value.span()?,
));
}
let value = input.into_value(call.head);
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
let group_by = NuLazyGroupBy {
group_by: Some(lazy.into_polars().groupby(&expressions)),
from_eager,
};
Ok(PipelineData::Value(group_by.into_value(call.head), None))
}
}

View File

@ -0,0 +1,139 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use polars::prelude::{Expr, JoinType};
#[derive(Clone)]
pub struct LazyJoin;
impl Command for LazyJoin {
fn name(&self) -> &str {
"dfr join"
}
fn usage(&self) -> &str {
"Joins a lazy frame with other lazy frame"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "LazyFrame to join with")
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
.switch(
"inner",
"inner joing between lazyframes (default)",
Some('i'),
)
.switch("left", "left join between lazyframes", Some('l'))
.switch("outer", "outer join between lazyframes", Some('o'))
.switch("cross", "cross join between lazyframes", Some('c'))
.named(
"suffix",
SyntaxShape::String,
"Suffix to use on columns with same name",
Some('s'),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Join two lazy dataframes",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-lazy);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
$df_a | dfr join $df_b a foo | dfr collect"#,
result: None,
},
Example {
description: "Join one eager dataframe with a lazy dataframe",
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-df);
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
$df_a | dfr join $df_b a foo"#,
result: None,
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let left = call.has_flag("left");
let outer = call.has_flag("outer");
let cross = call.has_flag("cross");
let how = if left {
JoinType::Left
} else if outer {
JoinType::Outer
} else if cross {
JoinType::Cross
} else {
JoinType::Inner
};
let other: Value = call.req(engine_state, stack, 0)?;
let (other, _) = NuLazyFrame::maybe_is_eager(other)?;
let other = other.into_polars();
let left_on: Value = call.req(engine_state, stack, 1)?;
let left_on = NuExpression::extract_exprs(left_on)?;
let right_on: Value = call.req(engine_state, stack, 2)?;
let right_on = NuExpression::extract_exprs(right_on)?;
if left_on.len() != right_on.len() {
let right_on: Value = call.req(engine_state, stack, 2)?;
return Err(ShellError::IncompatibleParametersSingle(
"The right column list has a different size to the left column list".into(),
right_on.span()?,
));
}
// Checking that both list of expressions are made out of col expressions or strings
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
let value: Value = call.req(engine_state, stack, *index)?;
return Err(ShellError::IncompatibleParametersSingle(
"Expected only a string, col expressions or list of strings".into(),
value.span()?,
));
}
}
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
let suffix = suffix.unwrap_or_else(|| "_x".into());
let value = input.into_value(call.head);
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = lazy
.join_builder()
.with(other)
.left_on(left_on)
.right_on(right_on)
.how(how)
.force_parallel(true)
.suffix(suffix)
.finish()
.into();
let res = if from_eager {
lazy.collect(call.head)?.into_value(call.head)
} else {
lazy.into_value(call.head)
};
Ok(PipelineData::Value(res, None))
}
}

View File

@ -0,0 +1,232 @@
/// Definition of multiple lazyframe commands using a macro rule
/// All of these commands have an identical body and only require
/// to have a change in the name, description and function
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
macro_rules! lazy_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
#[derive(Clone)]
pub struct $command;
impl Command for $command {
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy.$func().into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}
};
}
// LazyReverse command
// Expands to a command definition for reverse
lazy_command!(
LazyReverse,
"dfr reverse",
"Reverses the LazyFrame",
vec![Example {
description: "",
example: "",
result: None,
}],
reverse
);
// LazyCache command
// Expands to a command definition for cache
lazy_command!(
LazyCache,
"dfr cache",
"Caches operations in a new LazyFrame",
vec![Example {
description: "",
example: "",
result: None,
}],
cache
);
// Creates a command that may result in a lazy frame operation or
// lazy frame expression
macro_rules! lazy_expr_command {
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
#[derive(Clone)]
pub struct $command;
impl Command for $command {
fn name(&self) -> &str {
$name
}
fn usage(&self) -> &str {
$desc
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
$examples
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().$func().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value(value)?.into_polars();
let lazy: NuLazyFrame = lazy.$func().into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
} else {
Err(ShellError::CantConvert(
"expression or lazyframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
}
}
};
}
// LazyMax command
// Expands to a command definition for max aggregation
lazy_expr_command!(
LazyMax,
"dfr max",
"Aggregates columns to their max value or creates a max expression",
vec![Example {
description: "",
example: "",
result: None,
}],
max
);
// LazyMin command
// Expands to a command definition for min aggregation
lazy_expr_command!(
LazyMin,
"dfr min",
"Aggregates columns to their min value or creates a min expression",
vec![Example {
description: "",
example: "",
result: None,
}],
min
);
// LazySum command
// Expands to a command definition for sum aggregation
lazy_expr_command!(
LazySum,
"dfr sum",
"Aggregates columns to their sum value or creates a sum expression",
vec![Example {
description: "",
example: "",
result: None,
}],
sum
);
// LazyMean command
// Expands to a command definition for mean aggregation
lazy_expr_command!(
LazyMean,
"dfr mean",
"Aggregates columns to their mean value or creates a mean expression",
vec![Example {
description: "",
example: "",
result: None,
}],
mean
);
// LazyMedian command
// Expands to a command definition for median aggregation
lazy_expr_command!(
LazyMedian,
"dfr median",
"Aggregates columns to their median value or creates a median expression",
vec![Example {
description: "",
example: "",
result: None,
}],
median
);
// LazyStd command
// Expands to a command definition for std aggregation
lazy_expr_command!(
LazyStd,
"dfr std",
"Aggregates columns to their std value",
vec![Example {
description: "",
example: "",
result: None,
}],
std
);
// LazyVar command
// Expands to a command definition for var aggregation
lazy_expr_command!(
LazyVar,
"dfr var",
"Aggregates columns to their var value",
vec![Example {
description: "",
example: "",
result: None,
}],
var
);

View File

@ -0,0 +1,63 @@
mod aggregate;
mod collect;
mod fetch;
mod fill_na;
mod fill_null;
mod groupby;
mod join;
mod macro_commands;
mod quantile;
mod select;
mod sort_by_expr;
mod to_lazy;
use nu_protocol::engine::StateWorkingSet;
use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::aggregate::LazyAggregate;
use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_na::LazyFillNA;
use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin;
use crate::dataframe::lazy::quantile::LazyQuantile;
use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
use crate::dataframe::lazy::to_lazy::ToLazyFrame;
pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
( $command:expr ) => {
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
// Dataframe commands
bind_command!(
LazyAggregate,
LazyCache,
LazyCollect,
LazyFetch,
LazyFillNA,
LazyFillNull,
LazyJoin,
LazyQuantile,
LazyMax,
LazyMin,
LazySum,
LazyMean,
LazyMedian,
LazyStd,
LazyVar,
LazyReverse,
LazySelect,
LazySortBy,
ToLazyFrame,
ToLazyGroupBy
);
}

View File

@ -0,0 +1,67 @@
use crate::dataframe::values::NuLazyFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
};
use polars::prelude::QuantileInterpolOptions;
#[derive(Clone)]
pub struct LazyQuantile;
impl Command for LazyQuantile {
fn name(&self) -> &str {
"dfr quantile"
}
fn usage(&self) -> &str {
"Aggregates the columns to the selected quantile"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let quantile: f64 = call.req(engine_state, stack, 0)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy
.quantile(quantile, QuantileInterpolOptions::default())
.into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazyQuantile {})])
// }
//}

View File

@ -0,0 +1,78 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use polars::prelude::Expr;
#[derive(Clone)]
pub struct LazySelect;
impl Command for LazySelect {
fn name(&self) -> &str {
"dfr select"
}
fn usage(&self) -> &str {
"Selects columns from lazyframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"select expressions",
SyntaxShape::Any,
"Expression(s) that define the column selection",
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expressions = NuExpression::extract_exprs(value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(engine_state, stack, 0)?;
return Err(ShellError::IncompatibleParametersSingle(
"Expected only Col expressions".into(),
value.span()?,
));
}
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
let lazy: NuLazyFrame = lazy.select(&expressions).into();
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazySelect {})])
// }
//}

View File

@ -0,0 +1,100 @@
use super::super::values::NuLazyFrame;
use crate::dataframe::values::NuExpression;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
#[derive(Clone)]
pub struct LazySortBy;
impl Command for LazySortBy {
fn name(&self) -> &str {
"dfr sort-by"
}
fn usage(&self) -> &str {
"sorts a lazy dataframe based on expression(s)"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"filtering expression",
)
.named(
"reverse",
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
"list indicating if reverse search should be done in the column. Default is false",
Some('r'),
)
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "",
example: "",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expressions = NuExpression::extract_exprs(value)?;
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
let reverse = match reverse {
Some(list) => {
if expressions.len() != list.len() {
let span = call
.get_flag::<Value>(engine_state, stack, "reverse")?
.expect("already checked and it exists")
.span()?;
return Err(ShellError::GenericError(
"Incorrect list size".into(),
"Size doesn't match expression list".into(),
Some(span),
None,
Vec::new(),
));
} else {
list
}
}
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy: NuLazyFrame = lazy
.into_polars()
.sort_by_exprs(&expressions, reverse)
.into();
Ok(PipelineData::Value(
NuLazyFrame::into_value(lazy, call.head),
None,
))
}
}
//#[cfg(test)]
//mod test {
// use super::super::super::test_dataframe::test_dataframe;
// use super::*;
//
// #[test]
// fn test_examples() {
// test_dataframe(vec![Box::new(LazySortBy {})])
// }
//}

View File

@ -0,0 +1,45 @@
use super::super::values::{NuDataFrame, NuLazyFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
#[derive(Clone)]
pub struct ToLazyFrame;
impl Command for ToLazyFrame {
fn name(&self) -> &str {
"dfr to-lazy"
}
fn usage(&self) -> &str {
"Converts a dataframe into a lazy dataframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a dictionary and creates a lazy dataframe",
example: "[[a b];[1 2] [3 4]] | dfr to-df | dfl to-lazy",
result: None,
}]
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?;
let lazy = NuLazyFrame::from_dataframe(df);
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
}

View File

@ -1,8 +1,13 @@
mod eager; mod eager;
mod expressions;
mod lazy;
mod series; mod series;
mod utils;
mod values; mod values;
pub use eager::add_eager_decls; pub use eager::add_eager_decls;
pub use expressions::add_expressions;
pub use lazy::add_lazy_decls;
pub use series::add_series_decls; pub use series::add_series_decls;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
@ -10,6 +15,8 @@ use nu_protocol::engine::StateWorkingSet;
pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) { pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) {
add_series_decls(working_set); add_series_decls(working_set);
add_eager_decls(working_set); add_eager_decls(working_set);
add_expressions(working_set);
add_lazy_decls(working_set);
} }
#[cfg(test)] #[cfg(test)]

View File

@ -6,7 +6,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{IntoSeries, Utf8Methods};
#[derive(Clone)] #[derive(Clone)]
pub struct AsDate; pub struct AsDate;

View File

@ -7,7 +7,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use polars::prelude::{IntoSeries, TimeUnit}; use polars::prelude::{IntoSeries, TimeUnit, Utf8Methods};
#[derive(Clone)] #[derive(Clone)]
pub struct AsDateTime; pub struct AsDateTime;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetDay; pub struct GetDay;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetHour; pub struct GetHour;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetMinute; pub struct GetMinute;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetMonth; pub struct GetMonth;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetNanosecond; pub struct GetNanosecond;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetOrdinal; pub struct GetOrdinal;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetSecond; pub struct GetSecond;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetWeek; pub struct GetWeek;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetWeekDay; pub struct GetWeekDay;

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{DatetimeMethods, IntoSeries};
#[derive(Clone)] #[derive(Clone)]
pub struct GetYear; pub struct GetYear;

View File

@ -1,5 +1,5 @@
use super::super::super::values::{Column, NuDataFrame}; use super::super::super::values::{Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -16,7 +16,7 @@ impl Command for IsNotNull {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates mask where value is not null" "Creates mask where value is not null or creates a is-not-null expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -24,25 +24,32 @@ impl Command for IsNotNull {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create mask where values are not null", Example {
example: r#"let s = ([5 6 0 8] | dfr to-df); description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | dfr to-df);
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-not-null"#, $res | dfr is-not-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(vec![Column::new(
"is_not_null".to_string(), "is_not_null".to_string(),
vec![ vec![
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
], ],
)]) )])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Creates a is not null expression from a column",
example: "dfr col a | dfr is-not-null",
result: None,
},
]
} }
fn run( fn run(
@ -52,7 +59,27 @@ impl Command for IsNotNull {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_not_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -60,10 +87,8 @@ fn command(
_engine_state: &EngineState, _engine_state: &EngineState,
_stack: &mut Stack, _stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df.as_series(call.head)?.is_not_null(); let mut res = df.as_series(call.head)?.is_not_null();
res.rename("is_not_null"); res.rename("is_not_null");

View File

@ -1,5 +1,5 @@
use super::super::super::values::{Column, NuDataFrame}; use super::super::super::values::{Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -16,7 +16,7 @@ impl Command for IsNull {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Creates mask where value is null" "Creates mask where value is null or creates a is-null expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -24,25 +24,32 @@ impl Command for IsNull {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Create mask where values are null", Example {
example: r#"let s = ([5 6 0 8] | dfr to-df); description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | dfr to-df);
let res = ($s / $s); let res = ($s / $s);
$res | dfr is-null"#, $res | dfr is-null"#,
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(vec![Column::new(
"is_null".to_string(), "is_null".to_string(),
vec![ vec![
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(false), Value::test_bool(false),
], ],
)]) )])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Creates a is not null expression from a column",
example: "dfr col a | dfr is-null",
result: None,
},
]
} }
fn run( fn run(
@ -52,7 +59,27 @@ impl Command for IsNull {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -60,10 +87,8 @@ fn command(
_engine_state: &EngineState, _engine_state: &EngineState,
_stack: &mut Stack, _stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df.as_series(call.head)?.is_null(); let mut res = df.as_series(call.head)?.is_null();
res.rename("is_null"); res.rename("is_null");

View File

@ -1,5 +1,5 @@
use super::super::super::values::{Column, NuDataFrame}; use super::super::super::values::{Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -18,7 +18,7 @@ impl Command for NotSeries {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Inverts boolean mask" "Inverts boolean mask or creates a not expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -26,22 +26,29 @@ impl Command for NotSeries {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Inverts boolean mask", Example {
example: "[true false true] | dfr to-df | dfr not", description: "Inverts boolean mask",
result: Some( example: "[true false true] | dfr to-df | dfr not",
NuDataFrame::try_from_columns(vec![Column::new( result: Some(
"0".to_string(), NuDataFrame::try_from_columns(vec![Column::new(
vec![ "0".to_string(),
Value::test_bool(false), vec![
Value::test_bool(true), Value::test_bool(false),
Value::test_bool(false), Value::test_bool(true),
], Value::test_bool(false),
)]) ],
.expect("simple df for test should not fail") )])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a not expression from a column",
example: "dfr col a | dfr not",
result: None,
},
]
} }
fn run( fn run(
@ -51,7 +58,27 @@ impl Command for NotSeries {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().is_null().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -59,9 +86,8 @@ fn command(
_engine_state: &EngineState, _engine_state: &EngineState,
_stack: &mut Stack, _stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?; let series = df.as_series(call.head)?;
let bool = series.bool().map_err(|e| { let bool = series.bool().map_err(|e| {

View File

@ -1,5 +1,5 @@
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use crate::dataframe::values::NuExpression;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -15,7 +15,7 @@ impl Command for NUnique {
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"Counts unique values" "Counts unique values or creates a n-unique expression"
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
@ -23,18 +23,25 @@ impl Command for NUnique {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
description: "Counts unique values", Example {
example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique", description: "Counts unique values",
result: Some( example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique",
NuDataFrame::try_from_columns(vec![Column::new( result: Some(
"count_unique".to_string(), NuDataFrame::try_from_columns(vec![Column::new(
vec![Value::test_int(4)], "count_unique".to_string(),
)]) vec![Value::test_int(4)],
.expect("simple df for test should not fail") )])
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}] ),
},
Example {
description: "Creates a is n-unique expression from a column",
example: "dfr col a | dfr n-unique",
result: None,
},
]
} }
fn run( fn run(
@ -44,7 +51,27 @@ impl Command for NUnique {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuExpression::can_downcast(&value) {
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = expr.into_polars().n_unique().into();
Ok(PipelineData::Value(
NuExpression::into_value(expr, call.head),
None,
))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
@ -52,10 +79,8 @@ fn command(
_engine_state: &EngineState, _engine_state: &EngineState,
_stack: &mut Stack, _stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_series(call.head)?.n_unique().map_err(|e| { let res = df.as_series(call.head)?.n_unique().map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error counting unique values".into(), "Error counting unique values".into(),

View File

@ -1,3 +1,5 @@
use crate::dataframe::values::{NuExpression, NuLazyFrame};
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_engine::CallExt; use nu_engine::CallExt;
@ -22,6 +24,12 @@ impl Command for Shift {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required("period", SyntaxShape::Int, "shift period") .required("period", SyntaxShape::Int, "shift period")
.named(
"fill",
SyntaxShape::Any,
"Expression to use to fill the null values (lazy df)",
Some('f'),
)
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -47,25 +55,60 @@ impl Command for Shift {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuLazyFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
command_lazy(engine_state, stack, call, df)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
fn command( fn command_eager(
engine_state: &EngineState, engine_state: &EngineState,
stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let period: i64 = call.req(engine_state, stack, 0)?; let period: i64 = call.req(engine_state, stack, 0)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?.shift(period); let series = df.as_series(call.head)?.shift(period);
NuDataFrame::try_from_series(vec![series], call.head) NuDataFrame::try_from_series(vec![series], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }
fn command_lazy(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let shift: i64 = call.req(engine_state, stack, 0)?;
let fill: Option<Value> = call.get_flag(engine_state, stack, "fill")?;
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = match fill {
Some(fill) => {
let expr = NuExpression::try_from_value(fill)?.into_polars();
lazy.shift_and_fill(shift, expr).into()
}
None => lazy.shift(shift).into(),
};
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::eager::DropNulls; use super::super::super::eager::DropNulls;

View File

@ -1,11 +1,14 @@
use crate::dataframe::{utils::extract_strings, values::NuLazyFrame};
use super::super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{IntoSeries, UniqueKeepStrategy};
#[derive(Clone)] #[derive(Clone)]
pub struct Unique; pub struct Unique;
@ -20,7 +23,24 @@ impl Command for Unique {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into())) Signature::build(self.name())
.named(
"subset",
SyntaxShape::Any,
"Subset of column(s) to use to maintain rows (lazy df)",
Some('s'),
)
.switch(
"last",
"Keeps last unique value. Default keeps first value (lazy df)",
Some('l'),
)
.switch(
"maintain-order",
"Keep the same order as the original DataFrame (lazy df)",
Some('k'),
)
.category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
@ -45,17 +65,31 @@ impl Command for Unique {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input) let value = input.into_value(call.head);
if NuLazyFrame::can_downcast(&value) {
let df = NuLazyFrame::try_from_value(value)?;
command_lazy(engine_state, stack, call, df)
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
command_eager(engine_state, stack, call, df)
} else {
Err(ShellError::CantConvert(
"expression or query".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
} }
} }
fn command( fn command_eager(
_engine_state: &EngineState, _engine_state: &EngineState,
_stack: &mut Stack, _stack: &mut Stack,
call: &Call, call: &Call,
input: PipelineData, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?; let series = df.as_series(call.head)?;
let res = series.unique().map_err(|e| { let res = series.unique().map_err(|e| {
@ -72,6 +106,37 @@ fn command(
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }
fn command_lazy(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let last = call.has_flag("last");
let maintain = call.has_flag("maintain-order");
let subset: Option<Value> = call.get_flag(engine_state, stack, "subset")?;
let subset = match subset {
Some(value) => Some(extract_strings(value)?),
None => None,
};
let strategy = if last {
UniqueKeepStrategy::Last
} else {
UniqueKeepStrategy::First
};
let lazy = lazy.into_polars();
let lazy: NuLazyFrame = if maintain {
lazy.unique(subset, strategy).into()
} else {
lazy.unique_stable(subset, strategy).into()
};
Ok(PipelineData::Value(lazy.into_value(call.head), None))
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;

View File

@ -63,7 +63,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?; let series = df.as_series(call.head)?;
let res = series.value_counts().map_err(|e| { let res = series.value_counts(false).map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error calculating value counts values".into(), "Error calculating value counts values".into(),
e.to_string(), e.to_string(),

View File

@ -0,0 +1,15 @@
use nu_protocol::{FromValue, ShellError, Value};
pub fn extract_strings(value: Value) -> Result<Vec<String>, ShellError> {
match (
<String as FromValue>::from_value(&value),
<Vec<String> as FromValue>::from_value(&value),
) {
(Ok(col), Err(_)) => Ok(vec![col]),
(Err(_), Ok(cols)) => Ok(cols),
_ => Err(ShellError::IncompatibleParametersSingle(
"Expected a string or list of strings".into(),
value.span()?,
)),
}
}

View File

@ -1,6 +1,10 @@
mod nu_dataframe; mod nu_dataframe;
mod nu_groupby; mod nu_expression;
mod nu_lazyframe;
mod nu_lazygroupby;
pub mod utils; pub mod utils;
pub use nu_dataframe::{Axis, Column, NuDataFrame}; pub use nu_dataframe::{Axis, Column, NuDataFrame};
pub use nu_groupby::NuGroupBy; pub use nu_expression::NuExpression;
pub use nu_lazyframe::NuLazyFrame;
pub use nu_lazygroupby::NuLazyGroupBy;

View File

@ -76,39 +76,33 @@ pub(super) fn compute_between_series(
} }
} }
Operator::Equal => { Operator::Equal => {
let mut res = Series::equal(lhs, rhs).into_series();
let name = format!("eq_{}_{}", lhs.name(), rhs.name()); let name = format!("eq_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::NotEqual => { Operator::NotEqual => {
let mut res = Series::not_equal(lhs, rhs).into_series();
let name = format!("neq_{}_{}", lhs.name(), rhs.name()); let name = format!("neq_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::LessThan => { Operator::LessThan => {
let mut res = Series::lt(lhs, rhs).into_series();
let name = format!("lt_{}_{}", lhs.name(), rhs.name()); let name = format!("lt_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::LessThanOrEqual => { Operator::LessThanOrEqual => {
let mut res = Series::lt_eq(lhs, rhs).into_series();
let name = format!("lte_{}_{}", lhs.name(), rhs.name()); let name = format!("lte_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::GreaterThan => { Operator::GreaterThan => {
let mut res = Series::gt(lhs, rhs).into_series();
let name = format!("gt_{}_{}", lhs.name(), rhs.name()); let name = format!("gt_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::GreaterThanOrEqual => { Operator::GreaterThanOrEqual => {
let mut res = Series::gt_eq(lhs, rhs).into_series();
let name = format!("gte_{}_{}", lhs.name(), rhs.name()); let name = format!("gte_{}_{}", lhs.name(), rhs.name());
res.rename(&name); let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::And => match lhs.dtype() { Operator::And => match lhs.dtype() {
@ -179,6 +173,32 @@ pub(super) fn compute_between_series(
} }
} }
fn compare_series<'s, F>(
lhs: &'s Series,
rhs: &'s Series,
name: &'s str,
span: Option<Span>,
f: F,
) -> Result<Series, ShellError>
where
F: Fn(&'s Series, &'s Series) -> Result<ChunkedArray<BooleanType>, PolarsError>,
{
let mut res = f(lhs, rhs)
.map_err(|e| {
ShellError::GenericError(
"Equality error".into(),
e.to_string(),
span,
None,
Vec::new(),
)
})?
.into_series();
res.rename(name);
Ok(res)
}
pub(super) fn compute_series_single_value( pub(super) fn compute_series_single_value(
operator: Spanned<Operator>, operator: Spanned<Operator>,
left: &Value, left: &Value,

View File

@ -7,7 +7,7 @@ use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray; use polars::chunked_array::ChunkedArray;
use polars::prelude::{ use polars::prelude::{
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
ObjectType, Series, TimeUnit, ObjectType, Series, TemporalMethods, TimeUnit,
}; };
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};

View File

@ -84,6 +84,12 @@ impl AsMut<DataFrame> for NuDataFrame {
} }
} }
impl From<DataFrame> for NuDataFrame {
fn from(dataframe: DataFrame) -> Self {
Self(dataframe)
}
}
impl NuDataFrame { impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self { pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe) Self(dataframe)
@ -132,6 +138,7 @@ impl NuDataFrame {
for value in iter { for value in iter {
match value { match value {
Value::CustomValue { .. } => return Self::try_from_value(value),
Value::List { vals, .. } => { Value::List { vals, .. } => {
let cols = (0..vals.len()) let cols = (0..vals.len())
.map(|i| format!("{}", i)) .map(|i| format!("{}", i))
@ -181,7 +188,7 @@ impl NuDataFrame {
pub fn try_from_value(value: Value) -> Result<Self, ShellError> { pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value { match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() { Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(df) => Ok(NuDataFrame(df.0.clone())), Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert( None => Err(ShellError::CantConvert(
"dataframe".into(), "dataframe".into(),
@ -201,7 +208,15 @@ impl NuDataFrame {
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> { pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span); let value = input.into_value(span);
NuDataFrame::try_from_value(value) Self::try_from_value(value)
}
pub fn can_downcast(value: &Value) -> bool {
if let Value::CustomValue { val, .. } = value {
val.as_any().downcast_ref::<Self>().is_some()
} else {
false
}
} }
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> { pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {

View File

@ -0,0 +1,149 @@
use std::ops::{Add, Div, Mul, Rem, Sub};
use super::NuExpression;
use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Type, Value};
use polars::prelude::Expr;
// CustomValue implementation for NuDataFrame
impl CustomValue for NuExpression {
fn typetag_name(&self) -> &'static str {
"expression"
}
fn typetag_deserialize(&self) {
unimplemented!("typetag_deserialize")
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuExpression(self.0.clone());
Value::CustomValue {
val: Box::new(cloned),
span,
}
}
fn value_string(&self) -> String {
self.typetag_name().to_string()
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
Ok(self.to_value(span))
}
fn to_json(&self) -> nu_json::Value {
nu_json::Value::Null
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn operation(
&self,
lhs_span: Span,
operator: Operator,
op: Span,
right: &Value,
) -> Result<Value, ShellError> {
compute_with_value(self, lhs_span, operator, op, right)
}
}
fn compute_with_value(
left: &NuExpression,
lhs_span: Span,
operator: Operator,
op: Span,
right: &Value,
) -> Result<Value, ShellError> {
match right {
Value::CustomValue {
val: rhs,
span: rhs_span,
} => {
let rhs = rhs.as_any().downcast_ref::<NuExpression>().ok_or_else(|| {
ShellError::DowncastNotPossible(
"Unable to create expression".to_string(),
*rhs_span,
)
})?;
match rhs.as_ref() {
polars::prelude::Expr::Literal(..) => {
with_operator(operator, left, rhs, lhs_span, right.span()?, op)
}
_ => Err(ShellError::TypeMismatch(
"Only literal expressions or number".into(),
right.span()?,
)),
}
}
_ => {
let rhs = NuExpression::try_from_value(right.clone())?;
with_operator(operator, left, &rhs, lhs_span, right.span()?, op)
}
}
}
fn with_operator(
operator: Operator,
left: &NuExpression,
right: &NuExpression,
lhs_span: Span,
rhs_span: Span,
op_span: Span,
) -> Result<Value, ShellError> {
match operator {
Operator::Plus => apply_arithmetic(left, right, lhs_span, Add::add),
Operator::Minus => apply_arithmetic(left, right, lhs_span, Sub::sub),
Operator::Multiply => apply_arithmetic(left, right, lhs_span, Mul::mul),
Operator::Divide => apply_arithmetic(left, right, lhs_span, Div::div),
Operator::Modulo => apply_arithmetic(left, right, lhs_span, Rem::rem),
Operator::Equal => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::eq)
.into_value(lhs_span)),
Operator::NotEqual => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::neq)
.into_value(lhs_span)),
Operator::GreaterThan => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::gt)
.into_value(lhs_span)),
Operator::GreaterThanOrEqual => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::gt_eq)
.into_value(lhs_span)),
Operator::LessThan => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::lt)
.into_value(lhs_span)),
Operator::LessThanOrEqual => Ok(left
.clone()
.apply_with_expr(right.clone(), Expr::lt_eq)
.into_value(lhs_span)),
_ => Err(ShellError::OperatorMismatch {
op_span,
lhs_ty: Type::Custom,
lhs_span,
rhs_ty: Type::Custom,
rhs_span,
}),
}
}
fn apply_arithmetic<F>(
left: &NuExpression,
right: &NuExpression,
span: Span,
f: F,
) -> Result<Value, ShellError>
where
F: Fn(Expr, Expr) -> Expr,
{
let expr: NuExpression = f(left.as_ref().clone(), right.as_ref().clone()).into();
Ok(expr.into_value(span))
}

View File

@ -0,0 +1,325 @@
mod custom_value;
use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::{col, AggExpr, Expr, Literal};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Polars Expression wrapper for Nushell operations
// Object is behind and Option to allow easy implementation of
// the Deserialize trait
#[derive(Default, Clone)]
pub struct NuExpression(Option<Expr>);
// Mocked serialization of the LazyFrame object
impl Serialize for NuExpression {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_none()
}
}
// Mocked deserialization of the LazyFrame object
impl<'de> Deserialize<'de> for NuExpression {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(NuExpression::default())
}
}
impl fmt::Debug for NuExpression {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "NuExpression")
}
}
// Referenced access to the real LazyFrame
impl AsRef<Expr> for NuExpression {
fn as_ref(&self) -> &polars::prelude::Expr {
// The only case when there cannot be an expr is if it is created
// using the default function or if created by deserializing something
self.0.as_ref().expect("there should always be a frame")
}
}
impl AsMut<Expr> for NuExpression {
fn as_mut(&mut self) -> &mut polars::prelude::Expr {
// The only case when there cannot be an expr is if it is created
// using the default function or if created by deserializing something
self.0.as_mut().expect("there should always be a frame")
}
}
impl From<Expr> for NuExpression {
fn from(expr: Expr) -> Self {
Self(Some(expr))
}
}
impl NuExpression {
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(expr) => Ok(NuExpression(expr.0.clone())),
None => Err(ShellError::CantConvert(
"lazy expression".into(),
"non-dataframe".into(),
span,
None,
)),
},
Value::String { val, .. } => Ok(col(val.as_str()).into()),
Value::Int { val, .. } => Ok(val.lit().into()),
Value::Bool { val, .. } => Ok(val.lit().into()),
Value::Float { val, .. } => Ok(val.lit().into()),
x => Err(ShellError::CantConvert(
"lazy expression".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
Self::try_from_value(value)
}
pub fn can_downcast(value: &Value) -> bool {
match value {
Value::CustomValue { val, .. } => val.as_any().downcast_ref::<Self>().is_some(),
Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => {
true
}
_ => false,
}
}
pub fn into_polars(self) -> Expr {
self.0.expect("Expression cannot be none to convert")
}
pub fn apply_with_expr<F>(self, other: NuExpression, f: F) -> Self
where
F: Fn(Expr, Expr) -> Expr,
{
let expr = self.0.expect("Lazy expression must not be empty to apply");
let other = other.0.expect("Lazy expression must not be empty to apply");
f(expr, other).into()
}
pub fn to_value(&self, span: Span) -> Value {
expr_to_value(self.as_ref(), span)
}
// Convenient function to extrac multiple Expr that could be inside a nushell Value
pub fn extract_exprs(value: Value) -> Result<Vec<Expr>, ShellError> {
ExtractedExpr::extract_exprs(value).map(ExtractedExpr::into_exprs)
}
}
// Enum to represent the parsing of the expressions from Value
enum ExtractedExpr {
Single(Expr),
List(Vec<ExtractedExpr>),
}
impl ExtractedExpr {
fn into_exprs(self) -> Vec<Expr> {
match self {
Self::Single(expr) => vec![expr],
Self::List(expressions) => expressions
.into_iter()
.flat_map(ExtractedExpr::into_exprs)
.collect(),
}
}
fn extract_exprs(value: Value) -> Result<ExtractedExpr, ShellError> {
match value {
Value::String { val, .. } => Ok(ExtractedExpr::Single(col(val.as_str()))),
Value::CustomValue { .. } => NuExpression::try_from_value(value)
.map(NuExpression::into_polars)
.map(ExtractedExpr::Single),
Value::List { vals, .. } => vals
.into_iter()
.map(Self::extract_exprs)
.collect::<Result<Vec<ExtractedExpr>, ShellError>>()
.map(ExtractedExpr::List),
x => Err(ShellError::CantConvert(
"expression".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
}
pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
let cols = vec!["expr".to_string(), "value".to_string()];
match expr {
Expr::Not(_) => todo!(),
Expr::Alias(expr, alias) => {
let expr = expr_to_value(expr.as_ref(), span);
let alias = Value::String {
val: alias.as_ref().into(),
span,
};
let cols = vec!["expr".to_string(), "alias".to_string()];
Value::Record {
cols,
vals: vec![expr, alias],
span,
}
}
Expr::Column(name) => {
let expr_type = Value::String {
val: "column".into(),
span,
};
let value = Value::String {
val: name.to_string(),
span,
};
let vals = vec![expr_type, value];
Value::Record { cols, vals, span }
}
Expr::Columns(columns) => {
let expr_type = Value::String {
val: "columns".into(),
span,
};
let value = Value::List {
vals: columns
.iter()
.map(|col| Value::String {
val: col.clone(),
span,
})
.collect(),
span,
};
let vals = vec![expr_type, value];
Value::Record { cols, vals, span }
}
Expr::DtypeColumn(_) => todo!(),
Expr::Literal(literal) => {
let expr_type = Value::String {
val: "literal".into(),
span,
};
let value = Value::String {
val: format!("{:?}", literal),
span,
};
let vals = vec![expr_type, value];
Value::Record { cols, vals, span }
}
Expr::BinaryExpr { left, op, right } => {
let left_val = expr_to_value(left, span);
let right_val = expr_to_value(right, span);
let operator = Value::String {
val: format!("{:?}", op),
span,
};
let cols = vec!["left".to_string(), "op".to_string(), "right".to_string()];
Value::Record {
cols,
vals: vec![left_val, operator, right_val],
span,
}
}
Expr::Ternary {
predicate,
truthy,
falsy,
} => {
let predicate = expr_to_value(predicate.as_ref(), span);
let truthy = expr_to_value(truthy.as_ref(), span);
let falsy = expr_to_value(falsy.as_ref(), span);
let cols = vec![
"predicate".to_string(),
"truthy".to_string(),
"falsy".to_string(),
];
Value::Record {
cols,
vals: vec![predicate, truthy, falsy],
span,
}
}
Expr::Agg(agg_expr) => {
let value = match agg_expr {
AggExpr::Min(expr)
| AggExpr::Max(expr)
| AggExpr::Median(expr)
| AggExpr::NUnique(expr)
| AggExpr::First(expr)
| AggExpr::Last(expr)
| AggExpr::Mean(expr)
| AggExpr::List(expr)
| AggExpr::Count(expr)
| AggExpr::Sum(expr)
| AggExpr::AggGroups(expr)
| AggExpr::Std(expr)
| AggExpr::Var(expr) => expr_to_value(expr.as_ref(), span),
AggExpr::Quantile { .. } => todo!(),
};
let expr_type = Value::String {
val: "agg".into(),
span,
};
let vals = vec![expr_type, value];
Value::Record { cols, vals, span }
}
Expr::IsNotNull(_) => todo!(),
Expr::IsNull(_) => todo!(),
Expr::Cast { .. } => todo!(),
Expr::Sort { .. } => todo!(),
Expr::Take { .. } => todo!(),
Expr::SortBy { .. } => todo!(),
Expr::Function { .. } => todo!(),
Expr::Shift { .. } => todo!(),
Expr::Reverse(_) => todo!(),
Expr::Duplicated(_) => todo!(),
Expr::IsUnique(_) => todo!(),
Expr::Explode(_) => todo!(),
Expr::Filter { .. } => todo!(),
Expr::Window { .. } => todo!(),
Expr::Wildcard => todo!(),
Expr::Slice { .. } => todo!(),
Expr::Exclude(_, _) => todo!(),
Expr::KeepName(_) => todo!(),
Expr::RenameAlias { .. } => todo!(),
Expr::Count => todo!(),
Expr::Nth(_) => todo!(),
Expr::AnonymousFunction { .. } => todo!(),
}
}

View File

@ -1,140 +0,0 @@
mod custom_value;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::frame::groupby::{GroupBy, GroupsProxy};
use polars::prelude::{DataFrame, GroupsIdx};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum NuGroupsProxy {
Idx {
sorted: bool,
all: Vec<(u32, Vec<u32>)>,
},
Slice(Vec<[u32; 2]>),
}
impl NuGroupsProxy {
fn from_polars(groups: &GroupsProxy) -> Self {
match groups {
GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx {
sorted: indexes.is_sorted(),
all: indexes
.iter()
.map(|(index, values)| (index, values.clone()))
.collect(),
},
GroupsProxy::Slice(slice) => NuGroupsProxy::Slice(slice.clone()),
}
}
fn to_polars(&self) -> GroupsProxy {
match self {
Self::Idx { sorted, all } => {
let mut groups: GroupsIdx = all.clone().into();
if *sorted {
groups.sort()
}
GroupsProxy::Idx(groups)
}
Self::Slice(slice) => GroupsProxy::Slice(slice.clone()),
}
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuGroupBy {
dataframe: DataFrame,
by: Vec<String>,
groups: NuGroupsProxy,
}
impl NuGroupBy {
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: &GroupsProxy) -> Self {
NuGroupBy {
dataframe,
by,
groups: NuGroupsProxy::from_polars(groups),
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuGroupBy>() {
Some(groupby) => Ok(NuGroupBy {
dataframe: groupby.dataframe.clone(),
by: groupby.by.clone(),
groups: groupby.groups.clone(),
}),
None => Err(ShellError::CantConvert(
"groupby".into(),
"non-dataframe".into(),
span,
None,
)),
},
x => Err(ShellError::CantConvert(
"groupby".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<NuGroupBy, ShellError> {
let value = input.into_value(span);
NuGroupBy::try_from_value(value)
}
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
let by = self.dataframe.select_series(&self.by).map_err(|e| {
ShellError::GenericError(
"Error creating groupby".into(),
"".to_string(),
None,
Some(e.to_string()),
Vec::new(),
)
})?;
Ok(GroupBy::new(
&self.dataframe,
by,
self.groups.to_polars(),
None,
))
}
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
let values = self
.by
.iter()
.map(|col| {
let cols = vec!["group by".to_string()];
let vals = vec![Value::String {
val: col.into(),
span,
}];
Value::Record { cols, vals, span }
})
.collect::<Vec<Value>>();
Ok(values)
}
}
impl AsRef<DataFrame> for NuGroupBy {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.dataframe
}
}

View File

@ -0,0 +1,53 @@
use super::NuLazyFrame;
use nu_protocol::{CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame
impl CustomValue for NuLazyFrame {
fn typetag_name(&self) -> &'static str {
"lazyframe"
}
fn typetag_deserialize(&self) {
unimplemented!("typetag_deserialize")
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuLazyFrame(self.0.clone());
Value::CustomValue {
val: Box::new(cloned),
span,
}
}
fn value_string(&self) -> String {
self.typetag_name().to_string()
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let cols = vec!["plan".into(), "optimized_plan".into()];
let vals = vec![
Value::String {
val: self.as_ref().describe_plan(),
span,
},
Value::String {
val: self
.as_ref()
.describe_optimized_plan()
.unwrap_or_else(|_| "<NOT AVAILABLE>".to_string()),
span,
},
];
Ok(Value::Record { cols, vals, span })
}
fn to_json(&self) -> nu_json::Value {
nu_json::Value::Null
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
}

View File

@ -0,0 +1,156 @@
mod custom_value;
use super::{NuDataFrame, NuExpression};
use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::{Expr, IntoLazy, LazyFrame};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations
// Polars LazyFrame is behind and Option to allow easy implementation of
// the Deserialize trait
#[derive(Default)]
pub struct NuLazyFrame(Option<LazyFrame>);
// Mocked serialization of the LazyFrame object
impl Serialize for NuLazyFrame {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_none()
}
}
// Mocked deserialization of the LazyFrame object
impl<'de> Deserialize<'de> for NuLazyFrame {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(NuLazyFrame::default())
}
}
impl fmt::Debug for NuLazyFrame {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "NuLazyframe")
}
}
// Referenced access to the real LazyFrame
impl AsRef<LazyFrame> for NuLazyFrame {
fn as_ref(&self) -> &polars::prelude::LazyFrame {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.0.as_ref().expect("there should always be a frame")
}
}
impl AsMut<LazyFrame> for NuLazyFrame {
fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.0.as_mut().expect("there should always be a frame")
}
}
impl From<LazyFrame> for NuLazyFrame {
fn from(lazy_frame: LazyFrame) -> Self {
Self(Some(lazy_frame))
}
}
impl NuLazyFrame {
pub fn from_dataframe(df: NuDataFrame) -> Self {
let lazy = df.as_ref().clone().lazy();
Self(Some(lazy))
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn into_polars(self) -> LazyFrame {
self.0.expect("lazyframe cannot be none to convert")
}
pub fn collect(self, span: Span) -> Result<NuDataFrame, ShellError> {
self.0
.expect("No empty lazy for collect")
.collect()
.map_err(|e| {
ShellError::GenericError(
"Error collecting lazy frame".to_string(),
e.to_string(),
Some(span),
None,
Vec::new(),
)
})
.map(NuDataFrame::new)
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
Some(expr) => Ok(Self(expr.0.clone())),
None => Err(ShellError::CantConvert(
"lazy frame".into(),
"non-dataframe".into(),
span,
None,
)),
},
x => Err(ShellError::CantConvert(
"lazy frame".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
Self::try_from_value(value)
}
pub fn can_downcast(value: &Value) -> bool {
if let Value::CustomValue { val, .. } = value {
val.as_any().downcast_ref::<Self>().is_some()
} else {
false
}
}
pub fn maybe_is_eager(value: Value) -> Result<(Self, bool), ShellError> {
if Self::can_downcast(&value) {
Ok((Self::try_from_value(value)?, false))
} else if NuDataFrame::can_downcast(&value) {
let df = NuDataFrame::try_from_value(value)?;
Ok((NuLazyFrame::from_dataframe(df), true))
} else {
Err(ShellError::CantConvert(
"lazy or eager dataframe".into(),
value.get_type().to_string(),
value.span()?,
None,
))
}
}
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self
where
F: Fn(LazyFrame, Expr) -> LazyFrame,
{
let df = self.0.expect("Lazy frame must not be empty to apply");
let expr = expr.into_polars();
let new_frame = f(df, expr);
new_frame.into()
}
}

View File

@ -1,10 +1,10 @@
use super::NuGroupBy; use super::NuLazyGroupBy;
use nu_protocol::{CustomValue, ShellError, Span, Value}; use nu_protocol::{CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame // CustomValue implementation for NuDataFrame
impl CustomValue for NuGroupBy { impl CustomValue for NuLazyGroupBy {
fn typetag_name(&self) -> &'static str { fn typetag_name(&self) -> &'static str {
"groupby" "lazygroupby"
} }
fn typetag_deserialize(&self) { fn typetag_deserialize(&self) {
@ -12,10 +12,9 @@ impl CustomValue for NuGroupBy {
} }
fn clone_value(&self, span: nu_protocol::Span) -> Value { fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuGroupBy { let cloned = NuLazyGroupBy {
dataframe: self.dataframe.clone(), group_by: self.group_by.clone(),
by: self.by.clone(), from_eager: self.from_eager,
groups: self.groups.clone(),
}; };
Value::CustomValue { Value::CustomValue {
@ -29,9 +28,13 @@ impl CustomValue for NuGroupBy {
} }
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> { fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let vals = self.print(span)?; let cols = vec!["LazyGroupBy".into()];
let vals = vec![Value::String {
val: "apply aggregation to complete execution plan".into(),
span,
}];
Ok(Value::List { vals, span }) Ok(Value::Record { cols, vals, span })
} }
fn to_json(&self) -> nu_json::Value { fn to_json(&self) -> nu_json::Value {

View File

@ -0,0 +1,114 @@
mod custom_value;
use core::fmt;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::prelude::LazyGroupBy;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
// Lazyframe wrapper for Nushell operations
// Polars LazyFrame is behind and Option to allow easy implementation of
// the Deserialize trait
#[derive(Default)]
pub struct NuLazyGroupBy {
pub group_by: Option<LazyGroupBy>,
pub from_eager: bool,
}
// Mocked serialization of the LazyFrame object
impl Serialize for NuLazyGroupBy {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_none()
}
}
// Mocked deserialization of the LazyFrame object
impl<'de> Deserialize<'de> for NuLazyGroupBy {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Ok(NuLazyGroupBy::default())
}
}
impl fmt::Debug for NuLazyGroupBy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "NuLazyGroupBy")
}
}
// Referenced access to the real LazyFrame
impl AsRef<LazyGroupBy> for NuLazyGroupBy {
fn as_ref(&self) -> &polars::prelude::LazyGroupBy {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.group_by
.as_ref()
.expect("there should always be a frame")
}
}
impl AsMut<LazyGroupBy> for NuLazyGroupBy {
fn as_mut(&mut self) -> &mut polars::prelude::LazyGroupBy {
// The only case when there cannot be a lazy frame is if it is created
// using the default function or if created by deserializing something
self.group_by
.as_mut()
.expect("there should always be a frame")
}
}
impl From<LazyGroupBy> for NuLazyGroupBy {
fn from(group_by: LazyGroupBy) -> Self {
Self {
group_by: Some(group_by),
from_eager: false,
}
}
}
impl NuLazyGroupBy {
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn into_polars(self) -> LazyGroupBy {
self.group_by.expect("GroupBy cannot be none to convert")
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => {
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
Some(group) => Ok(Self {
group_by: group.group_by.clone(),
from_eager: group.from_eager,
}),
None => Err(ShellError::CantConvert(
"lazy frame".into(),
"non-dataframe".into(),
span,
None,
)),
}
}
x => Err(ShellError::CantConvert(
"lazy groupby".into(),
x.get_type().to_string(),
x.span()?,
None,
)),
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
Self::try_from_value(value)
}
}

View File

@ -238,6 +238,31 @@ impl FromValue for Vec<String> {
} }
} }
impl FromValue for Vec<bool> {
fn from_value(v: &Value) -> Result<Self, ShellError> {
match v {
Value::List { vals, .. } => vals
.iter()
.map(|val| match val {
Value::Bool { val, .. } => Ok(*val),
c => Err(ShellError::CantConvert(
"bool".into(),
c.get_type().to_string(),
c.span()?,
None,
)),
})
.collect::<Result<Vec<bool>, ShellError>>(),
v => Err(ShellError::CantConvert(
"bool".into(),
v.get_type().to_string(),
v.span()?,
None,
)),
}
}
}
impl FromValue for CellPath { impl FromValue for CellPath {
fn from_value(v: &Value) -> Result<Self, ShellError> { fn from_value(v: &Value) -> Result<Self, ShellError> {
let span = v.span()?; let span = v.span()?;