mirror of
https://github.com/nushell/nushell.git
synced 2024-12-22 15:13:01 +01:00
Lazy dataframes (#5546)
* lazyframe definition * expressions and lazy frames * new alias expression * more expression commands * updated to polars main * more expressions and groupby * more expressions, fetch and sort-by * csv reader * removed open csv * unique function * joining functions * join lazy frames commands with eager commands * corrected tests * Update .gitignore * Update .gitignore Co-authored-by: JT <547158+jntrnr@users.noreply.github.com>
This commit is contained in:
parent
2062e33c37
commit
8bd68416e3
1
.gitignore
vendored
1
.gitignore
vendored
@ -23,4 +23,5 @@ debian/nu/
|
||||
.vscode/*
|
||||
|
||||
# Helix configuration folder
|
||||
.helix/*
|
||||
.helix
|
||||
|
90
Cargo.lock
generated
90
Cargo.lock
generated
@ -141,9 +141,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.10.1"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e"
|
||||
checksum = "b040061368d1314b0fd8b8f1fde0671eba1afc63a1c61a4dafaf2d4fc10c96f9"
|
||||
dependencies = [
|
||||
"arrow-format",
|
||||
"base64",
|
||||
@ -2980,15 +2980,6 @@ dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "output_vt100"
|
||||
version = "0.1.3"
|
||||
@ -3060,22 +3051,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet-format-async-temp"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03abc2f9c83fe9ceec83f47c76cc071bfd56caba33794340330f35623ab1f544"
|
||||
checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"futures",
|
||||
"integer-encoding",
|
||||
"ordered-float",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parquet2"
|
||||
version = "0.10.3"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b085f9e78e4842865151b693f6d94bdf7b280af66daa6e3587adeb3106a07e9"
|
||||
checksum = "98f99f9724402d81faadd9cfa1e8dc78055fd0ddfdbefb7adab3a3a13e893408"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bitpacking",
|
||||
@ -3247,33 +3236,35 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "656db3b86c338a8a717476eb29436a380ebdf74915a71cff6ecce78d52173e53"
|
||||
checksum = "b140da767e129c60c41c8e1968ffab5f114bcf823182edb7fa900464a31bf421"
|
||||
dependencies = [
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
"polars-lazy",
|
||||
"polars-ops",
|
||||
"polars-time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcedf44a7b15b60c69e811c9d343ac459788e961dc4136f002ed1b68a1fada07"
|
||||
checksum = "6d27df11ee28956bd6f5aed54e7e05ce87b886871995e1da501134627ec89077"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"hashbrown 0.12.0",
|
||||
"num 0.4.0",
|
||||
"serde",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dfed0e21ac4d4c85df45b5864a68cfc5b2a97e9fba8a981be7b09c6f02a7eaa"
|
||||
checksum = "fdf8d12cb7ec278516228fc86469f98c62ab81ca31e4e76d2c0ccf5a09c70491"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@ -3284,8 +3275,8 @@ dependencies = [
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"num 0.4.0",
|
||||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"polars-utils",
|
||||
"rand 0.8.5",
|
||||
"rand_distr",
|
||||
"rayon",
|
||||
@ -3297,9 +3288,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8770fb4233ab88affac80c410be090dc7a2c044a9e4e7b942132e94ceeb732b"
|
||||
checksum = "fdd4b762e5694f359ded21ca0627b5bc95b6eb49f6b330569afc1d20f0564b01"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@ -3311,21 +3302,22 @@ dependencies = [
|
||||
"memchr",
|
||||
"memmap2",
|
||||
"num 0.4.0",
|
||||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-time",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simdutf8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eca1fed3b88ae1bb9b7f1d7b2958f1655d9c1aed33495d6ba30ff84a0c1e9e9"
|
||||
checksum = "eedc21001f05611e41bb7439b38d0f4ef9406aa49c17f3b289b5f57d8fa40c59"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"glob",
|
||||
@ -3336,24 +3328,36 @@ dependencies = [
|
||||
"polars-time",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-time"
|
||||
version = "0.20.0"
|
||||
name = "polars-ops"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fe48c759ca778a8b6fb30f70e9a81b56f0987a82dc71e61c5b2d3c236b6b8d6"
|
||||
checksum = "86fae68f0992955f224f09d1f15648a6fb76d8e3b962efac2f97ccc2aa58977a"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.20.0"
|
||||
name = "polars-time"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71011e8ed52f123ce23d110b496c8704d0a59c5fd4115cd938e7ff19d4bcb7ca"
|
||||
checksum = "be499f73749e820f96689c5f9ec59669b7cdd551d864358e2bdaebb5944e4bfb"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"lexical",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7f4cd569d383f5f000abbd6d5146550e6cb4e43fac30d1af98699499a440d56"
|
||||
dependencies = [
|
||||
"parking_lot 0.12.0",
|
||||
"rayon",
|
||||
@ -5333,18 +5337,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.10.0+zstd.1.5.2"
|
||||
version = "0.11.1+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd"
|
||||
checksum = "77a16b8414fde0414e90c612eba70985577451c4c504b99885ebed24762cb81a"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "4.1.4+zstd.1.5.2"
|
||||
version = "5.0.1+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee"
|
||||
checksum = "7c12659121420dd6365c5c3de4901f97145b79651fb1d25814020ed2ed0585ae"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
@ -5352,9 +5356,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "1.6.3+zstd.1.5.2"
|
||||
version = "2.0.1+zstd.1.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8"
|
||||
checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
|
@ -96,13 +96,15 @@ version = "2.0.2"
|
||||
optional = true
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.20.0"
|
||||
version = "0.21.1"
|
||||
# path = "../../../../polars/polars"
|
||||
optional = true
|
||||
features = [
|
||||
"default", "parquet", "json", "serde", "object",
|
||||
"checked_arithmetic", "strings", "cum_agg", "is_in",
|
||||
"default", "to_dummies", "parquet", "json", "serde", "serde-lazy",
|
||||
"object", "checked_arithmetic", "strings", "cum_agg", "is_in",
|
||||
"rolling_window", "strings", "rows", "random",
|
||||
"dtype-datetime"
|
||||
"dtype-datetime", "dtype-struct", "lazy", "cross_join",
|
||||
"dynamic_groupby"
|
||||
]
|
||||
|
||||
[features]
|
||||
|
@ -1,403 +0,0 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
did_you_mean,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::{
|
||||
frame::groupby::GroupBy,
|
||||
prelude::{PolarsError, QuantileInterpolOptions},
|
||||
};
|
||||
|
||||
use crate::dataframe::values::NuGroupBy;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
enum Operation {
|
||||
Mean,
|
||||
Sum,
|
||||
Min,
|
||||
Max,
|
||||
First,
|
||||
Last,
|
||||
Nunique,
|
||||
Quantile(f64),
|
||||
Median,
|
||||
Var,
|
||||
Std,
|
||||
Count,
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn from_tagged(
|
||||
name: &Spanned<String>,
|
||||
quantile: Option<Spanned<f64>>,
|
||||
) -> Result<Operation, ShellError> {
|
||||
match name.item.as_ref() {
|
||||
"mean" => Ok(Operation::Mean),
|
||||
"sum" => Ok(Operation::Sum),
|
||||
"min" => Ok(Operation::Min),
|
||||
"max" => Ok(Operation::Max),
|
||||
"first" => Ok(Operation::First),
|
||||
"last" => Ok(Operation::Last),
|
||||
"nunique" => Ok(Operation::Nunique),
|
||||
"quantile" => match quantile {
|
||||
None => Err(ShellError::GenericError(
|
||||
"Quantile value not fount".into(),
|
||||
"Quantile operation requires quantile value".into(),
|
||||
Some(name.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
Some(value) => {
|
||||
if (value.item < 0.0) | (value.item > 1.0) {
|
||||
Err(ShellError::GenericError(
|
||||
"Inappropriate quantile".into(),
|
||||
"Quantile value should be between 0.0 and 1.0".into(),
|
||||
Some(value.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
))
|
||||
} else {
|
||||
Ok(Operation::Quantile(value.item))
|
||||
}
|
||||
}
|
||||
},
|
||||
"median" => Ok(Operation::Median),
|
||||
"var" => Ok(Operation::Var),
|
||||
"std" => Ok(Operation::Std),
|
||||
"count" => Ok(Operation::Count),
|
||||
selection => {
|
||||
let possibilities = [
|
||||
"mean".to_string(),
|
||||
"sum".to_string(),
|
||||
"min".to_string(),
|
||||
"max".to_string(),
|
||||
"first".to_string(),
|
||||
"last".to_string(),
|
||||
"nunique".to_string(),
|
||||
"quantile".to_string(),
|
||||
"median".to_string(),
|
||||
"var".to_string(),
|
||||
"std".to_string(),
|
||||
"count".to_string(),
|
||||
];
|
||||
|
||||
match did_you_mean(&possibilities, selection) {
|
||||
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, name.span)),
|
||||
None => Err(ShellError::GenericError(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist".into(),
|
||||
Some(name.span),
|
||||
Some("Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count".into()),
|
||||
Vec::new(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Mean => "mean",
|
||||
Self::Sum => "sum",
|
||||
Self::Min => "min",
|
||||
Self::Max => "max",
|
||||
Self::First => "first",
|
||||
Self::Last => "last",
|
||||
Self::Nunique => "nunique",
|
||||
Self::Quantile(_) => "quantile",
|
||||
Self::Median => "median",
|
||||
Self::Var => "var",
|
||||
Self::Std => "std",
|
||||
Self::Count => "count",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Aggregate;
|
||||
|
||||
impl Command for Aggregate {
|
||||
fn name(&self) -> &str {
|
||||
"dfr aggregate"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs an aggregation operation on a dataframe and groupby object"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"operation_name",
|
||||
SyntaxShape::String,
|
||||
"\n\tDataframes: mean, sum, min, max, quantile, median, var, std
|
||||
\tGroupBy: mean, sum, min, max, first, last, nunique, quantile, median, var, std, count",
|
||||
)
|
||||
.named(
|
||||
"quantile",
|
||||
SyntaxShape::Number,
|
||||
"quantile value for quantile operation",
|
||||
Some('q'),
|
||||
)
|
||||
.switch(
|
||||
"explicit",
|
||||
"returns explicit names for groupby aggregations",
|
||||
Some('e'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||
example:
|
||||
"[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_string("one")]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(3)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in dataframe columns",
|
||||
example: "[[a b]; [4 1] [5 2]] | dfr to-df | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(9)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(3)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in series",
|
||||
example: "[4 1 5 6] | dfr to-df | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(16)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let operation: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||
let quantile: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "quantile")?;
|
||||
let op = Operation::from_tagged(&operation, quantile)?;
|
||||
|
||||
match input {
|
||||
PipelineData::Value(Value::CustomValue { val, span }, _) => {
|
||||
let df = val.as_any().downcast_ref::<NuDataFrame>();
|
||||
let groupby = val.as_any().downcast_ref::<NuGroupBy>();
|
||||
|
||||
match (df, groupby) {
|
||||
(Some(df), None) => {
|
||||
let df = df.as_ref();
|
||||
let res = perform_dataframe_aggregation(df, op, operation.span)?;
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, span),
|
||||
None,
|
||||
))
|
||||
}
|
||||
(None, Some(nu_groupby)) => {
|
||||
let groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let res = perform_groupby_aggregation(
|
||||
groupby,
|
||||
op,
|
||||
operation.span,
|
||||
call.head,
|
||||
call.has_flag("explicit"),
|
||||
)?;
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, span),
|
||||
None,
|
||||
))
|
||||
}
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Incorrect datatype".into(),
|
||||
"no groupby or dataframe found in input stream".into(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Incorrect datatype".into(),
|
||||
"no groupby or dataframe found in input stream".into(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_groupby_aggregation(
|
||||
groupby: GroupBy,
|
||||
operation: Operation,
|
||||
operation_span: Span,
|
||||
agg_span: Span,
|
||||
explicit: bool,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let mut res = match operation {
|
||||
Operation::Mean => groupby.mean(),
|
||||
Operation::Sum => groupby.sum(),
|
||||
Operation::Min => groupby.min(),
|
||||
Operation::Max => groupby.max(),
|
||||
Operation::First => groupby.first(),
|
||||
Operation::Last => groupby.last(),
|
||||
Operation::Nunique => groupby.n_unique(),
|
||||
Operation::Quantile(quantile) => {
|
||||
groupby.quantile(quantile, QuantileInterpolOptions::default())
|
||||
}
|
||||
Operation::Median => groupby.median(),
|
||||
Operation::Var => groupby.var(),
|
||||
Operation::Std => groupby.std(),
|
||||
Operation::Count => groupby.count(),
|
||||
}
|
||||
.map_err(|e| {
|
||||
let span = match &e {
|
||||
PolarsError::NotFound(_) => agg_span,
|
||||
_ => operation_span,
|
||||
};
|
||||
|
||||
ShellError::GenericError(
|
||||
"Error calculating aggregation".into(),
|
||||
e.to_string(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
if !explicit {
|
||||
let col_names = res
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|name| name.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
for col in col_names {
|
||||
let from = match operation {
|
||||
Operation::Mean => "_mean",
|
||||
Operation::Sum => "_sum",
|
||||
Operation::Min => "_min",
|
||||
Operation::Max => "_max",
|
||||
Operation::First => "_first",
|
||||
Operation::Last => "_last",
|
||||
Operation::Nunique => "_n_unique",
|
||||
Operation::Quantile(_) => "_quantile",
|
||||
Operation::Median => "_median",
|
||||
Operation::Var => "_agg_var",
|
||||
Operation::Std => "_agg_std",
|
||||
Operation::Count => "_count",
|
||||
};
|
||||
|
||||
let new_col = match col.find(from) {
|
||||
Some(index) => &col[..index],
|
||||
None => &col[..],
|
||||
};
|
||||
|
||||
res.rename(&col, new_col)
|
||||
.expect("Column is always there. Looping with known names");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn perform_dataframe_aggregation(
|
||||
dataframe: &polars::prelude::DataFrame,
|
||||
operation: Operation,
|
||||
operation_span: Span,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
match operation {
|
||||
Operation::Mean => Ok(dataframe.mean()),
|
||||
Operation::Sum => Ok(dataframe.sum()),
|
||||
Operation::Min => Ok(dataframe.min()),
|
||||
Operation::Max => Ok(dataframe.max()),
|
||||
Operation::Quantile(quantile) => dataframe
|
||||
.quantile(quantile, QuantileInterpolOptions::default())
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating quantile".into(),
|
||||
e.to_string(),
|
||||
Some(operation_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
Operation::Median => Ok(dataframe.median()),
|
||||
Operation::Var => Ok(dataframe.var()),
|
||||
Operation::Std => Ok(dataframe.std()),
|
||||
operation => {
|
||||
let possibilities = [
|
||||
"mean".to_string(),
|
||||
"sum".to_string(),
|
||||
"min".to_string(),
|
||||
"max".to_string(),
|
||||
"quantile".to_string(),
|
||||
"median".to_string(),
|
||||
"var".to_string(),
|
||||
"std".to_string(),
|
||||
];
|
||||
|
||||
match did_you_mean(&possibilities, operation.to_str()) {
|
||||
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, operation_span)),
|
||||
None => Err(ShellError::GenericError(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist".into(),
|
||||
Some(operation_span),
|
||||
Some(
|
||||
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std"
|
||||
.into(),
|
||||
),
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::CreateGroupBy;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(Aggregate {}), Box::new(CreateGroupBy {})])
|
||||
}
|
||||
}
|
@ -4,7 +4,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::DistinctKeepStrategy;
|
||||
use polars::prelude::UniqueKeepStrategy;
|
||||
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
@ -89,13 +89,13 @@ fn command(
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let keep_strategy = if call.has_flag("last") {
|
||||
DistinctKeepStrategy::Last
|
||||
UniqueKeepStrategy::Last
|
||||
} else {
|
||||
DistinctKeepStrategy::First
|
||||
UniqueKeepStrategy::First
|
||||
};
|
||||
|
||||
df.as_ref()
|
||||
.distinct(subset_slice, keep_strategy)
|
||||
.unique(subset_slice, keep_strategy)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error dropping duplicates".into(),
|
||||
|
@ -1,10 +1,10 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use polars::prelude::DataFrameOps;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Dummies;
|
||||
|
@ -4,6 +4,9 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::LazyFrame;
|
||||
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
@ -16,12 +19,16 @@ impl Command for FilterWith {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filters dataframe using a mask as reference"
|
||||
"Filters dataframe using a mask or expression as reference"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("mask", SyntaxShape::Any, "boolean mask used to filter data")
|
||||
.required(
|
||||
"mask or expression",
|
||||
SyntaxShape::Any,
|
||||
"boolean mask used to filter data",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
@ -48,15 +55,30 @@ impl Command for FilterWith {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mask_value: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
@ -72,8 +94,6 @@ fn command(
|
||||
)
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| {
|
||||
@ -88,6 +108,23 @@ fn command(
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr: Value = call.req(engine_state, stack, 0)?;
|
||||
let expr = NuExpression::try_from_value(expr)?;
|
||||
|
||||
let lazy = lazy.apply_with_expr(expr, LazyFrame::filter);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
|
@ -1,3 +1,5 @@
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -5,8 +7,6 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FirstDF;
|
||||
|
||||
@ -16,7 +16,7 @@ impl Command for FirstDF {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with first rows"
|
||||
"Creates new dataframe with first rows or creates a first expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -26,18 +26,25 @@ impl Command for FirstDF {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with head rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with head rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a first expression from a column",
|
||||
example: "dfr col a | dfr first",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -47,7 +54,27 @@ impl Command for FirstDF {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().is_null().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,12 +82,11 @@ fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
|
@ -1,77 +0,0 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::convert_columns_string, NuDataFrame, NuGroupBy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CreateGroupBy;
|
||||
|
||||
impl Command for CreateGroupBy {
|
||||
fn name(&self) -> &str {
|
||||
"dfr group-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a groupby object that can be used for other aggregations"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "groupby columns")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Grouping by column a",
|
||||
example: "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
// Extracting the names of the columns to perform the groupby
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
// This is the expensive part of the groupby; to create the
|
||||
// groups that will be used for grouping the data in the
|
||||
// dataframe. Once it has been done these values can be stored
|
||||
// in a NuGroupBy
|
||||
let groupby = df.as_ref().groupby(&col_string).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating groupby".into(),
|
||||
e.to_string(),
|
||||
Some(col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let groups = groupby.get_groups();
|
||||
let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups);
|
||||
|
||||
Ok(PipelineData::Value(groupby.into_value(call.head), None))
|
||||
}
|
@ -1,235 +0,0 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::JoinType;
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct JoinDF;
|
||||
|
||||
impl Command for JoinDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr join"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Joins a dataframe using columns as reference"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("dataframe", SyntaxShape::Any, "right dataframe to join")
|
||||
.required_named(
|
||||
"left",
|
||||
SyntaxShape::Table,
|
||||
"left column names to perform join",
|
||||
Some('l'),
|
||||
)
|
||||
.required_named(
|
||||
"right",
|
||||
SyntaxShape::Table,
|
||||
"right column names to perform join",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"type",
|
||||
SyntaxShape::String,
|
||||
"type of join. Inner by default",
|
||||
Some('t'),
|
||||
)
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"suffix for the columns of the right dataframe",
|
||||
Some('s'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "inner join dataframe",
|
||||
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dfr to-df);
|
||||
$right | dfr join $right -l [a b] -r [a b]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"c_right".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let r_df: Value = call.req(engine_state, stack, 0)?;
|
||||
let l_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "left")?
|
||||
.expect("required value in syntax");
|
||||
let r_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "right")?
|
||||
.expect("required value in syntax");
|
||||
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
|
||||
let join_type_op: Option<Spanned<String>> = call.get_flag(engine_state, stack, "type")?;
|
||||
|
||||
let join_type = match join_type_op {
|
||||
None => JoinType::Inner,
|
||||
Some(val) => match val.item.as_ref() {
|
||||
"inner" => JoinType::Inner,
|
||||
"outer" => JoinType::Outer,
|
||||
"left" => JoinType::Left,
|
||||
_ => {
|
||||
return Err(ShellError::GenericError(
|
||||
"Incorrect join type".into(),
|
||||
"Invalid join type".into(),
|
||||
Some(val.span),
|
||||
Some("Options: inner, outer or left".into()),
|
||||
Vec::new(),
|
||||
))
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let (l_col_string, l_col_span) = convert_columns_string(l_col, call.head)?;
|
||||
let (r_col_string, r_col_span) = convert_columns_string(r_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let r_df = NuDataFrame::try_from_value(r_df)?;
|
||||
|
||||
check_column_datatypes(
|
||||
df.as_ref(),
|
||||
r_df.as_ref(),
|
||||
&l_col_string,
|
||||
l_col_span,
|
||||
&r_col_string,
|
||||
r_col_span,
|
||||
)?;
|
||||
|
||||
df.as_ref()
|
||||
.join(
|
||||
r_df.as_ref(),
|
||||
&l_col_string,
|
||||
&r_col_string,
|
||||
join_type,
|
||||
suffix,
|
||||
)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error joining dataframes".into(),
|
||||
e.to_string(),
|
||||
Some(l_col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df_l: &polars::prelude::DataFrame,
|
||||
df_r: &polars::prelude::DataFrame,
|
||||
l_cols: &[T],
|
||||
l_col_span: Span,
|
||||
r_cols: &[T],
|
||||
r_col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if l_cols.len() != r_cols.len() {
|
||||
return Err(ShellError::GenericError(
|
||||
"Mismatched number of column names".into(),
|
||||
format!(
|
||||
"found {} left names vs {} right names",
|
||||
l_cols.len(),
|
||||
r_cols.len()
|
||||
),
|
||||
Some(l_col_span),
|
||||
Some("perhaps you need to change the number of columns to join".into()),
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
for (l, r) in l_cols.iter().zip(r_cols) {
|
||||
let l_series = df_l.column(l.as_ref()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error selecting the columns".into(),
|
||||
e.to_string(),
|
||||
Some(l_col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let r_series = df_r.column(r.as_ref()).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error selecting the columns".into(),
|
||||
e.to_string(),
|
||||
Some(r_col_span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError(
|
||||
"Mismatched datatypes".into(),
|
||||
format!(
|
||||
"left column type '{}' doesn't match '{}' right column match",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
),
|
||||
Some(l_col_span),
|
||||
Some("perhaps you need to select other column to match".into()),
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(JoinDF {})])
|
||||
}
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -5,8 +7,6 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LastDF;
|
||||
|
||||
@ -16,7 +16,7 @@ impl Command for LastDF {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with tail rows"
|
||||
"Creates new dataframe with tail rows or creates a last expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -26,18 +26,25 @@ impl Command for LastDF {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a last expression from a column",
|
||||
example: "dfr col a | dfr last",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -47,7 +54,27 @@ impl Command for LastDF {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().is_null().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,12 +82,11 @@ fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
|
@ -11,7 +11,7 @@ pub struct ListDF;
|
||||
|
||||
impl Command for ListDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr list"
|
||||
"dfr ls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -26,7 +26,7 @@ impl Command for ListDF {
|
||||
vec![Example {
|
||||
description: "Creates a new dataframe and shows it in the dataframe list",
|
||||
example: r#"let test = ([[a b];[1 2] [3 4]] | dfr to-df);
|
||||
dfr list"#,
|
||||
dfr ls"#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
mod aggregate;
|
||||
mod append;
|
||||
mod column;
|
||||
mod command;
|
||||
@ -11,13 +10,10 @@ mod dummies;
|
||||
mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod groupby;
|
||||
mod join;
|
||||
mod last;
|
||||
mod list;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod pivot;
|
||||
mod rename;
|
||||
mod sample;
|
||||
mod shape;
|
||||
@ -32,7 +28,6 @@ mod with_column;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub use aggregate::Aggregate;
|
||||
pub use append::AppendDF;
|
||||
pub use column::ColumnDF;
|
||||
pub use command::Dataframe;
|
||||
@ -45,13 +40,10 @@ pub use dummies::Dummies;
|
||||
pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use groupby::CreateGroupBy;
|
||||
pub use join::JoinDF;
|
||||
pub use last::LastDF;
|
||||
pub use list::ListDF;
|
||||
pub use melt::MeltDF;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use pivot::PivotDF;
|
||||
pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use shape::ShapeDF;
|
||||
@ -76,10 +68,8 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
Aggregate,
|
||||
AppendDF,
|
||||
ColumnDF,
|
||||
CreateGroupBy,
|
||||
Dataframe,
|
||||
DataTypes,
|
||||
DescribeDF,
|
||||
@ -90,12 +80,10 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
FilterWith,
|
||||
FirstDF,
|
||||
GetDF,
|
||||
JoinDF,
|
||||
LastDF,
|
||||
ListDF,
|
||||
MeltDF,
|
||||
OpenDataFrame,
|
||||
PivotDF,
|
||||
RenameDF,
|
||||
SampleDF,
|
||||
ShapeDF,
|
||||
|
@ -1,198 +0,0 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
|
||||
use crate::dataframe::values::NuGroupBy;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
enum Operation {
|
||||
First,
|
||||
Sum,
|
||||
Min,
|
||||
Max,
|
||||
Mean,
|
||||
Median,
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn from_tagged(name: Spanned<String>) -> Result<Operation, ShellError> {
|
||||
match name.item.as_ref() {
|
||||
"first" => Ok(Operation::First),
|
||||
"sum" => Ok(Operation::Sum),
|
||||
"min" => Ok(Operation::Min),
|
||||
"max" => Ok(Operation::Max),
|
||||
"mean" => Ok(Operation::Mean),
|
||||
"median" => Ok(Operation::Median),
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist for pivot".into(),
|
||||
Some(name.span),
|
||||
Some("Options: first, sum, min, max, mean, median".into()),
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PivotDF;
|
||||
|
||||
impl Command for PivotDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr pivot"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs a pivot operation on a groupby object"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"pivot_column",
|
||||
SyntaxShape::String,
|
||||
"pivot column to perform pivot",
|
||||
)
|
||||
.required(
|
||||
"value_column",
|
||||
SyntaxShape::String,
|
||||
"value column to perform pivot",
|
||||
)
|
||||
.required("operation", SyntaxShape::String, "aggregate operation")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Pivot a dataframe on b and aggregation on col c",
|
||||
example:
|
||||
"[[a b c]; [one x 1] [two y 2]] | dfr to-df | dfr group-by a | dfr pivot b c sum",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let pivot_col: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||
let value_col: Spanned<String> = call.req(engine_state, stack, 1)?;
|
||||
let operation: Spanned<String> = call.req(engine_state, stack, 2)?;
|
||||
let op = Operation::from_tagged(operation)?;
|
||||
|
||||
let nu_groupby = NuGroupBy::try_from_pipeline(input, call.head)?;
|
||||
let df_ref = nu_groupby.as_ref();
|
||||
|
||||
check_pivot_column(df_ref, &pivot_col)?;
|
||||
check_value_column(df_ref, &value_col)?;
|
||||
|
||||
let mut groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let pivot = groupby.pivot(vec![&pivot_col.item], vec![&value_col.item]);
|
||||
|
||||
match op {
|
||||
Operation::Mean => pivot.mean(),
|
||||
Operation::Sum => pivot.sum(),
|
||||
Operation::Min => pivot.min(),
|
||||
Operation::Max => pivot.max(),
|
||||
Operation::First => pivot.first(),
|
||||
Operation::Median => pivot.median(),
|
||||
}
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating pivot".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn check_pivot_column(
|
||||
df: &polars::prelude::DataFrame,
|
||||
col: &Spanned<String>,
|
||||
) -> Result<(), ShellError> {
|
||||
let series = df.column(&col.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Column not found".into(),
|
||||
e.to_string(),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
match series.dtype() {
|
||||
DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::Utf8 => Ok(()),
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Pivot error".into(),
|
||||
format!("Unsupported datatype {}", series.dtype()),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_value_column(
|
||||
df: &polars::prelude::DataFrame,
|
||||
col: &Spanned<String>,
|
||||
) -> Result<(), ShellError> {
|
||||
let series = df.column(&col.item).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Column not found".into(),
|
||||
e.to_string(),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
match series.dtype() {
|
||||
DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::Float32
|
||||
| DataType::Float64 => Ok(()),
|
||||
_ => Err(ShellError::GenericError(
|
||||
"Pivot error".into(),
|
||||
format!("Unsupported datatype {}", series.dtype()),
|
||||
Some(col.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)),
|
||||
}
|
||||
}
|
@ -5,6 +5,8 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::{utils::extract_strings, values::NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -21,8 +23,16 @@ impl Command for RenameDF {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("from", SyntaxShape::String, "column name to be renamed")
|
||||
.required("to", SyntaxShape::String, "new column name")
|
||||
.required(
|
||||
"columns",
|
||||
SyntaxShape::Any,
|
||||
"Column(s) to be renamed. A string or list of strings",
|
||||
)
|
||||
.required(
|
||||
"new names",
|
||||
SyntaxShape::Any,
|
||||
"New names for the selected column(s). A string or list of strings",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
@ -54,24 +64,39 @@ impl Command for RenameDF {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
mut df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let from: String = call.req(engine_state, stack, 0)?;
|
||||
let to: String = call.req(engine_state, stack, 1)?;
|
||||
let columns: Value = call.req(engine_state, stack, 0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let new_names: Value = call.req(engine_state, stack, 1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
df.as_mut()
|
||||
.rename(&from, &to)
|
||||
.map_err(|e| {
|
||||
for (from, to) in columns.iter().zip(new_names.iter()) {
|
||||
df.as_mut().rename(from, to).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error renaming".into(),
|
||||
e.to_string(),
|
||||
@ -79,13 +104,36 @@ fn command(
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(df.clone(), call.head),
|
||||
None,
|
||||
)
|
||||
})
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Value = call.req(engine_state, stack, 0)?;
|
||||
let columns = extract_strings(columns)?;
|
||||
|
||||
let new_names: Value = call.req(engine_state, stack, 1)?;
|
||||
let new_names = extract_strings(new_names)?;
|
||||
|
||||
if columns.len() != new_names.len() {
|
||||
let value: Value = call.req(engine_state, stack, 1)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"New name list has different size to column list".into(),
|
||||
value.span()?,
|
||||
));
|
||||
}
|
||||
|
||||
let lazy = lazy.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -33,6 +33,12 @@ impl Command for SampleDF {
|
||||
"fraction of dataframe to be taken",
|
||||
Some('f'),
|
||||
)
|
||||
.named(
|
||||
"seed",
|
||||
SyntaxShape::Number,
|
||||
"seed for the selection",
|
||||
Some('s'),
|
||||
)
|
||||
.switch("replace", "sample with replace", Some('e'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
@ -71,12 +77,15 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
|
||||
let seed: Option<u64> = call
|
||||
.get_flag::<i64>(engine_state, stack, "seed")?
|
||||
.map(|val| val as u64);
|
||||
let replace: bool = call.has_flag("replace");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
match (rows, fraction) {
|
||||
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace, 0).map_err(|e| {
|
||||
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace, seed).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
@ -85,15 +94,18 @@ fn command(
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace, 0).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
Some(frac.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(frac.item, replace, seed)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
Some(frac.span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
}),
|
||||
(Some(_), Some(_)) => Err(ShellError::GenericError(
|
||||
"Incompatible flags".into(),
|
||||
"Only one selection criterion allowed".into(),
|
||||
|
@ -1,12 +1,12 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WithColumn;
|
||||
|
||||
@ -21,35 +21,51 @@ impl Command for WithColumn {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("series", SyntaxShape::Any, "series to be added")
|
||||
.required_named("name", SyntaxShape::String, "column name", Some('n'))
|
||||
.named("name", SyntaxShape::String, "new column name", Some('n'))
|
||||
.rest(
|
||||
"series or expressions",
|
||||
SyntaxShape::Any,
|
||||
"series to be added or expressions used to define the new columns",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4]] | dfr to-df | dfr with-column ([5 6] | dfr to-df) --name c",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr to-df
|
||||
| dfr with-column ([5 6] | dfr to-df) --name c"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr to-df
|
||||
| dfr to-lazy
|
||||
| dfr with-column ((dfr col a) * 2 | dfr as "c")
|
||||
| dfr collect"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -59,26 +75,41 @@ impl Command for WithColumn {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
mut df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let name: Spanned<String> = call
|
||||
.get_flag(engine_state, stack, "name")?
|
||||
.expect("required named value");
|
||||
|
||||
let other_value: Value = call.req(engine_state, stack, 0)?;
|
||||
let other_span = other_value.span()?;
|
||||
let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?;
|
||||
let series = other.rename(&name.item).clone();
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let name = match call.get_flag::<String>(engine_state, stack, "name")? {
|
||||
Some(name) => name,
|
||||
None => other.name().to_string(),
|
||||
};
|
||||
|
||||
let series = other.rename(&name).clone();
|
||||
|
||||
df.as_mut()
|
||||
.with_column(series)
|
||||
@ -99,6 +130,27 @@ fn command(
|
||||
})
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
|
57
crates/nu-command/src/dataframe/expressions/alias.rs
Normal file
57
crates/nu-command/src/dataframe/expressions/alias.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use super::super::values::NuExpression;
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprAlias;
|
||||
|
||||
impl Command for ExprAlias {
|
||||
fn name(&self) -> &str {
|
||||
"dfr as"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an alias expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"Alias name",
|
||||
SyntaxShape::String,
|
||||
"Alias name for the expression",
|
||||
)
|
||||
.category(Category::Custom("expressions".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates and alias expression",
|
||||
example: "(dfr col a | df as new_a)",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let alias: String = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)?;
|
||||
let expr: NuExpression = expr.into_polars().alias(alias.as_str()).into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
77
crates/nu-command/src/dataframe/expressions/dsl/col.rs
Normal file
77
crates/nu-command/src/dataframe/expressions/dsl/col.rs
Normal file
@ -0,0 +1,77 @@
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::col;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprCol;
|
||||
|
||||
impl Command for ExprCol {
|
||||
fn name(&self) -> &str {
|
||||
"dfr col"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a named column expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"column name",
|
||||
SyntaxShape::String,
|
||||
"Name of column to be used",
|
||||
)
|
||||
.category(Category::Custom("expressions".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a named column expression and converts it to a nu object",
|
||||
example: "dfr col col_a | dfr to-nu",
|
||||
result: Some(Value::Record {
|
||||
cols: vec!["expr".into(), "value".into()],
|
||||
vals: vec![
|
||||
Value::String {
|
||||
val: "column".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "col_a".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let name: String = call.req(engine_state, stack, 0)?;
|
||||
let expr: NuExpression = col(name.as_str()).into();
|
||||
|
||||
Ok(PipelineData::Value(expr.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::ExprToNu;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprToNu {})])
|
||||
}
|
||||
}
|
79
crates/nu-command/src/dataframe/expressions/dsl/lit.rs
Normal file
79
crates/nu-command/src/dataframe/expressions/dsl/lit.rs
Normal file
@ -0,0 +1,79 @@
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprLit;
|
||||
|
||||
impl Command for ExprLit {
|
||||
fn name(&self) -> &str {
|
||||
"dfr lit"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a literal expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"literal",
|
||||
SyntaxShape::Any,
|
||||
"literal to construct the expression",
|
||||
)
|
||||
.category(Category::Custom("expressions".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Created a literal expression and converts it to a nu object",
|
||||
example: "dfr lit 2 | dfr to-nu",
|
||||
result: Some(Value::Record {
|
||||
cols: vec!["expr".into(), "value".into()],
|
||||
vals: vec![
|
||||
Value::String {
|
||||
val: "literal".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "2i64".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let literal: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let expr = NuExpression::try_from_value(literal)?;
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::ExprToNu;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprToNu {})])
|
||||
}
|
||||
}
|
7
crates/nu-command/src/dataframe/expressions/dsl/mod.rs
Normal file
7
crates/nu-command/src/dataframe/expressions/dsl/mod.rs
Normal file
@ -0,0 +1,7 @@
|
||||
mod col;
|
||||
mod lit;
|
||||
mod when;
|
||||
|
||||
pub(super) use crate::dataframe::expressions::dsl::col::ExprCol;
|
||||
pub(super) use crate::dataframe::expressions::dsl::lit::ExprLit;
|
||||
pub(super) use crate::dataframe::expressions::dsl::when::ExprWhen;
|
96
crates/nu-command/src/dataframe/expressions/dsl/when.rs
Normal file
96
crates/nu-command/src/dataframe/expressions/dsl/when.rs
Normal file
@ -0,0 +1,96 @@
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::when;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprWhen;
|
||||
|
||||
impl Command for ExprWhen {
|
||||
fn name(&self) -> &str {
|
||||
"dfr when"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a when expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"when predicate",
|
||||
SyntaxShape::Any,
|
||||
"Name of column to be used",
|
||||
)
|
||||
.required_named(
|
||||
"then",
|
||||
SyntaxShape::Any,
|
||||
"Expression that will be applied when predicate is true",
|
||||
Some('t'),
|
||||
)
|
||||
.required_named(
|
||||
"otherwise",
|
||||
SyntaxShape::Any,
|
||||
"Expression that will be applied when predicate is false",
|
||||
Some('o'),
|
||||
)
|
||||
.category(Category::Custom("expressions".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create a new column for the dataframe",
|
||||
example: r#"[[a b]; [1 2] [3 4]]
|
||||
| dfr to-df
|
||||
| dfr to-lazy
|
||||
| dfr with-column (
|
||||
dfr when ((dfr col a) > 2) --then 4 --otherwise 5 | dfr as "c"
|
||||
)
|
||||
| dfr collect"#,
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let predicate: Value = call.req(engine_state, stack, 0)?;
|
||||
let predicate = NuExpression::try_from_value(predicate)?;
|
||||
|
||||
let then: Value = call
|
||||
.get_flag(engine_state, stack, "then")?
|
||||
.expect("it is a required named value");
|
||||
let then = NuExpression::try_from_value(then)?;
|
||||
let otherwise: Value = call
|
||||
.get_flag(engine_state, stack, "otherwise")?
|
||||
.expect("it is a required named value");
|
||||
let otherwise = NuExpression::try_from_value(otherwise)?;
|
||||
|
||||
let expr: NuExpression = when(predicate.into_polars())
|
||||
.then(then.into_polars())
|
||||
.otherwise(otherwise.into_polars())
|
||||
.into();
|
||||
|
||||
Ok(PipelineData::Value(expr.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::ExprToNu;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ExprWhen {}), Box::new(ExprToNu {})])
|
||||
}
|
||||
}
|
109
crates/nu-command/src/dataframe/expressions/expressions_macro.rs
Normal file
109
crates/nu-command/src/dataframe/expressions/expressions_macro.rs
Normal file
@ -0,0 +1,109 @@
|
||||
/// Definition of multiple Expression commands using a macro rule
|
||||
/// All of these expressions have an identical body and only require
|
||||
/// to have a change in the name, description and expression function
|
||||
use super::super::values::NuExpression;
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
// The structs defined in this file are structs that form part of other commands
|
||||
// since they share a similar name
|
||||
macro_rules! expr_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl Command for $command {
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)?;
|
||||
let expr: NuExpression = expr.into_polars().$func().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ExprList command
|
||||
// Expands to a command definition for a list expression
|
||||
expr_command!(
|
||||
ExprList,
|
||||
"dfr list",
|
||||
"Aggregates a group to a Series",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
list
|
||||
);
|
||||
|
||||
// ExprAggGroups command
|
||||
// Expands to a command definition for a agg groups expression
|
||||
expr_command!(
|
||||
ExprAggGroups,
|
||||
"dfr agg-groups",
|
||||
"creates an agg_groups expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
agg_groups
|
||||
);
|
||||
|
||||
// ExprFlatten command
|
||||
// Expands to a command definition for a flatten expression
|
||||
expr_command!(
|
||||
ExprFlatten,
|
||||
"dfr flatten",
|
||||
"creates a flatten expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
flatten
|
||||
);
|
||||
|
||||
// ExprExplode command
|
||||
// Expands to a command definition for a explode expression
|
||||
expr_command!(
|
||||
ExprExplode,
|
||||
"dfr explode",
|
||||
"creates an explode expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
explode
|
||||
);
|
36
crates/nu-command/src/dataframe/expressions/mod.rs
Normal file
36
crates/nu-command/src/dataframe/expressions/mod.rs
Normal file
@ -0,0 +1,36 @@
|
||||
mod alias;
|
||||
mod dsl;
|
||||
mod expressions_macro;
|
||||
mod to_nu;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
use crate::dataframe::expressions::dsl::*;
|
||||
|
||||
use crate::dataframe::expressions::alias::ExprAlias;
|
||||
use crate::dataframe::expressions::expressions_macro::*;
|
||||
use crate::dataframe::expressions::to_nu::ExprToNu;
|
||||
|
||||
pub fn add_expressions(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
ExprAlias,
|
||||
ExprCol,
|
||||
ExprLit,
|
||||
ExprToNu,
|
||||
ExprWhen,
|
||||
ExprList,
|
||||
ExprAggGroups,
|
||||
ExprFlatten,
|
||||
ExprExplode
|
||||
);
|
||||
}
|
70
crates/nu-command/src/dataframe/expressions/to_nu.rs
Normal file
70
crates/nu-command/src/dataframe/expressions/to_nu.rs
Normal file
@ -0,0 +1,70 @@
|
||||
use super::super::values::NuExpression;
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprToNu;
|
||||
|
||||
impl Command for ExprToNu {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-nu"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Convert expression to a nu value for access and exploration"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("expressions".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Convert a col expression into a nushell value",
|
||||
example: "dfr col col_a | dfr to-nu",
|
||||
result: Some(Value::Record {
|
||||
cols: vec!["expr".into(), "value".into()],
|
||||
vals: vec![
|
||||
Value::String {
|
||||
val: "column".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "col_a".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)?;
|
||||
let value = expr.to_value(call.head);
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::ExprCol;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ExprToNu {}), Box::new(ExprCol {})])
|
||||
}
|
||||
}
|
91
crates/nu-command/src/dataframe/lazy/aggregate.rs
Normal file
91
crates/nu-command/src/dataframe/lazy/aggregate.rs
Normal file
@ -0,0 +1,91 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyAggregate;
|
||||
|
||||
impl Command for LazyAggregate {
|
||||
fn name(&self) -> &str {
|
||||
"dfr aggregate"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs a series of aggregations from a group by"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the aggregations to be applied",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| dfr to-df
|
||||
| dfr group-by a
|
||||
| dfr aggregate [
|
||||
("b" | dfr min | dfr as "b_min")
|
||||
("b" | dfr max | dfr as "b_max")
|
||||
("b" | dfr sum | dfr as "b_sum")
|
||||
]"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| dfr to-df
|
||||
| dfr to-lazy
|
||||
| dfr group-by a
|
||||
| dfr aggregate [
|
||||
("b" | dfr min | dfr as "b_min")
|
||||
("b" | dfr max | dfr as "b_max")
|
||||
("b" | dfr sum | dfr as "b_sum")
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?;
|
||||
let from_eager = group_by.from_eager;
|
||||
|
||||
let group_by = group_by.into_polars();
|
||||
let lazy: NuLazyFrame = group_by.agg(&expressions).into();
|
||||
|
||||
let res = if from_eager {
|
||||
lazy.collect(call.head)?.into_value(call.head)
|
||||
} else {
|
||||
lazy.into_value(call.head)
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(res, None))
|
||||
}
|
||||
}
|
48
crates/nu-command/src/dataframe/lazy/collect.rs
Normal file
48
crates/nu-command/src/dataframe/lazy/collect.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyCollect;
|
||||
|
||||
impl Command for LazyCollect {
|
||||
fn name(&self) -> &str {
|
||||
"dfr collect"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Collect lazy dataframe into dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let eager = lazy.collect(call.head)?;
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::into_value(eager, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
80
crates/nu-command/src/dataframe/lazy/fetch.rs
Normal file
80
crates/nu-command/src/dataframe/lazy/fetch.rs
Normal file
@ -0,0 +1,80 @@
|
||||
use super::super::values::NuLazyFrame;
|
||||
use crate::dataframe::values::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFetch;
|
||||
|
||||
impl Command for LazyFetch {
|
||||
fn name(&self) -> &str {
|
||||
"dfr fetch"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"collects the lazyframe to the selected rows"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be fetched from lazyframe",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: i64 = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let eager: NuDataFrame = lazy
|
||||
.into_polars()
|
||||
.fetch(rows as usize)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error fetching rows".into(),
|
||||
e.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::into_value(eager, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazyFetch {})])
|
||||
// }
|
||||
//}
|
65
crates/nu-command/src/dataframe/lazy/fill_na.rs
Normal file
65
crates/nu-command/src/dataframe/lazy/fill_na.rs
Normal file
@ -0,0 +1,65 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNA;
|
||||
|
||||
impl Command for LazyFillNA {
|
||||
fn name(&self) -> &str {
|
||||
"dfr fill-na"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NA values with the given expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the NAN values",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
|
||||
let expr = NuExpression::try_from_value(fill)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.fill_nan(expr).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazyFillNA {})])
|
||||
// }
|
||||
//}
|
65
crates/nu-command/src/dataframe/lazy/fill_null.rs
Normal file
65
crates/nu-command/src/dataframe/lazy/fill_null.rs
Normal file
@ -0,0 +1,65 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFillNull;
|
||||
|
||||
impl Command for LazyFillNull {
|
||||
fn name(&self) -> &str {
|
||||
"dfr fill-null"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Replaces NULL values with the given expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the null values",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let fill: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
|
||||
let expr = NuExpression::try_from_value(fill)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.fill_null(expr).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazyFillNull {})])
|
||||
// }
|
||||
//}
|
98
crates/nu-command/src/dataframe/lazy/groupby.rs
Normal file
98
crates/nu-command/src/dataframe/lazy/groupby.rs
Normal file
@ -0,0 +1,98 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyGroupBy;
|
||||
|
||||
impl Command for ToLazyGroupBy {
|
||||
fn name(&self) -> &str {
|
||||
"dfr group-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a groupby object that can be used for other aggregations"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"Group by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the lazy group by",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| dfr to-df
|
||||
| dfr group-by a
|
||||
| dfr aggregate [
|
||||
("b" | dfr min | dfr as "b_min")
|
||||
("b" | dfr max | dfr as "b_max")
|
||||
("b" | dfr sum | dfr as "b_sum")
|
||||
]"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Group by and perform an aggregation",
|
||||
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
|
||||
| dfr to-df
|
||||
| dfr to-lazy
|
||||
| dfr group-by a
|
||||
| dfr aggregate [
|
||||
("b" | dfr min | dfr as "b_min")
|
||||
("b" | dfr max | dfr as "b_max")
|
||||
("b" | dfr sum | dfr as "b_sum")
|
||||
]
|
||||
| dfr collect"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let vals: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let value = Value::List {
|
||||
vals,
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
{
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"Expected only Col expressions".into(),
|
||||
value.span()?,
|
||||
));
|
||||
}
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
|
||||
|
||||
let group_by = NuLazyGroupBy {
|
||||
group_by: Some(lazy.into_polars().groupby(&expressions)),
|
||||
from_eager,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(group_by.into_value(call.head), None))
|
||||
}
|
||||
}
|
139
crates/nu-command/src/dataframe/lazy/join.rs
Normal file
139
crates/nu-command/src/dataframe/lazy/join.rs
Normal file
@ -0,0 +1,139 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::{Expr, JoinType};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyJoin;
|
||||
|
||||
impl Command for LazyJoin {
|
||||
fn name(&self) -> &str {
|
||||
"dfr join"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Joins a lazy frame with other lazy frame"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("other", SyntaxShape::Any, "LazyFrame to join with")
|
||||
.required("left_on", SyntaxShape::Any, "Left column(s) to join on")
|
||||
.required("right_on", SyntaxShape::Any, "Right column(s) to join on")
|
||||
.switch(
|
||||
"inner",
|
||||
"inner joing between lazyframes (default)",
|
||||
Some('i'),
|
||||
)
|
||||
.switch("left", "left join between lazyframes", Some('l'))
|
||||
.switch("outer", "outer join between lazyframes", Some('o'))
|
||||
.switch("cross", "cross join between lazyframes", Some('c'))
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"Suffix to use on columns with same name",
|
||||
Some('s'),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Join two lazy dataframes",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-lazy);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
|
||||
$df_a | dfr join $df_b a foo | dfr collect"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Join one eager dataframe with a lazy dataframe",
|
||||
example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-df);
|
||||
let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy);
|
||||
$df_a | dfr join $df_b a foo"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let left = call.has_flag("left");
|
||||
let outer = call.has_flag("outer");
|
||||
let cross = call.has_flag("cross");
|
||||
|
||||
let how = if left {
|
||||
JoinType::Left
|
||||
} else if outer {
|
||||
JoinType::Outer
|
||||
} else if cross {
|
||||
JoinType::Cross
|
||||
} else {
|
||||
JoinType::Inner
|
||||
};
|
||||
|
||||
let other: Value = call.req(engine_state, stack, 0)?;
|
||||
let (other, _) = NuLazyFrame::maybe_is_eager(other)?;
|
||||
let other = other.into_polars();
|
||||
|
||||
let left_on: Value = call.req(engine_state, stack, 1)?;
|
||||
let left_on = NuExpression::extract_exprs(left_on)?;
|
||||
|
||||
let right_on: Value = call.req(engine_state, stack, 2)?;
|
||||
let right_on = NuExpression::extract_exprs(right_on)?;
|
||||
|
||||
if left_on.len() != right_on.len() {
|
||||
let right_on: Value = call.req(engine_state, stack, 2)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"The right column list has a different size to the left column list".into(),
|
||||
right_on.span()?,
|
||||
));
|
||||
}
|
||||
|
||||
// Checking that both list of expressions are made out of col expressions or strings
|
||||
for (index, list) in &[(1usize, &left_on), (2, &left_on)] {
|
||||
if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) {
|
||||
let value: Value = call.req(engine_state, stack, *index)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"Expected only a string, col expressions or list of strings".into(),
|
||||
value.span()?,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
|
||||
let suffix = suffix.unwrap_or_else(|| "_x".into());
|
||||
|
||||
let value = input.into_value(call.head);
|
||||
let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?;
|
||||
let lazy = lazy.into_polars();
|
||||
|
||||
let lazy: NuLazyFrame = lazy
|
||||
.join_builder()
|
||||
.with(other)
|
||||
.left_on(left_on)
|
||||
.right_on(right_on)
|
||||
.how(how)
|
||||
.force_parallel(true)
|
||||
.suffix(suffix)
|
||||
.finish()
|
||||
.into();
|
||||
|
||||
let res = if from_eager {
|
||||
lazy.collect(call.head)?.into_value(call.head)
|
||||
} else {
|
||||
lazy.into_value(call.head)
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(res, None))
|
||||
}
|
||||
}
|
232
crates/nu-command/src/dataframe/lazy/macro_commands.rs
Normal file
232
crates/nu-command/src/dataframe/lazy/macro_commands.rs
Normal file
@ -0,0 +1,232 @@
|
||||
/// Definition of multiple lazyframe commands using a macro rule
|
||||
/// All of these commands have an identical body and only require
|
||||
/// to have a change in the name, description and function
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
macro_rules! lazy_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl Command for $command {
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.$func().into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// LazyReverse command
|
||||
// Expands to a command definition for reverse
|
||||
lazy_command!(
|
||||
LazyReverse,
|
||||
"dfr reverse",
|
||||
"Reverses the LazyFrame",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
reverse
|
||||
);
|
||||
|
||||
// LazyCache command
|
||||
// Expands to a command definition for cache
|
||||
lazy_command!(
|
||||
LazyCache,
|
||||
"dfr cache",
|
||||
"Caches operations in a new LazyFrame",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
cache
|
||||
);
|
||||
|
||||
// Creates a command that may result in a lazy frame operation or
|
||||
// lazy frame expression
|
||||
macro_rules! lazy_expr_command {
|
||||
($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => {
|
||||
#[derive(Clone)]
|
||||
pub struct $command;
|
||||
|
||||
impl Command for $command {
|
||||
fn name(&self) -> &str {
|
||||
$name
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
$desc
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
$examples
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().$func().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuLazyFrame::can_downcast(&value) {
|
||||
let lazy = NuLazyFrame::try_from_value(value)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.$func().into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or lazyframe".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// LazyMax command
|
||||
// Expands to a command definition for max aggregation
|
||||
lazy_expr_command!(
|
||||
LazyMax,
|
||||
"dfr max",
|
||||
"Aggregates columns to their max value or creates a max expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
max
|
||||
);
|
||||
|
||||
// LazyMin command
|
||||
// Expands to a command definition for min aggregation
|
||||
lazy_expr_command!(
|
||||
LazyMin,
|
||||
"dfr min",
|
||||
"Aggregates columns to their min value or creates a min expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
min
|
||||
);
|
||||
|
||||
// LazySum command
|
||||
// Expands to a command definition for sum aggregation
|
||||
lazy_expr_command!(
|
||||
LazySum,
|
||||
"dfr sum",
|
||||
"Aggregates columns to their sum value or creates a sum expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
sum
|
||||
);
|
||||
|
||||
// LazyMean command
|
||||
// Expands to a command definition for mean aggregation
|
||||
lazy_expr_command!(
|
||||
LazyMean,
|
||||
"dfr mean",
|
||||
"Aggregates columns to their mean value or creates a mean expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
mean
|
||||
);
|
||||
|
||||
// LazyMedian command
|
||||
// Expands to a command definition for median aggregation
|
||||
lazy_expr_command!(
|
||||
LazyMedian,
|
||||
"dfr median",
|
||||
"Aggregates columns to their median value or creates a median expression",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
median
|
||||
);
|
||||
|
||||
// LazyStd command
|
||||
// Expands to a command definition for std aggregation
|
||||
lazy_expr_command!(
|
||||
LazyStd,
|
||||
"dfr std",
|
||||
"Aggregates columns to their std value",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
std
|
||||
);
|
||||
|
||||
// LazyVar command
|
||||
// Expands to a command definition for var aggregation
|
||||
lazy_expr_command!(
|
||||
LazyVar,
|
||||
"dfr var",
|
||||
"Aggregates columns to their var value",
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}],
|
||||
var
|
||||
);
|
63
crates/nu-command/src/dataframe/lazy/mod.rs
Normal file
63
crates/nu-command/src/dataframe/lazy/mod.rs
Normal file
@ -0,0 +1,63 @@
|
||||
mod aggregate;
|
||||
mod collect;
|
||||
mod fetch;
|
||||
mod fill_na;
|
||||
mod fill_null;
|
||||
mod groupby;
|
||||
mod join;
|
||||
mod macro_commands;
|
||||
mod quantile;
|
||||
mod select;
|
||||
mod sort_by_expr;
|
||||
mod to_lazy;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
use crate::dataframe::lazy::macro_commands::*;
|
||||
|
||||
use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
use crate::dataframe::lazy::collect::LazyCollect;
|
||||
use crate::dataframe::lazy::fetch::LazyFetch;
|
||||
use crate::dataframe::lazy::fill_na::LazyFillNA;
|
||||
use crate::dataframe::lazy::fill_null::LazyFillNull;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
use crate::dataframe::lazy::join::LazyJoin;
|
||||
use crate::dataframe::lazy::quantile::LazyQuantile;
|
||||
use crate::dataframe::lazy::select::LazySelect;
|
||||
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
|
||||
use crate::dataframe::lazy::to_lazy::ToLazyFrame;
|
||||
|
||||
pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
LazyAggregate,
|
||||
LazyCache,
|
||||
LazyCollect,
|
||||
LazyFetch,
|
||||
LazyFillNA,
|
||||
LazyFillNull,
|
||||
LazyJoin,
|
||||
LazyQuantile,
|
||||
LazyMax,
|
||||
LazyMin,
|
||||
LazySum,
|
||||
LazyMean,
|
||||
LazyMedian,
|
||||
LazyStd,
|
||||
LazyVar,
|
||||
LazyReverse,
|
||||
LazySelect,
|
||||
LazySortBy,
|
||||
ToLazyFrame,
|
||||
ToLazyGroupBy
|
||||
);
|
||||
}
|
67
crates/nu-command/src/dataframe/lazy/quantile.rs
Normal file
67
crates/nu-command/src/dataframe/lazy/quantile.rs
Normal file
@ -0,0 +1,67 @@
|
||||
use crate::dataframe::values::NuLazyFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
|
||||
};
|
||||
use polars::prelude::QuantileInterpolOptions;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyQuantile;
|
||||
|
||||
impl Command for LazyQuantile {
|
||||
fn name(&self) -> &str {
|
||||
"dfr quantile"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Aggregates the columns to the selected quantile"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"quantile",
|
||||
SyntaxShape::Number,
|
||||
"quantile value for quantile operation",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let quantile: f64 = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy
|
||||
.quantile(quantile, QuantileInterpolOptions::default())
|
||||
.into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazyQuantile {})])
|
||||
// }
|
||||
//}
|
78
crates/nu-command/src/dataframe/lazy/select.rs
Normal file
78
crates/nu-command/src/dataframe/lazy/select.rs
Normal file
@ -0,0 +1,78 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySelect;
|
||||
|
||||
impl Command for LazySelect {
|
||||
fn name(&self) -> &str {
|
||||
"dfr select"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Selects columns from lazyframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"select expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the column selection",
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
{
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"Expected only Col expressions".into(),
|
||||
value.span()?,
|
||||
));
|
||||
}
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars();
|
||||
let lazy: NuLazyFrame = lazy.select(&expressions).into();
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazySelect {})])
|
||||
// }
|
||||
//}
|
100
crates/nu-command/src/dataframe/lazy/sort_by_expr.rs
Normal file
100
crates/nu-command/src/dataframe/lazy/sort_by_expr.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use super::super::values::NuLazyFrame;
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySortBy;
|
||||
|
||||
impl Command for LazySortBy {
|
||||
fn name(&self) -> &str {
|
||||
"dfr sort-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"sorts a lazy dataframe based on expression(s)"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"filter expression",
|
||||
SyntaxShape::Any,
|
||||
"filtering expression",
|
||||
)
|
||||
.named(
|
||||
"reverse",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Boolean)),
|
||||
"list indicating if reverse search should be done in the column. Default is false",
|
||||
Some('r'),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "",
|
||||
example: "",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
|
||||
let reverse = match reverse {
|
||||
Some(list) => {
|
||||
if expressions.len() != list.len() {
|
||||
let span = call
|
||||
.get_flag::<Value>(engine_state, stack, "reverse")?
|
||||
.expect("already checked and it exists")
|
||||
.span()?;
|
||||
return Err(ShellError::GenericError(
|
||||
"Incorrect list size".into(),
|
||||
"Size doesn't match expression list".into(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
} else {
|
||||
list
|
||||
}
|
||||
}
|
||||
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
|
||||
};
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let lazy: NuLazyFrame = lazy
|
||||
.into_polars()
|
||||
.sort_by_exprs(&expressions, reverse)
|
||||
.into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuLazyFrame::into_value(lazy, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
//#[cfg(test)]
|
||||
//mod test {
|
||||
// use super::super::super::test_dataframe::test_dataframe;
|
||||
// use super::*;
|
||||
//
|
||||
// #[test]
|
||||
// fn test_examples() {
|
||||
// test_dataframe(vec![Box::new(LazySortBy {})])
|
||||
// }
|
||||
//}
|
45
crates/nu-command/src/dataframe/lazy/to_lazy.rs
Normal file
45
crates/nu-command/src/dataframe/lazy/to_lazy.rs
Normal file
@ -0,0 +1,45 @@
|
||||
use super::super::values::{NuDataFrame, NuLazyFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToLazyFrame;
|
||||
|
||||
impl Command for ToLazyFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-lazy"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a dataframe into a lazy dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes a dictionary and creates a lazy dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | dfr to-df | dfl to-lazy",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
let lazy = NuLazyFrame::from_dataframe(df);
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
}
|
@ -1,8 +1,13 @@
|
||||
mod eager;
|
||||
mod expressions;
|
||||
mod lazy;
|
||||
mod series;
|
||||
mod utils;
|
||||
mod values;
|
||||
|
||||
pub use eager::add_eager_decls;
|
||||
pub use expressions::add_expressions;
|
||||
pub use lazy::add_lazy_decls;
|
||||
pub use series::add_series_decls;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
@ -10,6 +15,8 @@ use nu_protocol::engine::StateWorkingSet;
|
||||
pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) {
|
||||
add_series_decls(working_set);
|
||||
add_eager_decls(working_set);
|
||||
add_expressions(working_set);
|
||||
add_lazy_decls(working_set);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -6,7 +6,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{IntoSeries, Utf8Methods};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDate;
|
||||
|
@ -7,7 +7,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, TimeUnit};
|
||||
use polars::prelude::{IntoSeries, TimeUnit, Utf8Methods};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDateTime;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetDay;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetHour;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetMinute;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetMonth;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetNanosecond;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetOrdinal;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetSecond;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetWeek;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetWeekDay;
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{DatetimeMethods, IntoSeries};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetYear;
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -16,7 +16,7 @@ impl Command for IsNotNull {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask where value is not null"
|
||||
"Creates mask where value is not null or creates a is-not-null expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -24,25 +24,32 @@ impl Command for IsNotNull {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask where values are not null",
|
||||
example: r#"let s = ([5 6 0 8] | dfr to-df);
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask where values are not null",
|
||||
example: r#"let s = ([5 6 0 8] | dfr to-df);
|
||||
let res = ($s / $s);
|
||||
$res | dfr is-not-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_not_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_not_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a is not null expression from a column",
|
||||
example: "dfr col a | dfr is-not-null",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -52,7 +59,27 @@ impl Command for IsNotNull {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().is_not_null().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,10 +87,8 @@ fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut res = df.as_series(call.head)?.is_not_null();
|
||||
res.rename("is_not_null");
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -16,7 +16,7 @@ impl Command for IsNull {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates mask where value is null"
|
||||
"Creates mask where value is null or creates a is-null expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -24,25 +24,32 @@ impl Command for IsNull {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask where values are null",
|
||||
example: r#"let s = ([5 6 0 8] | dfr to-df);
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask where values are null",
|
||||
example: r#"let s = ([5 6 0 8] | dfr to-df);
|
||||
let res = ($s / $s);
|
||||
$res | dfr is-null"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_null".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a is not null expression from a column",
|
||||
example: "dfr col a | dfr is-null",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -52,7 +59,27 @@ impl Command for IsNull {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().is_null().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,10 +87,8 @@ fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut res = df.as_series(call.head)?.is_null();
|
||||
res.rename("is_null");
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -18,7 +18,7 @@ impl Command for NotSeries {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Inverts boolean mask"
|
||||
"Inverts boolean mask or creates a not expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -26,22 +26,29 @@ impl Command for NotSeries {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Inverts boolean mask",
|
||||
example: "[true false true] | dfr to-df | dfr not",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Inverts boolean mask",
|
||||
example: "[true false true] | dfr to-df | dfr not",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a not expression from a column",
|
||||
example: "dfr col a | dfr not",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -51,7 +58,27 @@ impl Command for NotSeries {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().is_null().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,9 +86,8 @@ fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let bool = series.bool().map_err(|e| {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use crate::dataframe::values::NuExpression;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -15,7 +15,7 @@ impl Command for NUnique {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Counts unique values"
|
||||
"Counts unique values or creates a n-unique expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -23,18 +23,25 @@ impl Command for NUnique {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Counts unique values",
|
||||
example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"count_unique".to_string(),
|
||||
vec![Value::test_int(4)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Counts unique values",
|
||||
example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"count_unique".to_string(),
|
||||
vec![Value::test_int(4)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Creates a is n-unique expression from a column",
|
||||
example: "dfr col a | dfr n-unique",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -44,7 +51,27 @@ impl Command for NUnique {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuExpression::can_downcast(&value) {
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = expr.into_polars().n_unique().into();
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuExpression::into_value(expr, call.head),
|
||||
None,
|
||||
))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,10 +79,8 @@ fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let res = df.as_series(call.head)?.n_unique().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error counting unique values".into(),
|
||||
|
@ -1,3 +1,5 @@
|
||||
use crate::dataframe::values::{NuExpression, NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
@ -22,6 +24,12 @@ impl Command for Shift {
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("period", SyntaxShape::Int, "shift period")
|
||||
.named(
|
||||
"fill",
|
||||
SyntaxShape::Any,
|
||||
"Expression to use to fill the null values (lazy df)",
|
||||
Some('f'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
@ -47,25 +55,60 @@ impl Command for Shift {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
fn command_eager(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let period: i64 = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?.shift(period);
|
||||
|
||||
NuDataFrame::try_from_series(vec![series], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let shift: i64 = call.req(engine_state, stack, 0)?;
|
||||
let fill: Option<Value> = call.get_flag(engine_state, stack, "fill")?;
|
||||
|
||||
let lazy = lazy.into_polars();
|
||||
|
||||
let lazy: NuLazyFrame = match fill {
|
||||
Some(fill) => {
|
||||
let expr = NuExpression::try_from_value(fill)?.into_polars();
|
||||
lazy.shift_and_fill(shift, expr).into()
|
||||
}
|
||||
None => lazy.shift(shift).into(),
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::eager::DropNulls;
|
||||
|
@ -1,11 +1,14 @@
|
||||
use crate::dataframe::{utils::extract_strings, values::NuLazyFrame};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{IntoSeries, UniqueKeepStrategy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Unique;
|
||||
@ -20,7 +23,24 @@ impl Command for Unique {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"subset",
|
||||
SyntaxShape::Any,
|
||||
"Subset of column(s) to use to maintain rows (lazy df)",
|
||||
Some('s'),
|
||||
)
|
||||
.switch(
|
||||
"last",
|
||||
"Keeps last unique value. Default keeps first value (lazy df)",
|
||||
Some('l'),
|
||||
)
|
||||
.switch(
|
||||
"maintain-order",
|
||||
"Keep the same order as the original DataFrame (lazy df)",
|
||||
Some('k'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
@ -45,17 +65,31 @@ impl Command for Unique {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
let value = input.into_value(call.head);
|
||||
|
||||
if NuLazyFrame::can_downcast(&value) {
|
||||
let df = NuLazyFrame::try_from_value(value)?;
|
||||
command_lazy(engine_state, stack, call, df)
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
command_eager(engine_state, stack, call, df)
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"expression or query".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
fn command_eager(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series.unique().map_err(|e| {
|
||||
@ -72,6 +106,37 @@ fn command(
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
lazy: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let last = call.has_flag("last");
|
||||
let maintain = call.has_flag("maintain-order");
|
||||
|
||||
let subset: Option<Value> = call.get_flag(engine_state, stack, "subset")?;
|
||||
let subset = match subset {
|
||||
Some(value) => Some(extract_strings(value)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let strategy = if last {
|
||||
UniqueKeepStrategy::Last
|
||||
} else {
|
||||
UniqueKeepStrategy::First
|
||||
};
|
||||
|
||||
let lazy = lazy.into_polars();
|
||||
let lazy: NuLazyFrame = if maintain {
|
||||
lazy.unique(subset, strategy).into()
|
||||
} else {
|
||||
lazy.unique_stable(subset, strategy).into()
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
|
@ -63,7 +63,7 @@ fn command(
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series.value_counts().map_err(|e| {
|
||||
let res = series.value_counts(false).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating value counts values".into(),
|
||||
e.to_string(),
|
||||
|
15
crates/nu-command/src/dataframe/utils.rs
Normal file
15
crates/nu-command/src/dataframe/utils.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use nu_protocol::{FromValue, ShellError, Value};
|
||||
|
||||
pub fn extract_strings(value: Value) -> Result<Vec<String>, ShellError> {
|
||||
match (
|
||||
<String as FromValue>::from_value(&value),
|
||||
<Vec<String> as FromValue>::from_value(&value),
|
||||
) {
|
||||
(Ok(col), Err(_)) => Ok(vec![col]),
|
||||
(Err(_), Ok(cols)) => Ok(cols),
|
||||
_ => Err(ShellError::IncompatibleParametersSingle(
|
||||
"Expected a string or list of strings".into(),
|
||||
value.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
@ -1,6 +1,10 @@
|
||||
mod nu_dataframe;
|
||||
mod nu_groupby;
|
||||
mod nu_expression;
|
||||
mod nu_lazyframe;
|
||||
mod nu_lazygroupby;
|
||||
pub mod utils;
|
||||
|
||||
pub use nu_dataframe::{Axis, Column, NuDataFrame};
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
pub use nu_expression::NuExpression;
|
||||
pub use nu_lazyframe::NuLazyFrame;
|
||||
pub use nu_lazygroupby::NuLazyGroupBy;
|
||||
|
@ -76,39 +76,33 @@ pub(super) fn compute_between_series(
|
||||
}
|
||||
}
|
||||
Operator::Equal => {
|
||||
let mut res = Series::equal(lhs, rhs).into_series();
|
||||
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::NotEqual => {
|
||||
let mut res = Series::not_equal(lhs, rhs).into_series();
|
||||
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::LessThan => {
|
||||
let mut res = Series::lt(lhs, rhs).into_series();
|
||||
let name = format!("lt_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::LessThanOrEqual => {
|
||||
let mut res = Series::lt_eq(lhs, rhs).into_series();
|
||||
let name = format!("lte_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::GreaterThan => {
|
||||
let mut res = Series::gt(lhs, rhs).into_series();
|
||||
let name = format!("gt_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::GreaterThanOrEqual => {
|
||||
let mut res = Series::gt_eq(lhs, rhs).into_series();
|
||||
let name = format!("gte_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
let res = compare_series(lhs, rhs, name.as_str(), right.span().ok(), Series::equal)?;
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::And => match lhs.dtype() {
|
||||
@ -179,6 +173,32 @@ pub(super) fn compute_between_series(
|
||||
}
|
||||
}
|
||||
|
||||
fn compare_series<'s, F>(
|
||||
lhs: &'s Series,
|
||||
rhs: &'s Series,
|
||||
name: &'s str,
|
||||
span: Option<Span>,
|
||||
f: F,
|
||||
) -> Result<Series, ShellError>
|
||||
where
|
||||
F: Fn(&'s Series, &'s Series) -> Result<ChunkedArray<BooleanType>, PolarsError>,
|
||||
{
|
||||
let mut res = f(lhs, rhs)
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Equality error".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename(name);
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub(super) fn compute_series_single_value(
|
||||
operator: Spanned<Operator>,
|
||||
left: &Value,
|
||||
|
@ -7,7 +7,7 @@ use polars::chunked_array::object::builder::ObjectChunkedBuilder;
|
||||
use polars::chunked_array::ChunkedArray;
|
||||
use polars::prelude::{
|
||||
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
|
||||
ObjectType, Series, TimeUnit,
|
||||
ObjectType, Series, TemporalMethods, TimeUnit,
|
||||
};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
|
@ -84,6 +84,12 @@ impl AsMut<DataFrame> for NuDataFrame {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DataFrame> for NuDataFrame {
|
||||
fn from(dataframe: DataFrame) -> Self {
|
||||
Self(dataframe)
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn new(dataframe: DataFrame) -> Self {
|
||||
Self(dataframe)
|
||||
@ -132,6 +138,7 @@ impl NuDataFrame {
|
||||
|
||||
for value in iter {
|
||||
match value {
|
||||
Value::CustomValue { .. } => return Self::try_from_value(value),
|
||||
Value::List { vals, .. } => {
|
||||
let cols = (0..vals.len())
|
||||
.map(|i| format!("{}", i))
|
||||
@ -181,7 +188,7 @@ impl NuDataFrame {
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
|
||||
Some(df) => Ok(NuDataFrame(df.0.clone())),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"dataframe".into(),
|
||||
@ -201,7 +208,15 @@ impl NuDataFrame {
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
NuDataFrame::try_from_value(value)
|
||||
Self::try_from_value(value)
|
||||
}
|
||||
|
||||
pub fn can_downcast(value: &Value) -> bool {
|
||||
if let Value::CustomValue { val, .. } = value {
|
||||
val.as_any().downcast_ref::<Self>().is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
|
||||
|
@ -0,0 +1,149 @@
|
||||
use std::ops::{Add, Div, Mul, Rem, Sub};
|
||||
|
||||
use super::NuExpression;
|
||||
use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Type, Value};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
// CustomValue implementation for NuDataFrame
|
||||
impl CustomValue for NuExpression {
|
||||
fn typetag_name(&self) -> &'static str {
|
||||
"expression"
|
||||
}
|
||||
|
||||
fn typetag_deserialize(&self) {
|
||||
unimplemented!("typetag_deserialize")
|
||||
}
|
||||
|
||||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||
let cloned = NuExpression(self.0.clone());
|
||||
|
||||
Value::CustomValue {
|
||||
val: Box::new(cloned),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
fn value_string(&self) -> String {
|
||||
self.typetag_name().to_string()
|
||||
}
|
||||
|
||||
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
|
||||
Ok(self.to_value(span))
|
||||
}
|
||||
|
||||
fn to_json(&self) -> nu_json::Value {
|
||||
nu_json::Value::Null
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn operation(
|
||||
&self,
|
||||
lhs_span: Span,
|
||||
operator: Operator,
|
||||
op: Span,
|
||||
right: &Value,
|
||||
) -> Result<Value, ShellError> {
|
||||
compute_with_value(self, lhs_span, operator, op, right)
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_with_value(
|
||||
left: &NuExpression,
|
||||
lhs_span: Span,
|
||||
operator: Operator,
|
||||
op: Span,
|
||||
right: &Value,
|
||||
) -> Result<Value, ShellError> {
|
||||
match right {
|
||||
Value::CustomValue {
|
||||
val: rhs,
|
||||
span: rhs_span,
|
||||
} => {
|
||||
let rhs = rhs.as_any().downcast_ref::<NuExpression>().ok_or_else(|| {
|
||||
ShellError::DowncastNotPossible(
|
||||
"Unable to create expression".to_string(),
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
match rhs.as_ref() {
|
||||
polars::prelude::Expr::Literal(..) => {
|
||||
with_operator(operator, left, rhs, lhs_span, right.span()?, op)
|
||||
}
|
||||
_ => Err(ShellError::TypeMismatch(
|
||||
"Only literal expressions or number".into(),
|
||||
right.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let rhs = NuExpression::try_from_value(right.clone())?;
|
||||
with_operator(operator, left, &rhs, lhs_span, right.span()?, op)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn with_operator(
|
||||
operator: Operator,
|
||||
left: &NuExpression,
|
||||
right: &NuExpression,
|
||||
lhs_span: Span,
|
||||
rhs_span: Span,
|
||||
op_span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
match operator {
|
||||
Operator::Plus => apply_arithmetic(left, right, lhs_span, Add::add),
|
||||
Operator::Minus => apply_arithmetic(left, right, lhs_span, Sub::sub),
|
||||
Operator::Multiply => apply_arithmetic(left, right, lhs_span, Mul::mul),
|
||||
Operator::Divide => apply_arithmetic(left, right, lhs_span, Div::div),
|
||||
Operator::Modulo => apply_arithmetic(left, right, lhs_span, Rem::rem),
|
||||
Operator::Equal => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::eq)
|
||||
.into_value(lhs_span)),
|
||||
Operator::NotEqual => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::neq)
|
||||
.into_value(lhs_span)),
|
||||
Operator::GreaterThan => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::gt)
|
||||
.into_value(lhs_span)),
|
||||
Operator::GreaterThanOrEqual => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::gt_eq)
|
||||
.into_value(lhs_span)),
|
||||
Operator::LessThan => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::lt)
|
||||
.into_value(lhs_span)),
|
||||
Operator::LessThanOrEqual => Ok(left
|
||||
.clone()
|
||||
.apply_with_expr(right.clone(), Expr::lt_eq)
|
||||
.into_value(lhs_span)),
|
||||
_ => Err(ShellError::OperatorMismatch {
|
||||
op_span,
|
||||
lhs_ty: Type::Custom,
|
||||
lhs_span,
|
||||
rhs_ty: Type::Custom,
|
||||
rhs_span,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_arithmetic<F>(
|
||||
left: &NuExpression,
|
||||
right: &NuExpression,
|
||||
span: Span,
|
||||
f: F,
|
||||
) -> Result<Value, ShellError>
|
||||
where
|
||||
F: Fn(Expr, Expr) -> Expr,
|
||||
{
|
||||
let expr: NuExpression = f(left.as_ref().clone(), right.as_ref().clone()).into();
|
||||
|
||||
Ok(expr.into_value(span))
|
||||
}
|
325
crates/nu-command/src/dataframe/values/nu_expression/mod.rs
Normal file
325
crates/nu-command/src/dataframe/values/nu_expression/mod.rs
Normal file
@ -0,0 +1,325 @@
|
||||
mod custom_value;
|
||||
|
||||
use core::fmt;
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{col, AggExpr, Expr, Literal};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
// Polars Expression wrapper for Nushell operations
|
||||
// Object is behind and Option to allow easy implementation of
|
||||
// the Deserialize trait
|
||||
#[derive(Default, Clone)]
|
||||
pub struct NuExpression(Option<Expr>);
|
||||
|
||||
// Mocked serialization of the LazyFrame object
|
||||
impl Serialize for NuExpression {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_none()
|
||||
}
|
||||
}
|
||||
|
||||
// Mocked deserialization of the LazyFrame object
|
||||
impl<'de> Deserialize<'de> for NuExpression {
|
||||
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Ok(NuExpression::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NuExpression {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "NuExpression")
|
||||
}
|
||||
}
|
||||
|
||||
// Referenced access to the real LazyFrame
|
||||
impl AsRef<Expr> for NuExpression {
|
||||
fn as_ref(&self) -> &polars::prelude::Expr {
|
||||
// The only case when there cannot be an expr is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.0.as_ref().expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<Expr> for NuExpression {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::Expr {
|
||||
// The only case when there cannot be an expr is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.0.as_mut().expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Expr> for NuExpression {
|
||||
fn from(expr: Expr) -> Self {
|
||||
Self(Some(expr))
|
||||
}
|
||||
}
|
||||
|
||||
impl NuExpression {
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
|
||||
Some(expr) => Ok(NuExpression(expr.0.clone())),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"lazy expression".into(),
|
||||
"non-dataframe".into(),
|
||||
span,
|
||||
None,
|
||||
)),
|
||||
},
|
||||
Value::String { val, .. } => Ok(col(val.as_str()).into()),
|
||||
Value::Int { val, .. } => Ok(val.lit().into()),
|
||||
Value::Bool { val, .. } => Ok(val.lit().into()),
|
||||
Value::Float { val, .. } => Ok(val.lit().into()),
|
||||
x => Err(ShellError::CantConvert(
|
||||
"lazy expression".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
Self::try_from_value(value)
|
||||
}
|
||||
|
||||
pub fn can_downcast(value: &Value) -> bool {
|
||||
match value {
|
||||
Value::CustomValue { val, .. } => val.as_any().downcast_ref::<Self>().is_some(),
|
||||
Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => {
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_polars(self) -> Expr {
|
||||
self.0.expect("Expression cannot be none to convert")
|
||||
}
|
||||
|
||||
pub fn apply_with_expr<F>(self, other: NuExpression, f: F) -> Self
|
||||
where
|
||||
F: Fn(Expr, Expr) -> Expr,
|
||||
{
|
||||
let expr = self.0.expect("Lazy expression must not be empty to apply");
|
||||
let other = other.0.expect("Lazy expression must not be empty to apply");
|
||||
|
||||
f(expr, other).into()
|
||||
}
|
||||
|
||||
pub fn to_value(&self, span: Span) -> Value {
|
||||
expr_to_value(self.as_ref(), span)
|
||||
}
|
||||
|
||||
// Convenient function to extrac multiple Expr that could be inside a nushell Value
|
||||
pub fn extract_exprs(value: Value) -> Result<Vec<Expr>, ShellError> {
|
||||
ExtractedExpr::extract_exprs(value).map(ExtractedExpr::into_exprs)
|
||||
}
|
||||
}
|
||||
|
||||
// Enum to represent the parsing of the expressions from Value
|
||||
enum ExtractedExpr {
|
||||
Single(Expr),
|
||||
List(Vec<ExtractedExpr>),
|
||||
}
|
||||
|
||||
impl ExtractedExpr {
|
||||
fn into_exprs(self) -> Vec<Expr> {
|
||||
match self {
|
||||
Self::Single(expr) => vec![expr],
|
||||
Self::List(expressions) => expressions
|
||||
.into_iter()
|
||||
.flat_map(ExtractedExpr::into_exprs)
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_exprs(value: Value) -> Result<ExtractedExpr, ShellError> {
|
||||
match value {
|
||||
Value::String { val, .. } => Ok(ExtractedExpr::Single(col(val.as_str()))),
|
||||
Value::CustomValue { .. } => NuExpression::try_from_value(value)
|
||||
.map(NuExpression::into_polars)
|
||||
.map(ExtractedExpr::Single),
|
||||
Value::List { vals, .. } => vals
|
||||
.into_iter()
|
||||
.map(Self::extract_exprs)
|
||||
.collect::<Result<Vec<ExtractedExpr>, ShellError>>()
|
||||
.map(ExtractedExpr::List),
|
||||
x => Err(ShellError::CantConvert(
|
||||
"expression".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
|
||||
let cols = vec!["expr".to_string(), "value".to_string()];
|
||||
|
||||
match expr {
|
||||
Expr::Not(_) => todo!(),
|
||||
Expr::Alias(expr, alias) => {
|
||||
let expr = expr_to_value(expr.as_ref(), span);
|
||||
let alias = Value::String {
|
||||
val: alias.as_ref().into(),
|
||||
span,
|
||||
};
|
||||
|
||||
let cols = vec!["expr".to_string(), "alias".to_string()];
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals: vec![expr, alias],
|
||||
span,
|
||||
}
|
||||
}
|
||||
Expr::Column(name) => {
|
||||
let expr_type = Value::String {
|
||||
val: "column".into(),
|
||||
span,
|
||||
};
|
||||
let value = Value::String {
|
||||
val: name.to_string(),
|
||||
span,
|
||||
};
|
||||
|
||||
let vals = vec![expr_type, value];
|
||||
Value::Record { cols, vals, span }
|
||||
}
|
||||
Expr::Columns(columns) => {
|
||||
let expr_type = Value::String {
|
||||
val: "columns".into(),
|
||||
span,
|
||||
};
|
||||
let value = Value::List {
|
||||
vals: columns
|
||||
.iter()
|
||||
.map(|col| Value::String {
|
||||
val: col.clone(),
|
||||
span,
|
||||
})
|
||||
.collect(),
|
||||
span,
|
||||
};
|
||||
|
||||
let vals = vec![expr_type, value];
|
||||
Value::Record { cols, vals, span }
|
||||
}
|
||||
Expr::DtypeColumn(_) => todo!(),
|
||||
Expr::Literal(literal) => {
|
||||
let expr_type = Value::String {
|
||||
val: "literal".into(),
|
||||
span,
|
||||
};
|
||||
let value = Value::String {
|
||||
val: format!("{:?}", literal),
|
||||
span,
|
||||
};
|
||||
|
||||
let vals = vec![expr_type, value];
|
||||
Value::Record { cols, vals, span }
|
||||
}
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
let left_val = expr_to_value(left, span);
|
||||
let right_val = expr_to_value(right, span);
|
||||
|
||||
let operator = Value::String {
|
||||
val: format!("{:?}", op),
|
||||
span,
|
||||
};
|
||||
|
||||
let cols = vec!["left".to_string(), "op".to_string(), "right".to_string()];
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals: vec![left_val, operator, right_val],
|
||||
span,
|
||||
}
|
||||
}
|
||||
Expr::Ternary {
|
||||
predicate,
|
||||
truthy,
|
||||
falsy,
|
||||
} => {
|
||||
let predicate = expr_to_value(predicate.as_ref(), span);
|
||||
let truthy = expr_to_value(truthy.as_ref(), span);
|
||||
let falsy = expr_to_value(falsy.as_ref(), span);
|
||||
|
||||
let cols = vec![
|
||||
"predicate".to_string(),
|
||||
"truthy".to_string(),
|
||||
"falsy".to_string(),
|
||||
];
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals: vec![predicate, truthy, falsy],
|
||||
span,
|
||||
}
|
||||
}
|
||||
Expr::Agg(agg_expr) => {
|
||||
let value = match agg_expr {
|
||||
AggExpr::Min(expr)
|
||||
| AggExpr::Max(expr)
|
||||
| AggExpr::Median(expr)
|
||||
| AggExpr::NUnique(expr)
|
||||
| AggExpr::First(expr)
|
||||
| AggExpr::Last(expr)
|
||||
| AggExpr::Mean(expr)
|
||||
| AggExpr::List(expr)
|
||||
| AggExpr::Count(expr)
|
||||
| AggExpr::Sum(expr)
|
||||
| AggExpr::AggGroups(expr)
|
||||
| AggExpr::Std(expr)
|
||||
| AggExpr::Var(expr) => expr_to_value(expr.as_ref(), span),
|
||||
AggExpr::Quantile { .. } => todo!(),
|
||||
};
|
||||
|
||||
let expr_type = Value::String {
|
||||
val: "agg".into(),
|
||||
span,
|
||||
};
|
||||
|
||||
let vals = vec![expr_type, value];
|
||||
Value::Record { cols, vals, span }
|
||||
}
|
||||
Expr::IsNotNull(_) => todo!(),
|
||||
Expr::IsNull(_) => todo!(),
|
||||
Expr::Cast { .. } => todo!(),
|
||||
Expr::Sort { .. } => todo!(),
|
||||
Expr::Take { .. } => todo!(),
|
||||
Expr::SortBy { .. } => todo!(),
|
||||
Expr::Function { .. } => todo!(),
|
||||
Expr::Shift { .. } => todo!(),
|
||||
Expr::Reverse(_) => todo!(),
|
||||
Expr::Duplicated(_) => todo!(),
|
||||
Expr::IsUnique(_) => todo!(),
|
||||
Expr::Explode(_) => todo!(),
|
||||
Expr::Filter { .. } => todo!(),
|
||||
Expr::Window { .. } => todo!(),
|
||||
Expr::Wildcard => todo!(),
|
||||
Expr::Slice { .. } => todo!(),
|
||||
Expr::Exclude(_, _) => todo!(),
|
||||
Expr::KeepName(_) => todo!(),
|
||||
Expr::RenameAlias { .. } => todo!(),
|
||||
Expr::Count => todo!(),
|
||||
Expr::Nth(_) => todo!(),
|
||||
Expr::AnonymousFunction { .. } => todo!(),
|
||||
}
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
mod custom_value;
|
||||
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::frame::groupby::{GroupBy, GroupsProxy};
|
||||
use polars::prelude::{DataFrame, GroupsIdx};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum NuGroupsProxy {
|
||||
Idx {
|
||||
sorted: bool,
|
||||
all: Vec<(u32, Vec<u32>)>,
|
||||
},
|
||||
Slice(Vec<[u32; 2]>),
|
||||
}
|
||||
|
||||
impl NuGroupsProxy {
|
||||
fn from_polars(groups: &GroupsProxy) -> Self {
|
||||
match groups {
|
||||
GroupsProxy::Idx(indexes) => NuGroupsProxy::Idx {
|
||||
sorted: indexes.is_sorted(),
|
||||
all: indexes
|
||||
.iter()
|
||||
.map(|(index, values)| (index, values.clone()))
|
||||
.collect(),
|
||||
},
|
||||
GroupsProxy::Slice(slice) => NuGroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_polars(&self) -> GroupsProxy {
|
||||
match self {
|
||||
Self::Idx { sorted, all } => {
|
||||
let mut groups: GroupsIdx = all.clone().into();
|
||||
if *sorted {
|
||||
groups.sort()
|
||||
}
|
||||
|
||||
GroupsProxy::Idx(groups)
|
||||
}
|
||||
Self::Slice(slice) => GroupsProxy::Slice(slice.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NuGroupBy {
|
||||
dataframe: DataFrame,
|
||||
by: Vec<String>,
|
||||
groups: NuGroupsProxy,
|
||||
}
|
||||
|
||||
impl NuGroupBy {
|
||||
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: &GroupsProxy) -> Self {
|
||||
NuGroupBy {
|
||||
dataframe,
|
||||
by,
|
||||
groups: NuGroupsProxy::from_polars(groups),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuGroupBy>() {
|
||||
Some(groupby) => Ok(NuGroupBy {
|
||||
dataframe: groupby.dataframe.clone(),
|
||||
by: groupby.by.clone(),
|
||||
groups: groupby.groups.clone(),
|
||||
}),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"groupby".into(),
|
||||
"non-dataframe".into(),
|
||||
span,
|
||||
None,
|
||||
)),
|
||||
},
|
||||
x => Err(ShellError::CantConvert(
|
||||
"groupby".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<NuGroupBy, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
NuGroupBy::try_from_value(value)
|
||||
}
|
||||
|
||||
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
|
||||
let by = self.dataframe.select_series(&self.by).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error creating groupby".into(),
|
||||
"".to_string(),
|
||||
None,
|
||||
Some(e.to_string()),
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(GroupBy::new(
|
||||
&self.dataframe,
|
||||
by,
|
||||
self.groups.to_polars(),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
|
||||
let values = self
|
||||
.by
|
||||
.iter()
|
||||
.map(|col| {
|
||||
let cols = vec!["group by".to_string()];
|
||||
let vals = vec![Value::String {
|
||||
val: col.into(),
|
||||
span,
|
||||
}];
|
||||
|
||||
Value::Record { cols, vals, span }
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<DataFrame> for NuGroupBy {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
&self.dataframe
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
use super::NuLazyFrame;
|
||||
use nu_protocol::{CustomValue, ShellError, Span, Value};
|
||||
|
||||
// CustomValue implementation for NuDataFrame
|
||||
impl CustomValue for NuLazyFrame {
|
||||
fn typetag_name(&self) -> &'static str {
|
||||
"lazyframe"
|
||||
}
|
||||
|
||||
fn typetag_deserialize(&self) {
|
||||
unimplemented!("typetag_deserialize")
|
||||
}
|
||||
|
||||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||
let cloned = NuLazyFrame(self.0.clone());
|
||||
|
||||
Value::CustomValue {
|
||||
val: Box::new(cloned),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
fn value_string(&self) -> String {
|
||||
self.typetag_name().to_string()
|
||||
}
|
||||
|
||||
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
|
||||
let cols = vec!["plan".into(), "optimized_plan".into()];
|
||||
let vals = vec![
|
||||
Value::String {
|
||||
val: self.as_ref().describe_plan(),
|
||||
span,
|
||||
},
|
||||
Value::String {
|
||||
val: self
|
||||
.as_ref()
|
||||
.describe_optimized_plan()
|
||||
.unwrap_or_else(|_| "<NOT AVAILABLE>".to_string()),
|
||||
span,
|
||||
},
|
||||
];
|
||||
|
||||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
fn to_json(&self) -> nu_json::Value {
|
||||
nu_json::Value::Null
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
}
|
156
crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs
Normal file
156
crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs
Normal file
@ -0,0 +1,156 @@
|
||||
mod custom_value;
|
||||
|
||||
use super::{NuDataFrame, NuExpression};
|
||||
use core::fmt;
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{Expr, IntoLazy, LazyFrame};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
// Lazyframe wrapper for Nushell operations
|
||||
// Polars LazyFrame is behind and Option to allow easy implementation of
|
||||
// the Deserialize trait
|
||||
#[derive(Default)]
|
||||
pub struct NuLazyFrame(Option<LazyFrame>);
|
||||
|
||||
// Mocked serialization of the LazyFrame object
|
||||
impl Serialize for NuLazyFrame {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_none()
|
||||
}
|
||||
}
|
||||
|
||||
// Mocked deserialization of the LazyFrame object
|
||||
impl<'de> Deserialize<'de> for NuLazyFrame {
|
||||
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Ok(NuLazyFrame::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NuLazyFrame {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "NuLazyframe")
|
||||
}
|
||||
}
|
||||
|
||||
// Referenced access to the real LazyFrame
|
||||
impl AsRef<LazyFrame> for NuLazyFrame {
|
||||
fn as_ref(&self) -> &polars::prelude::LazyFrame {
|
||||
// The only case when there cannot be a lazy frame is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.0.as_ref().expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<LazyFrame> for NuLazyFrame {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame {
|
||||
// The only case when there cannot be a lazy frame is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.0.as_mut().expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LazyFrame> for NuLazyFrame {
|
||||
fn from(lazy_frame: LazyFrame) -> Self {
|
||||
Self(Some(lazy_frame))
|
||||
}
|
||||
}
|
||||
|
||||
impl NuLazyFrame {
|
||||
pub fn from_dataframe(df: NuDataFrame) -> Self {
|
||||
let lazy = df.as_ref().clone().lazy();
|
||||
Self(Some(lazy))
|
||||
}
|
||||
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_polars(self) -> LazyFrame {
|
||||
self.0.expect("lazyframe cannot be none to convert")
|
||||
}
|
||||
|
||||
pub fn collect(self, span: Span) -> Result<NuDataFrame, ShellError> {
|
||||
self.0
|
||||
.expect("No empty lazy for collect")
|
||||
.collect()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error collecting lazy frame".to_string(),
|
||||
e.to_string(),
|
||||
Some(span),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})
|
||||
.map(NuDataFrame::new)
|
||||
}
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<Self>() {
|
||||
Some(expr) => Ok(Self(expr.0.clone())),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"lazy frame".into(),
|
||||
"non-dataframe".into(),
|
||||
span,
|
||||
None,
|
||||
)),
|
||||
},
|
||||
x => Err(ShellError::CantConvert(
|
||||
"lazy frame".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
Self::try_from_value(value)
|
||||
}
|
||||
|
||||
pub fn can_downcast(value: &Value) -> bool {
|
||||
if let Value::CustomValue { val, .. } = value {
|
||||
val.as_any().downcast_ref::<Self>().is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn maybe_is_eager(value: Value) -> Result<(Self, bool), ShellError> {
|
||||
if Self::can_downcast(&value) {
|
||||
Ok((Self::try_from_value(value)?, false))
|
||||
} else if NuDataFrame::can_downcast(&value) {
|
||||
let df = NuDataFrame::try_from_value(value)?;
|
||||
Ok((NuLazyFrame::from_dataframe(df), true))
|
||||
} else {
|
||||
Err(ShellError::CantConvert(
|
||||
"lazy or eager dataframe".into(),
|
||||
value.get_type().to_string(),
|
||||
value.span()?,
|
||||
None,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_with_expr<F>(self, expr: NuExpression, f: F) -> Self
|
||||
where
|
||||
F: Fn(LazyFrame, Expr) -> LazyFrame,
|
||||
{
|
||||
let df = self.0.expect("Lazy frame must not be empty to apply");
|
||||
let expr = expr.into_polars();
|
||||
let new_frame = f(df, expr);
|
||||
|
||||
new_frame.into()
|
||||
}
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
use super::NuGroupBy;
|
||||
use super::NuLazyGroupBy;
|
||||
use nu_protocol::{CustomValue, ShellError, Span, Value};
|
||||
|
||||
// CustomValue implementation for NuDataFrame
|
||||
impl CustomValue for NuGroupBy {
|
||||
impl CustomValue for NuLazyGroupBy {
|
||||
fn typetag_name(&self) -> &'static str {
|
||||
"groupby"
|
||||
"lazygroupby"
|
||||
}
|
||||
|
||||
fn typetag_deserialize(&self) {
|
||||
@ -12,10 +12,9 @@ impl CustomValue for NuGroupBy {
|
||||
}
|
||||
|
||||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||
let cloned = NuGroupBy {
|
||||
dataframe: self.dataframe.clone(),
|
||||
by: self.by.clone(),
|
||||
groups: self.groups.clone(),
|
||||
let cloned = NuLazyGroupBy {
|
||||
group_by: self.group_by.clone(),
|
||||
from_eager: self.from_eager,
|
||||
};
|
||||
|
||||
Value::CustomValue {
|
||||
@ -29,9 +28,13 @@ impl CustomValue for NuGroupBy {
|
||||
}
|
||||
|
||||
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
|
||||
let vals = self.print(span)?;
|
||||
let cols = vec!["LazyGroupBy".into()];
|
||||
let vals = vec![Value::String {
|
||||
val: "apply aggregation to complete execution plan".into(),
|
||||
span,
|
||||
}];
|
||||
|
||||
Ok(Value::List { vals, span })
|
||||
Ok(Value::Record { cols, vals, span })
|
||||
}
|
||||
|
||||
fn to_json(&self) -> nu_json::Value {
|
114
crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs
Normal file
114
crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs
Normal file
@ -0,0 +1,114 @@
|
||||
mod custom_value;
|
||||
|
||||
use core::fmt;
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::LazyGroupBy;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
// Lazyframe wrapper for Nushell operations
|
||||
// Polars LazyFrame is behind and Option to allow easy implementation of
|
||||
// the Deserialize trait
|
||||
#[derive(Default)]
|
||||
pub struct NuLazyGroupBy {
|
||||
pub group_by: Option<LazyGroupBy>,
|
||||
pub from_eager: bool,
|
||||
}
|
||||
|
||||
// Mocked serialization of the LazyFrame object
|
||||
impl Serialize for NuLazyGroupBy {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_none()
|
||||
}
|
||||
}
|
||||
|
||||
// Mocked deserialization of the LazyFrame object
|
||||
impl<'de> Deserialize<'de> for NuLazyGroupBy {
|
||||
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Ok(NuLazyGroupBy::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NuLazyGroupBy {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "NuLazyGroupBy")
|
||||
}
|
||||
}
|
||||
|
||||
// Referenced access to the real LazyFrame
|
||||
impl AsRef<LazyGroupBy> for NuLazyGroupBy {
|
||||
fn as_ref(&self) -> &polars::prelude::LazyGroupBy {
|
||||
// The only case when there cannot be a lazy frame is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.group_by
|
||||
.as_ref()
|
||||
.expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<LazyGroupBy> for NuLazyGroupBy {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::LazyGroupBy {
|
||||
// The only case when there cannot be a lazy frame is if it is created
|
||||
// using the default function or if created by deserializing something
|
||||
self.group_by
|
||||
.as_mut()
|
||||
.expect("there should always be a frame")
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LazyGroupBy> for NuLazyGroupBy {
|
||||
fn from(group_by: LazyGroupBy) -> Self {
|
||||
Self {
|
||||
group_by: Some(group_by),
|
||||
from_eager: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl NuLazyGroupBy {
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_polars(self) -> LazyGroupBy {
|
||||
self.group_by.expect("GroupBy cannot be none to convert")
|
||||
}
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => {
|
||||
match val.as_any().downcast_ref::<NuLazyGroupBy>() {
|
||||
Some(group) => Ok(Self {
|
||||
group_by: group.group_by.clone(),
|
||||
from_eager: group.from_eager,
|
||||
}),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"lazy frame".into(),
|
||||
"non-dataframe".into(),
|
||||
span,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
x => Err(ShellError::CantConvert(
|
||||
"lazy groupby".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
Self::try_from_value(value)
|
||||
}
|
||||
}
|
@ -238,6 +238,31 @@ impl FromValue for Vec<String> {
|
||||
}
|
||||
}
|
||||
|
||||
impl FromValue for Vec<bool> {
|
||||
fn from_value(v: &Value) -> Result<Self, ShellError> {
|
||||
match v {
|
||||
Value::List { vals, .. } => vals
|
||||
.iter()
|
||||
.map(|val| match val {
|
||||
Value::Bool { val, .. } => Ok(*val),
|
||||
c => Err(ShellError::CantConvert(
|
||||
"bool".into(),
|
||||
c.get_type().to_string(),
|
||||
c.span()?,
|
||||
None,
|
||||
)),
|
||||
})
|
||||
.collect::<Result<Vec<bool>, ShellError>>(),
|
||||
v => Err(ShellError::CantConvert(
|
||||
"bool".into(),
|
||||
v.get_type().to_string(),
|
||||
v.span()?,
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromValue for CellPath {
|
||||
fn from_value(v: &Value) -> Result<Self, ShellError> {
|
||||
let span = v.span()?;
|
||||
|
Loading…
Reference in New Issue
Block a user