Polars upgrade 0.23 (#6303)

* more lazy expressions

* upgrade polars and correct functions

* arg-where example

* cargo clippy

* restore modified filter files

* correct string addition with str

* correct string addition with str

* correct message in test
This commit is contained in:
Fernando Herrera 2022-08-12 14:10:36 +02:00 committed by GitHub
parent ff6868b329
commit ae64c58f59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 583 additions and 121 deletions

77
Cargo.lock generated
View File

@ -130,9 +130,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]] [[package]]
name = "arrow-format" name = "arrow-format"
version = "0.6.0" version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd" checksum = "8df5d25bc6d676271277120c41ef28760fe0a9f070677a58db621c0f983f9c20"
dependencies = [ dependencies = [
"planus", "planus",
"serde", "serde",
@ -140,16 +140,19 @@ dependencies = [
[[package]] [[package]]
name = "arrow2" name = "arrow2"
version = "0.12.0" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0" checksum = "afc54f0b14083abaf6bc71cf1aeccd7831a24b1e29d07683ba9a4a0f6c5d9326"
dependencies = [ dependencies = [
"ahash",
"arrow-format", "arrow-format",
"base64", "base64",
"bytemuck", "bytemuck",
"chrono", "chrono",
"dyn-clone",
"either", "either",
"fallible-streaming-iterator", "fallible-streaming-iterator",
"foreign_vec",
"futures", "futures",
"hash_hasher", "hash_hasher",
"indexmap", "indexmap",
@ -1024,6 +1027,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541"
[[package]]
name = "dyn-clone"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d07a982d1fb29db01e5a59b1918e03da4df7297eaeee7686ac45542fd4e59c8"
[[package]] [[package]]
name = "ego-tree" name = "ego-tree"
version = "0.6.2" version = "0.6.2"
@ -1204,6 +1213,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "foreign_vec"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.0.1" version = "1.0.1"
@ -3172,9 +3187,9 @@ dependencies = [
[[package]] [[package]]
name = "parquet2" name = "parquet2"
version = "0.13.2" version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fd2690ad041f9296876daef1f2706f6347073bdbcc719090887f1691e4a09d" checksum = "33e434af3293ba384075a56d4b400ce659868ca7823142194ef204f01ab35e50"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"bitpacking", "bitpacking",
@ -3381,18 +3396,18 @@ checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]] [[package]]
name = "planus" name = "planus"
version = "0.2.0" version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bffebaf174d6cad46a5f0f1bb1c45c6eb509571688bcb18dfab217f3c9f9b151" checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f"
dependencies = [ dependencies = [
"array-init-cursor", "array-init-cursor",
] ]
[[package]] [[package]]
name = "polars" name = "polars"
version = "0.22.8" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d175c67e80ceaef7219258cfc3a8686531d9510875b0cefa25404e5b80a7933" checksum = "a75b1077fda63c0f67acc1cdc8586e7afce419be1e85bf7dfa8935e0e266d6b3"
dependencies = [ dependencies = [
"polars-core", "polars-core",
"polars-io", "polars-io",
@ -3403,9 +3418,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-arrow" name = "polars-arrow"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f66c7d3da2c10a09131294dbe7802fac792f570be639dc6ebf207bfc3e144287" checksum = "f7b28f858b252436550679609a23be34d62705faf783887f172f845eb58bcb8b"
dependencies = [ dependencies = [
"arrow2", "arrow2",
"hashbrown", "hashbrown",
@ -3416,13 +3431,14 @@ dependencies = [
[[package]] [[package]]
name = "polars-core" name = "polars-core"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7f15f443a90d5367c4fbbb151e203f03b5b96055c8b928c6bc30655a3644f13" checksum = "eeaec1ca3ac4829ca24b33743adeeb323a43b5a85515bfce20c2c81799c82790"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
"arrow2", "arrow2",
"bitflags",
"chrono", "chrono",
"comfy-table", "comfy-table",
"hashbrown", "hashbrown",
@ -3437,14 +3453,15 @@ dependencies = [
"regex", "regex",
"serde", "serde",
"serde_json", "serde_json",
"smartstring",
"thiserror", "thiserror",
] ]
[[package]] [[package]]
name = "polars-io" name = "polars-io"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "058d0a847ce5009b974c69ec878ed416e306436f21b626543019f738cee12315" checksum = "51405e46f93e306a3c9280c60ba1101c662e8a6dab33344680d31c3161045f1c"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
@ -3470,11 +3487,12 @@ dependencies = [
[[package]] [[package]]
name = "polars-lazy" name = "polars-lazy"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dad86a4ce7e32540ff12089bce6f77270fd133a5b263328a92be61defdd6b151" checksum = "1340af778bc8124180d8ca1a566f076a5339566a207a42130796048b087fe977"
dependencies = [ dependencies = [
"ahash", "ahash",
"bitflags",
"glob", "glob",
"parking_lot", "parking_lot",
"polars-arrow", "polars-arrow",
@ -3489,9 +3507,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-ops" name = "polars-ops"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "030ecd473be113cd0264f1bc19de39a844fa12fa565db9dc52c859cbc292cf04" checksum = "4a1812e5d5e589d5bd23f8d89dcd8bd4508082c50d055b8ff5fafb6f2a519c9a"
dependencies = [ dependencies = [
"polars-arrow", "polars-arrow",
"polars-core", "polars-core",
@ -3499,9 +3517,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-time" name = "polars-time"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94047b20d2da3bcc55c421be187a0c6f316cf1eea7fe7ed7347c1160a32d017c" checksum = "fc4ebe97d601a4b443337df71d0b7e673fce953654871c3311850ea394d48297"
dependencies = [ dependencies = [
"chrono", "chrono",
"lexical", "lexical",
@ -3513,9 +3531,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-utils" name = "polars-utils"
version = "0.22.7" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcd3d0238462d5d9f7fbeaaea46e73ed4d58f6fae8b70d53cbe51d7538cc43f5" checksum = "4ea836afadcddee3f1a513dae7624f6d7d0d64abb129063ec7476b8347c8725b"
dependencies = [ dependencies = [
"parking_lot", "parking_lot",
"rayon", "rayon",
@ -4479,6 +4497,17 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
[[package]]
name = "smartstring"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
"autocfg",
"static_assertions",
"version_check",
]
[[package]] [[package]]
name = "smawk" name = "smawk"
version = "0.3.1" version = "0.3.1"

View File

@ -99,15 +99,14 @@ version = "2.1.3"
optional = true optional = true
[dependencies.polars] [dependencies.polars]
version = "0.22.8" version = "0.23.2"
# path = "../../../../polars/polars"
optional = true optional = true
features = [ features = [
"default", "to_dummies", "parquet", "json", "serde", "serde-lazy", "default", "to_dummies", "parquet", "json", "serde", "serde-lazy",
"object", "checked_arithmetic", "strings", "cum_agg", "is_in", "object", "checked_arithmetic", "strings", "cum_agg", "is_in",
"rolling_window", "strings", "rows", "random", "rolling_window", "strings", "rows", "random",
"dtype-datetime", "dtype-struct", "lazy", "cross_join", "dtype-datetime", "dtype-struct", "lazy", "cross_join",
"dynamic_groupby", "dtype-categorical", "concat_str" "dynamic_groupby", "dtype-categorical", "concat_str", "arg_where"
] ]
[target.'cfg(windows)'.dependencies.windows] [target.'cfg(windows)'.dependencies.windows]

View File

@ -0,0 +1,94 @@
use super::super::values::NuDataFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
};
#[derive(Clone)]
pub struct ColumnsDF;
impl Command for ColumnsDF {
fn name(&self) -> &str {
"columns"
}
fn usage(&self) -> &str {
"Show dataframe columns"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_type(Type::Custom("dataframe".into()))
.output_type(Type::Any)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Dataframe columns",
example: "[[a b]; [1 2] [3 4]] | into df | columns",
result: Some(Value::List {
vals: vec![
Value::String {
val: "a".into(),
span: Span::test_data(),
},
Value::String {
val: "b".into(),
span: Span::test_data(),
},
],
span: Span::test_data(),
}),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
#[allow(clippy::needless_collect)]
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| Value::String {
val: v.to_string(),
span: call.head,
})
.collect();
let names = Value::List {
vals: names,
span: call.head,
};
Ok(PipelineData::Value(names, None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ColumnsDF {})])
}
}

View File

@ -1,4 +1,5 @@
mod append; mod append;
mod columns;
mod describe; mod describe;
mod drop; mod drop;
mod drop_duplicates; mod drop_duplicates;
@ -26,6 +27,7 @@ mod with_column;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
pub use append::AppendDF; pub use append::AppendDF;
pub use columns::ColumnsDF;
pub use describe::DescribeDF; pub use describe::DescribeDF;
pub use drop::DropDF; pub use drop::DropDF;
pub use drop_duplicates::DropDuplicates; pub use drop_duplicates::DropDuplicates;
@ -63,6 +65,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
// Dataframe commands // Dataframe commands
bind_command!( bind_command!(
AppendDF, AppendDF,
ColumnsDF,
DataTypes, DataTypes,
DescribeDF, DescribeDF,
DropDF, DropDF,

View File

@ -0,0 +1,76 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::arg_where;
#[derive(Clone)]
pub struct ExprArgWhere;
impl Command for ExprArgWhere {
fn name(&self) -> &str {
"arg-where"
}
fn usage(&self) -> &str {
"Creates an expression that returns the arguments where expression is true"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("column name", SyntaxShape::Any, "Expression to evaluate")
.input_type(Type::Any)
.output_type(Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Return a dataframe where the value match the expression",
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
$df | select (arg-where ((col b) >= 2) | as b_arg)",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"b_arg".to_string(),
vec![Value::test_int(1), Value::test_int(2)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_value(value)?;
let expr: NuExpression = arg_where(expr.into_polars()).into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::expressions::ExprAlias;
use crate::dataframe::lazy::LazySelect;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(ExprArgWhere {}),
Box::new(ExprAlias {}),
Box::new(LazySelect {}),
])
}
}

View File

@ -0,0 +1,110 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
use polars::prelude::{lit, DataType};
#[derive(Clone)]
pub struct ExprIsIn;
impl Command for ExprIsIn {
fn name(&self) -> &str {
"is-in"
}
fn usage(&self) -> &str {
"Creates an is-in expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"list",
SyntaxShape::List(Box::new(SyntaxShape::Any)),
"List to check if values are in",
)
.input_type(Type::Custom("expression".into()))
.output_type(Type::Custom("expression".into()))
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates a is-in expression",
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
$df | with-column (col a | is-in [one two] | as a_in)"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![
Value::test_string("one"),
Value::test_string("two"),
Value::test_string("three"),
],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
),
Column::new(
"a_in".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let list: Vec<Value> = call.req(engine_state, stack, 0)?;
let expr = NuExpression::try_from_pipeline(input, call.head)?;
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?;
let list = values.as_series(call.head)?;
if matches!(list.dtype(), DataType::Object(..)) {
return Err(ShellError::IncompatibleParametersSingle(
"Cannot use a mixed list as argument".into(),
call.head,
));
}
let expr: NuExpression = expr.into_polars().is_in(lit(list)).into();
Ok(PipelineData::Value(expr.into_value(call.head), None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
use crate::dataframe::eager::WithColumn;
use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::col::ExprCol;
#[test]
fn test_examples() {
test_dataframe(vec![
Box::new(ExprIsIn {}),
Box::new(ExprAlias {}),
Box::new(ExprCol {}),
Box::new(WithColumn {}),
])
}
}

View File

@ -1,8 +1,10 @@
mod alias; mod alias;
mod arg_where;
mod as_nu; mod as_nu;
mod col; mod col;
mod concat_str; mod concat_str;
mod expressions_macro; mod expressions_macro;
mod is_in;
mod lit; mod lit;
mod otherwise; mod otherwise;
mod quantile; mod quantile;
@ -11,10 +13,12 @@ mod when;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
pub(crate) use crate::dataframe::expressions::alias::ExprAlias; pub(crate) use crate::dataframe::expressions::alias::ExprAlias;
use crate::dataframe::expressions::arg_where::ExprArgWhere;
use crate::dataframe::expressions::as_nu::ExprAsNu; use crate::dataframe::expressions::as_nu::ExprAsNu;
pub(super) use crate::dataframe::expressions::col::ExprCol; pub(super) use crate::dataframe::expressions::col::ExprCol;
pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr; pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr;
pub(crate) use crate::dataframe::expressions::expressions_macro::*; pub(crate) use crate::dataframe::expressions::expressions_macro::*;
pub(super) use crate::dataframe::expressions::is_in::ExprIsIn;
pub(super) use crate::dataframe::expressions::lit::ExprLit; pub(super) use crate::dataframe::expressions::lit::ExprLit;
pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise; pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise;
pub(super) use crate::dataframe::expressions::quantile::ExprQuantile; pub(super) use crate::dataframe::expressions::quantile::ExprQuantile;
@ -33,6 +37,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
// Dataframe commands // Dataframe commands
bind_command!( bind_command!(
ExprAlias, ExprAlias,
ExprArgWhere,
ExprCol, ExprCol,
ExprConcatStr, ExprConcatStr,
ExprCount, ExprCount,
@ -49,6 +54,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
ExprFirst, ExprFirst,
ExprLast, ExprLast,
ExprNUnique, ExprNUnique,
ExprIsIn,
ExprIsNotNull, ExprIsNotNull,
ExprIsNull, ExprIsNull,
ExprNot, ExprNot,

View File

@ -0,0 +1,84 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
};
#[derive(Clone)]
pub struct LazyFilter;
impl Command for LazyFilter {
fn name(&self) -> &str {
"filter"
}
fn usage(&self) -> &str {
"Filter dataframe based in expression"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"filter expression",
SyntaxShape::Any,
"Expression that define the column selection",
)
.input_type(Type::Custom("dataframe".into()))
.output_type(Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using an expression",
example: "[[a b]; [6 2] [4 2] [2 2]] | into df | filter ((col a) >= 4)",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(6), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(2)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value: Value = call.req(engine_state, stack, 0)?;
let expression = NuExpression::try_from_value(value)?;
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars().filter(expression.into_polars()),
);
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LazyFilter {})])
}
}

View File

@ -3,6 +3,7 @@ mod collect;
mod fetch; mod fetch;
mod fill_na; mod fill_na;
mod fill_null; mod fill_null;
mod filter;
pub mod groupby; pub mod groupby;
mod join; mod join;
mod macro_commands; mod macro_commands;
@ -13,17 +14,17 @@ mod to_lazy;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::aggregate::LazyAggregate; use crate::dataframe::lazy::aggregate::LazyAggregate;
pub use crate::dataframe::lazy::collect::LazyCollect; pub use crate::dataframe::lazy::collect::LazyCollect;
use crate::dataframe::lazy::fetch::LazyFetch; use crate::dataframe::lazy::fetch::LazyFetch;
use crate::dataframe::lazy::fill_na::LazyFillNA; use crate::dataframe::lazy::fill_na::LazyFillNA;
use crate::dataframe::lazy::fill_null::LazyFillNull; use crate::dataframe::lazy::fill_null::LazyFillNull;
use crate::dataframe::lazy::filter::LazyFilter;
use crate::dataframe::lazy::groupby::ToLazyGroupBy; use crate::dataframe::lazy::groupby::ToLazyGroupBy;
use crate::dataframe::lazy::join::LazyJoin; use crate::dataframe::lazy::join::LazyJoin;
pub(crate) use crate::dataframe::lazy::macro_commands::*;
use crate::dataframe::lazy::quantile::LazyQuantile; use crate::dataframe::lazy::quantile::LazyQuantile;
use crate::dataframe::lazy::select::LazySelect; pub(crate) use crate::dataframe::lazy::select::LazySelect;
use crate::dataframe::lazy::sort_by_expr::LazySortBy; use crate::dataframe::lazy::sort_by_expr::LazySortBy;
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame; pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
@ -45,6 +46,7 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
LazyFetch, LazyFetch,
LazyFillNA, LazyFillNA,
LazyFillNull, LazyFillNull,
LazyFilter,
LazyJoin, LazyJoin,
LazyQuantile, LazyQuantile,
LazyMax, LazyMax,

View File

@ -6,8 +6,6 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
}; };
use polars::prelude::Expr;
#[derive(Clone)] #[derive(Clone)]
pub struct LazySelect; pub struct LazySelect;
@ -61,17 +59,6 @@ impl Command for LazySelect {
}; };
let expressions = NuExpression::extract_exprs(value)?; let expressions = NuExpression::extract_exprs(value)?;
if expressions
.iter()
.any(|expr| !matches!(expr, Expr::Column(..)))
{
let value: Value = call.req(engine_state, stack, 0)?;
return Err(ShellError::IncompatibleParametersSingle(
"Expected only Col expressions".into(),
value.span()?,
));
}
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions)); let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions));

View File

@ -32,6 +32,11 @@ impl Command for LazySortBy {
"Reverse sorting. Default is false", "Reverse sorting. Default is false",
Some('r'), Some('r'),
) )
.switch(
"nulls-last",
"nulls are shown last in the dataframe",
Some('n'),
)
.input_type(Type::Custom("dataframe".into())) .input_type(Type::Custom("dataframe".into()))
.output_type(Type::Custom("dataframe".into())) .output_type(Type::Custom("dataframe".into()))
.category(Category::Custom("lazyframe".into())) .category(Category::Custom("lazyframe".into()))
@ -102,6 +107,7 @@ impl Command for LazySortBy {
span: call.head, span: call.head,
}; };
let expressions = NuExpression::extract_exprs(value)?; let expressions = NuExpression::extract_exprs(value)?;
let nulls_last = call.has_flag("nulls-last");
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?; let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
let reverse = match reverse { let reverse = match reverse {
@ -128,7 +134,8 @@ impl Command for LazySortBy {
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(
lazy.from_eager, lazy.from_eager,
lazy.into_polars().sort_by_exprs(&expressions, reverse), lazy.into_polars()
.sort_by_exprs(&expressions, reverse, nulls_last),
); );
Ok(PipelineData::Value( Ok(PipelineData::Value(

View File

@ -35,7 +35,7 @@ impl Command for GetWeekDay {
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![Column::new( NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(), "0".to_string(),
vec![Value::test_int(2), Value::test_int(2)], vec![Value::test_int(1), Value::test_int(1)],
)]) )])
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),

View File

@ -5,7 +5,7 @@ use nu_protocol::{
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
}; };
use polars::prelude::IntoSeries; use polars::prelude::{arg_where, col, IntoLazy};
#[derive(Clone)] #[derive(Clone)]
pub struct ArgTrue; pub struct ArgTrue;
@ -59,23 +59,41 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let columns = df.as_ref().get_column_names();
if columns.len() > 1 {
return Err(ShellError::GenericError(
"Error using as series".into(),
"dataframe has more than one column".into(),
Some(call.head),
None,
Vec::new(),
));
}
let series = df.as_series(call.head)?; match columns.first() {
let bool = series.bool().map_err(|_| { Some(column) => {
let expression = arg_where(col(column).eq(true)).alias("arg_true");
let res = df
.as_ref()
.clone()
.lazy()
.select(&[expression])
.collect()
.map_err(|err| {
ShellError::GenericError( ShellError::GenericError(
"Error converting to bool".into(), "Error creating index column".into(),
"all-false only works with series of type bool".into(), err.to_string(),
Some(call.head), Some(call.head),
None, None,
Vec::new(), Vec::new(),
) )
})?; })?;
let mut res = bool.arg_true().into_series(); let value = NuDataFrame::dataframe_into_value(res, call.head);
res.rename("arg_true"); Ok(PipelineData::Value(value, None))
}
NuDataFrame::try_from_series(vec![res], call.head) _ => todo!(),
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) }
} }
#[cfg(test)] #[cfg(test)]

View File

@ -146,7 +146,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res.into_series()], call.head)
} }
Value::Float { val, span } => { Value::Float { val, span } => {
let chunked = series.as_ref().f64().map_err(|e| { let chunked = series.f64().map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error casting to f64".into(), "Error casting to f64".into(),
e.to_string(), e.to_string(),
@ -169,7 +169,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res.into_series()], call.head)
} }
Value::String { val, span } => { Value::String { val, span } => {
let chunked = series.as_ref().utf8().map_err(|e| { let chunked = series.utf8().map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error casting to string".into(), "Error casting to string".into(),
e.to_string(), e.to_string(),

View File

@ -27,7 +27,8 @@ impl Command for IsDuplicated {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
Example {
description: "Create mask indicating duplicated values", description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | into df | is-duplicated", example: "[5 6 6 6 8 8 8] | into df | is-duplicated",
result: Some( result: Some(
@ -46,7 +47,26 @@ impl Command for IsDuplicated {
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-duplicated",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_duplicated".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
} }
fn run( fn run(
@ -69,7 +89,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df let mut res = df
.as_series(call.head)? .as_ref()
.is_duplicated() .is_duplicated()
.map_err(|e| { .map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
@ -84,7 +104,7 @@ fn command(
res.rename("is_duplicated"); res.rename("is_duplicated");
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -27,7 +27,8 @@ impl Command for IsUnique {
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![
Example {
description: "Create mask indicating unique values", description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | into df | is-unique", example: "[5 6 6 6 8 8 8] | into df | is-unique",
result: Some( result: Some(
@ -46,7 +47,26 @@ impl Command for IsUnique {
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}] },
Example {
description: "Create mask indicating duplicated rows in a dataframe",
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-unique",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"is_unique".to_string(),
vec![
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(true),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
} }
fn run( fn run(
@ -68,7 +88,10 @@ fn command(
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut res = df.as_series(call.head)?.is_unique().map_err(|e| { let mut res = df
.as_ref()
.is_unique()
.map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error finding unique values".into(), "Error finding unique values".into(),
e.to_string(), e.to_string(),
@ -76,10 +99,12 @@ fn command(
None, None,
Vec::new(), Vec::new(),
) )
})?; })?
.into_series();
res.rename("is_unique"); res.rename("is_unique");
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }

View File

@ -129,7 +129,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res.into_series()], call.head)
} }
Value::Float { val, span } => { Value::Float { val, span } => {
let chunked = series.as_ref().f64().map_err(|e| { let chunked = series.f64().map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error casting to f64".into(), "Error casting to f64".into(),
e.to_string(), e.to_string(),
@ -152,7 +152,7 @@ fn command(
NuDataFrame::try_from_series(vec![res.into_series()], call.head) NuDataFrame::try_from_series(vec![res.into_series()], call.head)
} }
Value::String { val, span } => { Value::String { val, span } => {
let chunked = series.as_ref().utf8().map_err(|e| { let chunked = series.utf8().map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error casting to string".into(), "Error casting to string".into(),
e.to_string(), e.to_string(),

View File

@ -6,6 +6,8 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value, Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
}; };
use polars::prelude::SeriesMethods;
#[derive(Clone)] #[derive(Clone)]
pub struct ValueCount; pub struct ValueCount;
@ -66,7 +68,7 @@ fn command(
let df = NuDataFrame::try_from_pipeline(input, call.head)?; let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let series = df.as_series(call.head)?; let series = df.as_series(call.head)?;
let res = series.value_counts(false).map_err(|e| { let res = series.value_counts(false, false).map_err(|e| {
ShellError::GenericError( ShellError::GenericError(
"Error calculating value counts values".into(), "Error calculating value counts values".into(),
e.to_string(), e.to_string(),

View File

@ -190,7 +190,7 @@ fn find_with_regex(
(true, true, true) => "(?ims)", (true, true, true) => "(?ims)",
}; };
let regex = flags.to_string() + &regex; let regex = flags.to_string() + regex.as_str();
let re = Regex::new(regex.as_str()) let re = Regex::new(regex.as_str())
.map_err(|e| ShellError::UnsupportedInput(format!("incorrect regex: {}", e), span))?; .map_err(|e| ShellError::UnsupportedInput(format!("incorrect regex: {}", e), span))?;

View File

@ -199,7 +199,7 @@ impl Iterator for RawStreamLinesAdapter {
if !self.incomplete_line.is_empty() { if !self.incomplete_line.is_empty() {
if let Some(first) = lines.first() { if let Some(first) = lines.first() {
let new_incomplete_line = let new_incomplete_line =
self.incomplete_line.to_string() + first; self.incomplete_line.to_string() + first.as_str();
lines.splice(0..1, vec![new_incomplete_line]); lines.splice(0..1, vec![new_incomplete_line]);
self.incomplete_line = String::new(); self.incomplete_line = String::new();
} }

View File

@ -133,7 +133,7 @@ fn convert_yaml_value_to_nu_value(v: &serde_yaml::Value, span: Span) -> Result<V
.and_then(|e| match e { .and_then(|e| match e {
(serde_yaml::Value::String(s), serde_yaml::Value::Null) => { (serde_yaml::Value::String(s), serde_yaml::Value::Null) => {
Some(Value::String { Some(Value::String {
val: "{{ ".to_owned() + s + " }}", val: "{{ ".to_owned() + s.as_str() + " }}",
span, span,
}) })
} }

View File

@ -59,7 +59,7 @@ fn try_source_foo_with_double_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir); sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]); sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + r#"""# + &foo_file + r#"""#; let cmd = String::from("source ") + r#"""# + foo_file.as_str() + r#"""#;
let actual = nu!(cwd: dirs.test(), &cmd); let actual = nu!(cwd: dirs.test(), &cmd);
@ -76,7 +76,7 @@ fn try_source_foo_with_single_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir); sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]); sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + r#"'"# + &foo_file + r#"'"#; let cmd = String::from("source ") + r#"'"# + foo_file.as_str() + r#"'"#;
let actual = nu!(cwd: dirs.test(), &cmd); let actual = nu!(cwd: dirs.test(), &cmd);
@ -93,7 +93,7 @@ fn try_source_foo_without_quotes_in(testdir: &str, playdir: &str) {
sandbox.mkdir(&testdir); sandbox.mkdir(&testdir);
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]); sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
let cmd = String::from("source ") + &foo_file; let cmd = String::from("source ") + foo_file.as_str();
let actual = nu!(cwd: dirs.test(), &cmd); let actual = nu!(cwd: dirs.test(), &cmd);