mirror of
https://github.com/nushell/nushell.git
synced 2024-11-25 09:53:43 +01:00
Polars upgrade 0.23 (#6303)
* more lazy expressions * upgrade polars and correct functions * arg-where example * cargo clippy * restore modified filter files * correct string addition with str * correct string addition with str * correct message in test
This commit is contained in:
parent
ff6868b329
commit
ae64c58f59
77
Cargo.lock
generated
77
Cargo.lock
generated
@ -130,9 +130,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||
|
||||
[[package]]
|
||||
name = "arrow-format"
|
||||
version = "0.6.0"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd"
|
||||
checksum = "8df5d25bc6d676271277120c41ef28760fe0a9f070677a58db621c0f983f9c20"
|
||||
dependencies = [
|
||||
"planus",
|
||||
"serde",
|
||||
@ -140,16 +140,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.12.0"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5feafd6df4e3f577529e6aa2b9b7cdb3c9fe8e8f66ebc8dc29abbe71a7e968f0"
|
||||
checksum = "afc54f0b14083abaf6bc71cf1aeccd7831a24b1e29d07683ba9a4a0f6c5d9326"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-format",
|
||||
"base64",
|
||||
"bytemuck",
|
||||
"chrono",
|
||||
"dyn-clone",
|
||||
"either",
|
||||
"fallible-streaming-iterator",
|
||||
"foreign_vec",
|
||||
"futures",
|
||||
"hash_hasher",
|
||||
"indexmap",
|
||||
@ -1024,6 +1027,12 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541"
|
||||
|
||||
[[package]]
|
||||
name = "dyn-clone"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d07a982d1fb29db01e5a59b1918e03da4df7297eaeee7686ac45542fd4e59c8"
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.6.2"
|
||||
@ -1204,6 +1213,12 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "foreign_vec"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.0.1"
|
||||
@ -3172,9 +3187,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet2"
|
||||
version = "0.13.2"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73fd2690ad041f9296876daef1f2706f6347073bdbcc719090887f1691e4a09d"
|
||||
checksum = "33e434af3293ba384075a56d4b400ce659868ca7823142194ef204f01ab35e50"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bitpacking",
|
||||
@ -3381,18 +3396,18 @@ checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
|
||||
[[package]]
|
||||
name = "planus"
|
||||
version = "0.2.0"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bffebaf174d6cad46a5f0f1bb1c45c6eb509571688bcb18dfab217f3c9f9b151"
|
||||
checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f"
|
||||
dependencies = [
|
||||
"array-init-cursor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.22.8"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d175c67e80ceaef7219258cfc3a8686531d9510875b0cefa25404e5b80a7933"
|
||||
checksum = "a75b1077fda63c0f67acc1cdc8586e7afce419be1e85bf7dfa8935e0e266d6b3"
|
||||
dependencies = [
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
@ -3403,9 +3418,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f66c7d3da2c10a09131294dbe7802fac792f570be639dc6ebf207bfc3e144287"
|
||||
checksum = "f7b28f858b252436550679609a23be34d62705faf783887f172f845eb58bcb8b"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"hashbrown",
|
||||
@ -3416,13 +3431,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7f15f443a90d5367c4fbbb151e203f03b5b96055c8b928c6bc30655a3644f13"
|
||||
checksum = "eeaec1ca3ac4829ca24b33743adeeb323a43b5a85515bfce20c2c81799c82790"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow2",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"comfy-table",
|
||||
"hashbrown",
|
||||
@ -3437,14 +3453,15 @@ dependencies = [
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smartstring",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "058d0a847ce5009b974c69ec878ed416e306436f21b626543019f738cee12315"
|
||||
checksum = "51405e46f93e306a3c9280c60ba1101c662e8a6dab33344680d31c3161045f1c"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@ -3470,11 +3487,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dad86a4ce7e32540ff12089bce6f77270fd133a5b263328a92be61defdd6b151"
|
||||
checksum = "1340af778bc8124180d8ca1a566f076a5339566a207a42130796048b087fe977"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bitflags",
|
||||
"glob",
|
||||
"parking_lot",
|
||||
"polars-arrow",
|
||||
@ -3489,9 +3507,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-ops"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "030ecd473be113cd0264f1bc19de39a844fa12fa565db9dc52c859cbc292cf04"
|
||||
checksum = "4a1812e5d5e589d5bd23f8d89dcd8bd4508082c50d055b8ff5fafb6f2a519c9a"
|
||||
dependencies = [
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
@ -3499,9 +3517,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-time"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94047b20d2da3bcc55c421be187a0c6f316cf1eea7fe7ed7347c1160a32d017c"
|
||||
checksum = "fc4ebe97d601a4b443337df71d0b7e673fce953654871c3311850ea394d48297"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"lexical",
|
||||
@ -3513,9 +3531,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.22.7"
|
||||
version = "0.23.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcd3d0238462d5d9f7fbeaaea46e73ed4d58f6fae8b70d53cbe51d7538cc43f5"
|
||||
checksum = "4ea836afadcddee3f1a513dae7624f6d7d0d64abb129063ec7476b8347c8725b"
|
||||
dependencies = [
|
||||
"parking_lot",
|
||||
"rayon",
|
||||
@ -4479,6 +4497,17 @@ version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1"
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"static_assertions",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smawk"
|
||||
version = "0.3.1"
|
||||
|
@ -99,15 +99,14 @@ version = "2.1.3"
|
||||
optional = true
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.22.8"
|
||||
# path = "../../../../polars/polars"
|
||||
version = "0.23.2"
|
||||
optional = true
|
||||
features = [
|
||||
"default", "to_dummies", "parquet", "json", "serde", "serde-lazy",
|
||||
"object", "checked_arithmetic", "strings", "cum_agg", "is_in",
|
||||
"rolling_window", "strings", "rows", "random",
|
||||
"dtype-datetime", "dtype-struct", "lazy", "cross_join",
|
||||
"dynamic_groupby", "dtype-categorical", "concat_str"
|
||||
"dynamic_groupby", "dtype-categorical", "concat_str", "arg_where"
|
||||
]
|
||||
|
||||
[target.'cfg(windows)'.dependencies.windows]
|
||||
|
94
crates/nu-command/src/dataframe/eager/columns.rs
Normal file
94
crates/nu-command/src/dataframe/eager/columns.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ColumnsDF;
|
||||
|
||||
impl Command for ColumnsDF {
|
||||
fn name(&self) -> &str {
|
||||
"columns"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Show dataframe columns"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Any)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Dataframe columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | into df | columns",
|
||||
result: Some(Value::List {
|
||||
vals: vec![
|
||||
Value::String {
|
||||
val: "a".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
Value::String {
|
||||
val: "b".into(),
|
||||
span: Span::test_data(),
|
||||
},
|
||||
],
|
||||
span: Span::test_data(),
|
||||
}),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_collect)]
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| Value::String {
|
||||
val: v.to_string(),
|
||||
span: call.head,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let names = Value::List {
|
||||
vals: names,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(names, None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ColumnsDF {})])
|
||||
}
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
mod append;
|
||||
mod columns;
|
||||
mod describe;
|
||||
mod drop;
|
||||
mod drop_duplicates;
|
||||
@ -26,6 +27,7 @@ mod with_column;
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub use append::AppendDF;
|
||||
pub use columns::ColumnsDF;
|
||||
pub use describe::DescribeDF;
|
||||
pub use drop::DropDF;
|
||||
pub use drop_duplicates::DropDuplicates;
|
||||
@ -63,6 +65,7 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
AppendDF,
|
||||
ColumnsDF,
|
||||
DataTypes,
|
||||
DescribeDF,
|
||||
DropDF,
|
||||
|
76
crates/nu-command/src/dataframe/expressions/arg_where.rs
Normal file
76
crates/nu-command/src/dataframe/expressions/arg_where.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::arg_where;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprArgWhere;
|
||||
|
||||
impl Command for ExprArgWhere {
|
||||
fn name(&self) -> &str {
|
||||
"arg-where"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an expression that returns the arguments where expression is true"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("column name", SyntaxShape::Any, "Expression to evaluate")
|
||||
.input_type(Type::Any)
|
||||
.output_type(Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Return a dataframe where the value match the expression",
|
||||
example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
|
||||
$df | select (arg-where ((col b) >= 2) | as b_arg)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"b_arg".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
let expr = NuExpression::try_from_value(value)?;
|
||||
let expr: NuExpression = arg_where(expr.into_polars()).into();
|
||||
|
||||
Ok(PipelineData::Value(expr.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::expressions::ExprAlias;
|
||||
use crate::dataframe::lazy::LazySelect;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![
|
||||
Box::new(ExprArgWhere {}),
|
||||
Box::new(ExprAlias {}),
|
||||
Box::new(LazySelect {}),
|
||||
])
|
||||
}
|
||||
}
|
110
crates/nu-command/src/dataframe/expressions/is_in.rs
Normal file
110
crates/nu-command/src/dataframe/expressions/is_in.rs
Normal file
@ -0,0 +1,110 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression};
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::{lit, DataType};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExprIsIn;
|
||||
|
||||
impl Command for ExprIsIn {
|
||||
fn name(&self) -> &str {
|
||||
"is-in"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates an is-in expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"list",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Any)),
|
||||
"List to check if values are in",
|
||||
)
|
||||
.input_type(Type::Custom("expression".into()))
|
||||
.output_type(Type::Custom("expression".into()))
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates a is-in expression",
|
||||
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | into df);
|
||||
$df | with-column (col a | is-in [one two] | as a_in)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
Value::test_string("one"),
|
||||
Value::test_string("two"),
|
||||
Value::test_string("three"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"a_in".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let list: Vec<Value> = call.req(engine_state, stack, 0)?;
|
||||
let expr = NuExpression::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let values = NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)])?;
|
||||
let list = values.as_series(call.head)?;
|
||||
|
||||
if matches!(list.dtype(), DataType::Object(..)) {
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"Cannot use a mixed list as argument".into(),
|
||||
call.head,
|
||||
));
|
||||
}
|
||||
|
||||
let expr: NuExpression = expr.into_polars().is_in(lit(list)).into();
|
||||
Ok(PipelineData::Value(expr.into_value(call.head), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
use crate::dataframe::eager::WithColumn;
|
||||
use crate::dataframe::expressions::alias::ExprAlias;
|
||||
use crate::dataframe::expressions::col::ExprCol;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![
|
||||
Box::new(ExprIsIn {}),
|
||||
Box::new(ExprAlias {}),
|
||||
Box::new(ExprCol {}),
|
||||
Box::new(WithColumn {}),
|
||||
])
|
||||
}
|
||||
}
|
@ -1,8 +1,10 @@
|
||||
mod alias;
|
||||
mod arg_where;
|
||||
mod as_nu;
|
||||
mod col;
|
||||
mod concat_str;
|
||||
mod expressions_macro;
|
||||
mod is_in;
|
||||
mod lit;
|
||||
mod otherwise;
|
||||
mod quantile;
|
||||
@ -11,10 +13,12 @@ mod when;
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub(crate) use crate::dataframe::expressions::alias::ExprAlias;
|
||||
use crate::dataframe::expressions::arg_where::ExprArgWhere;
|
||||
use crate::dataframe::expressions::as_nu::ExprAsNu;
|
||||
pub(super) use crate::dataframe::expressions::col::ExprCol;
|
||||
pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr;
|
||||
pub(crate) use crate::dataframe::expressions::expressions_macro::*;
|
||||
pub(super) use crate::dataframe::expressions::is_in::ExprIsIn;
|
||||
pub(super) use crate::dataframe::expressions::lit::ExprLit;
|
||||
pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise;
|
||||
pub(super) use crate::dataframe::expressions::quantile::ExprQuantile;
|
||||
@ -33,6 +37,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
ExprAlias,
|
||||
ExprArgWhere,
|
||||
ExprCol,
|
||||
ExprConcatStr,
|
||||
ExprCount,
|
||||
@ -49,6 +54,7 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) {
|
||||
ExprFirst,
|
||||
ExprLast,
|
||||
ExprNUnique,
|
||||
ExprIsIn,
|
||||
ExprIsNotNull,
|
||||
ExprIsNull,
|
||||
ExprNot,
|
||||
|
84
crates/nu-command/src/dataframe/lazy/filter.rs
Normal file
84
crates/nu-command/src/dataframe/lazy/filter.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazyFilter;
|
||||
|
||||
impl Command for LazyFilter {
|
||||
fn name(&self) -> &str {
|
||||
"filter"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filter dataframe based in expression"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"filter expression",
|
||||
SyntaxShape::Any,
|
||||
"Expression that define the column selection",
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe using an expression",
|
||||
example: "[[a b]; [6 2] [4 2] [2 2]] | into df | filter ((col a) >= 4)",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(6), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
let expression = NuExpression::try_from_value(value)?;
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.into_polars().filter(expression.into_polars()),
|
||||
);
|
||||
|
||||
Ok(PipelineData::Value(lazy.into_value(call.head)?, None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(LazyFilter {})])
|
||||
}
|
||||
}
|
@ -3,6 +3,7 @@ mod collect;
|
||||
mod fetch;
|
||||
mod fill_na;
|
||||
mod fill_null;
|
||||
mod filter;
|
||||
pub mod groupby;
|
||||
mod join;
|
||||
mod macro_commands;
|
||||
@ -13,17 +14,17 @@ mod to_lazy;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub(crate) use crate::dataframe::lazy::macro_commands::*;
|
||||
|
||||
use crate::dataframe::lazy::aggregate::LazyAggregate;
|
||||
pub use crate::dataframe::lazy::collect::LazyCollect;
|
||||
use crate::dataframe::lazy::fetch::LazyFetch;
|
||||
use crate::dataframe::lazy::fill_na::LazyFillNA;
|
||||
use crate::dataframe::lazy::fill_null::LazyFillNull;
|
||||
use crate::dataframe::lazy::filter::LazyFilter;
|
||||
use crate::dataframe::lazy::groupby::ToLazyGroupBy;
|
||||
use crate::dataframe::lazy::join::LazyJoin;
|
||||
pub(crate) use crate::dataframe::lazy::macro_commands::*;
|
||||
use crate::dataframe::lazy::quantile::LazyQuantile;
|
||||
use crate::dataframe::lazy::select::LazySelect;
|
||||
pub(crate) use crate::dataframe::lazy::select::LazySelect;
|
||||
use crate::dataframe::lazy::sort_by_expr::LazySortBy;
|
||||
pub use crate::dataframe::lazy::to_lazy::ToLazyFrame;
|
||||
|
||||
@ -45,6 +46,7 @@ pub fn add_lazy_decls(working_set: &mut StateWorkingSet) {
|
||||
LazyFetch,
|
||||
LazyFillNA,
|
||||
LazyFillNull,
|
||||
LazyFilter,
|
||||
LazyJoin,
|
||||
LazyQuantile,
|
||||
LazyMax,
|
||||
|
@ -6,8 +6,6 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::Expr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySelect;
|
||||
|
||||
@ -61,17 +59,6 @@ impl Command for LazySelect {
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
|
||||
if expressions
|
||||
.iter()
|
||||
.any(|expr| !matches!(expr, Expr::Column(..)))
|
||||
{
|
||||
let value: Value = call.req(engine_state, stack, 0)?;
|
||||
return Err(ShellError::IncompatibleParametersSingle(
|
||||
"Expected only Col expressions".into(),
|
||||
value.span()?,
|
||||
));
|
||||
}
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions));
|
||||
|
||||
|
@ -32,6 +32,11 @@ impl Command for LazySortBy {
|
||||
"Reverse sorting. Default is false",
|
||||
Some('r'),
|
||||
)
|
||||
.switch(
|
||||
"nulls-last",
|
||||
"nulls are shown last in the dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.input_type(Type::Custom("dataframe".into()))
|
||||
.output_type(Type::Custom("dataframe".into()))
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
@ -102,6 +107,7 @@ impl Command for LazySortBy {
|
||||
span: call.head,
|
||||
};
|
||||
let expressions = NuExpression::extract_exprs(value)?;
|
||||
let nulls_last = call.has_flag("nulls-last");
|
||||
|
||||
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
|
||||
let reverse = match reverse {
|
||||
@ -128,7 +134,8 @@ impl Command for LazySortBy {
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.into_polars().sort_by_exprs(&expressions, reverse),
|
||||
lazy.into_polars()
|
||||
.sort_by_exprs(&expressions, reverse, nulls_last),
|
||||
);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
|
@ -35,7 +35,7 @@ impl Command for GetWeekDay {
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(2)],
|
||||
vec![Value::test_int(1), Value::test_int(1)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use polars::prelude::{arg_where, col, IntoLazy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ArgTrue;
|
||||
@ -59,23 +59,41 @@ fn command(
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let columns = df.as_ref().get_column_names();
|
||||
if columns.len() > 1 {
|
||||
return Err(ShellError::GenericError(
|
||||
"Error using as series".into(),
|
||||
"dataframe has more than one column".into(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
));
|
||||
}
|
||||
|
||||
let series = df.as_series(call.head)?;
|
||||
let bool = series.bool().map_err(|_| {
|
||||
match columns.first() {
|
||||
Some(column) => {
|
||||
let expression = arg_where(col(column).eq(true)).alias("arg_true");
|
||||
let res = df
|
||||
.as_ref()
|
||||
.clone()
|
||||
.lazy()
|
||||
.select(&[expression])
|
||||
.collect()
|
||||
.map_err(|err| {
|
||||
ShellError::GenericError(
|
||||
"Error converting to bool".into(),
|
||||
"all-false only works with series of type bool".into(),
|
||||
"Error creating index column".into(),
|
||||
err.to_string(),
|
||||
Some(call.head),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = bool.arg_true().into_series();
|
||||
res.rename("arg_true");
|
||||
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
let value = NuDataFrame::dataframe_into_value(res, call.head);
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -146,7 +146,7 @@ fn command(
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::Float { val, span } => {
|
||||
let chunked = series.as_ref().f64().map_err(|e| {
|
||||
let chunked = series.f64().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to f64".into(),
|
||||
e.to_string(),
|
||||
@ -169,7 +169,7 @@ fn command(
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::String { val, span } => {
|
||||
let chunked = series.as_ref().utf8().map_err(|e| {
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to string".into(),
|
||||
e.to_string(),
|
||||
|
@ -27,7 +27,8 @@ impl Command for IsDuplicated {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask indicating duplicated values",
|
||||
example: "[5 6 6 6 8 8 8] | into df | is-duplicated",
|
||||
result: Some(
|
||||
@ -46,7 +47,26 @@ impl Command for IsDuplicated {
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
},
|
||||
Example {
|
||||
description: "Create mask indicating duplicated rows in a dataframe",
|
||||
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-duplicated",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(true),
|
||||
Value::test_bool(false),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -69,7 +89,7 @@ fn command(
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut res = df
|
||||
.as_series(call.head)?
|
||||
.as_ref()
|
||||
.is_duplicated()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
@ -84,7 +104,7 @@ fn command(
|
||||
|
||||
res.rename("is_duplicated");
|
||||
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,8 @@ impl Command for IsUnique {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create mask indicating unique values",
|
||||
example: "[5 6 6 6 8 8 8] | into df | is-unique",
|
||||
result: Some(
|
||||
@ -46,7 +47,26 @@ impl Command for IsUnique {
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
},
|
||||
Example {
|
||||
description: "Create mask indicating duplicated rows in a dataframe",
|
||||
example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | into df | is-unique",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(false),
|
||||
Value::test_bool(true),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
@ -68,7 +88,10 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut res = df.as_series(call.head)?.is_unique().map_err(|e| {
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.is_unique()
|
||||
.map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error finding unique values".into(),
|
||||
e.to_string(),
|
||||
@ -76,10 +99,12 @@ fn command(
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
})?;
|
||||
})?
|
||||
.into_series();
|
||||
|
||||
res.rename("is_unique");
|
||||
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
NuDataFrame::try_from_series(vec![res], call.head)
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ fn command(
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::Float { val, span } => {
|
||||
let chunked = series.as_ref().f64().map_err(|e| {
|
||||
let chunked = series.f64().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to f64".into(),
|
||||
e.to_string(),
|
||||
@ -152,7 +152,7 @@ fn command(
|
||||
NuDataFrame::try_from_series(vec![res.into_series()], call.head)
|
||||
}
|
||||
Value::String { val, span } => {
|
||||
let chunked = series.as_ref().utf8().map_err(|e| {
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error casting to string".into(),
|
||||
e.to_string(),
|
||||
|
@ -6,6 +6,8 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
|
||||
use polars::prelude::SeriesMethods;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ValueCount;
|
||||
|
||||
@ -66,7 +68,7 @@ fn command(
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series.value_counts(false).map_err(|e| {
|
||||
let res = series.value_counts(false, false).map_err(|e| {
|
||||
ShellError::GenericError(
|
||||
"Error calculating value counts values".into(),
|
||||
e.to_string(),
|
||||
|
@ -190,7 +190,7 @@ fn find_with_regex(
|
||||
(true, true, true) => "(?ims)",
|
||||
};
|
||||
|
||||
let regex = flags.to_string() + ®ex;
|
||||
let regex = flags.to_string() + regex.as_str();
|
||||
|
||||
let re = Regex::new(regex.as_str())
|
||||
.map_err(|e| ShellError::UnsupportedInput(format!("incorrect regex: {}", e), span))?;
|
||||
|
@ -199,7 +199,7 @@ impl Iterator for RawStreamLinesAdapter {
|
||||
if !self.incomplete_line.is_empty() {
|
||||
if let Some(first) = lines.first() {
|
||||
let new_incomplete_line =
|
||||
self.incomplete_line.to_string() + first;
|
||||
self.incomplete_line.to_string() + first.as_str();
|
||||
lines.splice(0..1, vec![new_incomplete_line]);
|
||||
self.incomplete_line = String::new();
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ fn convert_yaml_value_to_nu_value(v: &serde_yaml::Value, span: Span) -> Result<V
|
||||
.and_then(|e| match e {
|
||||
(serde_yaml::Value::String(s), serde_yaml::Value::Null) => {
|
||||
Some(Value::String {
|
||||
val: "{{ ".to_owned() + s + " }}",
|
||||
val: "{{ ".to_owned() + s.as_str() + " }}",
|
||||
span,
|
||||
})
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ fn try_source_foo_with_double_quotes_in(testdir: &str, playdir: &str) {
|
||||
sandbox.mkdir(&testdir);
|
||||
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
|
||||
|
||||
let cmd = String::from("source ") + r#"""# + &foo_file + r#"""#;
|
||||
let cmd = String::from("source ") + r#"""# + foo_file.as_str() + r#"""#;
|
||||
|
||||
let actual = nu!(cwd: dirs.test(), &cmd);
|
||||
|
||||
@ -76,7 +76,7 @@ fn try_source_foo_with_single_quotes_in(testdir: &str, playdir: &str) {
|
||||
sandbox.mkdir(&testdir);
|
||||
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
|
||||
|
||||
let cmd = String::from("source ") + r#"'"# + &foo_file + r#"'"#;
|
||||
let cmd = String::from("source ") + r#"'"# + foo_file.as_str() + r#"'"#;
|
||||
|
||||
let actual = nu!(cwd: dirs.test(), &cmd);
|
||||
|
||||
@ -93,7 +93,7 @@ fn try_source_foo_without_quotes_in(testdir: &str, playdir: &str) {
|
||||
sandbox.mkdir(&testdir);
|
||||
sandbox.with_files(vec![FileWithContent(&foo_file, "echo foo")]);
|
||||
|
||||
let cmd = String::from("source ") + &foo_file;
|
||||
let cmd = String::from("source ") + foo_file.as_str();
|
||||
|
||||
let actual = nu!(cwd: dirs.test(), &cmd);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user