mirror of
https://github.com/nushell/nushell.git
synced 2024-12-22 15:13:01 +01:00
Polars 0.41 Upgrade (#13238)
# Description Upgrading to Polars 0.41 # User-Facing Changes * `polars melt` has been renamed to `polars unpivot` to match the change in the polars API. Additionally, it now supports lazy dataframes. Introduced a `--streamable` option to use the polars streaming engine for lazy frames. * The parameter `outer` has been replaced with `full` in `polars join` to match polars change. * `polars value-count` now supports the column (rename count column), parallelize (multithread), sort, and normalize options. The list of polars changes can be found [here](https://github.com/pola-rs/polars/releases/tag/rs-0.41.2)
This commit is contained in:
parent
a71732ba12
commit
720b4cbd01
108
Cargo.lock
generated
108
Cargo.lock
generated
@ -3442,7 +3442,7 @@ dependencies = [
|
||||
"polars-plan",
|
||||
"polars-utils",
|
||||
"serde",
|
||||
"sqlparser 0.47.0",
|
||||
"sqlparser",
|
||||
"tempfile",
|
||||
"typetag",
|
||||
"uuid",
|
||||
@ -4052,9 +4052,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442"
|
||||
checksum = "ce49e10a756f68eb99c102c6b2a0cbc0c583a0fa7263536ad0913d94be878d2d"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"polars-arrow",
|
||||
@ -4072,9 +4072,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e"
|
||||
checksum = "b436f83f62e864f0d91871e26528f2c5552c7cf07c8d77547f1b8e3fde22bd27"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"atoi",
|
||||
@ -4120,9 +4120,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-compute"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0"
|
||||
checksum = "f6758f834f07e622a2f859bebb542b2b7f8879b8704dbb2b2bbab460ddcdca4b"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"either",
|
||||
@ -4136,9 +4136,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a"
|
||||
checksum = "7ed262e9bdda15a12a9bfcfc9200bec5253335633dbd86cf5b94fda0194244b3"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bitflags 2.5.0",
|
||||
@ -4170,9 +4170,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-error"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a"
|
||||
checksum = "53e1707a17475ba5e74c349154b415e3148a1a275e395965427971b5e53ad621"
|
||||
dependencies = [
|
||||
"avro-schema",
|
||||
"polars-arrow-format",
|
||||
@ -4183,9 +4183,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-expr"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484"
|
||||
checksum = "31a9688d5842e7a7fbad88e67a174778794a91d97d3bba1b3c09dd1656fee3b2"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bitflags 2.5.0",
|
||||
@ -4203,9 +4203,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e"
|
||||
checksum = "18798dacd94fb9263f65f63f0feab0908675422646d6f7fc37043b85ff6dca35"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-trait",
|
||||
@ -4244,9 +4244,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-json"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901"
|
||||
checksum = "044ea319f667efbf8007c4c38171c2956e0e7f9b078eb66e31e82f80d1e14b51"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"chrono",
|
||||
@ -4265,19 +4265,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5"
|
||||
checksum = "74a11994c2211f2e99d9ac31776fd7c2c0607d5fe62d5b5db9e396f7d663f3d5"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bitflags 2.5.0",
|
||||
"glob",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-expr",
|
||||
"polars-io",
|
||||
"polars-json",
|
||||
"polars-mem-engine",
|
||||
"polars-ops",
|
||||
"polars-pipe",
|
||||
"polars-plan",
|
||||
@ -4289,10 +4291,29 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-ops"
|
||||
version = "0.40.0"
|
||||
name = "polars-mem-engine"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58"
|
||||
checksum = "5acd5fde6fadaddfcae3227ec5b64121007928f8e68870c80653438e20c1c587"
|
||||
dependencies = [
|
||||
"polars-arrow",
|
||||
"polars-core",
|
||||
"polars-error",
|
||||
"polars-expr",
|
||||
"polars-io",
|
||||
"polars-json",
|
||||
"polars-ops",
|
||||
"polars-plan",
|
||||
"polars-time",
|
||||
"polars-utils",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-ops"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4170c59e974727941edfb722f6d430ed623be9e7f30581ee00832c907f1b9fd"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"argminmax",
|
||||
@ -4326,9 +4347,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-parquet"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8"
|
||||
checksum = "c684638c36c60c691d707d414249fe8af4a19a35a39d418464b140fe23732e5d"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-stream",
|
||||
@ -4341,9 +4362,11 @@ dependencies = [
|
||||
"num-traits",
|
||||
"parquet-format-safe",
|
||||
"polars-arrow",
|
||||
"polars-compute",
|
||||
"polars-error",
|
||||
"polars-utils",
|
||||
"seq-macro",
|
||||
"serde",
|
||||
"simdutf8",
|
||||
"snap",
|
||||
"streaming-decompression",
|
||||
@ -4352,9 +4375,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-pipe"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436"
|
||||
checksum = "832af9fbebc4c074d95fb19e1ef9e1bf37c343641238c2476febff296a7028ea"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-queue",
|
||||
@ -4378,9 +4401,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-plan"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90"
|
||||
checksum = "801390ea815c05c9cf8337f3148090c9c10c9595a839fa0706b77cc2405b4466"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bytemuck",
|
||||
@ -4408,9 +4431,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-row"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40"
|
||||
checksum = "dee955e91b605fc91db4d0a8ea02609d3a09ff79256d905214a2a6f758cd6f7b"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"polars-arrow",
|
||||
@ -4420,9 +4443,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-sql"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9"
|
||||
checksum = "d89c00a4b399501d5bd478e8e8022b9391047fe8570324ecba20c4e4833c0e87"
|
||||
dependencies = [
|
||||
"hex",
|
||||
"once_cell",
|
||||
@ -4430,18 +4453,20 @@ dependencies = [
|
||||
"polars-core",
|
||||
"polars-error",
|
||||
"polars-lazy",
|
||||
"polars-ops",
|
||||
"polars-plan",
|
||||
"polars-time",
|
||||
"rand",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlparser 0.39.0",
|
||||
"sqlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-time"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97"
|
||||
checksum = "9689b3aff99d64befe300495528bdc44c36d2656c3a8b242a790d4f43df027fc"
|
||||
dependencies = [
|
||||
"atoi",
|
||||
"bytemuck",
|
||||
@ -4461,9 +4486,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-utils"
|
||||
version = "0.40.0"
|
||||
version = "0.41.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c"
|
||||
checksum = "12081e346983a91e26f395597e1d53dea1b4ecd694653aee1cc402d2fae01f04"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"bytemuck",
|
||||
@ -5614,15 +5639,6 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.39.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.47.0"
|
||||
|
@ -31,11 +31,11 @@ mimalloc = { version = "0.1.42" }
|
||||
num = {version = "0.4"}
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
sqlparser = { version = "0.47"}
|
||||
polars-io = { version = "0.40", features = ["avro"]}
|
||||
polars-arrow = { version = "0.40"}
|
||||
polars-ops = { version = "0.40"}
|
||||
polars-plan = { version = "0.40", features = ["regex"]}
|
||||
polars-utils = { version = "0.40"}
|
||||
polars-io = { version = "0.41", features = ["avro"]}
|
||||
polars-arrow = { version = "0.41"}
|
||||
polars-ops = { version = "0.41"}
|
||||
polars-plan = { version = "0.41", features = ["regex"]}
|
||||
polars-utils = { version = "0.41"}
|
||||
typetag = "0.2"
|
||||
env_logger = "0.11.3"
|
||||
log.workspace = true
|
||||
@ -73,7 +73,7 @@ features = [
|
||||
"to_dummies",
|
||||
]
|
||||
optional = false
|
||||
version = "0.40"
|
||||
version = "0.41"
|
||||
|
||||
[dev-dependencies]
|
||||
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.95.1" }
|
||||
|
@ -1,253 +0,0 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeltDF;
|
||||
|
||||
impl PluginCommand for MeltDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars melt"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating melt".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let res = NuDataFrame::new(false, res);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "empty column list".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df
|
||||
.column(w[0].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let r_series = df
|
||||
.column(w[1].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "found different column types in list".into(),
|
||||
span: Some(col_span),
|
||||
help: Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&MeltDF)
|
||||
}
|
||||
}
|
@ -9,7 +9,6 @@ mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod last;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod query_df;
|
||||
mod rename;
|
||||
@ -28,6 +27,7 @@ mod to_df;
|
||||
mod to_json_lines;
|
||||
mod to_nu;
|
||||
mod to_parquet;
|
||||
mod unpivot;
|
||||
mod with_column;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
@ -44,7 +44,6 @@ pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use last::LastDF;
|
||||
pub use melt::MeltDF;
|
||||
use nu_plugin::PluginCommand;
|
||||
pub use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
@ -62,6 +61,7 @@ pub use to_df::ToDataFrame;
|
||||
pub use to_json_lines::ToJsonLines;
|
||||
pub use to_nu::ToNu;
|
||||
pub use to_parquet::ToParquet;
|
||||
pub use unpivot::UnpivotDF;
|
||||
pub use with_column::WithColumn;
|
||||
|
||||
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
@ -76,7 +76,7 @@ pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugi
|
||||
Box::new(FilterWith),
|
||||
Box::new(GetDF),
|
||||
Box::new(OpenDataFrame),
|
||||
Box::new(MeltDF),
|
||||
Box::new(UnpivotDF),
|
||||
Box::new(Summary),
|
||||
Box::new(FirstDF),
|
||||
Box::new(LastDF),
|
||||
|
@ -16,6 +16,7 @@ use nu_protocol::{
|
||||
use std::{
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
num::NonZeroUsize,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
@ -376,9 +377,13 @@ fn from_jsonl(
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
let infer_schema: usize = call
|
||||
let infer_schema: NonZeroUsize = call
|
||||
.get_flag("infer-schema")?
|
||||
.unwrap_or(DEFAULT_INFER_SCHEMA);
|
||||
.and_then(NonZeroUsize::new)
|
||||
.unwrap_or(
|
||||
NonZeroUsize::new(DEFAULT_INFER_SCHEMA)
|
||||
.expect("The default infer-schema should be non zero"),
|
||||
);
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
@ -528,7 +533,7 @@ fn from_csv(
|
||||
.with_infer_schema_length(Some(infer_schema))
|
||||
.with_skip_rows(skip_rows.unwrap_or_default())
|
||||
.with_schema(maybe_schema.map(|s| s.into()))
|
||||
.with_columns(columns.map(Arc::new))
|
||||
.with_columns(columns.map(|v| Arc::from(v.into_boxed_slice())))
|
||||
.map_parse_options(|options| {
|
||||
options
|
||||
.with_separator(
|
||||
|
@ -70,7 +70,7 @@ fn command(
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
PolarsPluginObject::NuLazyFrame(mut lazy) => {
|
||||
let schema = lazy.schema()?;
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
|
358
crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs
Normal file
358
crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs
Normal file
@ -0,0 +1,358 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::frame::explode::UnpivotArgs;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string,
|
||||
values::{CustomValueSupport, NuLazyFrame, PolarsPluginObject},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UnpivotDF;
|
||||
|
||||
impl PluginCommand for UnpivotDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars unpivot"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names for unpivoting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.switch(
|
||||
"streamable",
|
||||
"Whether or not to use the polars streaming engine. Only valid for lazy dataframes",
|
||||
Some('s'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "unpivot on an eager dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "unpivot on a lazy dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -c [b c] -v [a d] | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Must be a dataframe or lazy dataframe".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.unpivot(&val_col_string, &id_col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating unpivot".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let res = NuDataFrame::new(false, res);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let (id_col_string, _id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, _val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let value_name: Option<String> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<String> = call.get_flag("variable-name")?;
|
||||
|
||||
let streamable = call.has_flag("streamable")?;
|
||||
|
||||
let unpivot_args = UnpivotArgs {
|
||||
on: val_col_string.iter().map(Into::into).collect(),
|
||||
index: id_col_string.iter().map(Into::into).collect(),
|
||||
value_name: value_name.map(Into::into),
|
||||
variable_name: variable_name.map(Into::into),
|
||||
streamable,
|
||||
};
|
||||
|
||||
let polars_df = df.to_polars().unpivot(unpivot_args);
|
||||
|
||||
let res = NuLazyFrame::new(false, polars_df);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "empty column list".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df
|
||||
.column(w[0].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let r_series = df
|
||||
.column(w[1].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "found different column types in list".into(),
|
||||
span: Some(col_span),
|
||||
help: Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&UnpivotDF)
|
||||
}
|
||||
}
|
@ -196,7 +196,8 @@ fn get_col_name(expr: &Expr) -> Option<String> {
|
||||
| Expr::Nth(_)
|
||||
| Expr::SubPlan(_, _)
|
||||
| Expr::IndexColumn(_)
|
||||
| Expr::Selector(_) => None,
|
||||
| Expr::Selector(_)
|
||||
| Expr::Field(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,11 +148,11 @@ fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
mut lazy: NuLazyFrame,
|
||||
expressions: Vec<Expr>,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let group_by = lazy.to_polars().group_by(expressions);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema().clone()?);
|
||||
group_by.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ impl PluginCommand for LazyJoin {
|
||||
Some('i'),
|
||||
)
|
||||
.switch("left", "left join between lazyframes", Some('l'))
|
||||
.switch("outer", "outer join between lazyframes", Some('o'))
|
||||
.switch("full", "full join between lazyframes", Some('f'))
|
||||
.switch("cross", "cross join between lazyframes", Some('c'))
|
||||
.named(
|
||||
"suffix",
|
||||
@ -183,13 +183,13 @@ impl PluginCommand for LazyJoin {
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let left = call.has_flag("left")?;
|
||||
let outer = call.has_flag("outer")?;
|
||||
let full = call.has_flag("full")?;
|
||||
let cross = call.has_flag("cross")?;
|
||||
|
||||
let how = if left {
|
||||
JoinType::Left
|
||||
} else if outer {
|
||||
JoinType::Outer
|
||||
} else if full {
|
||||
JoinType::Full
|
||||
} else if cross {
|
||||
JoinType::Cross
|
||||
} else {
|
||||
|
@ -140,7 +140,7 @@ impl PluginCommand for LazySortBy {
|
||||
|
||||
let sort_options = SortMultipleOptions {
|
||||
descending: reverse,
|
||||
nulls_last,
|
||||
nulls_last: vec![nulls_last],
|
||||
multithreaded: true,
|
||||
maintain_order,
|
||||
};
|
||||
|
@ -7,7 +7,10 @@ use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
use polars::{
|
||||
chunked_array::cast::CastOptions,
|
||||
prelude::{ChunkSet, DataType, IntoSeries},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SetWithIndex;
|
||||
@ -96,7 +99,7 @@ fn command(
|
||||
let casted = match indices.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
|
||||
.as_ref()
|
||||
.cast(&DataType::UInt32)
|
||||
.cast(&DataType::UInt32, CastOptions::default())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting indices".into(),
|
||||
msg: e.to_string(),
|
||||
|
@ -4,7 +4,8 @@ use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use polars::prelude::SeriesMethods;
|
||||
@ -25,6 +26,24 @@ impl PluginCommand for ValueCount {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"column",
|
||||
SyntaxShape::String,
|
||||
"Provide a custom name for the coutn column",
|
||||
Some('c'),
|
||||
)
|
||||
.switch("sort", "Whether or not values should be sorted", Some('s'))
|
||||
.switch(
|
||||
"parallel",
|
||||
"Use multiple threads when processing",
|
||||
Some('p'),
|
||||
)
|
||||
.named(
|
||||
"normalize",
|
||||
SyntaxShape::String,
|
||||
"Normalize the counts",
|
||||
Some('n'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
@ -73,11 +92,15 @@ fn command(
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let column = call.get_flag("column")?.unwrap_or("count".to_string());
|
||||
let parallel = call.has_flag("parallel")?;
|
||||
let sort = call.has_flag("sort")?;
|
||||
let normalize = call.has_flag("normalize")?;
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series
|
||||
.value_counts(false, false)
|
||||
.value_counts(sort, parallel, column, normalize)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating value counts values".into(),
|
||||
msg: e.to_string(),
|
||||
|
@ -41,19 +41,37 @@ pub(super) fn compute_between_series(
|
||||
let operation_span = Span::merge(left.span(), right.span());
|
||||
match operator.item {
|
||||
Operator::Math(Math::Plus) => {
|
||||
let mut res = lhs + rhs;
|
||||
let mut res = (lhs + rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Addition error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("sum_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
}
|
||||
Operator::Math(Math::Minus) => {
|
||||
let mut res = lhs - rhs;
|
||||
let mut res = (lhs - rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Subtraction error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("sub_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
}
|
||||
Operator::Math(Math::Multiply) => {
|
||||
let mut res = lhs * rhs;
|
||||
let mut res = (lhs * rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Multiplication error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("mul_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
|
@ -1,7 +1,10 @@
|
||||
mod custom_value;
|
||||
|
||||
use nu_protocol::{record, ShellError, Span, Value};
|
||||
use polars::prelude::{col, AggExpr, Expr, Literal};
|
||||
use polars::{
|
||||
chunked_array::cast::CastOptions,
|
||||
prelude::{col, AggExpr, Expr, Literal},
|
||||
};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
@ -269,15 +272,23 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
Expr::Cast {
|
||||
expr,
|
||||
data_type,
|
||||
strict,
|
||||
} => Ok(Value::record(
|
||||
record! {
|
||||
"expr" => expr_to_value(expr.as_ref(), span)?,
|
||||
"dtype" => Value::string(format!("{data_type:?}"), span),
|
||||
"strict" => Value::bool(*strict, span),
|
||||
},
|
||||
span,
|
||||
)),
|
||||
options,
|
||||
} => {
|
||||
let cast_option_str = match options {
|
||||
CastOptions::Strict => "STRICT",
|
||||
CastOptions::NonStrict => "NON_STRICT",
|
||||
CastOptions::Overflowing => "OVERFLOWING",
|
||||
};
|
||||
|
||||
Ok(Value::record(
|
||||
record! {
|
||||
"expr" => expr_to_value(expr.as_ref(), span)?,
|
||||
"dtype" => Value::string(format!("{data_type:?}"), span),
|
||||
"cast_options" => Value::string(cast_option_str, span)
|
||||
},
|
||||
span,
|
||||
))
|
||||
}
|
||||
Expr::Gather {
|
||||
expr,
|
||||
idx,
|
||||
@ -388,6 +399,7 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
Expr::Window {
|
||||
function,
|
||||
partition_by,
|
||||
order_by,
|
||||
options,
|
||||
} => {
|
||||
let partition_by: Result<Vec<Value>, ShellError> = partition_by
|
||||
@ -399,6 +411,23 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
record! {
|
||||
"function" => expr_to_value(function, span)?,
|
||||
"partition_by" => Value::list(partition_by?, span),
|
||||
"order_by" => {
|
||||
if let Some((order_expr, sort_options)) = order_by {
|
||||
Value::record(record! {
|
||||
"expr" => expr_to_value(order_expr.as_ref(), span)?,
|
||||
"sort_options" => {
|
||||
Value::record(record!(
|
||||
"descending" => Value::bool(sort_options.descending, span),
|
||||
"nulls_last"=> Value::bool(sort_options.nulls_last, span),
|
||||
"multithreaded"=> Value::bool(sort_options.multithreaded, span),
|
||||
"maintain_order"=> Value::bool(sort_options.maintain_order, span),
|
||||
), span)
|
||||
}
|
||||
}, span)
|
||||
} else {
|
||||
Value::nothing(span)
|
||||
}
|
||||
},
|
||||
"options" => Value::string(format!("{options:?}"), span),
|
||||
},
|
||||
span,
|
||||
@ -424,6 +453,18 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
msg_span: span,
|
||||
input_span: Span::unknown(),
|
||||
}),
|
||||
Expr::Field(column_name) => {
|
||||
let fields: Vec<Value> = column_name
|
||||
.iter()
|
||||
.map(|s| Value::string(s.to_string(), span))
|
||||
.collect();
|
||||
Ok(Value::record(
|
||||
record!(
|
||||
"fields" => Value::list(fields, span)
|
||||
),
|
||||
span,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,14 +77,17 @@ impl NuLazyFrame {
|
||||
Self::new(self.from_eager, new_frame)
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> Result<NuSchema, ShellError> {
|
||||
let internal_schema = self.lazy.schema().map_err(|e| ShellError::GenericError {
|
||||
error: "Error getting schema from lazy frame".into(),
|
||||
msg: e.to_string(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
pub fn schema(&mut self) -> Result<NuSchema, ShellError> {
|
||||
let internal_schema =
|
||||
Arc::make_mut(&mut self.lazy)
|
||||
.schema()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error getting schema from lazy frame".into(),
|
||||
msg: e.to_string(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
Ok(internal_schema.into())
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user