mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 06:30:08 +02:00
Polars 0.41 Upgrade (#13238)
# Description Upgrading to Polars 0.41 # User-Facing Changes * `polars melt` has been renamed to `polars unpivot` to match the change in the polars API. Additionally, it now supports lazy dataframes. Introduced a `--streamable` option to use the polars streaming engine for lazy frames. * The parameter `outer` has been replaced with `full` in `polars join` to match polars change. * `polars value-count` now supports the column (rename count column), parallelize (multithread), sort, and normalize options. The list of polars changes can be found [here](https://github.com/pola-rs/polars/releases/tag/rs-0.41.2)
This commit is contained in:
@ -31,11 +31,11 @@ mimalloc = { version = "0.1.42" }
|
||||
num = {version = "0.4"}
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
sqlparser = { version = "0.47"}
|
||||
polars-io = { version = "0.40", features = ["avro"]}
|
||||
polars-arrow = { version = "0.40"}
|
||||
polars-ops = { version = "0.40"}
|
||||
polars-plan = { version = "0.40", features = ["regex"]}
|
||||
polars-utils = { version = "0.40"}
|
||||
polars-io = { version = "0.41", features = ["avro"]}
|
||||
polars-arrow = { version = "0.41"}
|
||||
polars-ops = { version = "0.41"}
|
||||
polars-plan = { version = "0.41", features = ["regex"]}
|
||||
polars-utils = { version = "0.41"}
|
||||
typetag = "0.2"
|
||||
env_logger = "0.11.3"
|
||||
log.workspace = true
|
||||
@ -73,7 +73,7 @@ features = [
|
||||
"to_dummies",
|
||||
]
|
||||
optional = false
|
||||
version = "0.40"
|
||||
version = "0.41"
|
||||
|
||||
[dev-dependencies]
|
||||
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.95.1" }
|
||||
|
@ -1,253 +0,0 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string, values::CustomValueSupport, PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeltDF;
|
||||
|
||||
impl PluginCommand for MeltDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars melt"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars melt -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating melt".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let res = NuDataFrame::new(false, res);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "empty column list".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df
|
||||
.column(w[0].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let r_series = df
|
||||
.column(w[1].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "found different column types in list".into(),
|
||||
span: Some(col_span),
|
||||
help: Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&MeltDF)
|
||||
}
|
||||
}
|
@ -9,7 +9,6 @@ mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod last;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod query_df;
|
||||
mod rename;
|
||||
@ -28,6 +27,7 @@ mod to_df;
|
||||
mod to_json_lines;
|
||||
mod to_nu;
|
||||
mod to_parquet;
|
||||
mod unpivot;
|
||||
mod with_column;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
@ -44,7 +44,6 @@ pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use last::LastDF;
|
||||
pub use melt::MeltDF;
|
||||
use nu_plugin::PluginCommand;
|
||||
pub use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
@ -62,6 +61,7 @@ pub use to_df::ToDataFrame;
|
||||
pub use to_json_lines::ToJsonLines;
|
||||
pub use to_nu::ToNu;
|
||||
pub use to_parquet::ToParquet;
|
||||
pub use unpivot::UnpivotDF;
|
||||
pub use with_column::WithColumn;
|
||||
|
||||
pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
@ -76,7 +76,7 @@ pub(crate) fn eager_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugi
|
||||
Box::new(FilterWith),
|
||||
Box::new(GetDF),
|
||||
Box::new(OpenDataFrame),
|
||||
Box::new(MeltDF),
|
||||
Box::new(UnpivotDF),
|
||||
Box::new(Summary),
|
||||
Box::new(FirstDF),
|
||||
Box::new(LastDF),
|
||||
|
@ -16,6 +16,7 @@ use nu_protocol::{
|
||||
use std::{
|
||||
fs::File,
|
||||
io::BufReader,
|
||||
num::NonZeroUsize,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
@ -376,9 +377,13 @@ fn from_jsonl(
|
||||
file_path: &Path,
|
||||
file_span: Span,
|
||||
) -> Result<Value, ShellError> {
|
||||
let infer_schema: usize = call
|
||||
let infer_schema: NonZeroUsize = call
|
||||
.get_flag("infer-schema")?
|
||||
.unwrap_or(DEFAULT_INFER_SCHEMA);
|
||||
.and_then(NonZeroUsize::new)
|
||||
.unwrap_or(
|
||||
NonZeroUsize::new(DEFAULT_INFER_SCHEMA)
|
||||
.expect("The default infer-schema should be non zero"),
|
||||
);
|
||||
let maybe_schema = call
|
||||
.get_flag("schema")?
|
||||
.map(|schema| NuSchema::try_from(&schema))
|
||||
@ -528,7 +533,7 @@ fn from_csv(
|
||||
.with_infer_schema_length(Some(infer_schema))
|
||||
.with_skip_rows(skip_rows.unwrap_or_default())
|
||||
.with_schema(maybe_schema.map(|s| s.into()))
|
||||
.with_columns(columns.map(Arc::new))
|
||||
.with_columns(columns.map(|v| Arc::from(v.into_boxed_slice())))
|
||||
.map_parse_options(|options| {
|
||||
options
|
||||
.with_separator(
|
||||
|
@ -70,7 +70,7 @@ fn command(
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
PolarsPluginObject::NuLazyFrame(mut lazy) => {
|
||||
let schema = lazy.schema()?;
|
||||
let value: Value = schema.into();
|
||||
Ok(PipelineData::Value(value, None))
|
||||
|
358
crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs
Normal file
358
crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs
Normal file
@ -0,0 +1,358 @@
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::frame::explode::UnpivotArgs;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::utils::convert_columns_string,
|
||||
values::{CustomValueSupport, NuLazyFrame, PolarsPluginObject},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UnpivotDF;
|
||||
|
||||
impl PluginCommand for UnpivotDF {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars unpivot"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names for unpivoting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table(vec![]),
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.switch(
|
||||
"streamable",
|
||||
"Whether or not to use the polars streaming engine. Only valid for lazy dataframes",
|
||||
Some('s'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "unpivot on an eager dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "unpivot on a lazy dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -c [b c] -v [a d] | polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
], None)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
match PolarsPluginObject::try_from_pipeline(plugin, input, call.head)? {
|
||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df),
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Must be a dataframe or lazy dataframe".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.unpivot(&val_col_string, &id_col_string)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating unpivot".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error renaming column".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(name.span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
}
|
||||
|
||||
let res = NuDataFrame::new(false, res);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuLazyFrame,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value");
|
||||
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value");
|
||||
|
||||
let (id_col_string, _id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, _val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let value_name: Option<String> = call.get_flag("value-name")?;
|
||||
let variable_name: Option<String> = call.get_flag("variable-name")?;
|
||||
|
||||
let streamable = call.has_flag("streamable")?;
|
||||
|
||||
let unpivot_args = UnpivotArgs {
|
||||
on: val_col_string.iter().map(Into::into).collect(),
|
||||
index: id_col_string.iter().map(Into::into).collect(),
|
||||
value_name: value_name.map(Into::into),
|
||||
variable_name: variable_name.map(Into::into),
|
||||
streamable,
|
||||
};
|
||||
|
||||
let polars_df = df.to_polars().unpivot(unpivot_args);
|
||||
|
||||
let res = NuLazyFrame::new(false, polars_df);
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "empty column list".into(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df
|
||||
.column(w[0].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
let r_series = df
|
||||
.column(w[1].as_ref())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error selecting columns".into(),
|
||||
msg: e.to_string(),
|
||||
span: Some(col_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Merge error".into(),
|
||||
msg: "found different column types in list".into(),
|
||||
span: Some(col_span),
|
||||
help: Some(format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
)),
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&UnpivotDF)
|
||||
}
|
||||
}
|
@ -196,7 +196,8 @@ fn get_col_name(expr: &Expr) -> Option<String> {
|
||||
| Expr::Nth(_)
|
||||
| Expr::SubPlan(_, _)
|
||||
| Expr::IndexColumn(_)
|
||||
| Expr::Selector(_) => None,
|
||||
| Expr::Selector(_)
|
||||
| Expr::Field(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,11 +148,11 @@ fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
mut lazy: NuLazyFrame,
|
||||
expressions: Vec<Expr>,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let group_by = lazy.to_polars().group_by(expressions);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema()?);
|
||||
let group_by = NuLazyGroupBy::new(group_by, lazy.from_eager, lazy.schema().clone()?);
|
||||
group_by.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ impl PluginCommand for LazyJoin {
|
||||
Some('i'),
|
||||
)
|
||||
.switch("left", "left join between lazyframes", Some('l'))
|
||||
.switch("outer", "outer join between lazyframes", Some('o'))
|
||||
.switch("full", "full join between lazyframes", Some('f'))
|
||||
.switch("cross", "cross join between lazyframes", Some('c'))
|
||||
.named(
|
||||
"suffix",
|
||||
@ -183,13 +183,13 @@ impl PluginCommand for LazyJoin {
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let left = call.has_flag("left")?;
|
||||
let outer = call.has_flag("outer")?;
|
||||
let full = call.has_flag("full")?;
|
||||
let cross = call.has_flag("cross")?;
|
||||
|
||||
let how = if left {
|
||||
JoinType::Left
|
||||
} else if outer {
|
||||
JoinType::Outer
|
||||
} else if full {
|
||||
JoinType::Full
|
||||
} else if cross {
|
||||
JoinType::Cross
|
||||
} else {
|
||||
|
@ -140,7 +140,7 @@ impl PluginCommand for LazySortBy {
|
||||
|
||||
let sort_options = SortMultipleOptions {
|
||||
descending: reverse,
|
||||
nulls_last,
|
||||
nulls_last: vec![nulls_last],
|
||||
multithreaded: true,
|
||||
maintain_order,
|
||||
};
|
||||
|
@ -7,7 +7,10 @@ use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
use polars::{
|
||||
chunked_array::cast::CastOptions,
|
||||
prelude::{ChunkSet, DataType, IntoSeries},
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SetWithIndex;
|
||||
@ -96,7 +99,7 @@ fn command(
|
||||
let casted = match indices.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
|
||||
.as_ref()
|
||||
.cast(&DataType::UInt32)
|
||||
.cast(&DataType::UInt32, CastOptions::default())
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting indices".into(),
|
||||
msg: e.to_string(),
|
||||
|
@ -4,7 +4,8 @@ use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use polars::prelude::SeriesMethods;
|
||||
@ -25,6 +26,24 @@ impl PluginCommand for ValueCount {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"column",
|
||||
SyntaxShape::String,
|
||||
"Provide a custom name for the coutn column",
|
||||
Some('c'),
|
||||
)
|
||||
.switch("sort", "Whether or not values should be sorted", Some('s'))
|
||||
.switch(
|
||||
"parallel",
|
||||
"Use multiple threads when processing",
|
||||
Some('p'),
|
||||
)
|
||||
.named(
|
||||
"normalize",
|
||||
SyntaxShape::String,
|
||||
"Normalize the counts",
|
||||
Some('n'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
@ -73,11 +92,15 @@ fn command(
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let column = call.get_flag("column")?.unwrap_or("count".to_string());
|
||||
let parallel = call.has_flag("parallel")?;
|
||||
let sort = call.has_flag("sort")?;
|
||||
let normalize = call.has_flag("normalize")?;
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
|
||||
let res = series
|
||||
.value_counts(false, false)
|
||||
.value_counts(sort, parallel, column, normalize)
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error calculating value counts values".into(),
|
||||
msg: e.to_string(),
|
||||
|
@ -41,19 +41,37 @@ pub(super) fn compute_between_series(
|
||||
let operation_span = Span::merge(left.span(), right.span());
|
||||
match operator.item {
|
||||
Operator::Math(Math::Plus) => {
|
||||
let mut res = lhs + rhs;
|
||||
let mut res = (lhs + rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Addition error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("sum_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
}
|
||||
Operator::Math(Math::Minus) => {
|
||||
let mut res = lhs - rhs;
|
||||
let mut res = (lhs - rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Subtraction error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("sub_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
}
|
||||
Operator::Math(Math::Multiply) => {
|
||||
let mut res = lhs * rhs;
|
||||
let mut res = (lhs * rhs).map_err(|e| ShellError::GenericError {
|
||||
error: format!("Multiplication error: {e}"),
|
||||
msg: "".into(),
|
||||
span: Some(operation_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
let name = format!("mul_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::try_from_series(res, operation_span)
|
||||
|
@ -1,7 +1,10 @@
|
||||
mod custom_value;
|
||||
|
||||
use nu_protocol::{record, ShellError, Span, Value};
|
||||
use polars::prelude::{col, AggExpr, Expr, Literal};
|
||||
use polars::{
|
||||
chunked_array::cast::CastOptions,
|
||||
prelude::{col, AggExpr, Expr, Literal},
|
||||
};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
@ -269,15 +272,23 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
Expr::Cast {
|
||||
expr,
|
||||
data_type,
|
||||
strict,
|
||||
} => Ok(Value::record(
|
||||
record! {
|
||||
"expr" => expr_to_value(expr.as_ref(), span)?,
|
||||
"dtype" => Value::string(format!("{data_type:?}"), span),
|
||||
"strict" => Value::bool(*strict, span),
|
||||
},
|
||||
span,
|
||||
)),
|
||||
options,
|
||||
} => {
|
||||
let cast_option_str = match options {
|
||||
CastOptions::Strict => "STRICT",
|
||||
CastOptions::NonStrict => "NON_STRICT",
|
||||
CastOptions::Overflowing => "OVERFLOWING",
|
||||
};
|
||||
|
||||
Ok(Value::record(
|
||||
record! {
|
||||
"expr" => expr_to_value(expr.as_ref(), span)?,
|
||||
"dtype" => Value::string(format!("{data_type:?}"), span),
|
||||
"cast_options" => Value::string(cast_option_str, span)
|
||||
},
|
||||
span,
|
||||
))
|
||||
}
|
||||
Expr::Gather {
|
||||
expr,
|
||||
idx,
|
||||
@ -388,6 +399,7 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
Expr::Window {
|
||||
function,
|
||||
partition_by,
|
||||
order_by,
|
||||
options,
|
||||
} => {
|
||||
let partition_by: Result<Vec<Value>, ShellError> = partition_by
|
||||
@ -399,6 +411,23 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
record! {
|
||||
"function" => expr_to_value(function, span)?,
|
||||
"partition_by" => Value::list(partition_by?, span),
|
||||
"order_by" => {
|
||||
if let Some((order_expr, sort_options)) = order_by {
|
||||
Value::record(record! {
|
||||
"expr" => expr_to_value(order_expr.as_ref(), span)?,
|
||||
"sort_options" => {
|
||||
Value::record(record!(
|
||||
"descending" => Value::bool(sort_options.descending, span),
|
||||
"nulls_last"=> Value::bool(sort_options.nulls_last, span),
|
||||
"multithreaded"=> Value::bool(sort_options.multithreaded, span),
|
||||
"maintain_order"=> Value::bool(sort_options.maintain_order, span),
|
||||
), span)
|
||||
}
|
||||
}, span)
|
||||
} else {
|
||||
Value::nothing(span)
|
||||
}
|
||||
},
|
||||
"options" => Value::string(format!("{options:?}"), span),
|
||||
},
|
||||
span,
|
||||
@ -424,6 +453,18 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
msg_span: span,
|
||||
input_span: Span::unknown(),
|
||||
}),
|
||||
Expr::Field(column_name) => {
|
||||
let fields: Vec<Value> = column_name
|
||||
.iter()
|
||||
.map(|s| Value::string(s.to_string(), span))
|
||||
.collect();
|
||||
Ok(Value::record(
|
||||
record!(
|
||||
"fields" => Value::list(fields, span)
|
||||
),
|
||||
span,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,14 +77,17 @@ impl NuLazyFrame {
|
||||
Self::new(self.from_eager, new_frame)
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> Result<NuSchema, ShellError> {
|
||||
let internal_schema = self.lazy.schema().map_err(|e| ShellError::GenericError {
|
||||
error: "Error getting schema from lazy frame".into(),
|
||||
msg: e.to_string(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
pub fn schema(&mut self) -> Result<NuSchema, ShellError> {
|
||||
let internal_schema =
|
||||
Arc::make_mut(&mut self.lazy)
|
||||
.schema()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error getting schema from lazy frame".into(),
|
||||
msg: e.to_string(),
|
||||
span: None,
|
||||
help: None,
|
||||
inner: vec![],
|
||||
})?;
|
||||
Ok(internal_schema.into())
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user