Make polars unpivot consistent with polars pivot (#13335)

# Description
Makes `polars unpivot` use the same arguments as `polars pivot` and
makes it consistent with the polars' rust api. Additionally, support for
the polar's streaming engine has been exposed on eager dataframes.
Previously, it would only work with lazy dataframes.


# User-Facing Changes
* `polars unpivot` argument `--columns`|`-c` has been renamed to
`--index`|`-i`
* `polars unpivot` argument `--values`|`-v` has been renamed to
`--on`|`-o`
* `polars unpivot` short argument for `--streamable` is now `-t` to make
it consistent with `polars pivot`. It was made `-t` for `polars pivot`
because `-s` is short for `--short`
This commit is contained in:
Jack Wright 2024-07-10 14:36:38 -07:00 committed by GitHub
parent 0178295363
commit b68c7cf3fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -30,16 +30,16 @@ impl PluginCommand for UnpivotDF {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required_named( .required_named(
"columns", "index",
SyntaxShape::Table(vec![]), SyntaxShape::Table(vec![]),
"column names for unpivoting", "column names for unpivoting",
Some('c'), Some('i'),
) )
.required_named( .required_named(
"values", "on",
SyntaxShape::Table(vec![]), SyntaxShape::Table(vec![]),
"column names used as value columns", "column names used as value columns",
Some('v'), Some('o'),
) )
.named( .named(
"variable-name", "variable-name",
@ -60,7 +60,7 @@ impl PluginCommand for UnpivotDF {
.switch( .switch(
"streamable", "streamable",
"Whether or not to use the polars streaming engine. Only valid for lazy dataframes", "Whether or not to use the polars streaming engine. Only valid for lazy dataframes",
Some('s'), Some('t'),
) )
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -70,7 +70,7 @@ impl PluginCommand for UnpivotDF {
Example { Example {
description: "unpivot on an eager dataframe", description: "unpivot on an eager dataframe",
example: example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -c [b c] -v [a d]", "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -i [b c] -o [a d]",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
@ -125,7 +125,7 @@ impl PluginCommand for UnpivotDF {
Example { Example {
description: "unpivot on a lazy dataframe", description: "unpivot on a lazy dataframe",
example: example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -c [b c] -v [a d] | polars collect", "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -i [b c] -o [a d] | polars collect",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
@ -208,21 +208,31 @@ fn command_eager(
call: &EvaluatedCall, call: &EvaluatedCall,
df: NuDataFrame, df: NuDataFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value"); let index_col: Vec<Value> = call.get_flag("index")?.expect("required value");
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value"); let on_col: Vec<Value> = call.get_flag("on")?.expect("required value");
let value_name: Option<Spanned<String>> = call.get_flag("value-name")?; let value_name: Option<Spanned<String>> = call.get_flag("value-name")?;
let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?; let variable_name: Option<Spanned<String>> = call.get_flag("variable-name")?;
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; let (index_col_string, index_col_span) = convert_columns_string(index_col, call.head)?;
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; let (on_col_string, on_col_span) = convert_columns_string(on_col, call.head)?;
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; check_column_datatypes(df.as_ref(), &index_col_string, index_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; check_column_datatypes(df.as_ref(), &on_col_string, on_col_span)?;
let mut res = df let streamable = call.has_flag("streamable")?;
let args = UnpivotArgs {
on: on_col_string.iter().map(Into::into).collect(),
index: index_col_string.iter().map(Into::into).collect(),
variable_name: variable_name.map(|s| s.item.into()),
value_name: value_name.map(|s| s.item.into()),
streamable,
};
let res = df
.as_ref() .as_ref()
.unpivot(&val_col_string, &id_col_string) .unpivot2(args)
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "Error calculating unpivot".into(), error: "Error calculating unpivot".into(),
msg: e.to_string(), msg: e.to_string(),
@ -231,28 +241,6 @@ fn command_eager(
inner: vec![], inner: vec![],
})?; })?;
if let Some(name) = &variable_name {
res.rename("variable", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
if let Some(name) = &value_name {
res.rename("value", &name.item)
.map_err(|e| ShellError::GenericError {
error: "Error renaming column".into(),
msg: e.to_string(),
span: Some(name.span),
help: None,
inner: vec![],
})?;
}
let res = NuDataFrame::new(false, res); let res = NuDataFrame::new(false, res);
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }
@ -263,11 +251,11 @@ fn command_lazy(
call: &EvaluatedCall, call: &EvaluatedCall,
df: NuLazyFrame, df: NuLazyFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let id_col: Vec<Value> = call.get_flag("columns")?.expect("required value"); let index_col: Vec<Value> = call.get_flag("index")?.expect("required value");
let val_col: Vec<Value> = call.get_flag("values")?.expect("required value"); let on_col: Vec<Value> = call.get_flag("on")?.expect("required value");
let (id_col_string, _id_col_span) = convert_columns_string(id_col, call.head)?; let (index_col_string, _index_col_span) = convert_columns_string(index_col, call.head)?;
let (val_col_string, _val_col_span) = convert_columns_string(val_col, call.head)?; let (on_col_string, _on_col_span) = convert_columns_string(on_col, call.head)?;
let value_name: Option<String> = call.get_flag("value-name")?; let value_name: Option<String> = call.get_flag("value-name")?;
let variable_name: Option<String> = call.get_flag("variable-name")?; let variable_name: Option<String> = call.get_flag("variable-name")?;
@ -275,8 +263,8 @@ fn command_lazy(
let streamable = call.has_flag("streamable")?; let streamable = call.has_flag("streamable")?;
let unpivot_args = UnpivotArgs { let unpivot_args = UnpivotArgs {
on: val_col_string.iter().map(Into::into).collect(), on: on_col_string.iter().map(Into::into).collect(),
index: id_col_string.iter().map(Into::into).collect(), index: index_col_string.iter().map(Into::into).collect(),
value_name: value_name.map(Into::into), value_name: value_name.map(Into::into),
variable_name: variable_name.map(Into::into), variable_name: variable_name.map(Into::into),
streamable, streamable,