mirror of
https://github.com/nushell/nushell.git
synced 2025-05-08 20:14:26 +02:00
feat(polars): introduce new polars replace
This commit is contained in:
parent
f264f656ff
commit
82cf13d5f8
@ -82,6 +82,7 @@ features = [
|
||||
"parquet",
|
||||
"pivot",
|
||||
"random",
|
||||
"replace",
|
||||
"rolling_window",
|
||||
"rows",
|
||||
"round_series",
|
||||
|
@ -40,6 +40,7 @@ mod unnest;
|
||||
mod unpivot;
|
||||
mod with_column;
|
||||
use filter::LazyFilter;
|
||||
mod replace;
|
||||
mod shift;
|
||||
mod unique;
|
||||
|
||||
@ -68,6 +69,7 @@ pub use last::LastDF;
|
||||
pub use lit::ExprLit;
|
||||
use query_df::QueryDf;
|
||||
pub use rename::RenameDF;
|
||||
pub use replace::Replace;
|
||||
pub use sample::SampleDF;
|
||||
pub use shift::Shift;
|
||||
pub use slice::SliceDF;
|
||||
@ -114,6 +116,7 @@ pub(crate) fn data_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
|
||||
Box::new(select::LazySelect),
|
||||
Box::new(LazySortBy),
|
||||
Box::new(LazyFilter),
|
||||
Box::new(Replace),
|
||||
Box::new(Shift),
|
||||
Box::new(struct_json_encode::StructJsonEncode),
|
||||
Box::new(qcut::QCutSeries),
|
||||
|
335
crates/nu_plugin_polars/src/dataframe/command/data/replace.rs
Normal file
335
crates/nu_plugin_polars/src/dataframe/command/data/replace.rs
Normal file
@ -0,0 +1,335 @@
|
||||
use crate::{
|
||||
values::{str_to_dtype, CustomValueSupport, NuDataFrame, NuExpression},
|
||||
PolarsPlugin,
|
||||
};
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use polars::{df, prelude::*};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Replace;
|
||||
|
||||
impl PluginCommand for Replace {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars replace"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Create an expression that replaces old values with new values"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"old",
|
||||
SyntaxShape::OneOf(vec![SyntaxShape::Record(vec![]), SyntaxShape::List(Box::new(SyntaxShape::Any))]),
|
||||
"Values to be replaced",
|
||||
)
|
||||
.optional(
|
||||
"new",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::Any)),
|
||||
"Values to replace by",
|
||||
)
|
||||
.switch(
|
||||
"strict",
|
||||
"Require that all values must be replaced or throw an error (ignored if `old` or `new` are expressions).",
|
||||
Some('s'),
|
||||
)
|
||||
.named(
|
||||
"default",
|
||||
SyntaxShape::Any,
|
||||
"Set values that were not replaced to this value. If no default is specified, (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals.",
|
||||
Some('d'),
|
||||
)
|
||||
.named(
|
||||
"return-dtype",
|
||||
SyntaxShape::String,
|
||||
"Data type of the resulting expression. If set to `null` (default), the data type is determined automatically based on the other inputs.",
|
||||
Some('t'),
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Replace column with different values of same type",
|
||||
example: "[[a]; [1] [1] [2] [2]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace [1 2] [10 20])
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => [10, 10, 20, 20])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Replace column with different values of another type",
|
||||
example: "[[a]; [1] [1] [2] [2]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace [1 2] [a b] --strict)
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => ["a", "a", "b", "b"])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Replace column with different values based on expressions (cannot be used with strict)",
|
||||
example: "[[a]; [1] [1] [2] [2]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace [(polars col a | polars max)] [(polars col a | polars max | $in + 5)])
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => [1, 1, 7, 7])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Replace column with different values based on expressions with default",
|
||||
example: "[[a]; [1] [1] [2] [3]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace [1] [10] --default (polars col a | polars max | $in * 100) --strict)
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => [10, 10, 300, 300])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Replace column with different values based on expressions with default",
|
||||
example: "[[a]; [1] [1] [2] [3]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace [1] [10] --default (polars col a | polars max | $in * 100) --strict --return-dtype str)
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => ["10", "10", "300", "300"])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Replace column with different values using a record",
|
||||
example: "[[a]; [1] [1] [2] [2]]
|
||||
| polars into-df
|
||||
| polars select (polars col a | polars replace {1: a, 2: b} --strict --return-dtype str)
|
||||
| polars collect",
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!("a" => ["a", "a", "b", "b"])
|
||||
.expect("simple df for test should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec!["replace"]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let (old_vals, new_vals) = match (call.req(0)?, call.opt::<Value>(1)?) {
|
||||
(Value::Record { val, .. }, None) => val
|
||||
.iter()
|
||||
.map(|(key, value)| (Value::string(key, call.head), value.clone()))
|
||||
.collect::<Vec<(Value, Value)>>()
|
||||
.into_iter()
|
||||
.unzip(),
|
||||
(Value::List { vals: old_vals, .. }, Some(Value::List { vals: new_vals, .. })) => {
|
||||
(old_vals, new_vals)
|
||||
}
|
||||
(_, _) => {
|
||||
return Err(LabeledError::from(ShellError::GenericError {
|
||||
error: "Invalid arguments".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: Some("`old` must be either a record or list. If `old` is a record, then `new` must not be specified. Otherwise, `new` must also be a list".into()),
|
||||
inner: vec![],
|
||||
}));
|
||||
}
|
||||
};
|
||||
// let new_vals: Vec<Value> = call.req(1)?;
|
||||
|
||||
let old = values_to_expr(plugin, call.head, old_vals)?;
|
||||
let new = values_to_expr(plugin, call.head, new_vals)?;
|
||||
|
||||
let strict = call.has_flag("strict")?;
|
||||
let return_dtype = match call.get_flag::<String>("return-dtype")? {
|
||||
Some(dtype) => {
|
||||
if !strict {
|
||||
return Err(LabeledError::from(ShellError::GenericError {
|
||||
error: "`return-dtype` may only be used with `strict`".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}));
|
||||
}
|
||||
Some(str_to_dtype(&dtype, call.head)?)
|
||||
}
|
||||
|
||||
None => None,
|
||||
};
|
||||
|
||||
let default = match call.get_flag::<Value>("default")? {
|
||||
Some(default) => {
|
||||
if !strict {
|
||||
return Err(LabeledError::from(ShellError::GenericError {
|
||||
error: "`default` may only be used with `strict`".into(),
|
||||
msg: "".into(),
|
||||
span: Some(call.head),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}));
|
||||
}
|
||||
Some(values_to_expr(plugin, call.head, vec![default])?)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let expr = NuExpression::try_from_pipeline(plugin, input, call.head)?;
|
||||
|
||||
let expr: NuExpression = if strict {
|
||||
expr.into_polars()
|
||||
.replace_strict(old, new, default, return_dtype)
|
||||
.into()
|
||||
} else {
|
||||
expr.into_polars().replace(old, new).into()
|
||||
};
|
||||
|
||||
expr.to_pipeline_data(plugin, engine, call.head)
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn values_to_expr(
|
||||
plugin: &PolarsPlugin,
|
||||
span: Span,
|
||||
values: Vec<Value>,
|
||||
) -> Result<Expr, ShellError> {
|
||||
match values.first() {
|
||||
Some(Value::Int { .. }) => {
|
||||
let series_values = values
|
||||
.into_iter()
|
||||
.filter_map(|v| match v {
|
||||
Value::Int { val, .. } => Some(val),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<i64>>();
|
||||
Ok(lit(Series::new("old".into(), &series_values)))
|
||||
}
|
||||
|
||||
Some(Value::Bool { .. }) => {
|
||||
let series_values = values
|
||||
.into_iter()
|
||||
.filter_map(|v| match v {
|
||||
Value::Bool { val, .. } => Some(val),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<bool>>();
|
||||
Ok(lit(Series::new("old".into(), &series_values)))
|
||||
}
|
||||
|
||||
Some(Value::Float { .. }) => {
|
||||
let series_values = values
|
||||
.into_iter()
|
||||
.filter_map(|v| match v {
|
||||
Value::Float { val, .. } => Some(val),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<f64>>();
|
||||
Ok(lit(Series::new("old".into(), &series_values)))
|
||||
}
|
||||
|
||||
Some(Value::String { .. }) => {
|
||||
let series_values = values
|
||||
.into_iter()
|
||||
.filter_map(|v| match v {
|
||||
Value::String { val, .. } => Some(val),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<String>>();
|
||||
Ok(lit(Series::new("old".into(), &series_values)))
|
||||
}
|
||||
|
||||
Some(Value::Custom { .. }) => {
|
||||
if values.len() > 1 {
|
||||
return Err(ShellError::GenericError {
|
||||
error: "Multiple expressions to be replaced is not supported".into(),
|
||||
msg: "".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
NuExpression::try_from_value(
|
||||
plugin,
|
||||
values
|
||||
.first()
|
||||
.expect("Presence of first element is enforced at argument parsing."),
|
||||
)
|
||||
.map(|expr| expr.into_polars())
|
||||
}
|
||||
|
||||
x @ Some(_) => Err(ShellError::GenericError {
|
||||
error: "Cannot convert input to expression".into(),
|
||||
msg: "".into(),
|
||||
span: Some(span),
|
||||
help: Some(format!("Unexpected type: {x:?}")),
|
||||
inner: vec![],
|
||||
}),
|
||||
|
||||
None => Err(ShellError::GenericError {
|
||||
error: "Missing input values".into(),
|
||||
msg: "".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), nu_protocol::ShellError> {
|
||||
test_polars_plugin_command(&Replace)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user