mirror of
https://github.com/nushell/nushell.git
synced 2025-04-25 21:58:18 +02:00
polars
: add new command polars over
(#15551)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> Introducing a basic implementation of the polars expression for window functions: `over` (https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.over.html). Note that this PR only implements the default values for the sorting and `mapping_strategy` parameters. Implementations for other values for these parameters may be added in a future PR, as the demand arises. ```nushell # Compute expression over an aggregation window > [[a b]; [x 2] [x 4] [y 6] [y 4]] | polars into-lazy | polars select a (polars col b | polars cumulative sum | polars over a | polars as cum_b) | polars collect ╭───┬───┬───────╮ │ # │ a │ cum_b │ ├───┼───┼───────┤ │ 0 │ x │ 2 │ │ 1 │ x │ 6 │ │ 2 │ y │ 6 │ │ 3 │ y │ 10 │ ╰───┴───┴───────╯ # Compute expression over an aggregation window where partitions are defined by expressions > [[a b]; [x 2] [X 4] [Y 6] [y 4]] | polars into-lazy | polars select a (polars col b | polars cumulative sum | polars over (polars col a | polars lowercase) | polars as cum_b) | polars collect ╭───┬───┬───────╮ │ # │ a │ cum_b │ ├───┼───┼───────┤ │ 0 │ x │ 2 │ │ 1 │ X │ 6 │ │ 2 │ Y │ 6 │ │ 3 │ y │ 10 │ ╰───┴───┴───────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. This PR seeks to add a new command only. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Example tests are included. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
d31b7024d8
commit
ceaa0f9375
@ -10,6 +10,7 @@ mod median;
|
||||
mod min;
|
||||
mod n_null;
|
||||
mod n_unique;
|
||||
mod over;
|
||||
mod quantile;
|
||||
mod rolling;
|
||||
mod std;
|
||||
@ -30,6 +31,7 @@ use mean::ExprMean;
|
||||
use min::ExprMin;
|
||||
pub use n_null::NNull;
|
||||
pub use n_unique::NUnique;
|
||||
pub use over::Over;
|
||||
pub use rolling::Rolling;
|
||||
use std::ExprStd;
|
||||
pub use sum::ExprSum;
|
||||
@ -52,6 +54,7 @@ pub(crate) fn aggregation_commands() -> Vec<Box<dyn PluginCommand<Plugin = Polar
|
||||
Box::new(median::LazyMedian),
|
||||
Box::new(quantile::LazyQuantile),
|
||||
Box::new(groupby::ToLazyGroupBy),
|
||||
Box::new(Over),
|
||||
Box::new(Rolling),
|
||||
Box::new(ValueCount),
|
||||
Box::new(NNull),
|
||||
|
@ -0,0 +1,119 @@
|
||||
use crate::{
|
||||
dataframe::values::{NuDataFrame, NuExpression},
|
||||
values::{cant_convert_err, CustomValueSupport, PolarsPluginObject, PolarsPluginType},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::df;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Over;
|
||||
|
||||
impl PluginCommand for Over {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars over"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Compute expressions over a window group defined by partition expressions."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest(
|
||||
"partition by expressions",
|
||||
SyntaxShape::Any,
|
||||
"Expression(s) that define the partition window",
|
||||
)
|
||||
.input_output_type(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)
|
||||
.category(Category::Custom("lazyframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Compute expression over an aggregation window",
|
||||
example: r#"[[a b]; [x 2] [x 4] [y 6] [y 4]]
|
||||
| polars into-lazy
|
||||
| polars select a (polars col b | polars cumulative sum | polars over a | polars as cum_b)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &["x", "x", "y", "y"],
|
||||
"cum_b" => &[2, 6, 6, 10]
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Compute expression over an aggregation window where partitions are defined by expressions",
|
||||
example: r#"[[a b]; [x 2] [X 4] [Y 6] [y 4]]
|
||||
| polars into-lazy
|
||||
| polars select a (polars col b | polars cumulative sum | polars over (polars col a | polars lowercase) | polars as cum_b)
|
||||
| polars collect"#,
|
||||
result: Some(
|
||||
NuDataFrame::from(
|
||||
df!(
|
||||
"a" => &["x", "X", "Y", "y"],
|
||||
"cum_b" => &[2, 6, 6, 10]
|
||||
)
|
||||
.expect("should not fail"),
|
||||
)
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
let vals: Vec<Value> = call.rest(0)?;
|
||||
let expr_value = Value::list(vals, call.head);
|
||||
let expressions = NuExpression::extract_exprs(plugin, expr_value)?;
|
||||
|
||||
let input_value = input.into_value(call.head)?;
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &input_value)? {
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let expr: NuExpression = expr
|
||||
.into_polars()
|
||||
.over_with_options(expressions, None, Default::default())
|
||||
.into();
|
||||
expr.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&input_value,
|
||||
&[PolarsPluginType::NuExpression],
|
||||
)),
|
||||
}
|
||||
.map_err(LabeledError::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command;
|
||||
use nu_protocol::ShellError;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command(&Over)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user