mirror of
https://github.com/nushell/nushell.git
synced 2025-05-29 14:21:45 +02:00
polars
: expand polars col
to handle multiple columns and by types (#15570)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR seeks to expand `polars col` functionality to allow selecting multiple columns and columns by type, which is particularly useful when piping to subsequent expressions that should be applied to each column selected (e.g., `polars col int --type | polars sum` as a shorthand for `[(polars col a | polars sum), (polars col b | polars sum)]`). See examples below. ```nushell # Select multiple columns (cannot be used with asterisk wildcard) > [[a b c]; [x 1 1.1] [y 2 2.2] [z 3 3.3]] | polars into-df | polars select (polars col b c | polars sum) | polars collect ╭───┬───┬──────╮ │ # │ b │ c │ ├───┼───┼──────┤ │ 0 │ 6 │ 6.60 │ ╰───┴───┴──────╯ # Select multiple columns by types (cannot be used with asterisk wildcard) > [[a b c]; [x o 1.1] [y p 2.2] [z q 3.3]] | polars into-df | polars select (polars col str f64 --type | polars max) | polars collect ╭───┬───┬───┬──────╮ │ # │ a │ b │ c │ ├───┼───┼───┼──────┤ │ 0 │ z │ q │ 3.30 │ ╰───┴───┴───┴──────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. Users have the additional capability to select multiple columns in `polars col`. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Examples have been added to `polars col`. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
d273ce89df
commit
0e9927ea4d
@ -1,14 +1,14 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
dataframe::values::NuExpression,
|
dataframe::values::NuExpression,
|
||||||
values::{Column, CustomValueSupport, NuDataFrame},
|
values::{str_to_dtype, Column, CustomValueSupport, NuDataFrame},
|
||||||
PolarsPlugin,
|
PolarsPlugin,
|
||||||
};
|
};
|
||||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
record, Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type,
|
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
|
||||||
Value,
|
SyntaxShape, Type, Value,
|
||||||
};
|
};
|
||||||
use polars::prelude::col;
|
use polars::prelude::DataType;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ExprCol;
|
pub struct ExprCol;
|
||||||
@ -31,6 +31,12 @@ impl PluginCommand for ExprCol {
|
|||||||
SyntaxShape::String,
|
SyntaxShape::String,
|
||||||
"Name of column to be used. '*' can be used for all columns.",
|
"Name of column to be used. '*' can be used for all columns.",
|
||||||
)
|
)
|
||||||
|
.rest(
|
||||||
|
"more columns",
|
||||||
|
SyntaxShape::String,
|
||||||
|
"Additional columns to be used. Cannot be '*'",
|
||||||
|
)
|
||||||
|
.switch("type", "Treat column names as type names", Some('t'))
|
||||||
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
.input_output_type(Type::Any, Type::Custom("expression".into()))
|
||||||
.category(Category::Custom("expression".into()))
|
.category(Category::Custom("expression".into()))
|
||||||
}
|
}
|
||||||
@ -57,6 +63,31 @@ impl PluginCommand for ExprCol {
|
|||||||
.into_value(Span::test_data()),
|
.into_value(Span::test_data()),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
Example {
|
||||||
|
description: "Select multiple columns (cannot be used with asterisk wildcard)",
|
||||||
|
example: "[[a b c]; [x 1 1.1] [y 2 2.2] [z 3 3.3]] | polars into-df | polars select (polars col b c | polars sum) | polars collect",
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::try_from_columns(vec![
|
||||||
|
Column::new("b".to_string(), vec![Value::test_int(6)]),
|
||||||
|
Column::new("c".to_string(), vec![Value::test_float(6.6)]),
|
||||||
|
],None)
|
||||||
|
.expect("should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Select multiple columns by types (cannot be used with asterisk wildcard)",
|
||||||
|
example: "[[a b c]; [x o 1.1] [y p 2.2] [z q 3.3]] | polars into-df | polars select (polars col str f64 --type | polars max) | polars collect",
|
||||||
|
result: Some(
|
||||||
|
NuDataFrame::try_from_columns(vec![
|
||||||
|
Column::new("a".to_string(), vec![Value::test_string("z")]),
|
||||||
|
Column::new("b".to_string(), vec![Value::test_string("q")]),
|
||||||
|
Column::new("c".to_string(), vec![Value::test_float(3.3)]),
|
||||||
|
],None)
|
||||||
|
.expect("should not fail")
|
||||||
|
.into_value(Span::test_data()),
|
||||||
|
),
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,8 +102,27 @@ impl PluginCommand for ExprCol {
|
|||||||
call: &EvaluatedCall,
|
call: &EvaluatedCall,
|
||||||
_input: PipelineData,
|
_input: PipelineData,
|
||||||
) -> Result<PipelineData, LabeledError> {
|
) -> Result<PipelineData, LabeledError> {
|
||||||
let name: String = call.req(0)?;
|
let mut names: Vec<String> = vec![call.req(0)?];
|
||||||
let expr: NuExpression = col(name.as_str()).into();
|
names.extend(call.rest(1)?);
|
||||||
|
|
||||||
|
let as_type = call.has_flag("type")?;
|
||||||
|
|
||||||
|
let expr: NuExpression = match as_type {
|
||||||
|
false => match names.as_slice() {
|
||||||
|
[single] => polars::prelude::col(single).into(),
|
||||||
|
_ => polars::prelude::cols(&names).into(),
|
||||||
|
},
|
||||||
|
true => {
|
||||||
|
let dtypes = names
|
||||||
|
.iter()
|
||||||
|
.map(|n| str_to_dtype(n, call.head))
|
||||||
|
.collect::<Result<Vec<DataType>, ShellError>>()
|
||||||
|
.map_err(LabeledError::from)?;
|
||||||
|
|
||||||
|
polars::prelude::dtype_cols(dtypes).into()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
expr.to_pipeline_data(plugin, engine, call.head)
|
expr.to_pipeline_data(plugin, engine, call.head)
|
||||||
.map_err(LabeledError::from)
|
.map_err(LabeledError::from)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user