mirror of
https://github.com/nushell/nushell.git
synced 2025-05-02 09:04:30 +02:00
feat(polars): enable as_date
and as_datetime
to handle expressions as inputs (#15590)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR is a follow-up to the previous PR #15557 and part of a wider campaign to enable certain polars commands that only operated on the entire dataframe to also operate on expressions. Here, we enable two commands `polars as-date` and `polars as-datetime` to receive expressions as inputs so that they may be used on specific columns in a dataframe with multiple columns of different types. See examples below. ```nushell > [[a b]; ["2025-04-01" 1] ["2025-04-02" 2] ["2025-04-03" 3]] | polars into-df | polars select (polars col a | polars as-date %Y-%m-%d) b | polars collect ╭───┬───────────────────────┬───╮ │ # │ a │ b │ ├───┼───────────────────────┼───┤ │ 0 │ 04/01/2025 12:00:00AM │ 1 │ │ 1 │ 04/02/2025 12:00:00AM │ 2 │ │ 2 │ 04/03/2025 12:00:00AM │ 3 │ ╰───┴───────────────────────┴───╯ > seq date -b 2025-04-01 --periods 4 --increment 25min -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars select (polars col 0 | polars as-datetime "%Y-%m-%d %H:%M:%S") | polars collect ╭───┬───────────────────────╮ │ # │ 0 │ ├───┼───────────────────────┤ │ 0 │ 04/01/2025 12:00:00AM │ │ 1 │ 04/01/2025 12:25:00AM │ │ 2 │ 04/01/2025 12:50:00AM │ │ 3 │ 04/01/2025 01:15:00AM │ ╰───┴───────────────────────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. Users have the additional option to use `polars as-date` and `polars as-datetime` in expressions that operate on specific columns. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Examples have been added to `polars as-date` and `polars as-datetime`. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
95998bdd53
commit
a2dc3e3b33
@ -1,13 +1,19 @@
|
||||
use crate::{values::CustomValueSupport, PolarsPlugin};
|
||||
|
||||
use super::super::super::values::NuDataFrame;
|
||||
use crate::{
|
||||
values::{
|
||||
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame,
|
||||
NuSchema, PolarsPluginObject, PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use chrono::DateTime;
|
||||
use std::sync::Arc;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
|
||||
SyntaxShape, Type, Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, StringMethods};
|
||||
use polars::prelude::{col, DataType, Field, IntoSeries, Schema, StringMethods, StrptimeOptions};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDate;
|
||||
@ -34,10 +40,16 @@ impl PluginCommand for AsDate {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formatting date string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
])
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
@ -46,12 +58,110 @@ impl PluginCommand for AsDate {
|
||||
Example {
|
||||
description: "Converts string to date",
|
||||
example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#,
|
||||
result: None, // help is needed on how to provide results
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"date".to_string(),
|
||||
vec![
|
||||
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
|
||||
// We therefore force the type to be DataType::Date in the schema
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new("date".into(), DataType::Date),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to date",
|
||||
example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-df | polars as-date "%Y-%m-%d" --not-exact"#,
|
||||
result: None,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"date".to_string(),
|
||||
vec![
|
||||
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
|
||||
// We therefore force the type to be DataType::Date in the schema
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new("date".into(), DataType::Date),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to date in an expression",
|
||||
example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-lazy | polars select (polars col 0 | polars as-date "%Y-%m-%d" --not-exact)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"date".to_string(),
|
||||
vec![
|
||||
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
|
||||
// We therefore force the type to be DataType::Date in the schema
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new("date".into(), DataType::Date),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Output is of date type",
|
||||
@ -85,8 +195,61 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(0)?;
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
let value = input.into_value(call.head)?;
|
||||
|
||||
let options = StrptimeOptions {
|
||||
format: Some(format.into()),
|
||||
strict: true,
|
||||
exact: !not_exact,
|
||||
cache: Default::default(),
|
||||
};
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy, options),
|
||||
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df, options),
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let res: NuExpression = expr.into_polars().str().to_date(options).into();
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
options: StrptimeOptions,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
NuLazyFrame::new(
|
||||
false,
|
||||
lazy.to_polars().select([col("*").str().to_date(options)]),
|
||||
)
|
||||
.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
options: StrptimeOptions,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format = if let Some(format) = options.format {
|
||||
format.to_string()
|
||||
} else {
|
||||
unreachable!("`format` will never be None")
|
||||
};
|
||||
let not_exact = !options.exact;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
let casted = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
|
@ -1,15 +1,22 @@
|
||||
use crate::{values::CustomValueSupport, PolarsPlugin};
|
||||
use crate::{
|
||||
values::{
|
||||
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame,
|
||||
NuSchema, PolarsPluginObject, PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use chrono::DateTime;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame, NuSchema};
|
||||
|
||||
use chrono::DateTime;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit};
|
||||
use polars::prelude::{
|
||||
col, DataType, Expr, Field, IntoSeries, LiteralValue, PlSmallStr, Schema, StringMethods,
|
||||
StrptimeOptions, TimeUnit,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDateTime;
|
||||
@ -42,14 +49,30 @@ impl PluginCommand for AsDateTime {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![
|
||||
(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
),
|
||||
(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
),
|
||||
])
|
||||
.required("format", SyntaxShape::String, "formatting date time string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware). Ignored if input is an expression.", None)
|
||||
.named(
|
||||
"ambiguous",
|
||||
SyntaxShape::OneOf(vec![SyntaxShape::String, SyntaxShape::Nothing]),
|
||||
r#"Determine how to deal with ambiguous datetimes:
|
||||
`raise` (default): raise error
|
||||
`earliest`: use the earliest datetime
|
||||
`latest`: use the latest datetime
|
||||
`null`: set to null
|
||||
Used only when input is a lazyframe or expression and ignored otherwise"#,
|
||||
Some('a'),
|
||||
) .category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
@ -158,6 +181,63 @@ impl PluginCommand for AsDateTime {
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols in an expression",
|
||||
example: r#"["2025-11-02 00:00:00", "2025-11-02 01:00:00", "2025-11-02 02:00:00", "2025-11-02 03:00:00"] | polars into-df | polars select (polars col 0 | polars as-datetime "%Y-%m-%d %H:%M:%S")"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-11-02 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-11-02 01:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-11-02 02:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-11-02 03:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
)],
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
None
|
||||
),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@ -182,7 +262,98 @@ fn command(
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
let tz_aware = !call.has_flag("naive")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let value = input.into_value(call.head)?;
|
||||
|
||||
let options = StrptimeOptions {
|
||||
format: Some(format.into()),
|
||||
strict: true,
|
||||
exact: !not_exact,
|
||||
cache: Default::default(),
|
||||
};
|
||||
|
||||
let ambiguous = match call.get_flag::<Value>("ambiguous")? {
|
||||
Some(Value::String { val, internal_span }) => match val.as_str() {
|
||||
"raise" | "earliest" | "latest" => Ok(val),
|
||||
_ => Err(ShellError::GenericError {
|
||||
error: "Invalid argument value".into(),
|
||||
msg: "`ambiguous` must be one of raise, earliest, latest, or null".into(),
|
||||
span: Some(internal_span),
|
||||
help: None,
|
||||
inner: vec![],
|
||||
}),
|
||||
},
|
||||
Some(Value::Nothing { .. }) => Ok("null".into()),
|
||||
Some(_) => unreachable!("Argument only accepts string or null."),
|
||||
None => Ok("raise".into()),
|
||||
}
|
||||
.map_err(LabeledError::from)?;
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuLazyFrame(lazy) => {
|
||||
command_lazy(plugin, engine, call, lazy, options, ambiguous)
|
||||
}
|
||||
PolarsPluginObject::NuDataFrame(df) => {
|
||||
command_eager(plugin, engine, call, df, options, tz_aware)
|
||||
}
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let res: NuExpression = expr
|
||||
.into_polars()
|
||||
.str()
|
||||
.to_datetime(
|
||||
None,
|
||||
None,
|
||||
options,
|
||||
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))),
|
||||
)
|
||||
.into();
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
_ => Err(cant_convert_err(
|
||||
&value,
|
||||
&[
|
||||
PolarsPluginType::NuDataFrame,
|
||||
PolarsPluginType::NuLazyFrame,
|
||||
PolarsPluginType::NuExpression,
|
||||
],
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn command_lazy(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
lazy: NuLazyFrame,
|
||||
options: StrptimeOptions,
|
||||
ambiguous: String,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
NuLazyFrame::new(
|
||||
false,
|
||||
lazy.to_polars().select([col("*").str().to_datetime(
|
||||
None,
|
||||
None,
|
||||
options,
|
||||
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))),
|
||||
)]),
|
||||
)
|
||||
.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
|
||||
fn command_eager(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
df: NuDataFrame,
|
||||
options: StrptimeOptions,
|
||||
tz_aware: bool,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format = if let Some(format) = options.format {
|
||||
format.to_string()
|
||||
} else {
|
||||
unreachable!("`format` will never be None")
|
||||
};
|
||||
let not_exact = !options.exact;
|
||||
|
||||
let series = df.as_series(call.head)?;
|
||||
let casted = series.str().map_err(|e| ShellError::GenericError {
|
||||
error: "Error casting to string".into(),
|
||||
|
Loading…
Reference in New Issue
Block a user