feat(polars): enable as_date and as_datetime to handle expressions as inputs (#15590)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR is a follow-up to the previous PR #15557 and part of a wider
campaign to enable certain polars commands that only operated on the
entire dataframe to also operate on expressions. Here, we enable two
commands `polars as-date` and `polars as-datetime` to receive
expressions as inputs so that they may be used on specific columns in a
dataframe with multiple columns of different types. See examples below.

```nushell
> [[a b]; ["2025-04-01" 1] ["2025-04-02" 2] ["2025-04-03" 3]] | polars into-df | polars select (polars col a | polars as-date %Y-%m-%d) b | polars collect
╭───┬───────────────────────┬───╮
│ # │           a           │ b │
├───┼───────────────────────┼───┤
│ 0 │ 04/01/2025 12:00:00AM │ 1 │
│ 1 │ 04/02/2025 12:00:00AM │ 2 │
│ 2 │ 04/03/2025 12:00:00AM │ 3 │
╰───┴───────────────────────┴───╯

> seq date -b 2025-04-01 --periods 4 --increment 25min -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars select (polars col 0 | polars as-datetime "%Y-%m-%d %H:%M:%S") | polars collect
╭───┬───────────────────────╮
│ # │           0           │
├───┼───────────────────────┤
│ 0 │ 04/01/2025 12:00:00AM │
│ 1 │ 04/01/2025 12:25:00AM │
│ 2 │ 04/01/2025 12:50:00AM │
│ 3 │ 04/01/2025 01:15:00AM │
╰───┴───────────────────────╯

``` 

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. Users have the additional option to use `polars
as-date` and `polars as-datetime` in expressions that operate on
specific columns.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
Examples have been added to `polars as-date` and `polars as-datetime`.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-04-18 16:50:36 -04:00 committed by GitHub
parent 95998bdd53
commit a2dc3e3b33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 357 additions and 23 deletions

View File

@ -1,13 +1,19 @@
use crate::{values::CustomValueSupport, PolarsPlugin};
use super::super::super::values::NuDataFrame;
use crate::{
values::{
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame,
NuSchema, PolarsPluginObject, PolarsPluginType,
},
PolarsPlugin,
};
use chrono::DateTime;
use std::sync::Arc;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span,
SyntaxShape, Type, Value,
};
use polars::prelude::{IntoSeries, StringMethods};
use polars::prelude::{col, DataType, Field, IntoSeries, Schema, StringMethods, StrptimeOptions};
#[derive(Clone)]
pub struct AsDate;
@ -34,10 +40,16 @@ impl PluginCommand for AsDate {
Signature::build(self.name())
.required("format", SyntaxShape::String, "formatting date string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.input_output_type(
.input_output_types(vec![
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
),
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
])
.category(Category::Custom("dataframe".into()))
}
@ -46,12 +58,110 @@ impl PluginCommand for AsDate {
Example {
description: "Converts string to date",
example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#,
result: None, // help is needed on how to provide results
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"date".to_string(),
vec![
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
// We therefore force the type to be DataType::Date in the schema
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
Field::new("date".into(), DataType::Date),
])))),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Converts string to date",
example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-df | polars as-date "%Y-%m-%d" --not-exact"#,
result: None,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"date".to_string(),
vec![
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
// We therefore force the type to be DataType::Date in the schema
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
Field::new("date".into(), DataType::Date),
])))),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Converts string to date in an expression",
example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-lazy | polars select (polars col 0 | polars as-date "%Y-%m-%d" --not-exact)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"date".to_string(),
vec![
// Nushell's Value::date only maps to DataType::Datetime and not DataType::Date
// We therefore force the type to be DataType::Date in the schema
Value::date(
DateTime::parse_from_str(
"2021-12-30 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2021-12-31 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
Field::new("date".into(), DataType::Date),
])))),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Output is of date type",
@ -85,8 +195,61 @@ fn command(
) -> Result<PipelineData, ShellError> {
let format: String = call.req(0)?;
let not_exact = call.has_flag("not-exact")?;
let value = input.into_value(call.head)?;
let options = StrptimeOptions {
format: Some(format.into()),
strict: true,
exact: !not_exact,
cache: Default::default(),
};
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy, options),
PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df, options),
PolarsPluginObject::NuExpression(expr) => {
let res: NuExpression = expr.into_polars().str().to_date(options).into();
res.to_pipeline_data(plugin, engine, call.head)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
options: StrptimeOptions,
) -> Result<PipelineData, ShellError> {
NuLazyFrame::new(
false,
lazy.to_polars().select([col("*").str().to_date(options)]),
)
.to_pipeline_data(plugin, engine, call.head)
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
options: StrptimeOptions,
) -> Result<PipelineData, ShellError> {
let format = if let Some(format) = options.format {
format.to_string()
} else {
unreachable!("`format` will never be None")
};
let not_exact = !options.exact;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),

View File

@ -1,15 +1,22 @@
use crate::{values::CustomValueSupport, PolarsPlugin};
use crate::{
values::{
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame,
NuSchema, PolarsPluginObject, PolarsPluginType,
},
PolarsPlugin,
};
use chrono::DateTime;
use std::sync::Arc;
use super::super::super::values::{Column, NuDataFrame, NuSchema};
use chrono::DateTime;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit};
use polars::prelude::{
col, DataType, Expr, Field, IntoSeries, LiteralValue, PlSmallStr, Schema, StringMethods,
StrptimeOptions, TimeUnit,
};
#[derive(Clone)]
pub struct AsDateTime;
@ -42,14 +49,30 @@ impl PluginCommand for AsDateTime {
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
),
])
.required("format", SyntaxShape::String, "formatting date time string")
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware). Ignored if input is an expression.", None)
.named(
"ambiguous",
SyntaxShape::OneOf(vec![SyntaxShape::String, SyntaxShape::Nothing]),
r#"Determine how to deal with ambiguous datetimes:
`raise` (default): raise error
`earliest`: use the earliest datetime
`latest`: use the latest datetime
`null`: set to null
Used only when input is a lazyframe or expression and ignored otherwise"#,
Some('a'),
) .category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
@ -158,6 +181,63 @@ impl PluginCommand for AsDateTime {
.into_value(Span::test_data()),
),
},
Example {
description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols in an expression",
example: r#"["2025-11-02 00:00:00", "2025-11-02 01:00:00", "2025-11-02 02:00:00", "2025-11-02 03:00:00"] | polars into-df | polars select (polars col 0 | polars as-datetime "%Y-%m-%d %H:%M:%S")"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2025-11-02 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-11-02 01:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-11-02 02:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-11-02 03:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
)],
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
Field::new(
"datetime".into(),
DataType::Datetime(
TimeUnit::Nanoseconds,
None
),
),
])))),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
@ -182,7 +262,98 @@ fn command(
let not_exact = call.has_flag("not-exact")?;
let tz_aware = !call.has_flag("naive")?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let value = input.into_value(call.head)?;
let options = StrptimeOptions {
format: Some(format.into()),
strict: true,
exact: !not_exact,
cache: Default::default(),
};
let ambiguous = match call.get_flag::<Value>("ambiguous")? {
Some(Value::String { val, internal_span }) => match val.as_str() {
"raise" | "earliest" | "latest" => Ok(val),
_ => Err(ShellError::GenericError {
error: "Invalid argument value".into(),
msg: "`ambiguous` must be one of raise, earliest, latest, or null".into(),
span: Some(internal_span),
help: None,
inner: vec![],
}),
},
Some(Value::Nothing { .. }) => Ok("null".into()),
Some(_) => unreachable!("Argument only accepts string or null."),
None => Ok("raise".into()),
}
.map_err(LabeledError::from)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuLazyFrame(lazy) => {
command_lazy(plugin, engine, call, lazy, options, ambiguous)
}
PolarsPluginObject::NuDataFrame(df) => {
command_eager(plugin, engine, call, df, options, tz_aware)
}
PolarsPluginObject::NuExpression(expr) => {
let res: NuExpression = expr
.into_polars()
.str()
.to_datetime(
None,
None,
options,
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))),
)
.into();
res.to_pipeline_data(plugin, engine, call.head)
}
_ => Err(cant_convert_err(
&value,
&[
PolarsPluginType::NuDataFrame,
PolarsPluginType::NuLazyFrame,
PolarsPluginType::NuExpression,
],
)),
}
}
fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
options: StrptimeOptions,
ambiguous: String,
) -> Result<PipelineData, ShellError> {
NuLazyFrame::new(
false,
lazy.to_polars().select([col("*").str().to_datetime(
None,
None,
options,
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))),
)]),
)
.to_pipeline_data(plugin, engine, call.head)
}
fn command_eager(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
options: StrptimeOptions,
tz_aware: bool,
) -> Result<PipelineData, ShellError> {
let format = if let Some(format) = options.format {
format.to_string()
} else {
unreachable!("`format` will never be None")
};
let not_exact = !options.exact;
let series = df.as_series(call.head)?;
let casted = series.str().map_err(|e| ShellError::GenericError {
error: "Error casting to string".into(),