feat(polars): add polars truncate for rounding datetimes (#15582)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
This PR directly ports the polars function `polars.Expr.dt.truncate`
(https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html),
which rounds a datetime to an arbitrarily specified period length. This
function is particularly useful when rounding to variable period lengths
such as months or quarters. See below for examples.

```nushell
#  Truncate a series of dates by period length
  > seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated) | polars collect
  ╭───┬───────────────────────┬───────────────────────╮
  │ # │       datetime        │       truncated       │
  ├───┼───────────────────────┼───────────────────────┤
  │ 0 │ 01/01/2025 12:00:00AM │ 12/30/2024 04:49:00PM │
  │ 1 │ 02/12/2025 12:00:00AM │ 02/08/2025 09:45:00PM │
  │ 2 │ 03/26/2025 12:00:00AM │ 03/21/2025 02:41:00AM │
  │ 3 │ 05/07/2025 12:00:00AM │ 05/05/2025 08:14:00AM │
  ╰───┴───────────────────────┴───────────────────────╯

#  Truncate based on period length measured in quarters and months
> seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 1q5mo | polars as truncated) | polars collect
╭───┬───────────────────────┬───────────────────────╮
│ # │       datetime        │       truncated       │
├───┼───────────────────────┼───────────────────────┤
│ 0 │ 01/01/2025 12:00:00AM │ 09/01/2024 12:00:00AM │
│ 1 │ 02/12/2025 12:00:00AM │ 09/01/2024 12:00:00AM │
│ 2 │ 03/26/2025 12:00:00AM │ 09/01/2024 12:00:00AM │
│ 3 │ 05/07/2025 12:00:00AM │ 05/01/2025 12:00:00AM │
╰───┴───────────────────────┴───────────────────────╯

```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No breaking changes. This PR introduces a new command `polars truncate`

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
Example test was added.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-04-16 17:17:49 -04:00 committed by GitHub
parent eff063822a
commit 669b44ad7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 217 additions and 3 deletions

View File

@ -14,6 +14,7 @@ mod get_weekday;
mod get_year; mod get_year;
mod replace_time_zone; mod replace_time_zone;
mod strftime; mod strftime;
mod truncate;
use crate::PolarsPlugin; use crate::PolarsPlugin;
use nu_plugin::PluginCommand; use nu_plugin::PluginCommand;
@ -34,12 +35,14 @@ pub use get_weekday::GetWeekDay;
pub use get_year::GetYear; pub use get_year::GetYear;
pub use replace_time_zone::ReplaceTimeZone; pub use replace_time_zone::ReplaceTimeZone;
pub use strftime::StrFTime; pub use strftime::StrFTime;
pub use truncate::Truncate;
pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> { pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
vec![ vec![
Box::new(ExprDatePart),
Box::new(AsDate), Box::new(AsDate),
Box::new(AsDateTime), Box::new(AsDateTime),
Box::new(ConvertTimeZone),
Box::new(ExprDatePart),
Box::new(GetDay), Box::new(GetDay),
Box::new(GetHour), Box::new(GetHour),
Box::new(GetMinute), Box::new(GetMinute),
@ -50,8 +53,8 @@ pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPl
Box::new(GetWeek), Box::new(GetWeek),
Box::new(GetWeekDay), Box::new(GetWeekDay),
Box::new(GetYear), Box::new(GetYear),
Box::new(StrFTime),
Box::new(ReplaceTimeZone), Box::new(ReplaceTimeZone),
Box::new(ConvertTimeZone), Box::new(StrFTime),
Box::new(Truncate),
] ]
} }

View File

@ -0,0 +1,211 @@
use crate::{
values::{
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuSchema,
PolarsPluginObject, PolarsPluginType,
},
PolarsPlugin,
};
use std::sync::Arc;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use chrono::DateTime;
use polars::prelude::{DataType, Expr, Field, LiteralValue, PlSmallStr, Schema, TimeUnit};
#[derive(Clone)]
pub struct Truncate;
impl PluginCommand for Truncate {
type Plugin = PolarsPlugin;
fn name(&self) -> &str {
"polars truncate"
}
fn description(&self) -> &str {
"Divide the date/datetime range into buckets."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![(
Type::Custom("expression".into()),
Type::Custom("expression".into()),
)])
.required(
"every",
SyntaxShape::OneOf(vec![SyntaxShape::Duration, SyntaxShape::String]),
"Period length for every interval (can be duration or str)",
)
.category(Category::Custom("expression".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Truncate a series of dates by period length",
example: r#"seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated)"#,
result: Some(
NuDataFrame::try_from_columns(
vec![
Column::new(
"datetime".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2025-01-01 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-02-12 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-03-26 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-05-07 00:00:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
),
Column::new(
"truncated".to_string(),
vec![
Value::date(
DateTime::parse_from_str(
"2024-12-30 16:49:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-02-08 21:45:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-03-21 02:41:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
Value::date(
DateTime::parse_from_str(
"2025-05-05 08:14:00 +0000",
"%Y-%m-%d %H:%M:%S %z",
)
.expect("date calculation should not fail in test"),
Span::test_data(),
),
],
),
],
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
Field::new(
"datetime".into(),
DataType::Datetime(TimeUnit::Nanoseconds, None),
),
Field::new(
"truncated".into(),
DataType::Datetime(TimeUnit::Nanoseconds, None),
),
])))),
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
command(plugin, engine, call, input).map_err(LabeledError::from)
}
fn extra_description(&self) -> &str {
r#"Each date/datetime is mapped to the start of its bucket using the corresponding local datetime. Note that weekly buckets start on Monday. Ambiguous results are localised using the DST offset of the original timestamp - for example, truncating '2022-11-06 01:30:00 CST' by '1h' results in '2022-11-06 01:00:00 CST', whereas truncating '2022-11-06 01:30:00 CDT' by '1h' results in '2022-11-06 01:00:00 CDT'.
See Notes in documentation for full list of compatible string values for `every`: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html"#
}
fn search_terms(&self) -> Vec<&str> {
vec![]
}
}
fn command(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(call.head)?;
let every = match call.req(0)? {
// handle Value::Duration input for maximum compatibility
// duration types are always stored as nanoseconds
Value::Duration { val, .. } => Ok(format!("{val}ns")),
Value::String { val, .. } => Ok(val.clone()),
x => Err(ShellError::IncompatibleParametersSingle {
msg: format!("Expected duration or str type but got {}", x.get_type()),
span: value.span(),
}),
}?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuExpression(expr) => {
let res: NuExpression = expr
.into_polars()
.dt()
.truncate(Expr::Literal(LiteralValue::String(
PlSmallStr::from_string(every),
)))
.into();
res.to_pipeline_data(plugin, engine, call.head)
}
_ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::test::test_polars_plugin_command_with_decls;
use nu_command::SeqDate;
#[test]
fn test_examples() -> Result<(), ShellError> {
test_polars_plugin_command_with_decls(&Truncate, vec![Box::new(SeqDate)])
}
}