mirror of
https://github.com/nushell/nushell.git
synced 2025-04-28 07:08:20 +02:00
feat(polars): add polars truncate
for rounding datetimes (#15582)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> This PR directly ports the polars function `polars.Expr.dt.truncate` (https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html), which rounds a datetime to an arbitrarily specified period length. This function is particularly useful when rounding to variable period lengths such as months or quarters. See below for examples. ```nushell # Truncate a series of dates by period length > seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated) | polars collect ╭───┬───────────────────────┬───────────────────────╮ │ # │ datetime │ truncated │ ├───┼───────────────────────┼───────────────────────┤ │ 0 │ 01/01/2025 12:00:00AM │ 12/30/2024 04:49:00PM │ │ 1 │ 02/12/2025 12:00:00AM │ 02/08/2025 09:45:00PM │ │ 2 │ 03/26/2025 12:00:00AM │ 03/21/2025 02:41:00AM │ │ 3 │ 05/07/2025 12:00:00AM │ 05/05/2025 08:14:00AM │ ╰───┴───────────────────────┴───────────────────────╯ # Truncate based on period length measured in quarters and months > seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 1q5mo | polars as truncated) | polars collect ╭───┬───────────────────────┬───────────────────────╮ │ # │ datetime │ truncated │ ├───┼───────────────────────┼───────────────────────┤ │ 0 │ 01/01/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 1 │ 02/12/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 2 │ 03/26/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 3 │ 05/07/2025 12:00:00AM │ 05/01/2025 12:00:00AM │ ╰───┴───────────────────────┴───────────────────────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> No breaking changes. This PR introduces a new command `polars truncate` # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Example test was added. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
eff063822a
commit
669b44ad7d
@ -14,6 +14,7 @@ mod get_weekday;
|
||||
mod get_year;
|
||||
mod replace_time_zone;
|
||||
mod strftime;
|
||||
mod truncate;
|
||||
|
||||
use crate::PolarsPlugin;
|
||||
use nu_plugin::PluginCommand;
|
||||
@ -34,12 +35,14 @@ pub use get_weekday::GetWeekDay;
|
||||
pub use get_year::GetYear;
|
||||
pub use replace_time_zone::ReplaceTimeZone;
|
||||
pub use strftime::StrFTime;
|
||||
pub use truncate::Truncate;
|
||||
|
||||
pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin>>> {
|
||||
vec![
|
||||
Box::new(ExprDatePart),
|
||||
Box::new(AsDate),
|
||||
Box::new(AsDateTime),
|
||||
Box::new(ConvertTimeZone),
|
||||
Box::new(ExprDatePart),
|
||||
Box::new(GetDay),
|
||||
Box::new(GetHour),
|
||||
Box::new(GetMinute),
|
||||
@ -50,8 +53,8 @@ pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPl
|
||||
Box::new(GetWeek),
|
||||
Box::new(GetWeekDay),
|
||||
Box::new(GetYear),
|
||||
Box::new(StrFTime),
|
||||
Box::new(ReplaceTimeZone),
|
||||
Box::new(ConvertTimeZone),
|
||||
Box::new(StrFTime),
|
||||
Box::new(Truncate),
|
||||
]
|
||||
}
|
||||
|
@ -0,0 +1,211 @@
|
||||
use crate::{
|
||||
values::{
|
||||
cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuSchema,
|
||||
PolarsPluginObject, PolarsPluginType,
|
||||
},
|
||||
PolarsPlugin,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
|
||||
use chrono::DateTime;
|
||||
use polars::prelude::{DataType, Expr, Field, LiteralValue, PlSmallStr, Schema, TimeUnit};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Truncate;
|
||||
|
||||
impl PluginCommand for Truncate {
|
||||
type Plugin = PolarsPlugin;
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"polars truncate"
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Divide the date/datetime range into buckets."
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.input_output_types(vec![(
|
||||
Type::Custom("expression".into()),
|
||||
Type::Custom("expression".into()),
|
||||
)])
|
||||
.required(
|
||||
"every",
|
||||
SyntaxShape::OneOf(vec![SyntaxShape::Duration, SyntaxShape::String]),
|
||||
"Period length for every interval (can be duration or str)",
|
||||
)
|
||||
.category(Category::Custom("expression".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Truncate a series of dates by period length",
|
||||
example: r#"seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated)"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"datetime".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-01-01 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-02-12 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-03-26 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-05-07 00:00:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"truncated".to_string(),
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2024-12-30 16:49:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-02-08 21:45:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-03-21 02:41:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2025-05-05 08:14:00 +0000",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
Span::test_data(),
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(TimeUnit::Nanoseconds, None),
|
||||
),
|
||||
Field::new(
|
||||
"truncated".into(),
|
||||
DataType::Datetime(TimeUnit::Nanoseconds, None),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
plugin: &Self::Plugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, LabeledError> {
|
||||
command(plugin, engine, call, input).map_err(LabeledError::from)
|
||||
}
|
||||
|
||||
fn extra_description(&self) -> &str {
|
||||
r#"Each date/datetime is mapped to the start of its bucket using the corresponding local datetime. Note that weekly buckets start on Monday. Ambiguous results are localised using the DST offset of the original timestamp - for example, truncating '2022-11-06 01:30:00 CST' by '1h' results in '2022-11-06 01:00:00 CST', whereas truncating '2022-11-06 01:30:00 CDT' by '1h' results in '2022-11-06 01:00:00 CDT'.
|
||||
|
||||
See Notes in documentation for full list of compatible string values for `every`: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html"#
|
||||
}
|
||||
|
||||
fn search_terms(&self) -> Vec<&str> {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
plugin: &PolarsPlugin,
|
||||
engine: &EngineInterface,
|
||||
call: &EvaluatedCall,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let value = input.into_value(call.head)?;
|
||||
|
||||
let every = match call.req(0)? {
|
||||
// handle Value::Duration input for maximum compatibility
|
||||
// duration types are always stored as nanoseconds
|
||||
Value::Duration { val, .. } => Ok(format!("{val}ns")),
|
||||
Value::String { val, .. } => Ok(val.clone()),
|
||||
x => Err(ShellError::IncompatibleParametersSingle {
|
||||
msg: format!("Expected duration or str type but got {}", x.get_type()),
|
||||
span: value.span(),
|
||||
}),
|
||||
}?;
|
||||
|
||||
match PolarsPluginObject::try_from_value(plugin, &value)? {
|
||||
PolarsPluginObject::NuExpression(expr) => {
|
||||
let res: NuExpression = expr
|
||||
.into_polars()
|
||||
.dt()
|
||||
.truncate(Expr::Literal(LiteralValue::String(
|
||||
PlSmallStr::from_string(every),
|
||||
)))
|
||||
.into();
|
||||
res.to_pipeline_data(plugin, engine, call.head)
|
||||
}
|
||||
_ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test::test_polars_plugin_command_with_decls;
|
||||
use nu_command::SeqDate;
|
||||
|
||||
#[test]
|
||||
fn test_examples() -> Result<(), ShellError> {
|
||||
test_polars_plugin_command_with_decls(&Truncate, vec![Box::new(SeqDate)])
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user