From 669b44ad7de1de24efa4771b69481eef8dab52ef Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Wed, 16 Apr 2025 17:17:49 -0400 Subject: [PATCH] feat(polars): add `polars truncate` for rounding datetimes (#15582) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR directly ports the polars function `polars.Expr.dt.truncate` (https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html), which rounds a datetime to an arbitrarily specified period length. This function is particularly useful when rounding to variable period lengths such as months or quarters. See below for examples. ```nushell # Truncate a series of dates by period length > seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated) | polars collect ╭───┬───────────────────────┬───────────────────────╮ │ # │ datetime │ truncated │ ├───┼───────────────────────┼───────────────────────┤ │ 0 │ 01/01/2025 12:00:00AM │ 12/30/2024 04:49:00PM │ │ 1 │ 02/12/2025 12:00:00AM │ 02/08/2025 09:45:00PM │ │ 2 │ 03/26/2025 12:00:00AM │ 03/21/2025 02:41:00AM │ │ 3 │ 05/07/2025 12:00:00AM │ 05/05/2025 08:14:00AM │ ╰───┴───────────────────────┴───────────────────────╯ # Truncate based on period length measured in quarters and months > seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 1q5mo | polars as truncated) | polars collect ╭───┬───────────────────────┬───────────────────────╮ │ # │ datetime │ truncated │ ├───┼───────────────────────┼───────────────────────┤ │ 0 │ 01/01/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 1 │ 02/12/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 2 │ 03/26/2025 12:00:00AM │ 09/01/2024 12:00:00AM │ │ 3 │ 05/07/2025 12:00:00AM │ 05/01/2025 12:00:00AM │ ╰───┴───────────────────────┴───────────────────────╯ ``` # User-Facing Changes No breaking changes. This PR introduces a new command `polars truncate` # Tests + Formatting Example test was added. # After Submitting --- .../src/dataframe/command/datetime/mod.rs | 9 +- .../dataframe/command/datetime/truncate.rs | 211 ++++++++++++++++++ 2 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 crates/nu_plugin_polars/src/dataframe/command/datetime/truncate.rs diff --git a/crates/nu_plugin_polars/src/dataframe/command/datetime/mod.rs b/crates/nu_plugin_polars/src/dataframe/command/datetime/mod.rs index 80c997f6ff..496711f2db 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/datetime/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/datetime/mod.rs @@ -14,6 +14,7 @@ mod get_weekday; mod get_year; mod replace_time_zone; mod strftime; +mod truncate; use crate::PolarsPlugin; use nu_plugin::PluginCommand; @@ -34,12 +35,14 @@ pub use get_weekday::GetWeekDay; pub use get_year::GetYear; pub use replace_time_zone::ReplaceTimeZone; pub use strftime::StrFTime; +pub use truncate::Truncate; pub(crate) fn datetime_commands() -> Vec>> { vec![ - Box::new(ExprDatePart), Box::new(AsDate), Box::new(AsDateTime), + Box::new(ConvertTimeZone), + Box::new(ExprDatePart), Box::new(GetDay), Box::new(GetHour), Box::new(GetMinute), @@ -50,8 +53,8 @@ pub(crate) fn datetime_commands() -> Vec &str { + "polars truncate" + } + + fn description(&self) -> &str { + "Divide the date/datetime range into buckets." + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + )]) + .required( + "every", + SyntaxShape::OneOf(vec![SyntaxShape::Duration, SyntaxShape::String]), + "Period length for every interval (can be duration or str)", + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Truncate a series of dates by period length", + example: r#"seq date -b 2025-01-01 --periods 4 --increment 6wk -o "%Y-%m-%d %H:%M:%S" | polars into-df | polars as-datetime "%F %H:%M:%S" --naive | polars select datetime (polars col datetime | polars truncate 5d37m | polars as truncated)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![ + Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2025-01-01 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-02-12 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-03-26 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-05-07 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + ), + Column::new( + "truncated".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2024-12-30 16:49:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-02-08 21:45:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-03-21 02:41:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-05-05 08:14:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + ), + ], + Some(NuSchema::new(Arc::new(Schema::from_iter(vec![ + Field::new( + "datetime".into(), + DataType::Datetime(TimeUnit::Nanoseconds, None), + ), + Field::new( + "truncated".into(), + DataType::Datetime(TimeUnit::Nanoseconds, None), + ), + ])))), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + plugin: &Self::Plugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + command(plugin, engine, call, input).map_err(LabeledError::from) + } + + fn extra_description(&self) -> &str { + r#"Each date/datetime is mapped to the start of its bucket using the corresponding local datetime. Note that weekly buckets start on Monday. Ambiguous results are localised using the DST offset of the original timestamp - for example, truncating '2022-11-06 01:30:00 CST' by '1h' results in '2022-11-06 01:00:00 CST', whereas truncating '2022-11-06 01:30:00 CDT' by '1h' results in '2022-11-06 01:00:00 CDT'. + + See Notes in documentation for full list of compatible string values for `every`: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.truncate.html"# + } + + fn search_terms(&self) -> Vec<&str> { + vec![] + } +} + +fn command( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + input: PipelineData, +) -> Result { + let value = input.into_value(call.head)?; + + let every = match call.req(0)? { + // handle Value::Duration input for maximum compatibility + // duration types are always stored as nanoseconds + Value::Duration { val, .. } => Ok(format!("{val}ns")), + Value::String { val, .. } => Ok(val.clone()), + x => Err(ShellError::IncompatibleParametersSingle { + msg: format!("Expected duration or str type but got {}", x.get_type()), + span: value.span(), + }), + }?; + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuExpression(expr) => { + let res: NuExpression = expr + .into_polars() + .dt() + .truncate(Expr::Literal(LiteralValue::String( + PlSmallStr::from_string(every), + ))) + .into(); + res.to_pipeline_data(plugin, engine, call.head) + } + _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])), + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test::test_polars_plugin_command_with_decls; + use nu_command::SeqDate; + + #[test] + fn test_examples() -> Result<(), ShellError> { + test_polars_plugin_command_with_decls(&Truncate, vec![Box::new(SeqDate)]) + } +}