From 715b0d90a90613a870a4dcf7ff87f811f6e6522e Mon Sep 17 00:00:00 2001 From: pyz4 <42039243+pyz4@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:45:36 -0400 Subject: [PATCH] fix(polars): conversion from nanoseconds to time_units in Datetime and Duration parsing (#15637) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description The current implementation improperly inverts the conversion from nanoseconds to the specified time units, resulting in nonsensical Datetime and Duration parsing and integer overflows when the specified time unit is not nanoseconds. This PR seeks to correct this conversion by changing the multiplication to an integer division. Below are examples highlighting the current and proposed implementations. ## Current Implementation Specifying a different time unit incorrectly changes the returned value. ```nushell > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 06/27/2035 11:22:33PM │ <-- changing the time unit should not change the actual value > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬────────────────╮ │ # │ a │ ├───┼────────────────┤ │ 0 │ 86400000000000 │ ╰───┴────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬──────────────────────╮ │ # │ a │ ├───┼──────────────────────┤ │ 0 │ -5833720368547758080 │ <-- i64 overflow ╰───┴──────────────────────╯ ``` ## Proposed Implementation ```nushell > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ ╰───┴───────────────────────╯ > [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime"} ╭───┬───────────────────────╮ │ # │ a │ ├───┼───────────────────────┤ │ 0 │ 04/01/2024 12:00:00AM │ ╰───┴───────────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬────────────────╮ │ # │ a │ ├───┼────────────────┤ │ 0 │ 86400000000000 │ ╰───┴────────────────╯ > [[a]; [1day]] | polars into-df --schema {a: "duration"} ╭───┬──────────╮ │ # │ a │ ├───┼──────────┤ │ 0 │ 86400000 │ ╰───┴──────────╯ ``` # User-Facing Changes No user-facing breaking change. Developer breaking change: to mitigate the silent overflow in nanoseconds conversion functions `nanos_from_timeunit` and `nanos_to_timeunit` (new), the function signatures were changed from `i64` to `Result`. # Tests + Formatting No additional examples were added, but I'd be happy to add a few if needed. The covering tests just didn't fit well into any examples. # After Submitting --- .../values/nu_dataframe/conversion.rs | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs index 2ba8526794..7c32ec3e62 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/conversion.rs @@ -440,8 +440,8 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result Result { // If there is a timezone specified, make sure // the value is converted to it - Ok(tz - .parse::() + tz.parse::() .map(|tz| val.with_timezone(&tz)) .map_err(|e| ShellError::GenericError { error: "Error parsing timezone".into(), @@ -500,11 +499,13 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result Ok(val + (None, Value::Date { val, .. }) => val .timestamp_nanos_opt() - .map(|nanos| nanos_from_timeunit(nanos, *tu))), + .map(|nanos| nanos_to_timeunit(nanos, *tu)) + .transpose(), _ => Ok(None), } @@ -1160,7 +1161,7 @@ fn series_to_values( .map(|v| match v { Some(a) => { // elapsed time in nano/micro/milliseconds since 1970-01-01 - let nanos = nanos_from_timeunit(a, *time_unit); + let nanos = nanos_from_timeunit(a, *time_unit)?; let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; Ok(Value::date(datetime, span)) } @@ -1278,7 +1279,7 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { - let nanos = nanos_from_timeunit(*a, *time_unit); + let nanos = nanos_from_timeunit(*a, *time_unit)?; datetime_from_epoch_nanos(nanos, &tz.cloned(), span) .map(|datetime| Value::date(datetime, span)) } @@ -1365,12 +1366,35 @@ fn nanos_per_day(days: i32) -> i64 { days as i64 * NANOS_PER_DAY } -fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { - a * match time_unit { +fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> Result { + a.checked_mul(match time_unit { TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds TimeUnit::Nanoseconds => 1, // Already in nanoseconds - } + }) + .ok_or_else(|| ShellError::GenericError { + error: format!("Converting from {time_unit} to nanoseconds caused an overflow"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) +} + +fn nanos_to_timeunit(a: i64, time_unit: TimeUnit) -> Result { + // integer division (rounds to 0) + a.checked_div(match time_unit { + TimeUnit::Microseconds => 1_000i64, // Convert microseconds to nanoseconds + TimeUnit::Milliseconds => 1_000_000i64, // Convert milliseconds to nanoseconds + TimeUnit::Nanoseconds => 1i64, // Already in nanoseconds + }) + .ok_or_else(|| ShellError::GenericError { + error: format!("Converting from nanoseconds to {time_unit} caused an overflow"), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) } fn datetime_from_epoch_nanos(