fix(polars): conversion from nanoseconds to time_units in Datetime and Duration parsing (#15637)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->
The current implementation improperly inverts the conversion from
nanoseconds to the specified time units, resulting in nonsensical
Datetime and Duration parsing and integer overflows when the specified
time unit is not nanoseconds. This PR seeks to correct this conversion
by changing the multiplication to an integer division. Below are
examples highlighting the current and proposed implementations.

## Current Implementation
Specifying a different time unit incorrectly changes the returned value.
```nushell
> [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime<ns,UTC>"}
╭───┬───────────────────────╮
│ # │           a           │
├───┼───────────────────────┤
│ 0 │ 04/01/2024 12:00:00AM │

> [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime<ms,UTC>"}
╭───┬───────────────────────╮
│ # │           a           │
├───┼───────────────────────┤
│ 0 │ 06/27/2035 11:22:33PM │ <-- changing the time unit should not change the actual value

> [[a]; [1day]] | polars into-df --schema {a: "duration<ns>"}
╭───┬────────────────╮
│ # │       a        │
├───┼────────────────┤
│ 0 │ 86400000000000 │
╰───┴────────────────╯

> [[a]; [1day]] | polars into-df --schema {a: "duration<ms>"}
╭───┬──────────────────────╮
│ # │          a           │
├───┼──────────────────────┤
│ 0 │ -5833720368547758080 │ <-- i64 overflow
╰───┴──────────────────────╯

```

## Proposed Implementation
```nushell
> [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime<ns,UTC>"}
╭───┬───────────────────────╮
│ # │           a           │
├───┼───────────────────────┤
│ 0 │ 04/01/2024 12:00:00AM │
╰───┴───────────────────────╯

> [[a]; [2024-04-01]] | polars into-df --schema {a: "datetime<ms,UTC>"}
╭───┬───────────────────────╮
│ # │           a           │
├───┼───────────────────────┤
│ 0 │ 04/01/2024 12:00:00AM │
╰───┴───────────────────────╯

> [[a]; [1day]] | polars into-df --schema {a: "duration<ns>"}
╭───┬────────────────╮
│ # │       a        │
├───┼────────────────┤
│ 0 │ 86400000000000 │
╰───┴────────────────╯

> [[a]; [1day]] | polars into-df --schema {a: "duration<ms>"}
╭───┬──────────╮
│ # │    a     │
├───┼──────────┤
│ 0 │ 86400000 │
╰───┴──────────╯
```

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->
No user-facing breaking change.

Developer breaking change: to mitigate the silent overflow in
nanoseconds conversion functions `nanos_from_timeunit` and
`nanos_to_timeunit` (new), the function signatures were changed from
`i64` to `Result<i64, ShellError>`.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->
No additional examples were added, but I'd be happy to add a few if
needed. The covering tests just didn't fit well into any examples.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
pyz4 2025-04-24 17:45:36 -04:00 committed by GitHub
parent 05c36d1bc7
commit 715b0d90a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -440,8 +440,8 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
.iter()
.map(|v| {
value_to_option(v, |v| {
v.as_duration().map(|v| nanos_from_timeunit(v, *time_unit))
})
v.as_duration().map(|v| nanos_to_timeunit(v, *time_unit))
}?)
})
.collect();
Ok(Series::new(name, series_values?))
@ -489,8 +489,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
(Some(tz), Value::Date { val, .. }) => {
// If there is a timezone specified, make sure
// the value is converted to it
Ok(tz
.parse::<Tz>()
tz.parse::<Tz>()
.map(|tz| val.with_timezone(&tz))
.map_err(|e| ShellError::GenericError {
error: "Error parsing timezone".into(),
@ -500,11 +499,13 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
inner: vec![],
})?
.timestamp_nanos_opt()
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
.map(|nanos| nanos_to_timeunit(nanos, *tu))
.transpose()
}
(None, Value::Date { val, .. }) => Ok(val
(None, Value::Date { val, .. }) => val
.timestamp_nanos_opt()
.map(|nanos| nanos_from_timeunit(nanos, *tu))),
.map(|nanos| nanos_to_timeunit(nanos, *tu))
.transpose(),
_ => Ok(None),
}
@ -1160,7 +1161,7 @@ fn series_to_values(
.map(|v| match v {
Some(a) => {
// elapsed time in nano/micro/milliseconds since 1970-01-01
let nanos = nanos_from_timeunit(a, *time_unit);
let nanos = nanos_from_timeunit(a, *time_unit)?;
let datetime = datetime_from_epoch_nanos(nanos, tz, span)?;
Ok(Value::date(datetime, span))
}
@ -1278,7 +1279,7 @@ fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result<Value, ShellEr
.map(|datetime| Value::date(datetime, span))
}
AnyValue::Datetime(a, time_unit, tz) => {
let nanos = nanos_from_timeunit(*a, *time_unit);
let nanos = nanos_from_timeunit(*a, *time_unit)?;
datetime_from_epoch_nanos(nanos, &tz.cloned(), span)
.map(|datetime| Value::date(datetime, span))
}
@ -1365,12 +1366,35 @@ fn nanos_per_day(days: i32) -> i64 {
days as i64 * NANOS_PER_DAY
}
fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 {
a * match time_unit {
fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> Result<i64, ShellError> {
a.checked_mul(match time_unit {
TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds
TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds
TimeUnit::Nanoseconds => 1, // Already in nanoseconds
}
})
.ok_or_else(|| ShellError::GenericError {
error: format!("Converting from {time_unit} to nanoseconds caused an overflow"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})
}
fn nanos_to_timeunit(a: i64, time_unit: TimeUnit) -> Result<i64, ShellError> {
// integer division (rounds to 0)
a.checked_div(match time_unit {
TimeUnit::Microseconds => 1_000i64, // Convert microseconds to nanoseconds
TimeUnit::Milliseconds => 1_000_000i64, // Convert milliseconds to nanoseconds
TimeUnit::Nanoseconds => 1i64, // Already in nanoseconds
})
.ok_or_else(|| ShellError::GenericError {
error: format!("Converting from nanoseconds to {time_unit} caused an overflow"),
msg: "".into(),
span: None,
help: None,
inner: vec![],
})
}
fn datetime_from_epoch_nanos(