mirror of
https://github.com/nushell/nushell.git
synced 2025-05-29 14:21:45 +02:00
FIX polars as-datetime
: ignores timezone information on conversion (#15490)
# Description This PR seeks to fix an error in `polars as-datetime` where timezone information is entirely ignored. This behavior raises a host of silent errors when dealing with datetime conversions (see example below). ## Current Implementation Timezones are entirely ignored and datetimes with different timezones are converted to the same naive datetimes even when the user specifically indicates that the timezone should be parsed. For example, "2021-12-30 00:00:00 +0000" and "2021-12-30 00:00:00 -0400" will both be parsed to "2021-12-30 00:00:00" even when the format string specifically includes "%z". ``` $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 12:00:00AM │ <-- Same datetime even though the first is +0000 and second is -0400 ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" | polars schema ╭──────────┬──────────────╮ │ datetime │ datetime<ns> │ ╰──────────┴──────────────╯ ``` ## New Implementation Datetimes are converted to UTC and timezone information is retained. ``` $ "2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 04:00:00AM │ <-- Converted to UTC ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" | polars schema ╭──────────┬───────────────────╮ │ datetime │ datetime<ns, UTC> │ ╰──────────┴───────────────────╯ ``` The user may intentionally ignore timezone information by setting the `--naive` flag. ``` $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" --naive ╭───┬───────────────────────╮ │ # │ datetime │ ├───┼───────────────────────┤ │ 0 │ 12/30/2021 12:00:00AM │ │ 1 │ 12/30/2021 12:00:00AM │ <-- the -0400 offset is ignored when --naive is set ╰───┴───────────────────────╯ $ ["2021-12-30 00:00:00 +0000" "2021-12-30 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z" --naive | polars schema ╭──────────┬──────────────╮ │ datetime │ datetime<ns> │ ╰──────────┴──────────────╯ ``` # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> `polars as-datetime` will now account for timezone information and return type `datetime<ns,UTC>` rather than `datetime<ns>` by default. The user can replicate the previous behavior by setting `--naive`. # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> Tests that incorporated `polars as-datetime` had to be tweaked to include `--naive` flag to replicate previous behavior. # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
237a685605
commit
7ca2a6f8ac
@ -39,14 +39,14 @@ impl PluginCommand for ToRepr {
|
||||
result: Some(Value::string(
|
||||
r#"
|
||||
shape: (2, 2)
|
||||
┌─────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns] ┆ i64 │
|
||||
╞═════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 ┆ 4 │
|
||||
└─────────────────────┴─────┘"#
|
||||
┌─────────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns, UTC] ┆ i64 │
|
||||
╞═════════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 UTC ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 UTC ┆ 4 │
|
||||
└─────────────────────────┴─────┘"#
|
||||
.trim(),
|
||||
Span::test_data(),
|
||||
)),
|
||||
@ -54,18 +54,18 @@ shape: (2, 2)
|
||||
Example {
|
||||
description: "Shows lazy dataframe in repr format",
|
||||
example:
|
||||
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-df | polars into-lazy | polars into-repr",
|
||||
"[[a b]; [2025-01-01 2] [2025-01-02 4]] | polars into-lazy | polars into-repr",
|
||||
result: Some(Value::string(
|
||||
r#"
|
||||
shape: (2, 2)
|
||||
┌─────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns] ┆ i64 │
|
||||
╞═════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 ┆ 4 │
|
||||
└─────────────────────┴─────┘"#
|
||||
┌─────────────────────────┬─────┐
|
||||
│ a ┆ b │
|
||||
│ --- ┆ --- │
|
||||
│ datetime[ns, UTC] ┆ i64 │
|
||||
╞═════════════════════════╪═════╡
|
||||
│ 2025-01-01 00:00:00 UTC ┆ 2 │
|
||||
│ 2025-01-02 00:00:00 UTC ┆ 4 │
|
||||
└─────────────────────────┴─────┘"#
|
||||
.trim(),
|
||||
Span::test_data(),
|
||||
)),
|
||||
|
@ -1,6 +1,7 @@
|
||||
use crate::{values::CustomValueSupport, PolarsPlugin};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::super::super::values::{Column, NuDataFrame};
|
||||
use super::super::super::values::{Column, NuDataFrame, NuSchema};
|
||||
|
||||
use chrono::DateTime;
|
||||
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
|
||||
@ -8,7 +9,7 @@ use nu_protocol::{
|
||||
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
|
||||
Value,
|
||||
};
|
||||
use polars::prelude::{IntoSeries, StringMethods, TimeUnit};
|
||||
use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AsDateTime;
|
||||
@ -43,6 +44,7 @@ impl PluginCommand for AsDateTime {
|
||||
Signature::build(self.name())
|
||||
.required("format", SyntaxShape::String, "formatting date time string")
|
||||
.switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n'))
|
||||
.switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None)
|
||||
.input_output_type(
|
||||
Type::Custom("dataframe".into()),
|
||||
Type::Custom("dataframe".into()),
|
||||
@ -54,7 +56,7 @@ impl PluginCommand for AsDateTime {
|
||||
vec![
|
||||
Example {
|
||||
description: "Converts string to datetime",
|
||||
example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S""#,
|
||||
example: r#"["2021-12-30 00:00:00 -0400" "2021-12-31 00:00:00 -0400"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S %z""#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -62,7 +64,7 @@ impl PluginCommand for AsDateTime {
|
||||
vec![
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-30 00:00:00 +0000",
|
||||
"2021-12-30 00:00:00 -0400",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
@ -70,7 +72,7 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
Value::date(
|
||||
DateTime::parse_from_str(
|
||||
"2021-12-31 00:00:00 +0000",
|
||||
"2021-12-31 00:00:00 -0400",
|
||||
"%Y-%m-%d %H:%M:%S %z",
|
||||
)
|
||||
.expect("date calculation should not fail in test"),
|
||||
@ -86,7 +88,7 @@ impl PluginCommand for AsDateTime {
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime with high resolutions",
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f""#,
|
||||
example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S.%9f" --naive"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -110,7 +112,15 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
None
|
||||
),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -118,7 +128,7 @@ impl PluginCommand for AsDateTime {
|
||||
},
|
||||
Example {
|
||||
description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols",
|
||||
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact"#,
|
||||
example: r#"["2021-12-30 00:00:00 GMT+4"] | polars into-df | polars as-datetime "%Y-%m-%d %H:%M:%S" --not-exact --naive"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
@ -134,7 +144,15 @@ impl PluginCommand for AsDateTime {
|
||||
),
|
||||
],
|
||||
)],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
None
|
||||
),
|
||||
),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -162,6 +180,7 @@ fn command(
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let format: String = call.req(0)?;
|
||||
let not_exact = call.has_flag("not-exact")?;
|
||||
let tz_aware = !call.has_flag("naive")?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
|
||||
let series = df.as_series(call.head)?;
|
||||
@ -177,7 +196,7 @@ fn command(
|
||||
casted.as_datetime_not_exact(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
tz_aware,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
@ -186,7 +205,7 @@ fn command(
|
||||
Some(format.as_str()),
|
||||
TimeUnit::Nanoseconds,
|
||||
false,
|
||||
false,
|
||||
tz_aware,
|
||||
None,
|
||||
&Default::default(),
|
||||
)
|
||||
|
@ -1,7 +1,8 @@
|
||||
use crate::values::NuExpression;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
dataframe::values::{Column, NuDataFrame},
|
||||
dataframe::values::{Column, NuDataFrame, NuSchema},
|
||||
values::CustomValueSupport,
|
||||
PolarsPlugin,
|
||||
};
|
||||
@ -13,7 +14,7 @@ use nu_protocol::{
|
||||
};
|
||||
use polars::{
|
||||
datatypes::{DataType, TimeUnit},
|
||||
prelude::NamedFrom,
|
||||
prelude::{Field, NamedFrom, Schema},
|
||||
series::Series,
|
||||
};
|
||||
|
||||
@ -54,14 +55,20 @@ impl PluginCommand for ExprDatePart {
|
||||
vec![
|
||||
Example {
|
||||
description: "Creates an expression to capture the year date part",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive | polars with-column [(polars col datetime | polars datepart year | polars as datetime_year )]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("datetime".to_string(), vec![Value::test_date(dt)]),
|
||||
Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]),
|
||||
],
|
||||
None,
|
||||
Some(NuSchema::new(Arc::new(Schema::from_iter(vec![
|
||||
Field::new(
|
||||
"datetime".into(),
|
||||
DataType::Datetime(TimeUnit::Nanoseconds, None),
|
||||
),
|
||||
Field::new("datetime_year".into(), DataType::Int64),
|
||||
])))),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
@ -69,7 +76,7 @@ impl PluginCommand for ExprDatePart {
|
||||
},
|
||||
Example {
|
||||
description: "Creates an expression to capture multiple date parts",
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" |
|
||||
example: r#"[["2021-12-30T01:02:03.123456789"]] | polars into-df | polars as-datetime "%Y-%m-%dT%H:%M:%S.%9f" --naive |
|
||||
polars with-column [ (polars col datetime | polars datepart year | polars as datetime_year ),
|
||||
(polars col datetime | polars datepart month | polars as datetime_month ),
|
||||
(polars col datetime | polars datepart day | polars as datetime_day ),
|
||||
|
@ -245,7 +245,10 @@ fn value_to_data_type(value: &Value) -> Option<DataType> {
|
||||
Value::Float { .. } => Some(DataType::Float64),
|
||||
Value::String { .. } => Some(DataType::String),
|
||||
Value::Bool { .. } => Some(DataType::Boolean),
|
||||
Value::Date { .. } => Some(DataType::Date),
|
||||
Value::Date { .. } => Some(DataType::Datetime(
|
||||
TimeUnit::Nanoseconds,
|
||||
Some(PlSmallStr::from_static("UTC")),
|
||||
)),
|
||||
Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)),
|
||||
Value::Filesize { .. } => Some(DataType::Int64),
|
||||
Value::Binary { .. } => Some(DataType::Binary),
|
||||
@ -447,24 +450,28 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Date { val, .. } = &v {
|
||||
// If there is a timezone specified, make sure
|
||||
// the value is converted to it
|
||||
Ok(maybe_tz
|
||||
.as_ref()
|
||||
.map(|tz| tz.parse::<Tz>().map(|tz| val.with_timezone(&tz)))
|
||||
.transpose()
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error parsing timezone".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
inner: vec![],
|
||||
})?
|
||||
.and_then(|dt| dt.timestamp_nanos_opt())
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
|
||||
} else {
|
||||
Ok(None)
|
||||
match (maybe_tz, &v) {
|
||||
(Some(tz), Value::Date { val, .. }) => {
|
||||
// If there is a timezone specified, make sure
|
||||
// the value is converted to it
|
||||
Ok(tz
|
||||
.parse::<Tz>()
|
||||
.map(|tz| val.with_timezone(&tz))
|
||||
.map_err(|e| ShellError::GenericError {
|
||||
error: "Error parsing timezone".into(),
|
||||
msg: "".into(),
|
||||
span: None,
|
||||
help: Some(e.to_string()),
|
||||
inner: vec![],
|
||||
})?
|
||||
.timestamp_nanos_opt()
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu)))
|
||||
}
|
||||
(None, Value::Date { val, .. }) => Ok(val
|
||||
.timestamp_nanos_opt()
|
||||
.map(|nanos| nanos_from_timeunit(nanos, *tu))),
|
||||
|
||||
_ => Ok(None),
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<Option<i64>>, ShellError>>()?;
|
||||
|
Loading…
x
Reference in New Issue
Block a user