From 2ad0fcb377970fd02260618b297353b060c2fb8c Mon Sep 17 00:00:00 2001 From: Bob Hyman Date: Tue, 7 Mar 2023 18:02:15 -0500 Subject: [PATCH] Fix 8244 -- store timestamps with nanosecond resolution (consistently) (#8337) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Fix for data ambiguity noted in #8244. Basic change is to use nanosecond resolution for unix timestamps (stored in type Int). Previously, a timestamp might have seconds, milliseconds or nanoseconds, but it turned out there were overlaps in data ranges between different resolutions, so there wasn't always a unique mapping back to date/time. Due to higher precision, the *range* of dates that timestamps can map to is restricted. Unix timestamps with seconds resolution and 64 bit storage can cover all dates from the Big Bang to eternity. Timestamps with seconds resolution and 32 bit storage can only represent dates from 1901-12-13 through 2038-01-19. The nanoseconds resolution and 64 bit storage used with this fix can represent dates from 1677-09-21T00:12:44 to 2262-04-11T23:47:16, something of a compromise. # User-Facing Changes _(List of all changes that impact the user experience here. This helps us keep track of breaking changes.)_ ## ` | into int` Converts to nanosecond resolution ```rust 〉date now | into int 1678084730502126846 ``` This is the number of non-leap nanoseconds after the unix epoch date: 1970-01-01T00:00:00+00:00. Conversion fails for dates outside the supported range: ```rust 〉1492-10-12 | into int Error: nu::shell::incorrect_value × Incorrect value. ╭─[entry #51:1:1] 1 │ 1492-10-12 | into int · ────┬─── · ╰── DateTime out of timestamp range 1677-09-21T00:12:43 and 2262-04-11T23:47:16 ╰──── ``` ## ` | into datetime` Can no longer fail or produce incorrect results for any 64-bit input: ```rust 〉0 | into datetime Thu, 01 Jan 1970 00:00:00 +0000 (53 years ago) 〉"7fffffffffffffff" | into int -r 16 | into datetime Fri, 11 Apr 2262 23:47:16 +0000 (in 239 years) 〉("7fffffffffffffff" | into int -r 16) * -1 | into datetime Tue, 21 Sep 1677 00:12:43 +0000 (345 years ago) ``` ## ` | date to-record` and ` | date to-table` Now both have a `nanosecond` field. ```rust 〉"7fffffffffffffff" | into int -r 16 | into datetime | date to-record ╭────────────┬───────────╮ │ year │ 2262 │ │ month │ 4 │ │ day │ 11 │ │ hour │ 23 │ │ minute │ 47 │ │ second │ 16 │ │ nanosecond │ 854775807 │ │ timezone │ +00:00 │ ╰────────────┴───────────╯ 〉"7fffffffffffffff" | into int -r 16 | into datetime | date to-table ╭───┬──────┬───────┬─────┬──────┬────────┬────────┬────────────┬──────────╮ │ # │ year │ month │ day │ hour │ minute │ second │ nanosecond │ timezone │ ├───┼──────┼───────┼─────┼──────┼────────┼────────┼────────────┼──────────┤ │ 0 │ 2262 │ 4 │ 11 │ 23 │ 47 │ 16 │ 854775807 │ +00:00 │ ╰───┴──────┴───────┴─────┴──────┴────────┴────────┴────────────┴──────────╯ ``` This change was not mandated by the OP problem, but it is nice to be able to see the nanosecond bits that were present in Nushell `date` type all along. # Tests + Formatting Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --- .../src/conversions/into/datetime.rs | 147 ++++++------------ crates/nu-command/src/conversions/into/int.rs | 94 ++++++++++- crates/nu-command/src/date/to_record.rs | 9 +- crates/nu-command/src/date/to_table.rs | 8 +- crates/nu-command/tests/commands/into_int.rs | 40 +++++ crates/nu-protocol/src/value/mod.rs | 9 ++ 6 files changed, 198 insertions(+), 109 deletions(-) diff --git a/crates/nu-command/src/conversions/into/datetime.rs b/crates/nu-command/src/conversions/into/datetime.rs index 89653a23a6..6a8d9955bb 100644 --- a/crates/nu-command/src/conversions/into/datetime.rs +++ b/crates/nu-command/src/conversions/into/datetime.rs @@ -1,6 +1,6 @@ use crate::input_handler::{operate, CmdArgument}; use crate::{generate_strftime_list, parse_date_from_string}; -use chrono::{DateTime, FixedOffset, Local, LocalResult, TimeZone, Utc}; +use chrono::{DateTime, FixedOffset, Local, TimeZone, Utc}; use nu_engine::CallExt; use nu_protocol::ast::Call; use nu_protocol::ast::CellPath; @@ -83,12 +83,12 @@ impl Command for SubCommand { .named( "format", SyntaxShape::String, - "Specify an expected format for parsing strings to datetimes. Use --list to see all possible options", + "Specify expected format of string input to parse to datetime. Use --list to see options", Some('f'), ) .switch( "list", - "Show all possible variables for use with the --format flag", + "Show all possible variables for use in --format flag", Some('l'), ) .rest( @@ -125,10 +125,12 @@ impl Command for SubCommand { span: zone.span, }), }; + let format_options = call .get_flag::(engine_state, stack, "format")? .as_ref() .map(|fmt| DatetimeFormat(fmt.to_string())); + let args = Arguments { format_options, zone_options, @@ -139,7 +141,7 @@ impl Command for SubCommand { } fn usage(&self) -> &str { - "Convert text into a datetime." + "Convert text or timestamp into a datetime." } fn search_terms(&self) -> Vec<&str> { @@ -147,53 +149,44 @@ impl Command for SubCommand { } fn examples(&self) -> Vec { - let example_result_1 = |secs: i64, nsecs: u32| match Utc.timestamp_opt(secs, nsecs) { - LocalResult::Single(dt) => Some(Value::Date { - val: dt.into(), + let example_result_1 = |nanos: i64| { + Some(Value::Date { + val: Utc.timestamp_nanos(nanos).into(), span: Span::test_data(), - }), - _ => panic!("datetime: help example is invalid"), - }; - let example_result_2 = |millis: i64| match Utc.timestamp_millis_opt(millis) { - LocalResult::Single(dt) => Some(Value::Date { - val: dt.into(), - span: Span::test_data(), - }), - _ => panic!("datetime: help example is invalid"), + }) }; vec![ Example { - description: "Convert to datetime", + description: "Convert any standard timestamp string to datetime", example: "'27.02.2021 1:55 pm +0000' | into datetime", - result: example_result_1(1614434100,0) + #[allow(clippy::inconsistent_digit_grouping)] + result: example_result_1(1614434100_000000000), }, Example { - description: "Convert to datetime", - example: "'2021-02-27T13:55:40+00:00' | into datetime", - result: example_result_1(1614434140, 0) + description: "Convert any standard timestamp string to datetime", + example: "'2021-02-27T13:55:40.2246+00:00' | into datetime", + #[allow(clippy::inconsistent_digit_grouping)] + result: example_result_1(1614434140_224600000), }, Example { - description: "Convert to datetime using a custom format", + description: + "Convert non-standard timestamp string to datetime using a custom format", example: "'20210227_135540+0000' | into datetime -f '%Y%m%d_%H%M%S%z'", - result: example_result_1(1614434140, 0) - - }, - Example { - description: "Convert timestamp (no larger than 8e+12) to a UTC datetime", - example: "1614434140 | into datetime", - result: example_result_1(1614434140, 0) + #[allow(clippy::inconsistent_digit_grouping)] + result: example_result_1(1614434140_000000000), }, Example { description: - "Convert timestamp (no larger than 8e+12) to datetime using a specified timezone offset (between -12 and 12)", - example: "1614434140 | into datetime -o +9", - result: None, + "Convert nanosecond-precision unix timestamp to a datetime with offset from UTC", + example: "1614434140123456789 | into datetime --offset -5", + #[allow(clippy::inconsistent_digit_grouping)] + result: example_result_1(1614434140_123456789), }, Example { - description: - "Convert a millisecond-precise timestamp", - example: "1656165681720 | into datetime", - result: example_result_2(1656165681720) + description: "Convert standard (seconds) unix timestamp to a UTC datetime", + example: "1614434140 * 1_000_000_000 | into datetime", + #[allow(clippy::inconsistent_digit_grouping)] + result: example_result_1(1614434140_000000000), }, ] } @@ -205,6 +198,9 @@ struct DatetimeFormat(String); fn action(input: &Value, args: &Arguments, head: Span) -> Value { let timezone = &args.zone_options; let dateformat = &args.format_options; + + const HOUR: i32 = 60 * 60; + // Check to see if input looks like a Unix timestamp (i.e. can it be parsed to an int?) let timestamp = match input { Value::Int { val, .. } => Ok(*val), @@ -224,66 +220,36 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { }; if let Ok(ts) = timestamp { - const TIMESTAMP_BOUND: i64 = 8.2e+12 as i64; - const HOUR: i32 = 3600; - - if ts.abs() > TIMESTAMP_BOUND { - return Value::Error { - error: ShellError::UnsupportedInput( - "timestamp is out of range; it should between -8e+12 and 8e+12".to_string(), - format!("timestamp is {ts:?}"), - head, - // Again, can safely unwrap this from here on - input.expect_span(), - ), - }; - } - macro_rules! match_datetime { ($expr:expr) => { match $expr { - LocalResult::Single(dt) => Value::Date { + dt => Value::Date { val: dt.into(), span: head, }, - _ => { - return Value::Error { - error: ShellError::UnsupportedInput( - "The given local datetime representation is invalid.".into(), - format!("timestamp is {:?}", ts), - head, - head, - ), - }; - } } }; } return match timezone { + // note all these `.timestamp_nanos()` could overflow if we didn't check range in ` | into int`. + // default to UTC - None => { - // be able to convert chrono::Utc::now() - match ts.to_string().len() { - x if x > 13 => Value::Date { - val: Utc.timestamp_nanos(ts).into(), - span: head, - }, - x if x > 10 => match_datetime!(Utc.timestamp_millis_opt(ts)), - _ => match_datetime!(Utc.timestamp_opt(ts, 0)), - } - } + None => Value::Date { + val: Utc.timestamp_nanos(ts).into(), + span: head, + }, Some(Spanned { item, span }) => match item { - Zone::Utc => match_datetime!(Utc.timestamp_opt(ts, 0)), - Zone::Local => match_datetime!(Local.timestamp_opt(ts, 0)), + Zone::Utc => match_datetime!(Utc.timestamp_nanos(ts)), + Zone::Local => match_datetime!(Local.timestamp_nanos(ts)), Zone::East(i) => match FixedOffset::east_opt((*i as i32) * HOUR) { - Some(eastoffset) => match_datetime!(eastoffset.timestamp_opt(ts, 0)), + Some(eastoffset) => match_datetime!(eastoffset.timestamp_nanos(ts)), None => Value::Error { error: ShellError::DatetimeParseError(input.debug_value(), *span), }, }, Zone::West(i) => match FixedOffset::west_opt((*i as i32) * HOUR) { - Some(westoffset) => match_datetime!(westoffset.timestamp_opt(ts, 0)), + Some(westoffset) => match_datetime!(westoffset.timestamp_nanos(ts)), None => Value::Error { error: ShellError::DatetimeParseError(input.debug_value(), *span), }, @@ -297,7 +263,7 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { }, }, }; - } + }; // If input is not a timestamp, try parsing it as a string match input { @@ -386,7 +352,7 @@ mod tests { #[test] fn takes_timestamp_offset() { - let date_str = Value::test_string("1614434140"); + let date_str = Value::test_string("1614434140000000000"); let timezone_option = Some(Spanned { item: Zone::East(8), span: Span::test_data(), @@ -408,7 +374,7 @@ mod tests { #[test] fn takes_timestamp_offset_as_int() { - let date_int = Value::test_int(1614434140); + let date_int = Value::test_int(1614434140_000000000); let timezone_option = Some(Spanned { item: Zone::East(8), span: Span::test_data(), @@ -430,7 +396,7 @@ mod tests { #[test] fn takes_timestamp() { - let date_str = Value::test_string("1614434140"); + let date_str = Value::test_string("1614434140000000000"); let timezone_option = Some(Spanned { item: Zone::Local, span: Span::test_data(), @@ -451,7 +417,7 @@ mod tests { #[test] fn takes_timestamp_without_timezone() { - let date_str = Value::test_string("1614434140"); + let date_str = Value::test_string("1614434140000000000"); let args = Arguments { zone_options: None, format_options: None, @@ -467,23 +433,6 @@ mod tests { assert_eq!(actual, expected) } - #[test] - fn takes_invalid_timestamp() { - let date_str = Value::test_string("10440970000000"); - let timezone_option = Some(Spanned { - item: Zone::Utc, - span: Span::test_data(), - }); - let args = Arguments { - zone_options: timezone_option, - format_options: None, - cell_paths: None, - }; - let actual = action(&date_str, &args, Span::test_data()); - - assert_eq!(actual.get_type(), Error); - } - #[test] fn communicates_parsing_error_given_an_invalid_datetimelike_string() { let date_str = Value::test_string("16.11.1984 8:00 am Oops0000"); diff --git a/crates/nu-command/src/conversions/into/int.rs b/crates/nu-command/src/conversions/into/int.rs index 1f1862402e..fbc98ab006 100644 --- a/crates/nu-command/src/conversions/into/int.rs +++ b/crates/nu-command/src/conversions/into/int.rs @@ -1,3 +1,5 @@ +use chrono::{FixedOffset, TimeZone}; + use crate::input_handler::{operate, CmdArgument}; use nu_engine::CallExt; use nu_protocol::{ @@ -32,7 +34,7 @@ impl Command for SubCommand { (Type::String, Type::Int), (Type::Number, Type::Int), (Type::Bool, Type::Int), - // Unix timestamp in seconds + // Unix timestamp in nanoseconds (Type::Date, Type::Int), // TODO: Users should do this by dividing a Filesize by a Filesize explicitly (Type::Filesize, Type::Int), @@ -124,9 +126,9 @@ impl Command for SubCommand { }), }, Example { - description: "Convert date to integer (Unix timestamp)", - example: "2022-02-02 | into int", - result: Some(Value::test_int(1643760000)), + description: "Convert date to integer (Unix nanosecond timestamp)", + example: "1983-04-13T12:09:14.123456789-05:00 | into int", + result: Some(Value::test_int(419101754123456789)), }, Example { description: "Convert to integer from binary", @@ -217,10 +219,30 @@ fn action(input: &Value, args: &Arguments, span: Span) -> Value { Value::Int { val: 0, span } } } - Value::Date { val, .. } => Value::Int { - val: val.timestamp(), - span, - }, + Value::Date { val, .. } => { + if val + < &FixedOffset::east_opt(0) + .expect("constant") + .with_ymd_and_hms(1677, 9, 21, 0, 12, 44) + .unwrap() + || val + > &FixedOffset::east_opt(0) + .expect("constant") + .with_ymd_and_hms(2262, 4, 11, 23, 47, 16) + .unwrap() + { + Value::Error { + error: ShellError::IncorrectValue( + "DateTime out of range for timestamp: 1677-09-21T00:12:43Z to 2262-04-11T23:47:16".to_string(), + span), + } + } else { + Value::Int { + val: val.timestamp_nanos(), + span, + } + } + } Value::Duration { val, .. } => Value::Int { val: *val, span }, Value::Binary { val, span } => { use byteorder::{BigEndian, ByteOrder, LittleEndian}; @@ -381,6 +403,9 @@ fn int_from_string(a_string: &str, span: Span) -> Result { #[cfg(test)] mod test { + use chrono::{DateTime, FixedOffset}; + use rstest::rstest; + use super::Value; use super::*; use nu_protocol::Type::Error; @@ -455,4 +480,57 @@ mod test { assert_eq!(actual.get_type(), Error) } + + #[rstest] + #[case("2262-04-11T23:47:16+00:00", 0x7fffffff_ffffffff)] + #[case("1970-01-01T00:00:00+00:00", 0)] + #[case("1677-09-21T00:12:44+00:00", -0x7fffffff_ffffffff)] + fn datetime_to_int_values_that_work( + #[case] dt_in: DateTime, + #[case] int_expected: i64, + ) { + let s = Value::test_date(dt_in); + let actual = action( + &s, + &Arguments { + radix: 10, + cell_paths: None, + little_endian: false, + }, + Span::test_data(), + ); + // ignore fractional seconds -- I don't want to hard code test values that might vary due to leap nanoseconds. + let exp_truncated = (int_expected / 1_000_000_000) * 1_000_000_000; + assert_eq!(actual, Value::test_int(exp_truncated)); + } + + #[rstest] + #[case("2262-04-11T23:47:17+00:00", "DateTime out of range for timestamp")] + #[case("1677-09-21T00:12:43+00:00", "DateTime out of range for timestamp")] + fn datetime_to_int_values_that_fail( + #[case] dt_in: DateTime, + #[case] err_expected: &str, + ) { + let s = Value::test_date(dt_in); + let actual = action( + &s, + &Arguments { + radix: 10, + cell_paths: None, + little_endian: false, + }, + Span::test_data(), + ); + if let Value::Error { + error: ShellError::IncorrectValue(e, ..), + } = actual + { + assert!( + e.contains(err_expected), + "{e:?} doesn't contain {err_expected}" + ); + } else { + panic!("Unexpected actual value {actual:?}") + } + } } diff --git a/crates/nu-command/src/date/to_record.rs b/crates/nu-command/src/date/to_record.rs index d3e107ebd6..e63f9d4942 100644 --- a/crates/nu-command/src/date/to_record.rs +++ b/crates/nu-command/src/date/to_record.rs @@ -59,6 +59,7 @@ impl Command for SubCommand { "hour".into(), "minute".into(), "second".into(), + "nanosecond".into(), "timezone".into(), ]; let vals = vec![ @@ -68,6 +69,10 @@ impl Command for SubCommand { Value::Int { val: 22, span }, Value::Int { val: 10, span }, Value::Int { val: 57, span }, + Value::Int { + val: 123_000_000, + span, + }, Value::String { val: "+02:00".to_string(), span, @@ -89,7 +94,7 @@ impl Command for SubCommand { }, Example { description: "Convert a date string into a record.", - example: "'2020-04-12 22:10:57 +0200' | date to-record", + example: "'2020-04-12T22:10:57.123+02:00' | date to-record", result: example_result_1(), }, // TODO: This should work but does not; see https://github.com/nushell/nushell/issues/7032 @@ -110,6 +115,7 @@ fn parse_date_into_table(date: Result, Value>, head: Span) "hour".into(), "minute".into(), "second".into(), + "nanosecond".into(), "timezone".into(), ]; match date { @@ -121,6 +127,7 @@ fn parse_date_into_table(date: Result, Value>, head: Span) Value::int(x.hour() as i64, head), Value::int(x.minute() as i64, head), Value::int(x.second() as i64, head), + Value::int(x.nanosecond() as i64, head), Value::string(x.offset().to_string(), head), ]; Value::Record { diff --git a/crates/nu-command/src/date/to_table.rs b/crates/nu-command/src/date/to_table.rs index d7da306e6f..7e9711c944 100644 --- a/crates/nu-command/src/date/to_table.rs +++ b/crates/nu-command/src/date/to_table.rs @@ -59,6 +59,7 @@ impl Command for SubCommand { "hour".into(), "minute".into(), "second".into(), + "nanosecond".into(), "timezone".into(), ]; let vals = vec![ @@ -68,6 +69,7 @@ impl Command for SubCommand { Value::Int { val: 22, span }, Value::Int { val: 10, span }, Value::Int { val: 57, span }, + Value::Int { val: 789, span }, Value::String { val: "+02:00".to_string(), span, @@ -92,7 +94,9 @@ impl Command for SubCommand { }, Example { description: "Convert a given date into a table.", - example: "'2020-04-12 22:10:57 +0200' | date to-table", + //todo: resolve https://github.com/bspeice/dtparse/issues/40, which truncates nanosec bits + // for now, change the example to use date literal rather than string conversion, as workaround + example: "2020-04-12T22:10:57.000000789+02:00 | date to-table", result: example_result_1(), }, // TODO: This should work but does not; see https://github.com/nushell/nushell/issues/7032 @@ -113,6 +117,7 @@ fn parse_date_into_table(date: Result, Value>, head: Span) "hour".into(), "minute".into(), "second".into(), + "nanosecond".into(), "timezone".into(), ]; match date { @@ -124,6 +129,7 @@ fn parse_date_into_table(date: Result, Value>, head: Span) Value::int(x.hour() as i64, head), Value::int(x.minute() as i64, head), Value::int(x.second() as i64, head), + Value::int(x.nanosecond() as i64, head), Value::string(x.offset().to_string(), head), ]; Value::List { diff --git a/crates/nu-command/tests/commands/into_int.rs b/crates/nu-command/tests/commands/into_int.rs index 0220442a68..714ba09d93 100644 --- a/crates/nu-command/tests/commands/into_int.rs +++ b/crates/nu-command/tests/commands/into_int.rs @@ -1,3 +1,6 @@ +use chrono::{DateTime, FixedOffset, NaiveDate, TimeZone}; +use rstest::rstest; + use nu_test_support::{nu, pipeline}; #[test] @@ -47,3 +50,40 @@ fn into_int_binary() { assert!(actual.out.contains("16843009")); } + +#[test] +#[ignore] +fn into_int_datetime1() { + let dt = DateTime::parse_from_rfc3339("1983-04-13T12:09:14.123456789+00:00"); + eprintln!("dt debug {:?}", dt); + assert_eq!( + dt, + Ok(FixedOffset::east_opt(0) + .unwrap() + .from_local_datetime( + &NaiveDate::from_ymd_opt(1983, 4, 13) + .unwrap() + .and_hms_nano_opt(12, 9, 14, 123456789) + .unwrap() + ) + .unwrap()) + ); + + let dt_nano = dt.expect("foo").timestamp_nanos(); + assert_eq!(dt_nano % 1_000_000_000, 123456789); +} + +#[rstest] +#[case("1983-04-13T12:09:14.123456789-05:00", "419101754123456789")] // full precision +#[case("1983-04-13T12:09:14.456789-05:00", "419101754456789000")] // microsec +#[case("1983-04-13T12:09:14-05:00", "419101754000000000")] // sec +#[case("2052-04-13T12:09:14.123456789-05:00", "2596640954123456789")] // future date > 2038 epoch +#[case("1902-04-13T12:09:14.123456789-05:00", "-2137042245876543211")] // past date < 1970 +fn into_int_datetime(#[case] time_in: &str, #[case] int_out: &str) { + let actual = nu!( + cwd: ".", pipeline( + &format!(r#""{time_in}" | into datetime --format "%+" | into int"#) + )); + + assert_eq!(int_out, actual.out); +} diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 507c07d235..65db0a46cd 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -1632,6 +1632,15 @@ impl Value { span: Span::test_data(), } } + + /// Note: Only use this for test data, *not* live data, as it will point into unknown source + /// when used in errors. + pub fn test_date(val: DateTime) -> Value { + Value::Date { + val, + span: Span::test_data(), + } + } } impl Default for Value {