From 30bb090cd469dc4fa08fcce5bebab03b9c8f9bd8 Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sun, 13 Mar 2022 13:53:13 +0000 Subject: [PATCH] str to datetime dfr (#4833) * str to datetime dfr * change description --- crates/nu-command/Cargo.toml | 3 +- .../src/dataframe/series/date/as_datetime.rs | 129 ++++++++++++++++++ .../src/dataframe/series/date/get_day.rs | 2 +- .../src/dataframe/series/date/get_hour.rs | 2 +- .../src/dataframe/series/date/get_minute.rs | 2 +- .../src/dataframe/series/date/get_month.rs | 2 +- .../dataframe/series/date/get_nanosecond.rs | 2 +- .../src/dataframe/series/date/get_ordinal.rs | 2 +- .../src/dataframe/series/date/get_second.rs | 2 +- .../src/dataframe/series/date/get_week.rs | 2 +- .../src/dataframe/series/date/get_weekday.rs | 2 +- .../src/dataframe/series/date/get_year.rs | 2 +- .../src/dataframe/series/date/mod.rs | 2 + crates/nu-command/src/dataframe/series/mod.rs | 1 + .../values/nu_dataframe/between_values.rs | 28 +++- 15 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 crates/nu-command/src/dataframe/series/date/as_datetime.rs diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 5d417237b..2098ffa64 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -90,7 +90,8 @@ optional = true features = [ "default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings", "cum_agg", "is_in", - "rolling_window", "strings", "rows", "random" + "rolling_window", "strings", "rows", "random", + "dtype-datetime" ] [features] diff --git a/crates/nu-command/src/dataframe/series/date/as_datetime.rs b/crates/nu-command/src/dataframe/series/date/as_datetime.rs new file mode 100644 index 000000000..486148342 --- /dev/null +++ b/crates/nu-command/src/dataframe/series/date/as_datetime.rs @@ -0,0 +1,129 @@ +use super::super::super::values::{Column, NuDataFrame}; + +use chrono::DateTime; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use polars::prelude::{IntoSeries, TimeUnit}; + +#[derive(Clone)] +pub struct AsDateTime; + +impl Command for AsDateTime { + fn name(&self) -> &str { + "dfr as-datetime" + } + + fn usage(&self) -> &str { + r#"Converts string to datetime. Format example: + "%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98 + "%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 + "%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01 + "%y%m%d %H:%M:%S" => 210319 23:58:50 + "%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98 + "%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 + "%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01 + "%Y%m%d %H:%M:%S" => 20210319 23:58:50 + "%FT%H:%M:%S" => 2019-04-18T02:45:55 + "%FT%H:%M:%S.%6f" => microseconds + "%FT%H:%M:%S.%9f" => nanoseconds"# + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required("format", SyntaxShape::String, "formating date string") + .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) + .category(Category::Custom("dataframe".into())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Converts string to datetime", + example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr to-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![ + Value::Date { + val: DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + Value::Date { + val: DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + span: Span::test_data(), + }, + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + command(engine_state, stack, call, input) + } +} + +fn command( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let format: String = call.req(engine_state, stack, 0)?; + let not_exact = call.has_flag("not-exact"); + + let df = NuDataFrame::try_from_pipeline(input, call.head)?; + let series = df.as_series(call.head)?; + let casted = series.utf8().map_err(|e| { + ShellError::SpannedLabeledError("Error casting to string".into(), e.to_string(), call.head) + })?; + + let res = if not_exact { + casted.as_datetime_not_exact(Some(format.as_str()), TimeUnit::Milliseconds) + } else { + casted.as_datetime(Some(format.as_str()), TimeUnit::Milliseconds) + }; + + let res = res + .map_err(|e| { + ShellError::SpannedLabeledError( + "Error creating datetime".into(), + e.to_string(), + call.head, + ) + })? + .into_series(); + + NuDataFrame::try_from_series(vec![res], call.head) + .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) +} + +#[cfg(test)] +mod test { + use super::super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(AsDateTime {})]) + } +} diff --git a/crates/nu-command/src/dataframe/series/date/get_day.rs b/crates/nu-command/src/dataframe/series/date/get_day.rs index fe840121a..b603b9853 100644 --- a/crates/nu-command/src/dataframe/series/date/get_day.rs +++ b/crates/nu-command/src/dataframe/series/date/get_day.rs @@ -70,7 +70,7 @@ fn command( let res = casted.day().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_hour.rs b/crates/nu-command/src/dataframe/series/date/get_hour.rs index 4b0ea98ee..f720b7013 100644 --- a/crates/nu-command/src/dataframe/series/date/get_hour.rs +++ b/crates/nu-command/src/dataframe/series/date/get_hour.rs @@ -70,7 +70,7 @@ fn command( let res = casted.hour().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_minute.rs b/crates/nu-command/src/dataframe/series/date/get_minute.rs index 161a85f46..baec27645 100644 --- a/crates/nu-command/src/dataframe/series/date/get_minute.rs +++ b/crates/nu-command/src/dataframe/series/date/get_minute.rs @@ -70,7 +70,7 @@ fn command( let res = casted.minute().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_month.rs b/crates/nu-command/src/dataframe/series/date/get_month.rs index a5f9f0fc9..14baaea9a 100644 --- a/crates/nu-command/src/dataframe/series/date/get_month.rs +++ b/crates/nu-command/src/dataframe/series/date/get_month.rs @@ -70,7 +70,7 @@ fn command( let res = casted.month().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs b/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs index 445398d49..4b9da9833 100644 --- a/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs +++ b/crates/nu-command/src/dataframe/series/date/get_nanosecond.rs @@ -70,7 +70,7 @@ fn command( let res = casted.nanosecond().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_ordinal.rs b/crates/nu-command/src/dataframe/series/date/get_ordinal.rs index 95b46c24a..ba4cf64e6 100644 --- a/crates/nu-command/src/dataframe/series/date/get_ordinal.rs +++ b/crates/nu-command/src/dataframe/series/date/get_ordinal.rs @@ -70,7 +70,7 @@ fn command( let res = casted.ordinal().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_second.rs b/crates/nu-command/src/dataframe/series/date/get_second.rs index ba5ecf8e4..6503d4e93 100644 --- a/crates/nu-command/src/dataframe/series/date/get_second.rs +++ b/crates/nu-command/src/dataframe/series/date/get_second.rs @@ -70,7 +70,7 @@ fn command( let res = casted.second().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_week.rs b/crates/nu-command/src/dataframe/series/date/get_week.rs index a671d3229..dfbd127a5 100644 --- a/crates/nu-command/src/dataframe/series/date/get_week.rs +++ b/crates/nu-command/src/dataframe/series/date/get_week.rs @@ -70,7 +70,7 @@ fn command( let res = casted.week().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_weekday.rs b/crates/nu-command/src/dataframe/series/date/get_weekday.rs index b4e370665..f084ae2a7 100644 --- a/crates/nu-command/src/dataframe/series/date/get_weekday.rs +++ b/crates/nu-command/src/dataframe/series/date/get_weekday.rs @@ -70,7 +70,7 @@ fn command( let res = casted.weekday().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/get_year.rs b/crates/nu-command/src/dataframe/series/date/get_year.rs index ed248c2af..706ff7044 100644 --- a/crates/nu-command/src/dataframe/series/date/get_year.rs +++ b/crates/nu-command/src/dataframe/series/date/get_year.rs @@ -70,7 +70,7 @@ fn command( let res = casted.year().into_series(); - NuDataFrame::try_from_series(vec![res.into_series()], call.head) + NuDataFrame::try_from_series(vec![res], call.head) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } diff --git a/crates/nu-command/src/dataframe/series/date/mod.rs b/crates/nu-command/src/dataframe/series/date/mod.rs index fbbb75f5d..b1c802958 100644 --- a/crates/nu-command/src/dataframe/series/date/mod.rs +++ b/crates/nu-command/src/dataframe/series/date/mod.rs @@ -1,3 +1,4 @@ +mod as_datetime; mod get_day; mod get_hour; mod get_minute; @@ -9,6 +10,7 @@ mod get_week; mod get_weekday; mod get_year; +pub use as_datetime::AsDateTime; pub use get_day::GetDay; pub use get_hour::GetHour; pub use get_minute::GetMinute; diff --git a/crates/nu-command/src/dataframe/series/mod.rs b/crates/nu-command/src/dataframe/series/mod.rs index fbe81ebcf..33edf7a8c 100644 --- a/crates/nu-command/src/dataframe/series/mod.rs +++ b/crates/nu-command/src/dataframe/series/mod.rs @@ -57,6 +57,7 @@ pub fn add_series_decls(working_set: &mut StateWorkingSet) { ArgSort, ArgTrue, ArgUnique, + AsDateTime, Concatenate, Contains, Cumulative, diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs index 4f1d7c812..e3cc45152 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/between_values.rs @@ -4,7 +4,7 @@ use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value}; use num::Zero; use polars::prelude::{ BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, - NumOpsDispatchChecked, PolarsError, Series, + NumOpsDispatchChecked, PolarsError, Series, TimeUnit, }; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; @@ -270,6 +270,9 @@ pub(super) fn compute_series_single_value( let equal_pattern = format!("^{}$", val); contains_series_pat(&lhs, &equal_pattern, lhs_span) } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::equal, lhs_span) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -285,6 +288,12 @@ pub(super) fn compute_series_single_value( Value::Float { val, .. } => { compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span) } + Value::Date { val, .. } => compare_series_i64( + &lhs, + val.timestamp_millis(), + ChunkedArray::not_equal, + lhs_span, + ), _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -298,6 +307,9 @@ pub(super) fn compute_series_single_value( Value::Float { val, .. } => { compare_series_decimal(&lhs, *val, ChunkedArray::lt, lhs_span) } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt, lhs_span) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -311,6 +323,9 @@ pub(super) fn compute_series_single_value( Value::Float { val, .. } => { compare_series_decimal(&lhs, *val, ChunkedArray::lt_eq, lhs_span) } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt_eq, lhs_span) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -324,6 +339,9 @@ pub(super) fn compute_series_single_value( Value::Float { val, .. } => { compare_series_decimal(&lhs, *val, ChunkedArray::gt, lhs_span) } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt, lhs_span) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -337,6 +355,9 @@ pub(super) fn compute_series_single_value( Value::Float { val, .. } => { compare_series_decimal(&lhs, *val, ChunkedArray::gt_eq, lhs_span) } + Value::Date { val, .. } => { + compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt_eq, lhs_span) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type(), @@ -491,7 +512,10 @@ where F: Fn(&ChunkedArray, i64) -> ChunkedArray, { match series.dtype() { - DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { + DataType::UInt32 + | DataType::Int32 + | DataType::UInt64 + | DataType::Datetime(TimeUnit::Milliseconds, _) => { let to_i64 = series.cast(&DataType::Int64); match to_i64 {