diff --git a/crates/nu_plugin_polars/src/dataframe/command/datetime/as_date.rs b/crates/nu_plugin_polars/src/dataframe/command/datetime/as_date.rs index aa36e44ece..eb9c056309 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/datetime/as_date.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/datetime/as_date.rs @@ -1,13 +1,19 @@ -use crate::{values::CustomValueSupport, PolarsPlugin}; - -use super::super::super::values::NuDataFrame; +use crate::{ + values::{ + cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame, + NuSchema, PolarsPluginObject, PolarsPluginType, + }, + PolarsPlugin, +}; +use chrono::DateTime; +use std::sync::Arc; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::{IntoSeries, StringMethods}; +use polars::prelude::{col, DataType, Field, IntoSeries, Schema, StringMethods, StrptimeOptions}; #[derive(Clone)] pub struct AsDate; @@ -34,10 +40,16 @@ impl PluginCommand for AsDate { Signature::build(self.name()) .required("format", SyntaxShape::String, "formatting date string") .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) + .input_output_types(vec![ + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ]) .category(Category::Custom("dataframe".into())) } @@ -46,12 +58,110 @@ impl PluginCommand for AsDate { Example { description: "Converts string to date", example: r#"["2021-12-30" "2021-12-31"] | polars into-df | polars as-date "%Y-%m-%d""#, - result: None, // help is needed on how to provide results + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "date".to_string(), + vec![ + // Nushell's Value::date only maps to DataType::Datetime and not DataType::Date + // We therefore force the type to be DataType::Date in the schema + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + Some(NuSchema::new(Arc::new(Schema::from_iter(vec![ + Field::new("date".into(), DataType::Date), + ])))), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, Example { description: "Converts string to date", example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-df | polars as-date "%Y-%m-%d" --not-exact"#, - result: None, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "date".to_string(), + vec![ + // Nushell's Value::date only maps to DataType::Datetime and not DataType::Date + // We therefore force the type to be DataType::Date in the schema + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + Some(NuSchema::new(Arc::new(Schema::from_iter(vec![ + Field::new("date".into(), DataType::Date), + ])))), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Converts string to date in an expression", + example: r#"["2021-12-30" "2021-12-31 21:00:00"] | polars into-lazy | polars select (polars col 0 | polars as-date "%Y-%m-%d" --not-exact)"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "date".to_string(), + vec![ + // Nushell's Value::date only maps to DataType::Datetime and not DataType::Date + // We therefore force the type to be DataType::Date in the schema + Value::date( + DateTime::parse_from_str( + "2021-12-30 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2021-12-31 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + Some(NuSchema::new(Arc::new(Schema::from_iter(vec![ + Field::new("date".into(), DataType::Date), + ])))), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, Example { description: "Output is of date type", @@ -85,8 +195,61 @@ fn command( ) -> Result { let format: String = call.req(0)?; let not_exact = call.has_flag("not-exact")?; + let value = input.into_value(call.head)?; + + let options = StrptimeOptions { + format: Some(format.into()), + strict: true, + exact: !not_exact, + cache: Default::default(), + }; + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy, options), + PolarsPluginObject::NuDataFrame(df) => command_eager(plugin, engine, call, df, options), + PolarsPluginObject::NuExpression(expr) => { + let res: NuExpression = expr.into_polars().str().to_date(options).into(); + res.to_pipeline_data(plugin, engine, call.head) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + options: StrptimeOptions, +) -> Result { + NuLazyFrame::new( + false, + lazy.to_polars().select([col("*").str().to_date(options)]), + ) + .to_pipeline_data(plugin, engine, call.head) +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, + options: StrptimeOptions, +) -> Result { + let format = if let Some(format) = options.format { + format.to_string() + } else { + unreachable!("`format` will never be None") + }; + let not_exact = !options.exact; - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let series = df.as_series(call.head)?; let casted = series.str().map_err(|e| ShellError::GenericError { error: "Error casting to string".into(), diff --git a/crates/nu_plugin_polars/src/dataframe/command/datetime/as_datetime.rs b/crates/nu_plugin_polars/src/dataframe/command/datetime/as_datetime.rs index 3a246cc58c..c06a349496 100644 --- a/crates/nu_plugin_polars/src/dataframe/command/datetime/as_datetime.rs +++ b/crates/nu_plugin_polars/src/dataframe/command/datetime/as_datetime.rs @@ -1,15 +1,22 @@ -use crate::{values::CustomValueSupport, PolarsPlugin}; +use crate::{ + values::{ + cant_convert_err, Column, CustomValueSupport, NuDataFrame, NuExpression, NuLazyFrame, + NuSchema, PolarsPluginObject, PolarsPluginType, + }, + PolarsPlugin, +}; +use chrono::DateTime; use std::sync::Arc; -use super::super::super::values::{Column, NuDataFrame, NuSchema}; - -use chrono::DateTime; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use polars::prelude::{DataType, Field, IntoSeries, Schema, StringMethods, TimeUnit}; +use polars::prelude::{ + col, DataType, Expr, Field, IntoSeries, LiteralValue, PlSmallStr, Schema, StringMethods, + StrptimeOptions, TimeUnit, +}; #[derive(Clone)] pub struct AsDateTime; @@ -42,14 +49,30 @@ impl PluginCommand for AsDateTime { fn signature(&self) -> Signature { Signature::build(self.name()) + .input_output_types(vec![ + ( + Type::Custom("dataframe".into()), + Type::Custom("dataframe".into()), + ), + ( + Type::Custom("expression".into()), + Type::Custom("expression".into()), + ), + ]) .required("format", SyntaxShape::String, "formatting date time string") .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) - .switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware)", None) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) + .switch("naive", "the input datetimes should be parsed as naive (i.e., not timezone-aware). Ignored if input is an expression.", None) + .named( + "ambiguous", + SyntaxShape::OneOf(vec![SyntaxShape::String, SyntaxShape::Nothing]), + r#"Determine how to deal with ambiguous datetimes: + `raise` (default): raise error + `earliest`: use the earliest datetime + `latest`: use the latest datetime + `null`: set to null + Used only when input is a lazyframe or expression and ignored otherwise"#, + Some('a'), + ) .category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { @@ -158,6 +181,63 @@ impl PluginCommand for AsDateTime { .into_value(Span::test_data()), ), }, + Example { + description: "Converts string to datetime using the `--not-exact` flag even with excessive symbols in an expression", + example: r#"["2025-11-02 00:00:00", "2025-11-02 01:00:00", "2025-11-02 02:00:00", "2025-11-02 03:00:00"] | polars into-df | polars select (polars col 0 | polars as-datetime "%Y-%m-%d %H:%M:%S")"#, + result: Some( + NuDataFrame::try_from_columns( + vec![Column::new( + "datetime".to_string(), + vec![ + Value::date( + DateTime::parse_from_str( + "2025-11-02 00:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + + Value::date( + DateTime::parse_from_str( + "2025-11-02 01:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-11-02 02:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + Value::date( + DateTime::parse_from_str( + "2025-11-02 03:00:00 +0000", + "%Y-%m-%d %H:%M:%S %z", + ) + .expect("date calculation should not fail in test"), + Span::test_data(), + ), + ], + )], + Some(NuSchema::new(Arc::new(Schema::from_iter(vec![ + Field::new( + "datetime".into(), + DataType::Datetime( + TimeUnit::Nanoseconds, + None + ), + ), + ])))), + ) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, ] } @@ -182,7 +262,98 @@ fn command( let not_exact = call.has_flag("not-exact")?; let tz_aware = !call.has_flag("naive")?; - let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; + let value = input.into_value(call.head)?; + + let options = StrptimeOptions { + format: Some(format.into()), + strict: true, + exact: !not_exact, + cache: Default::default(), + }; + + let ambiguous = match call.get_flag::("ambiguous")? { + Some(Value::String { val, internal_span }) => match val.as_str() { + "raise" | "earliest" | "latest" => Ok(val), + _ => Err(ShellError::GenericError { + error: "Invalid argument value".into(), + msg: "`ambiguous` must be one of raise, earliest, latest, or null".into(), + span: Some(internal_span), + help: None, + inner: vec![], + }), + }, + Some(Value::Nothing { .. }) => Ok("null".into()), + Some(_) => unreachable!("Argument only accepts string or null."), + None => Ok("raise".into()), + } + .map_err(LabeledError::from)?; + + match PolarsPluginObject::try_from_value(plugin, &value)? { + PolarsPluginObject::NuLazyFrame(lazy) => { + command_lazy(plugin, engine, call, lazy, options, ambiguous) + } + PolarsPluginObject::NuDataFrame(df) => { + command_eager(plugin, engine, call, df, options, tz_aware) + } + PolarsPluginObject::NuExpression(expr) => { + let res: NuExpression = expr + .into_polars() + .str() + .to_datetime( + None, + None, + options, + Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))), + ) + .into(); + res.to_pipeline_data(plugin, engine, call.head) + } + _ => Err(cant_convert_err( + &value, + &[ + PolarsPluginType::NuDataFrame, + PolarsPluginType::NuLazyFrame, + PolarsPluginType::NuExpression, + ], + )), + } +} + +fn command_lazy( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + lazy: NuLazyFrame, + options: StrptimeOptions, + ambiguous: String, +) -> Result { + NuLazyFrame::new( + false, + lazy.to_polars().select([col("*").str().to_datetime( + None, + None, + options, + Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))), + )]), + ) + .to_pipeline_data(plugin, engine, call.head) +} + +fn command_eager( + plugin: &PolarsPlugin, + engine: &EngineInterface, + call: &EvaluatedCall, + df: NuDataFrame, + options: StrptimeOptions, + tz_aware: bool, +) -> Result { + let format = if let Some(format) = options.format { + format.to_string() + } else { + unreachable!("`format` will never be None") + }; + let not_exact = !options.exact; + let series = df.as_series(call.head)?; let casted = series.str().map_err(|e| ShellError::GenericError { error: "Error casting to string".into(),