diff --git a/Cargo.lock b/Cargo.lock index 50b3eeaae..7354d5ce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -199,11 +199,11 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "arrow" -version = "4.4.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f3334cea4f209440350d00ae1dab237ced49d80b664cc4b0e984893d583890" +checksum = "ddf189dff0c7e0f40588fc25adbe5bb6837b82fc61bb7cadf5d76de030f710bb" dependencies = [ - "cfg_aliases", + "bitflags", "chrono", "csv", "flatbuffers", @@ -213,7 +213,7 @@ dependencies = [ "lexical-core", "multiversion", "num 0.4.0", - "rand 0.7.3", + "rand 0.8.4", "regex", "serde 1.0.126", "serde_derive", @@ -726,12 +726,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "cfg_aliases" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" - [[package]] name = "chrono" version = "0.4.19" @@ -1657,9 +1651,9 @@ checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" [[package]] name = "flatbuffers" -version = "0.8.4" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c502342b7d6d73beb1b8bab39dc01deba0c8ef66f4e6f1eba7c69ee6b38069" +checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" dependencies = [ "bitflags", "smallvec", @@ -4243,9 +4237,9 @@ dependencies = [ [[package]] name = "parquet" -version = "4.4.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265044e41d674fad4c7860a3e245e53138e926fe83cad8d45193a7a354c56a54" +checksum = "db54d10313f64ea22e1fe0332864abefb19aa1c322bfbc67999ff743aebf868d" dependencies = [ "arrow", "base64", @@ -4256,6 +4250,7 @@ dependencies = [ "lz4", "num-bigint 0.4.0", "parquet-format", + "rand 0.8.4", "snap", "thrift", "zstd", @@ -4477,9 +4472,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.14.8" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed1645f3daba885ecedef8a76839536bfc7263f1e4fc0b67b27282f19325756" +checksum = "008543a5972e0cc94e08cfbf1ec7c5904dc09256f31ffc2b04ab2f24032be69c" dependencies = [ "polars-core", "polars-io", @@ -4488,9 +4483,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.14.8" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5943252213fbd21e26b62240f5bb73f5d874e1072ed93234c6860781a4c366f2" +checksum = "5b0b1c85210b633bc690e01a2b88ca8528af9a50d4a75d0e313ddec89ad2ed4a" dependencies = [ "arrow", "num 0.4.0", @@ -4499,9 +4494,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.14.8" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df43d51c6314623deafdd1266ca9a8431f087d9d345fffc8eeb16508aec69fee" +checksum = "e1547583d662e51c3cf1871296874acacb53edb7e074e6f00fd537cd81237a6f" dependencies = [ "ahash", "anyhow", @@ -4527,14 +4522,13 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.14.8" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd998aab494826fb08d9c83763fc2925c9aa4eab2e4b349416bb174b9b300a3a" +checksum = "1048c30f9c85e12a2f2f65a2e4527c6cc0fc9ec1e7ff4ad98b2759a7e12ba699" dependencies = [ "ahash", "anyhow", "arrow", - "csv", "csv-core", "dirs 3.0.2", "fast-float", @@ -4553,9 +4547,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.14.8" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6596b6d1cf1e378976ad9f5d2abe88ba08f112684ca1b67fbe89b97832c9f11d" +checksum = "ad98453f7bc530b5531c802aad6d4ea480d99a90a3a0318135aab9b501f562d0" dependencies = [ "ahash", "itertools", @@ -6933,18 +6927,18 @@ dependencies = [ [[package]] name = "zstd" -version = "0.8.3+zstd.1.5.0" +version = "0.9.0+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea7094c7b4a58fbd738eb0d4a2fc7684a0e6949a31597e074ffe20a07cbc2bf" +checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "4.1.0+zstd.1.5.0" +version = "4.1.1+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30375f78e185ca4c91930f42ea2c0162f9aa29737032501f93b79266d985ae7" +checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" dependencies = [ "libc", "zstd-sys", @@ -6952,9 +6946,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.0+zstd.1.5.0" +version = "1.6.1+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2141bed8922b427761470e6bbfeff255da94fa20b0bbeab0d9297fcaf71e3aa7" +checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" dependencies = [ "cc", "libc", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 55b6e5913..932be5dc0 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -99,9 +99,9 @@ zip = { version="0.5.9", optional=true } digest = "0.9.0" [dependencies.polars] -version = "0.14.8" +version = "0.15" optional = true -features = ["parquet", "json", "random", "pivot", "strings", "is_in"] +features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal"] [target.'cfg(unix)'.dependencies] umask = "1.0.0" diff --git a/crates/nu-command/src/commands/dataframe/mod.rs b/crates/nu-command/src/commands/dataframe/mod.rs index e6df70e41..43c34b05d 100644 --- a/crates/nu-command/src/commands/dataframe/mod.rs +++ b/crates/nu-command/src/commands/dataframe/mod.rs @@ -73,6 +73,16 @@ pub use series::DataFrameArgTrue; pub use series::DataFrameArgUnique; pub use series::DataFrameConcatenate; pub use series::DataFrameContains; +pub use series::DataFrameGetDay; +pub use series::DataFrameGetHour; +pub use series::DataFrameGetMinute; +pub use series::DataFrameGetMonth; +pub use series::DataFrameGetNanoSecond; +pub use series::DataFrameGetOrdinal; +pub use series::DataFrameGetSecond; +pub use series::DataFrameGetWeek; +pub use series::DataFrameGetWeekDay; +pub use series::DataFrameGetYear; pub use series::DataFrameIsDuplicated; pub use series::DataFrameIsIn; pub use series::DataFrameIsNotNull; @@ -87,6 +97,7 @@ pub use series::DataFrameSeriesRename; pub use series::DataFrameSet; pub use series::DataFrameSetWithIdx; pub use series::DataFrameShift; +pub use series::DataFrameStrFTime; pub use series::DataFrameStringLengths; pub use series::DataFrameStringSlice; pub use series::DataFrameToLowercase; diff --git a/crates/nu-command/src/commands/dataframe/sample.rs b/crates/nu-command/src/commands/dataframe/sample.rs index 50c02df62..d32a09283 100644 --- a/crates/nu-command/src/commands/dataframe/sample.rs +++ b/crates/nu-command/src/commands/dataframe/sample.rs @@ -41,7 +41,7 @@ impl WholeStreamCommand for DataFrame { vec![ Example { description: "Sample rows from dataframe", - example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe sample -r 1", + example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe sample -n 1", result: None, // No expected value because sampling is random }, Example { diff --git a/crates/nu-command/src/commands/dataframe/series/get_day.rs b/crates/nu-command/src/commands/dataframe/series/get_day.rs new file mode 100644 index 000000000..fa02503d4 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_day.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-day" + } + + fn usage(&self) -> &str { + "[Series] Gets day from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-day") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns day from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-day"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(4).into(), UntaggedValue::int(4).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.day().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_hour.rs b/crates/nu-command/src/commands/dataframe/series/get_hour.rs new file mode 100644 index 000000000..5666899a8 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_hour.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-hour" + } + + fn usage(&self) -> &str { + "[Series] Gets hour from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-hour") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns hour from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-hour"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(16).into(), UntaggedValue::int(16).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.hour().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_minute.rs b/crates/nu-command/src/commands/dataframe/series/get_minute.rs new file mode 100644 index 000000000..a33653333 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_minute.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-minute" + } + + fn usage(&self) -> &str { + "[Series] Gets minute from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-minute") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns minute from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-minute"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(39).into(), UntaggedValue::int(39).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.minute().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_month.rs b/crates/nu-command/src/commands/dataframe/series/get_month.rs new file mode 100644 index 000000000..999817377 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_month.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-month" + } + + fn usage(&self) -> &str { + "[Series] Gets month from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-month") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns month from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-month"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(8).into(), UntaggedValue::int(8).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.month().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs b/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs new file mode 100644 index 000000000..5d0250cc0 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-nanosecond" + } + + fn usage(&self) -> &str { + "[Series] Gets nanosecond from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-nanosecond") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns nanosecond from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-nanosecond"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(0).into(), UntaggedValue::int(0).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.nanosecond().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs b/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs new file mode 100644 index 000000000..a18424e62 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs @@ -0,0 +1,78 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-ordinal" + } + + fn usage(&self) -> &str { + "[Series] Gets ordinal date from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-ordinal") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns ordinal from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-ordinal"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(217).into(), + UntaggedValue::int(217).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.ordinal().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_second.rs b/crates/nu-command/src/commands/dataframe/series/get_second.rs new file mode 100644 index 000000000..26cebaa5a --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_second.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-second" + } + + fn usage(&self) -> &str { + "[Series] Gets second from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-second") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns second from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-second"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(18).into(), UntaggedValue::int(18).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.second().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_week.rs b/crates/nu-command/src/commands/dataframe/series/get_week.rs new file mode 100644 index 000000000..06f4bba39 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_week.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-week" + } + + fn usage(&self) -> &str { + "[Series] Gets week from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-week") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns week from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-week"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(32).into(), UntaggedValue::int(32).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.week().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_weekday.rs b/crates/nu-command/src/commands/dataframe/series/get_weekday.rs new file mode 100644 index 000000000..8c28feaf1 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_weekday.rs @@ -0,0 +1,75 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-weekday" + } + + fn usage(&self) -> &str { + "[Series] Gets weekday from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-weekday") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns weekday from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-weekday"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.weekday().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/get_year.rs b/crates/nu-command/src/commands/dataframe/series/get_year.rs new file mode 100644 index 000000000..7c8331a67 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/get_year.rs @@ -0,0 +1,78 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; + +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe get-year" + } + + fn usage(&self) -> &str { + "[Series] Gets year from date" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe get-year") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns year from a date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe get-year"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(2020).into(), + UntaggedValue::int(2020).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.year().into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/mod.rs b/crates/nu-command/src/commands/dataframe/series/mod.rs index cbd7cf60a..519fa1d5b 100644 --- a/crates/nu-command/src/commands/dataframe/series/mod.rs +++ b/crates/nu-command/src/commands/dataframe/series/mod.rs @@ -7,6 +7,16 @@ pub mod arg_true; pub mod arg_unique; pub mod concatenate; pub mod contains; +pub mod get_day; +pub mod get_hour; +pub mod get_minute; +pub mod get_month; +pub mod get_nanosecond; +pub mod get_ordinal; +pub mod get_second; +pub mod get_week; +pub mod get_weekday; +pub mod get_year; pub mod is_duplicated; pub mod is_in; pub mod is_not_null; @@ -23,6 +33,7 @@ pub mod set_with_idx; pub mod shift; pub mod str_lengths; pub mod str_slice; +pub mod strftime; pub mod to_lowercase; pub mod to_uppercase; pub mod unique; @@ -37,6 +48,16 @@ pub use arg_true::DataFrame as DataFrameArgTrue; pub use arg_unique::DataFrame as DataFrameArgUnique; pub use concatenate::DataFrame as DataFrameConcatenate; pub use contains::DataFrame as DataFrameContains; +pub use get_day::DataFrame as DataFrameGetDay; +pub use get_hour::DataFrame as DataFrameGetHour; +pub use get_minute::DataFrame as DataFrameGetMinute; +pub use get_month::DataFrame as DataFrameGetMonth; +pub use get_nanosecond::DataFrame as DataFrameGetNanoSecond; +pub use get_ordinal::DataFrame as DataFrameGetOrdinal; +pub use get_second::DataFrame as DataFrameGetSecond; +pub use get_week::DataFrame as DataFrameGetWeek; +pub use get_weekday::DataFrame as DataFrameGetWeekDay; +pub use get_year::DataFrame as DataFrameGetYear; pub use is_duplicated::DataFrame as DataFrameIsDuplicated; pub use is_in::DataFrame as DataFrameIsIn; pub use is_not_null::DataFrame as DataFrameIsNotNull; @@ -53,6 +74,7 @@ pub use set_with_idx::DataFrame as DataFrameSetWithIdx; pub use shift::DataFrame as DataFrameShift; pub use str_lengths::DataFrame as DataFrameStringLengths; pub use str_slice::DataFrame as DataFrameStringSlice; +pub use strftime::DataFrame as DataFrameStrFTime; pub use to_lowercase::DataFrame as DataFrameToLowercase; pub use to_uppercase::DataFrame as DataFrameToUppercase; pub use unique::DataFrame as DataFrameUnique; diff --git a/crates/nu-command/src/commands/dataframe/series/strftime.rs b/crates/nu-command/src/commands/dataframe/series/strftime.rs new file mode 100644 index 000000000..524a699b7 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/strftime.rs @@ -0,0 +1,80 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; + +use nu_source::Tagged; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe strftime" + } + + fn usage(&self) -> &str { + "[Series] Formats date based on string rule" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe strftime").required("fmt", SyntaxShape::String, "Format rule") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Formats date", + example: r#"let dt = ('2020-08-04T16:39:18+00:00' | str to-datetime -z 'UTC'); + let df = ([$dt $dt] | dataframe to-df); + $df | dataframe strftime "%Y/%m/%d""#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("2020/08/04").into(), + UntaggedValue::string("2020/08/04").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let fmt: Tagged = args.req(0)?; + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; + + let casted = series + .date64() + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; + + let res = casted.strftime(fmt.item.as_str()).into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/take.rs b/crates/nu-command/src/commands/dataframe/take.rs index 1dcf2a2d5..293fab526 100644 --- a/crates/nu-command/src/commands/dataframe/take.rs +++ b/crates/nu-command/src/commands/dataframe/take.rs @@ -111,9 +111,12 @@ fn command(mut args: CommandArgs) -> Result { ShellError::labeled_error("Empty stream", "No value found in the stream", &tag) })?; - match value.value { + match &value.value { UntaggedValue::DataFrame(df) => { - let res = df.as_ref().take(indices); + let res = df + .as_ref() + .take(indices) + .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } diff --git a/crates/nu-command/src/commands/dataframe/to_csv.rs b/crates/nu-command/src/commands/dataframe/to_csv.rs index 65aa2e881..c9409eed1 100644 --- a/crates/nu-command/src/commands/dataframe/to_csv.rs +++ b/crates/nu-command/src/commands/dataframe/to_csv.rs @@ -45,7 +45,7 @@ impl WholeStreamCommand for DataFrame { vec![ Example { description: "Saves dataframe to csv file", - example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to_csv test.csv", + example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-csv test.csv", result: None, }, Example { diff --git a/crates/nu-command/src/commands/mod.rs b/crates/nu-command/src/commands/mod.rs index 210c73361..b1a5244e8 100644 --- a/crates/nu-command/src/commands/mod.rs +++ b/crates/nu-command/src/commands/mod.rs @@ -29,14 +29,17 @@ pub use dataframe::{ DataFrameArgMax, DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn, DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst, - DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull, - DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt, - DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen, DataFramePivot, - DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect, DataFrameSeriesRename, - DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, DataFrameShow, - DataFrameSlice, DataFrameSort, DataFrameStringLengths, DataFrameStringSlice, DataFrameTake, - DataFrameToCsv, DataFrameToDF, DataFrameToLowercase, DataFrameToParquet, DataFrameToUppercase, - DataFrameUnique, DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn, + DataFrameGet, DataFrameGetDay, DataFrameGetHour, DataFrameGetMinute, DataFrameGetMonth, + DataFrameGetNanoSecond, DataFrameGetOrdinal, DataFrameGetSecond, DataFrameGetWeek, + DataFrameGetWeekDay, DataFrameGetYear, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, + DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, + DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen, + DataFramePivot, DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect, + DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, + DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameStrFTime, DataFrameStringLengths, + DataFrameStringSlice, DataFrameTake, DataFrameToCsv, DataFrameToDF, DataFrameToLowercase, + DataFrameToParquet, DataFrameToUppercase, DataFrameUnique, DataFrameValueCounts, + DataFrameWhere, DataFrameWithColumn, }; pub use env::*; pub use filesystem::*; diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index bb1780cc8..7026da438 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -329,6 +329,17 @@ pub fn create_default_context(interactive: bool) -> Result Result<(), Shel whole_stream_command(Select), whole_stream_command(StrCollect), whole_stream_command(Wrap), + whole_stream_command(StrToDatetime), ]); for sample_pipeline in examples { diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index a03042659..056212f07 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -32,9 +32,9 @@ serde_yaml = "0.8.16" toml = "0.5.8" [dependencies.polars] -version = "0.14.8" +version = "0.15" optional = true -features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object"] +features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-duration-ns"] [features] dataframe = ["polars"] diff --git a/crates/nu-protocol/src/dataframe/conversion.rs b/crates/nu-protocol/src/dataframe/conversion.rs index 9f0d655e7..106219315 100644 --- a/crates/nu-protocol/src/dataframe/conversion.rs +++ b/crates/nu-protocol/src/dataframe/conversion.rs @@ -2,13 +2,14 @@ use indexmap::map::{Entry, IndexMap}; use polars::chunked_array::object::builder::ObjectChunkedBuilder; use polars::chunked_array::ChunkedArray; -use bigdecimal::FromPrimitive; +use bigdecimal::{FromPrimitive, ToPrimitive}; use chrono::{DateTime, FixedOffset, NaiveDateTime}; use nu_errors::ShellError; use nu_source::{Span, Tag}; use num_bigint::BigInt; use polars::prelude::{ - DataFrame, DataType, IntoSeries, NamedFrom, ObjectType, PolarsNumericType, Series, TimeUnit, + DataFrame, DataType, Date64Type, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, ObjectType, + PolarsNumericType, Series, TimeUnit, }; use std::ops::{Deref, DerefMut}; @@ -74,6 +75,8 @@ pub enum InputType { String, Boolean, Object, + Date, + Duration, } #[derive(Debug)] @@ -528,6 +531,12 @@ pub fn insert_value( UntaggedValue::Primitive(Primitive::Boolean(_)) => { col_val.column_type = Some(InputType::Boolean); } + UntaggedValue::Primitive(Primitive::Date(_)) => { + col_val.column_type = Some(InputType::Date); + } + UntaggedValue::Primitive(Primitive::Duration(_)) => { + col_val.column_type = Some(InputType::Duration); + } _ => col_val.column_type = Some(InputType::Object), } col_val.values.push(value); @@ -550,6 +559,14 @@ pub fn insert_value( | ( UntaggedValue::Primitive(Primitive::Boolean(_)), UntaggedValue::Primitive(Primitive::Boolean(_)), + ) + | ( + UntaggedValue::Primitive(Primitive::Date(_)), + UntaggedValue::Primitive(Primitive::Date(_)), + ) + | ( + UntaggedValue::Primitive(Primitive::Duration(_)), + UntaggedValue::Primitive(Primitive::Duration(_)), ) => col_val.values.push(value), _ => { col_val.column_type = Some(InputType::Object); @@ -607,6 +624,32 @@ pub fn from_parsed_columns( let res = builder.finish(); df_series.push(res.into_series()) } + InputType::Date => { + let it = column.values.iter().map(|v| { + if let UntaggedValue::Primitive(Primitive::Date(date)) = &v.value { + Some(date.timestamp_millis()) + } else { + None + } + }); + + let res = ChunkedArray::::new_from_opt_iter(&name, it); + + df_series.push(res.into_series()) + } + InputType::Duration => { + let it = column.values.iter().map(|v| { + if let UntaggedValue::Primitive(Primitive::Duration(duration)) = &v.value { + Some(duration.to_i64().expect("Not expecting NAN in duration")) + } else { + None + } + }); + + let res = ChunkedArray::::new_from_opt_iter(&name, it); + + df_series.push(res.into_series()) + } } } } diff --git a/crates/nu-protocol/src/dataframe/nu_dataframe.rs b/crates/nu-protocol/src/dataframe/nu_dataframe.rs index fe2022f06..736b9774d 100644 --- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs +++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs @@ -174,6 +174,7 @@ impl NuDataFrame { | UntaggedValue::Primitive(Primitive::Decimal(_)) | UntaggedValue::Primitive(Primitive::String(_)) | UntaggedValue::Primitive(Primitive::Boolean(_)) + | UntaggedValue::Primitive(Primitive::Date(_)) | UntaggedValue::DataFrame(_) => { let key = format!("{}", 0); insert_value(value, key, &mut column_values)?