From 44dc890124e298573cdb91d92b0917b559fc4a09 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Fri, 15 Dec 2023 02:21:30 -0800 Subject: [PATCH] Polars Struct support without unsafe blocks (#11229) Second attempt at polars Struct support. This version avoid using unsafe checks by cloning the StructArray and utilizing the into_static to convert to a StructOwned. --------- Co-authored-by: Jack Wright --- Cargo.lock | 164 +++--- .../values/nu_dataframe/conversion.rs | 505 ++++++++++++++---- 2 files changed, 474 insertions(+), 195 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 348e7435d3..c3f1c88684 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,9 +114,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" dependencies = [ "anstyle", "anstyle-parse", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3a318f1f38d2418400f8209655bfd825785afd25aa30bb7ba6cc792e4596748" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ "windows-sys 0.52.0", ] @@ -241,7 +241,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -252,7 +252,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -352,7 +352,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -471,7 +471,7 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -974,7 +974,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -1196,7 +1196,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -1217,9 +1217,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "erased-serde" -version = "0.3.31" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c" +checksum = "a3286168faae03a0e583f6fde17c02c8b8bba2dcc2061d0f7817066e5b0af706" dependencies = [ "serde", ] @@ -1291,7 +1291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ "cfg-if", - "rustix 0.38.26", + "rustix 0.38.28", "windows-sys 0.48.0", ] @@ -1461,7 +1461,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -1730,9 +1730,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -1921,7 +1921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", - "rustix 0.38.26", + "rustix 0.38.28", "windows-sys 0.48.0", ] @@ -1976,9 +1976,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" @@ -2102,9 +2102,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libflate" @@ -2424,7 +2424,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -2469,9 +2469,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "log", @@ -3359,9 +3359,9 @@ checksum = "80adb31078122c880307e9cdfd4e3361e6545c319f9b9dcafcb03acd3b51a575" [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" @@ -3403,7 +3403,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -3414,9 +3414,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.1.6+3.1.4" +version = "300.2.1+3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439fac53e092cd7442a3660c85dde4643ab3b5bd39040912388dcdabf6b88085" +checksum = "3fe476c29791a5ca0d1273c697e96085bbabbbea2ef7afd5617e78a4b40332d3" dependencies = [ "cc", ] @@ -3585,7 +3585,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -3668,7 +3668,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -4105,9 +4105,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" [[package]] name = "powerfmt" @@ -4195,7 +4195,7 @@ dependencies = [ "hex", "lazy_static", "procfs-core", - "rustix 0.38.26", + "rustix 0.38.28", ] [[package]] @@ -4429,7 +4429,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -4553,7 +4553,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.39", + "syn 2.0.40", "unicode-ident", ] @@ -4573,9 +4573,9 @@ dependencies = [ [[package]] name = "rust-embed" -version = "8.0.0" +version = "8.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e7d90385b59f0a6bf3d3b757f3ca4ece2048265d70db20a2016043d4509a40" +checksum = "810294a8a4a0853d4118e3b94bb079905f2107c7fe979d8f0faae98765eb6378" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -4584,22 +4584,22 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.0.0" +version = "8.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3d8c6fd84090ae348e63a84336b112b5c3918b3bf0493a581f7bd8ee623c29" +checksum = "bfc144a1273124a67b8c1d7cd19f5695d1878b31569c0512f6086f0f4676604e" dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.39", + "syn 2.0.40", "walkdir", ] [[package]] name = "rust-embed-utils" -version = "8.0.0" +version = "8.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "873feff8cb7bf86fdf0a71bb21c95159f4e4a37dd7a4bd1855a940909b583ada" +checksum = "816ccd4875431253d6bb54b804bcff4369cbde9bae33defde25fdf6c2ef91d40" dependencies = [ "sha2", "walkdir", @@ -4662,9 +4662,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.26" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ "bitflags 2.4.1", "errno", @@ -4681,9 +4681,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "same-file" @@ -4795,7 +4795,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -4817,7 +4817,7 @@ checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -4876,7 +4876,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -5188,7 +5188,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -5253,9 +5253,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e" dependencies = [ "proc-macro2", "quote", @@ -5313,7 +5313,7 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall", - "rustix 0.38.26", + "rustix 0.38.28", "windows-sys 0.48.0", ] @@ -5363,7 +5363,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" dependencies = [ - "rustix 0.38.26", + "rustix 0.38.28", "windows-sys 0.48.0", ] @@ -5401,7 +5401,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -5492,9 +5492,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.34.0" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c" dependencies = [ "backtrace", "bytes", @@ -5517,7 +5517,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -5636,9 +5636,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typed-arena" @@ -5654,9 +5654,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typetag" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80960fd143d4c96275c0e60b08f14b81fbb468e79bc0ef8fbda69fb0afafae43" +checksum = "196976efd4a62737b3a2b662cda76efb448d099b1049613d7a5d72743c611ce0" dependencies = [ "erased-serde", "inventory", @@ -5667,13 +5667,13 @@ dependencies = [ [[package]] name = "typetag-impl" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfc13d450dc4a695200da3074dacf43d449b968baee95e341920e47f61a3b40f" +checksum = "2eea6765137e2414c44c7b1e07c73965a118a72c46148e1e168b3fc9d3ccf3aa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] @@ -5702,9 +5702,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" [[package]] name = "unicode-ident" @@ -6004,7 +6004,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", "wasm-bindgen-shared", ] @@ -6026,7 +6026,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6071,7 +6071,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix 0.38.26", + "rustix 0.38.28", ] [[package]] @@ -6083,7 +6083,7 @@ dependencies = [ "either", "home", "once_cell", - "rustix 0.38.26", + "rustix 0.38.28", "windows-sys 0.48.0", ] @@ -6354,9 +6354,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "winnow" -version = "0.5.24" +version = "0.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0383266b19108dfc6314a56047aa545a1b4d1be60e799b4dbdd407b56402704b" +checksum = "6c830786f7720c2fd27a1a0e27a709dbd3c4d009b56d098fc742d4f4eab91fe2" dependencies = [ "memchr", ] @@ -6383,11 +6383,13 @@ dependencies = [ [[package]] name = "xattr" -version = "1.0.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" +checksum = "a7dae5072fe1f8db8f8d29059189ac175196e410e40ba42d5d4684ae2f750995" dependencies = [ "libc", + "linux-raw-sys 0.4.12", + "rustix 0.38.28", ] [[package]] @@ -6410,22 +6412,22 @@ checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" [[package]] name = "zerocopy" -version = "0.7.29" +version = "0.7.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d075cf85bbb114e933343e087b92f2146bac0d55b534cbb8188becf0039948e" +checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.29" +version = "0.7.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86cd5ca076997b97ef09d3ad65efe811fa68c9e874cb636ccb211223a813b0c2" +checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.40", ] [[package]] diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs index 099a6cbada..d7b3ba7d8a 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs @@ -1,11 +1,12 @@ -use super::{DataFrameValue, NuDataFrame}; +use std::ops::{Deref, DerefMut}; -use chrono::{DateTime, FixedOffset, NaiveDateTime}; +use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; +use chrono_tz::Tz; use indexmap::map::{Entry, IndexMap}; -use nu_protocol::{Record, ShellError, Span, Value}; use polars::chunked_array::builder::AnonymousOwnedListBuilder; use polars::chunked_array::object::builder::ObjectChunkedBuilder; use polars::chunked_array::ChunkedArray; +use polars::datatypes::AnyValue; use polars::export::arrow::Either; use polars::prelude::{ DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries, @@ -13,11 +14,14 @@ use polars::prelude::{ ListUtf8ChunkedBuilder, NamedFrom, NewChunkedArray, ObjectType, Series, TemporalMethods, TimeUnit, }; -use std::ops::{Deref, DerefMut}; -const SECS_PER_DAY: i64 = 86_400; +use nu_protocol::{Record, ShellError, Span, Value}; -// The values capacity is for the size of an internal vec. +use super::{DataFrameValue, NuDataFrame}; + +const NANOS_PER_DAY: i64 = 86_400_000_000_000; + +// The values capacity is for the size of an vec. // Since this is impossible to determine without traversing every value // I just picked one. Since this is for converting back and forth // between nushell tables the values shouldn't be too extremely large for @@ -199,7 +203,7 @@ fn value_to_input_type(value: &Value) -> InputType { Value::Filesize { .. } => InputType::Filesize, Value::List { vals, .. } => { // We need to determined the type inside of the list. - // Since Value::List does not have any kind of internal + // Since Value::List does not have any kind of // type information, we need to look inside the list. // This will cause errors if lists have inconsistent types. // Basically, if a list column needs to be converted to dataframe, @@ -775,28 +779,21 @@ fn series_to_values( }), Some(ca) => { let it = ca.into_iter(); - let values: Vec = - if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|ca| { + let sublist: Vec = if let Some(ref s) = ca { + series_to_values(s, None, None, Span::unknown())? } else { - Either::Right(it) - } - .map(|ca| { - let sublist = ca - .map(|ref s| { - match series_to_values(s, None, None, Span::unknown()) { - Ok(v) => v, - Err(e) => { - eprintln!("Error list values: {e}"); - vec![] - } - } - }) - .unwrap_or(vec![]); - Value::list(sublist, span) - }) - .collect::>(); - Ok(values) + // empty item + vec![] + }; + Ok(Value::list(sublist, span)) + }) + .collect::, ShellError>>() } } } @@ -817,51 +814,16 @@ fn series_to_values( } .map(|v| match v { Some(a) => { - // elapsed time in day since 1970-01-01 - let seconds = a as i64 * SECS_PER_DAY; - let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - // Zero length offset - let offset = match FixedOffset::east_opt(0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - let datetime = - DateTime::::from_naive_utc_and_offset(naive_datetime, offset); - - Value::date(datetime, span) + let nanos = nanos_per_day(a); + let datetime = datetime_from_epoch_nanos(nanos, &None, span)?; + Ok(Value::date(datetime, span)) } - None => Value::nothing(span), + None => Ok(Value::nothing(span)), }) - .collect::>(); - + .collect::, ShellError>>()?; Ok(values) } - DataType::Datetime(time_unit, _) => { + DataType::Datetime(time_unit, tz) => { let casted = series.datetime().map_err(|e| ShellError::GenericError { error: "Error casting column to datetime".into(), msg: "".into(), @@ -878,55 +840,46 @@ fn series_to_values( } .map(|v| match v { Some(a) => { - let unit_divisor = match time_unit { - TimeUnit::Nanoseconds => 1_000_000_000, - TimeUnit::Microseconds => 1_000_000, - TimeUnit::Milliseconds => 1_000, - }; // elapsed time in nano/micro/milliseconds since 1970-01-01 - let seconds = a / unit_divisor; - let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - // Zero length offset - let offset = match FixedOffset::east_opt(0) { - Some(val) => val, - None => { - return Value::error( - ShellError::UnsupportedInput { - msg: "The given local datetime representation is invalid." - .to_string(), - input: format!("timestamp is {a:?}"), - msg_span: span, - input_span: Span::unknown(), - }, - span, - ) - } - }; - let datetime = - DateTime::::from_naive_utc_and_offset(naive_datetime, offset); - - Value::date(datetime, span) + let nanos = nanos_from_timeunit(a, *time_unit); + let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; + Ok(Value::date(datetime, span)) } - None => Value::nothing(span), + None => Ok(Value::nothing(span)), }) - .collect::>(); - + .collect::, ShellError>>()?; Ok(values) } + DataType::Struct(polar_fields) => { + let casted = series.struct_().map_err(|e| ShellError::GenericError { + error: "Error casting column to struct".into(), + msg: "".to_string(), + span: None, + help: Some(e.to_string()), + inner: Vec::new(), + })?; + let it = casted.into_iter(); + let values: Result, ShellError> = + if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { + Either::Left(it.skip(from_row).take(size)) + } else { + Either::Right(it) + } + .map(|any_values| { + let vals: Result, ShellError> = any_values + .iter() + .map(|v| any_value_to_value(v, span)) + .collect(); + let cols: Vec = polar_fields + .iter() + .map(|field| field.name.to_string()) + .collect(); + let record = Record { cols, vals: vals? }; + Ok(Value::record(record, span)) + }) + .collect(); + values + } DataType::Time => { let casted = series @@ -963,10 +916,154 @@ fn series_to_values( } } +fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { + match any_value { + AnyValue::Null => Ok(Value::nothing(span)), + AnyValue::Boolean(b) => Ok(Value::bool(*b, span)), + AnyValue::Utf8(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)), + AnyValue::Int64(i) => Ok(Value::int(*i, span)), + AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)), + AnyValue::Float64(f) => Ok(Value::float(*f, span)), + AnyValue::Date(d) => { + let nanos = nanos_per_day(*d); + datetime_from_epoch_nanos(nanos, &None, span) + .map(|datetime| Value::date(datetime, span)) + } + AnyValue::Datetime(a, time_unit, tz) => { + let nanos = nanos_from_timeunit(*a, *time_unit); + datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span)) + } + AnyValue::Duration(a, time_unit) => { + let nanos = match time_unit { + TimeUnit::Nanoseconds => *a, + TimeUnit::Microseconds => *a * 1_000, + TimeUnit::Milliseconds => *a * 1_000_000, + }; + Ok(Value::duration(nanos, span)) + } + // AnyValue::Time represents the current time since midnight. + // Unfortunately, there is no timezone related information. + // Given this, calculate the current date from UTC and add the time. + AnyValue::Time(nanos) => time_from_midnight(*nanos, span), + AnyValue::List(series) => { + series_to_values(series, None, None, span).map(|values| Value::list(values, span)) + } + AnyValue::Struct(_idx, _struct_array, _s_fields) => { + // This should convert to a StructOwned object. + let static_value = + any_value + .clone() + .into_static() + .map_err(|e| ShellError::GenericError { + error: "Cannot convert polars struct to static value".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: Vec::new(), + })?; + any_value_to_value(&static_value, span) + } + AnyValue::StructOwned(struct_tuple) => { + let values: Result, ShellError> = struct_tuple + .0 + .iter() + .map(|s| any_value_to_value(s, span)) + .collect(); + let fields = struct_tuple + .1 + .iter() + .map(|f| f.name().to_string()) + .collect(); + Ok(Value::Record { + val: Record { + cols: fields, + vals: values?, + }, + internal_span: span, + }) + } + AnyValue::Utf8Owned(s) => Ok(Value::string(s.to_string(), span)), + AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)), + AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)), + e => Err(ShellError::GenericError { + error: "Error creating Value".into(), + msg: "".to_string(), + span: None, + help: Some(format!("Value not supported in nushell: {e}")), + inner: Vec::new(), + }), + } +} + +fn nanos_per_day(days: i32) -> i64 { + days as i64 * NANOS_PER_DAY +} + +fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { + a * match time_unit { + TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds + TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds + TimeUnit::Nanoseconds => 1, // Already in nanoseconds + } +} + +fn datetime_from_epoch_nanos( + nanos: i64, + timezone: &Option, + span: Span, +) -> Result, ShellError> { + let tz: Tz = if let Some(polars_tz) = timezone { + polars_tz + .parse::() + .map_err(|_| ShellError::GenericError { + error: format!("Could not parse polars timezone: {polars_tz}"), + msg: "".to_string(), + span: Some(span), + help: None, + inner: vec![], + })? + } else { + Tz::UTC + }; + + Ok(tz.timestamp_nanos(nanos).fixed_offset()) +} + +fn time_from_midnight(nanos: i64, span: Span) -> Result { + let today = Utc::now().date_naive(); + NaiveTime::from_hms_opt(0, 0, 0) // midnight + .map(|time| time + Duration::nanoseconds(nanos)) // current time + .map(|time| today.and_time(time)) // current date and time + .and_then(|datetime| { + FixedOffset::east_opt(0) // utc + .map(|offset| { + DateTime::::from_naive_utc_and_offset(datetime, offset) + }) + }) + .map(|datetime| Value::date(datetime, span)) // current date and time + .ok_or(ShellError::CantConvert { + to_type: "datetime".to_string(), + from_type: "polars time".to_string(), + span, + help: Some("Could not convert polars time of {nanos} to datetime".to_string()), + }) +} + #[cfg(test)] mod tests { - use super::*; use indexmap::indexmap; + use polars::export::arrow::array::{BooleanArray, PrimitiveArray}; + use polars::prelude::Field; + use polars_io::prelude::StructArray; + + use super::*; #[test] fn test_parsed_column_string_list() -> Result<(), Box> { @@ -1001,4 +1098,184 @@ mod tests { Ok(()) } + + #[test] + fn test_any_value_to_value() -> Result<(), Box> { + let span = Span::test_data(); + assert_eq!( + any_value_to_value(&AnyValue::Null, span)?, + Value::nothing(span) + ); + + let test_bool = true; + assert_eq!( + any_value_to_value(&AnyValue::Boolean(test_bool), span)?, + Value::bool(test_bool, span) + ); + + let test_str = "foo"; + assert_eq!( + any_value_to_value(&AnyValue::Utf8(test_str), span)?, + Value::string(test_str.to_string(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::Utf8Owned(test_str.into()), span)?, + Value::string(test_str.to_owned(), span) + ); + + let tests_uint8 = 4; + assert_eq!( + any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?, + Value::int(tests_uint8 as i64, span) + ); + + let tests_uint16 = 233; + assert_eq!( + any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?, + Value::int(tests_uint16 as i64, span) + ); + + let tests_uint32 = 897688233; + assert_eq!( + any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?, + Value::int(tests_uint32 as i64, span) + ); + + let tests_uint64 = 903225135897388233; + assert_eq!( + any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?, + Value::int(tests_uint64 as i64, span) + ); + + let tests_float32 = 903225135897388233.3223353; + assert_eq!( + any_value_to_value(&AnyValue::Float32(tests_float32), span)?, + Value::float(tests_float32 as f64, span) + ); + + let tests_float64 = 9064251358973882322333.64233533232; + assert_eq!( + any_value_to_value(&AnyValue::Float64(tests_float64), span)?, + Value::float(tests_float64, span) + ); + + let test_days = 10_957; + let comparison_date = Utc + .with_ymd_and_hms(2000, 1, 1, 0, 0, 0) + .unwrap() + .fixed_offset(); + assert_eq!( + any_value_to_value(&AnyValue::Date(test_days), span)?, + Value::date(comparison_date, span) + ); + + let test_millis = 946_684_800_000; + assert_eq!( + any_value_to_value( + &AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None), + span + )?, + Value::date(comparison_date, span) + ); + + let test_duration_millis = 99_999; + let test_duration_micros = 99_999_000; + let test_duration_nanos = 99_999_000_000; + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + assert_eq!( + any_value_to_value( + &AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds), + span + )?, + Value::duration(test_duration_nanos, span) + ); + + let test_binary = b"sdf2332f32q3f3afwaf3232f32"; + assert_eq!( + any_value_to_value(&AnyValue::Binary(test_binary), span)?, + Value::binary(test_binary.to_vec(), span) + ); + assert_eq!( + any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?, + Value::binary(test_binary.to_vec(), span) + ); + + let test_time_nanos = 54_000_000_000_000; + let test_time = DateTime::::from_naive_utc_and_offset( + Utc::now() + .date_naive() + .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()), + FixedOffset::east_opt(0).unwrap(), + ); + assert_eq!( + any_value_to_value(&AnyValue::Time(test_time_nanos), span)?, + Value::date(test_time, span) + ); + + let test_list_series = Series::new("int series", &[1, 2, 3]); + let comparison_list_series = Value::list( + vec![ + Value::int(1, span), + Value::int(2, span), + Value::int(3, span), + ], + span, + ); + assert_eq!( + any_value_to_value(&AnyValue::List(test_list_series), span)?, + comparison_list_series + ); + + let field_value_0 = AnyValue::Int32(1); + let field_value_1 = AnyValue::Boolean(true); + let values = vec![field_value_0, field_value_1]; + let field_name_0 = "num_field"; + let field_name_1 = "bool_field"; + let fields = vec![ + Field::new(field_name_0, DataType::Int32), + Field::new(field_name_1, DataType::Boolean), + ]; + let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone()))); + let comparison_owned_record = Value::record( + Record { + cols: vec![field_name_0.to_owned(), field_name_1.to_owned()], + vals: vec![Value::int(1, span), Value::bool(true, span)], + }, + span, + ); + assert_eq!( + any_value_to_value(&test_owned_struct, span)?, + comparison_owned_record.clone() + ); + + let test_int_arr = PrimitiveArray::from([Some(1_i32)]); + let test_bool_arr = BooleanArray::from([Some(true)]); + let test_struct_arr = StructArray::new( + DataType::Struct(fields.clone()).to_arrow(), + vec![Box::new(test_int_arr), Box::new(test_bool_arr)], + None, + ); + assert_eq!( + any_value_to_value( + &AnyValue::Struct(0, &test_struct_arr, fields.as_slice()), + span + )?, + comparison_owned_record + ); + + Ok(()) + } }