Polars Struct support without unsafe blocks (#11229)

Second attempt at polars Struct support. This version avoid using unsafe
checks by cloning the StructArray and utilizing the into_static to
convert to a StructOwned.

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright 2023-12-15 02:21:30 -08:00 committed by GitHub
parent 6ead98effb
commit 44dc890124
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 474 additions and 195 deletions

164
Cargo.lock generated
View File

@ -114,9 +114,9 @@ dependencies = [
[[package]]
name = "anstream"
version = "0.6.4"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
@ -143,9 +143,9 @@ dependencies = [
[[package]]
name = "anstyle-query"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3a318f1f38d2418400f8209655bfd825785afd25aa30bb7ba6cc792e4596748"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys 0.52.0",
]
@ -241,7 +241,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -252,7 +252,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -352,7 +352,7 @@ dependencies = [
"regex",
"rustc-hash",
"shlex",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -471,7 +471,7 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -974,7 +974,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1196,7 +1196,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1217,9 +1217,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "erased-serde"
version = "0.3.31"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c"
checksum = "a3286168faae03a0e583f6fde17c02c8b8bba2dcc2061d0f7817066e5b0af706"
dependencies = [
"serde",
]
@ -1291,7 +1291,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
dependencies = [
"cfg-if",
"rustix 0.38.26",
"rustix 0.38.28",
"windows-sys 0.48.0",
]
@ -1461,7 +1461,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1730,9 +1730,9 @@ dependencies = [
[[package]]
name = "http-body"
version = "0.4.5"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
dependencies = [
"bytes",
"http",
@ -1921,7 +1921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"hermit-abi",
"rustix 0.38.26",
"rustix 0.38.28",
"windows-sys 0.48.0",
]
@ -1976,9 +1976,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.9"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "jobserver"
@ -2102,9 +2102,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.150"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libflate"
@ -2424,7 +2424,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -2469,9 +2469,9 @@ dependencies = [
[[package]]
name = "mio"
version = "0.8.9"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09"
dependencies = [
"libc",
"log",
@ -3359,9 +3359,9 @@ checksum = "80adb31078122c880307e9cdfd4e3361e6545c319f9b9dcafcb03acd3b51a575"
[[package]]
name = "once_cell"
version = "1.18.0"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "oorandom"
@ -3403,7 +3403,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3414,9 +3414,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-src"
version = "300.1.6+3.1.4"
version = "300.2.1+3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439fac53e092cd7442a3660c85dde4643ab3b5bd39040912388dcdabf6b88085"
checksum = "3fe476c29791a5ca0d1273c697e96085bbabbbea2ef7afd5617e78a4b40332d3"
dependencies = [
"cc",
]
@ -3585,7 +3585,7 @@ dependencies = [
"pest_meta",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3668,7 +3668,7 @@ dependencies = [
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -4105,9 +4105,9 @@ dependencies = [
[[package]]
name = "portable-atomic"
version = "1.5.1"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "powerfmt"
@ -4195,7 +4195,7 @@ dependencies = [
"hex",
"lazy_static",
"procfs-core",
"rustix 0.38.26",
"rustix 0.38.28",
]
[[package]]
@ -4429,7 +4429,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -4553,7 +4553,7 @@ dependencies = [
"regex",
"relative-path",
"rustc_version",
"syn 2.0.39",
"syn 2.0.40",
"unicode-ident",
]
@ -4573,9 +4573,9 @@ dependencies = [
[[package]]
name = "rust-embed"
version = "8.0.0"
version = "8.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1e7d90385b59f0a6bf3d3b757f3ca4ece2048265d70db20a2016043d4509a40"
checksum = "810294a8a4a0853d4118e3b94bb079905f2107c7fe979d8f0faae98765eb6378"
dependencies = [
"rust-embed-impl",
"rust-embed-utils",
@ -4584,22 +4584,22 @@ dependencies = [
[[package]]
name = "rust-embed-impl"
version = "8.0.0"
version = "8.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c3d8c6fd84090ae348e63a84336b112b5c3918b3bf0493a581f7bd8ee623c29"
checksum = "bfc144a1273124a67b8c1d7cd19f5695d1878b31569c0512f6086f0f4676604e"
dependencies = [
"proc-macro2",
"quote",
"rust-embed-utils",
"syn 2.0.39",
"syn 2.0.40",
"walkdir",
]
[[package]]
name = "rust-embed-utils"
version = "8.0.0"
version = "8.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "873feff8cb7bf86fdf0a71bb21c95159f4e4a37dd7a4bd1855a940909b583ada"
checksum = "816ccd4875431253d6bb54b804bcff4369cbde9bae33defde25fdf6c2ef91d40"
dependencies = [
"sha2",
"walkdir",
@ -4662,9 +4662,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.26"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags 2.4.1",
"errno",
@ -4681,9 +4681,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "ryu"
version = "1.0.15"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "same-file"
@ -4795,7 +4795,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -4817,7 +4817,7 @@ checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -4876,7 +4876,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -5188,7 +5188,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -5253,9 +5253,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.39"
version = "2.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e"
dependencies = [
"proc-macro2",
"quote",
@ -5313,7 +5313,7 @@ dependencies = [
"cfg-if",
"fastrand",
"redox_syscall",
"rustix 0.38.26",
"rustix 0.38.28",
"windows-sys 0.48.0",
]
@ -5363,7 +5363,7 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
dependencies = [
"rustix 0.38.26",
"rustix 0.38.28",
"windows-sys 0.48.0",
]
@ -5401,7 +5401,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -5492,9 +5492,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.34.0"
version = "1.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9"
checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c"
dependencies = [
"backtrace",
"bytes",
@ -5517,7 +5517,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -5636,9 +5636,9 @@ dependencies = [
[[package]]
name = "try-lock"
version = "0.2.4"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typed-arena"
@ -5654,9 +5654,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "typetag"
version = "0.2.13"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80960fd143d4c96275c0e60b08f14b81fbb468e79bc0ef8fbda69fb0afafae43"
checksum = "196976efd4a62737b3a2b662cda76efb448d099b1049613d7a5d72743c611ce0"
dependencies = [
"erased-serde",
"inventory",
@ -5667,13 +5667,13 @@ dependencies = [
[[package]]
name = "typetag-impl"
version = "0.2.13"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfc13d450dc4a695200da3074dacf43d449b968baee95e341920e47f61a3b40f"
checksum = "2eea6765137e2414c44c7b1e07c73965a118a72c46148e1e168b3fc9d3ccf3aa"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -5702,9 +5702,9 @@ dependencies = [
[[package]]
name = "unicode-bidi"
version = "0.3.13"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
[[package]]
name = "unicode-ident"
@ -6004,7 +6004,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
"wasm-bindgen-shared",
]
@ -6026,7 +6026,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -6071,7 +6071,7 @@ dependencies = [
"either",
"home",
"once_cell",
"rustix 0.38.26",
"rustix 0.38.28",
]
[[package]]
@ -6083,7 +6083,7 @@ dependencies = [
"either",
"home",
"once_cell",
"rustix 0.38.26",
"rustix 0.38.28",
"windows-sys 0.48.0",
]
@ -6354,9 +6354,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "winnow"
version = "0.5.24"
version = "0.5.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0383266b19108dfc6314a56047aa545a1b4d1be60e799b4dbdd407b56402704b"
checksum = "6c830786f7720c2fd27a1a0e27a709dbd3c4d009b56d098fc742d4f4eab91fe2"
dependencies = [
"memchr",
]
@ -6383,11 +6383,13 @@ dependencies = [
[[package]]
name = "xattr"
version = "1.0.1"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985"
checksum = "a7dae5072fe1f8db8f8d29059189ac175196e410e40ba42d5d4684ae2f750995"
dependencies = [
"libc",
"linux-raw-sys 0.4.12",
"rustix 0.38.28",
]
[[package]]
@ -6410,22 +6412,22 @@ checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "zerocopy"
version = "0.7.29"
version = "0.7.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d075cf85bbb114e933343e087b92f2146bac0d55b534cbb8188becf0039948e"
checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.29"
version = "0.7.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86cd5ca076997b97ef09d3ad65efe811fa68c9e874cb636ccb211223a813b0c2"
checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]

View File

@ -1,11 +1,12 @@
use super::{DataFrameValue, NuDataFrame};
use std::ops::{Deref, DerefMut};
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
use indexmap::map::{Entry, IndexMap};
use nu_protocol::{Record, ShellError, Span, Value};
use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
use polars::datatypes::AnyValue;
use polars::export::arrow::Either;
use polars::prelude::{
DataFrame, DataType, DatetimeChunked, Float64Type, Int64Type, IntoSeries,
@ -13,11 +14,14 @@ use polars::prelude::{
ListUtf8ChunkedBuilder, NamedFrom, NewChunkedArray, ObjectType, Series, TemporalMethods,
TimeUnit,
};
use std::ops::{Deref, DerefMut};
const SECS_PER_DAY: i64 = 86_400;
use nu_protocol::{Record, ShellError, Span, Value};
// The values capacity is for the size of an internal vec.
use super::{DataFrameValue, NuDataFrame};
const NANOS_PER_DAY: i64 = 86_400_000_000_000;
// The values capacity is for the size of an vec.
// Since this is impossible to determine without traversing every value
// I just picked one. Since this is for converting back and forth
// between nushell tables the values shouldn't be too extremely large for
@ -199,7 +203,7 @@ fn value_to_input_type(value: &Value) -> InputType {
Value::Filesize { .. } => InputType::Filesize,
Value::List { vals, .. } => {
// We need to determined the type inside of the list.
// Since Value::List does not have any kind of internal
// Since Value::List does not have any kind of
// type information, we need to look inside the list.
// This will cause errors if lists have inconsistent types.
// Basically, if a list column needs to be converted to dataframe,
@ -775,28 +779,21 @@ fn series_to_values(
}),
Some(ca) => {
let it = ca.into_iter();
let values: Vec<Value> =
if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) {
Either::Left(it.skip(from_row).take(size))
} else {
Either::Right(it)
}
.map(|ca| {
let sublist = ca
.map(|ref s| {
match series_to_values(s, None, None, Span::unknown()) {
Ok(v) => v,
Err(e) => {
eprintln!("Error list values: {e}");
let sublist: Vec<Value> = if let Some(ref s) = ca {
series_to_values(s, None, None, Span::unknown())?
} else {
// empty item
vec![]
}
}
};
Ok(Value::list(sublist, span))
})
.unwrap_or(vec![]);
Value::list(sublist, span)
})
.collect::<Vec<Value>>();
Ok(values)
.collect::<Result<Vec<Value>, ShellError>>()
}
}
}
@ -817,51 +814,16 @@ fn series_to_values(
}
.map(|v| match v {
Some(a) => {
// elapsed time in day since 1970-01-01
let seconds = a as i64 * SECS_PER_DAY;
let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) {
Some(val) => val,
None => {
return Value::error(
ShellError::UnsupportedInput {
msg: "The given local datetime representation is invalid."
.to_string(),
input: format!("timestamp is {a:?}"),
msg_span: span,
input_span: Span::unknown(),
},
span,
)
let nanos = nanos_per_day(a);
let datetime = datetime_from_epoch_nanos(nanos, &None, span)?;
Ok(Value::date(datetime, span))
}
};
// Zero length offset
let offset = match FixedOffset::east_opt(0) {
Some(val) => val,
None => {
return Value::error(
ShellError::UnsupportedInput {
msg: "The given local datetime representation is invalid."
.to_string(),
input: format!("timestamp is {a:?}"),
msg_span: span,
input_span: Span::unknown(),
},
span,
)
}
};
let datetime =
DateTime::<FixedOffset>::from_naive_utc_and_offset(naive_datetime, offset);
Value::date(datetime, span)
}
None => Value::nothing(span),
None => Ok(Value::nothing(span)),
})
.collect::<Vec<Value>>();
.collect::<Result<Vec<Value>, ShellError>>()?;
Ok(values)
}
DataType::Datetime(time_unit, _) => {
DataType::Datetime(time_unit, tz) => {
let casted = series.datetime().map_err(|e| ShellError::GenericError {
error: "Error casting column to datetime".into(),
msg: "".into(),
@ -878,55 +840,46 @@ fn series_to_values(
}
.map(|v| match v {
Some(a) => {
let unit_divisor = match time_unit {
TimeUnit::Nanoseconds => 1_000_000_000,
TimeUnit::Microseconds => 1_000_000,
TimeUnit::Milliseconds => 1_000,
};
// elapsed time in nano/micro/milliseconds since 1970-01-01
let seconds = a / unit_divisor;
let naive_datetime = match NaiveDateTime::from_timestamp_opt(seconds, 0) {
Some(val) => val,
None => {
return Value::error(
ShellError::UnsupportedInput {
msg: "The given local datetime representation is invalid."
.to_string(),
input: format!("timestamp is {a:?}"),
msg_span: span,
input_span: Span::unknown(),
},
span,
)
let nanos = nanos_from_timeunit(a, *time_unit);
let datetime = datetime_from_epoch_nanos(nanos, tz, span)?;
Ok(Value::date(datetime, span))
}
};
// Zero length offset
let offset = match FixedOffset::east_opt(0) {
Some(val) => val,
None => {
return Value::error(
ShellError::UnsupportedInput {
msg: "The given local datetime representation is invalid."
.to_string(),
input: format!("timestamp is {a:?}"),
msg_span: span,
input_span: Span::unknown(),
},
span,
)
}
};
let datetime =
DateTime::<FixedOffset>::from_naive_utc_and_offset(naive_datetime, offset);
Value::date(datetime, span)
}
None => Value::nothing(span),
None => Ok(Value::nothing(span)),
})
.collect::<Vec<Value>>();
.collect::<Result<Vec<Value>, ShellError>>()?;
Ok(values)
}
DataType::Struct(polar_fields) => {
let casted = series.struct_().map_err(|e| ShellError::GenericError {
error: "Error casting column to struct".into(),
msg: "".to_string(),
span: None,
help: Some(e.to_string()),
inner: Vec::new(),
})?;
let it = casted.into_iter();
let values: Result<Vec<Value>, ShellError> =
if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) {
Either::Left(it.skip(from_row).take(size))
} else {
Either::Right(it)
}
.map(|any_values| {
let vals: Result<Vec<Value>, ShellError> = any_values
.iter()
.map(|v| any_value_to_value(v, span))
.collect();
let cols: Vec<String> = polar_fields
.iter()
.map(|field| field.name.to_string())
.collect();
let record = Record { cols, vals: vals? };
Ok(Value::record(record, span))
})
.collect();
values
}
DataType::Time => {
let casted =
series
@ -963,10 +916,154 @@ fn series_to_values(
}
}
fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result<Value, ShellError> {
match any_value {
AnyValue::Null => Ok(Value::nothing(span)),
AnyValue::Boolean(b) => Ok(Value::bool(*b, span)),
AnyValue::Utf8(s) => Ok(Value::string(s.to_string(), span)),
AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)),
AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)),
AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)),
AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)),
AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)),
AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)),
AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)),
AnyValue::Int64(i) => Ok(Value::int(*i, span)),
AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)),
AnyValue::Float64(f) => Ok(Value::float(*f, span)),
AnyValue::Date(d) => {
let nanos = nanos_per_day(*d);
datetime_from_epoch_nanos(nanos, &None, span)
.map(|datetime| Value::date(datetime, span))
}
AnyValue::Datetime(a, time_unit, tz) => {
let nanos = nanos_from_timeunit(*a, *time_unit);
datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span))
}
AnyValue::Duration(a, time_unit) => {
let nanos = match time_unit {
TimeUnit::Nanoseconds => *a,
TimeUnit::Microseconds => *a * 1_000,
TimeUnit::Milliseconds => *a * 1_000_000,
};
Ok(Value::duration(nanos, span))
}
// AnyValue::Time represents the current time since midnight.
// Unfortunately, there is no timezone related information.
// Given this, calculate the current date from UTC and add the time.
AnyValue::Time(nanos) => time_from_midnight(*nanos, span),
AnyValue::List(series) => {
series_to_values(series, None, None, span).map(|values| Value::list(values, span))
}
AnyValue::Struct(_idx, _struct_array, _s_fields) => {
// This should convert to a StructOwned object.
let static_value =
any_value
.clone()
.into_static()
.map_err(|e| ShellError::GenericError {
error: "Cannot convert polars struct to static value".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: Vec::new(),
})?;
any_value_to_value(&static_value, span)
}
AnyValue::StructOwned(struct_tuple) => {
let values: Result<Vec<Value>, ShellError> = struct_tuple
.0
.iter()
.map(|s| any_value_to_value(s, span))
.collect();
let fields = struct_tuple
.1
.iter()
.map(|f| f.name().to_string())
.collect();
Ok(Value::Record {
val: Record {
cols: fields,
vals: values?,
},
internal_span: span,
})
}
AnyValue::Utf8Owned(s) => Ok(Value::string(s.to_string(), span)),
AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)),
AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)),
e => Err(ShellError::GenericError {
error: "Error creating Value".into(),
msg: "".to_string(),
span: None,
help: Some(format!("Value not supported in nushell: {e}")),
inner: Vec::new(),
}),
}
}
fn nanos_per_day(days: i32) -> i64 {
days as i64 * NANOS_PER_DAY
}
fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 {
a * match time_unit {
TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds
TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds
TimeUnit::Nanoseconds => 1, // Already in nanoseconds
}
}
fn datetime_from_epoch_nanos(
nanos: i64,
timezone: &Option<String>,
span: Span,
) -> Result<DateTime<FixedOffset>, ShellError> {
let tz: Tz = if let Some(polars_tz) = timezone {
polars_tz
.parse::<Tz>()
.map_err(|_| ShellError::GenericError {
error: format!("Could not parse polars timezone: {polars_tz}"),
msg: "".to_string(),
span: Some(span),
help: None,
inner: vec![],
})?
} else {
Tz::UTC
};
Ok(tz.timestamp_nanos(nanos).fixed_offset())
}
fn time_from_midnight(nanos: i64, span: Span) -> Result<Value, ShellError> {
let today = Utc::now().date_naive();
NaiveTime::from_hms_opt(0, 0, 0) // midnight
.map(|time| time + Duration::nanoseconds(nanos)) // current time
.map(|time| today.and_time(time)) // current date and time
.and_then(|datetime| {
FixedOffset::east_opt(0) // utc
.map(|offset| {
DateTime::<FixedOffset>::from_naive_utc_and_offset(datetime, offset)
})
})
.map(|datetime| Value::date(datetime, span)) // current date and time
.ok_or(ShellError::CantConvert {
to_type: "datetime".to_string(),
from_type: "polars time".to_string(),
span,
help: Some("Could not convert polars time of {nanos} to datetime".to_string()),
})
}
#[cfg(test)]
mod tests {
use super::*;
use indexmap::indexmap;
use polars::export::arrow::array::{BooleanArray, PrimitiveArray};
use polars::prelude::Field;
use polars_io::prelude::StructArray;
use super::*;
#[test]
fn test_parsed_column_string_list() -> Result<(), Box<dyn std::error::Error>> {
@ -1001,4 +1098,184 @@ mod tests {
Ok(())
}
#[test]
fn test_any_value_to_value() -> Result<(), Box<dyn std::error::Error>> {
let span = Span::test_data();
assert_eq!(
any_value_to_value(&AnyValue::Null, span)?,
Value::nothing(span)
);
let test_bool = true;
assert_eq!(
any_value_to_value(&AnyValue::Boolean(test_bool), span)?,
Value::bool(test_bool, span)
);
let test_str = "foo";
assert_eq!(
any_value_to_value(&AnyValue::Utf8(test_str), span)?,
Value::string(test_str.to_string(), span)
);
assert_eq!(
any_value_to_value(&AnyValue::Utf8Owned(test_str.into()), span)?,
Value::string(test_str.to_owned(), span)
);
let tests_uint8 = 4;
assert_eq!(
any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?,
Value::int(tests_uint8 as i64, span)
);
let tests_uint16 = 233;
assert_eq!(
any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?,
Value::int(tests_uint16 as i64, span)
);
let tests_uint32 = 897688233;
assert_eq!(
any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?,
Value::int(tests_uint32 as i64, span)
);
let tests_uint64 = 903225135897388233;
assert_eq!(
any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?,
Value::int(tests_uint64 as i64, span)
);
let tests_float32 = 903225135897388233.3223353;
assert_eq!(
any_value_to_value(&AnyValue::Float32(tests_float32), span)?,
Value::float(tests_float32 as f64, span)
);
let tests_float64 = 9064251358973882322333.64233533232;
assert_eq!(
any_value_to_value(&AnyValue::Float64(tests_float64), span)?,
Value::float(tests_float64, span)
);
let test_days = 10_957;
let comparison_date = Utc
.with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
.unwrap()
.fixed_offset();
assert_eq!(
any_value_to_value(&AnyValue::Date(test_days), span)?,
Value::date(comparison_date, span)
);
let test_millis = 946_684_800_000;
assert_eq!(
any_value_to_value(
&AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None),
span
)?,
Value::date(comparison_date, span)
);
let test_duration_millis = 99_999;
let test_duration_micros = 99_999_000;
let test_duration_nanos = 99_999_000_000;
assert_eq!(
any_value_to_value(
&AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds),
span
)?,
Value::duration(test_duration_nanos, span)
);
assert_eq!(
any_value_to_value(
&AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds),
span
)?,
Value::duration(test_duration_nanos, span)
);
assert_eq!(
any_value_to_value(
&AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds),
span
)?,
Value::duration(test_duration_nanos, span)
);
let test_binary = b"sdf2332f32q3f3afwaf3232f32";
assert_eq!(
any_value_to_value(&AnyValue::Binary(test_binary), span)?,
Value::binary(test_binary.to_vec(), span)
);
assert_eq!(
any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?,
Value::binary(test_binary.to_vec(), span)
);
let test_time_nanos = 54_000_000_000_000;
let test_time = DateTime::<FixedOffset>::from_naive_utc_and_offset(
Utc::now()
.date_naive()
.and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()),
FixedOffset::east_opt(0).unwrap(),
);
assert_eq!(
any_value_to_value(&AnyValue::Time(test_time_nanos), span)?,
Value::date(test_time, span)
);
let test_list_series = Series::new("int series", &[1, 2, 3]);
let comparison_list_series = Value::list(
vec![
Value::int(1, span),
Value::int(2, span),
Value::int(3, span),
],
span,
);
assert_eq!(
any_value_to_value(&AnyValue::List(test_list_series), span)?,
comparison_list_series
);
let field_value_0 = AnyValue::Int32(1);
let field_value_1 = AnyValue::Boolean(true);
let values = vec![field_value_0, field_value_1];
let field_name_0 = "num_field";
let field_name_1 = "bool_field";
let fields = vec![
Field::new(field_name_0, DataType::Int32),
Field::new(field_name_1, DataType::Boolean),
];
let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone())));
let comparison_owned_record = Value::record(
Record {
cols: vec![field_name_0.to_owned(), field_name_1.to_owned()],
vals: vec![Value::int(1, span), Value::bool(true, span)],
},
span,
);
assert_eq!(
any_value_to_value(&test_owned_struct, span)?,
comparison_owned_record.clone()
);
let test_int_arr = PrimitiveArray::from([Some(1_i32)]);
let test_bool_arr = BooleanArray::from([Some(true)]);
let test_struct_arr = StructArray::new(
DataType::Struct(fields.clone()).to_arrow(),
vec![Box::new(test_int_arr), Box::new(test_bool_arr)],
None,
);
assert_eq!(
any_value_to_value(
&AnyValue::Struct(0, &test_struct_arr, fields.as_slice()),
span
)?,
comparison_owned_record
);
Ok(())
}
}