Polars upgrade (#15852)

# Description
Polars 0.48 upgrade

# User-Facing Changes
- (breaking change) Due to a change in behavior in polars, `polars
is-in` now only works as an expression.

---------

Co-authored-by: Jack Wright <jack.wright@nike.com>
This commit is contained in:
Jack Wright 2025-05-30 10:20:57 -07:00 committed by GitHub
parent 18ce5de500
commit d9ecb7da93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 382 additions and 271 deletions

253
Cargo.lock generated
View File

@ -197,9 +197,9 @@ dependencies = [
[[package]] [[package]]
name = "argminmax" name = "argminmax"
version = "0.6.2" version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" checksum = "70f13d10a41ac8d2ec79ee34178d61e6f47a29c2edfe7ef1721c7383b0359e65"
dependencies = [ dependencies = [
"num-traits", "num-traits",
] ]
@ -254,6 +254,18 @@ dependencies = [
"wait-timeout", "wait-timeout",
] ]
[[package]]
name = "async-channel"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a"
dependencies = [
"concurrent-queue",
"event-listener-strategy",
"futures-core",
"pin-project-lite",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.6" version = "0.3.6"
@ -1120,6 +1132,15 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "concurrent-queue"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
dependencies = [
"crossbeam-utils",
]
[[package]] [[package]]
name = "console" name = "console"
version = "0.15.8" version = "0.15.8"
@ -1775,6 +1796,27 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c"
[[package]]
name = "event-listener"
version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae"
dependencies = [
"concurrent-queue",
"parking",
"pin-project-lite",
]
[[package]]
name = "event-listener-strategy"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [
"event-listener",
"pin-project-lite",
]
[[package]] [[package]]
name = "fallible-iterator" name = "fallible-iterator"
version = "0.3.0" version = "0.3.0"
@ -1924,12 +1966,12 @@ dependencies = [
[[package]] [[package]]
name = "fs4" name = "fs4"
version = "0.12.0" version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c29c30684418547d476f0b48e84f4821639119c483b1eccd566c8cd0cd05f521" checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4"
dependencies = [ dependencies = [
"rustix 0.38.42", "rustix 1.0.7",
"windows-sys 0.52.0", "windows-sys 0.59.0",
] ]
[[package]] [[package]]
@ -2849,6 +2891,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.14" version = "1.0.14"
@ -4438,32 +4489,38 @@ dependencies = [
[[package]] [[package]]
name = "object_store" name = "object_store"
version = "0.11.2" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" checksum = "d94ac16b433c0ccf75326388c893d2835ab7457ea35ab8ba5d745c053ef5fa16"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
"chrono", "chrono",
"form_urlencoded",
"futures", "futures",
"http 1.2.0",
"http-body-util",
"humantime", "humantime",
"hyper 1.5.1", "hyper 1.5.1",
"itertools 0.13.0", "itertools 0.14.0",
"md-5", "md-5",
"parking_lot", "parking_lot",
"percent-encoding", "percent-encoding",
"quick-xml 0.37.1", "quick-xml 0.37.1",
"rand 0.8.5", "rand 0.9.0",
"reqwest", "reqwest",
"ring", "ring",
"serde", "serde",
"serde_json", "serde_json",
"snafu", "serde_urlencoded",
"thiserror 2.0.12",
"tokio", "tokio",
"tracing", "tracing",
"url", "url",
"walkdir", "walkdir",
"wasm-bindgen-futures",
"web-time",
] ]
[[package]] [[package]]
@ -4615,6 +4672,12 @@ dependencies = [
"unicode-width 0.2.0", "unicode-width 0.2.0",
] ]
[[package]]
name = "parking"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.12.3" version = "0.12.3"
@ -4888,9 +4951,9 @@ dependencies = [
[[package]] [[package]]
name = "polars" name = "polars"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72571dde488ecccbe799798bf99ab7308ebdb7cf5d95bcc498dbd5a132f0da4d" checksum = "c0c10a02dc15223de108e0625bf152efb8dc3b181c5916b5ee335e40dcda735a"
dependencies = [ dependencies = [
"getrandom 0.2.15", "getrandom 0.2.15",
"polars-arrow", "polars-arrow",
@ -4909,13 +4972,13 @@ dependencies = [
[[package]] [[package]]
name = "polars-arrow" name = "polars-arrow"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6611c758d52e799761cc25900666b71552e6c929d88052811bc9daad4b3321a8" checksum = "bef8c08875db45de6f71660ef15a686e459ab0dddae302b2870d66625fd3ba65"
dependencies = [ dependencies = [
"ahash",
"atoi_simd", "atoi_simd",
"avro-schema", "avro-schema",
"bitflags 2.6.0",
"bytemuck", "bytemuck",
"chrono", "chrono",
"chrono-tz", "chrono-tz",
@ -4927,7 +4990,6 @@ dependencies = [
"itoa", "itoa",
"lz4", "lz4",
"num-traits", "num-traits",
"parking_lot",
"polars-arrow-format", "polars-arrow-format",
"polars-error", "polars-error",
"polars-schema", "polars-schema",
@ -4935,7 +4997,6 @@ dependencies = [
"serde", "serde",
"simdutf8", "simdutf8",
"streaming-iterator", "streaming-iterator",
"strength_reduce",
"strum_macros", "strum_macros",
"version_check", "version_check",
"zstd", "zstd",
@ -4953,32 +5014,36 @@ dependencies = [
[[package]] [[package]]
name = "polars-compute" name = "polars-compute"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "332f2547dbb27599a8ffe68e56159f5996ba03d1dad0382ccb62c109ceacdeb6" checksum = "b54171a366ec0bd3431ce184568e731de14060c20e19f306706cb073e7a98dc1"
dependencies = [ dependencies = [
"atoi_simd", "atoi_simd",
"bytemuck", "bytemuck",
"chrono", "chrono",
"either", "either",
"fast-float2", "fast-float2",
"hashbrown 0.15.2",
"itoa", "itoa",
"num-traits", "num-traits",
"polars-arrow", "polars-arrow",
"polars-error", "polars-error",
"polars-utils", "polars-utils",
"rand 0.8.5",
"ryu", "ryu",
"serde",
"skiplist",
"strength_reduce", "strength_reduce",
"strum_macros",
"version_check", "version_check",
] ]
[[package]] [[package]]
name = "polars-core" name = "polars-core"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "796d06eae7e6e74ed28ea54a8fccc584ebac84e6cf0e1e9ba41ffc807b169a01" checksum = "7d13c50b27ed6df2f6a9d156e9a4960e9373bf65fc763e0be2738efa5457d915"
dependencies = [ dependencies = [
"ahash",
"bitflags 2.6.0", "bitflags 2.6.0",
"bytemuck", "bytemuck",
"chrono", "chrono",
@ -4990,7 +5055,6 @@ dependencies = [
"indexmap", "indexmap",
"itoa", "itoa",
"num-traits", "num-traits",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-error", "polars-error",
@ -5004,36 +5068,34 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"strum_macros", "strum_macros",
"thiserror 2.0.12",
"version_check", "version_check",
"xxhash-rust", "xxhash-rust",
] ]
[[package]] [[package]]
name = "polars-error" name = "polars-error"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d6529cae0d1db5ed690e47de41fac9b35ae0c26d476830c2079f130887b847" checksum = "aa800b7240c7326e54d6b95df8376e3ee760e4cb4170fe38de1f42d14d719ffc"
dependencies = [ dependencies = [
"avro-schema", "avro-schema",
"object_store", "object_store",
"parking_lot",
"polars-arrow-format", "polars-arrow-format",
"regex", "regex",
"signal-hook",
"simdutf8", "simdutf8",
"thiserror 2.0.12",
] ]
[[package]] [[package]]
name = "polars-expr" name = "polars-expr"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e639991a8ad4fb12880ab44bcc3cf44a5703df003142334d9caf86d77d77e7" checksum = "fc0f1673599bee079c94d7d7e7e0a31818c7c88060dcf6ec82691a99e1eb5cf9"
dependencies = [ dependencies = [
"ahash",
"bitflags 2.6.0", "bitflags 2.6.0",
"hashbrown 0.15.2", "hashbrown 0.15.2",
"num-traits", "num-traits",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-core", "polars-core",
@ -5045,15 +5107,15 @@ dependencies = [
"polars-utils", "polars-utils",
"rand 0.8.5", "rand 0.8.5",
"rayon", "rayon",
"recursive",
] ]
[[package]] [[package]]
name = "polars-io" name = "polars-io"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719a77e94480f6be090512da196e378cbcbeb3584c6fe1134c600aee906e38ab" checksum = "1db23d90c17ab03ccded685e5a88aa8ee43fd851376463a9533ed56ba66d1538"
dependencies = [ dependencies = [
"ahash",
"async-trait", "async-trait",
"atoi_simd", "atoi_simd",
"blake3", "blake3",
@ -5072,7 +5134,6 @@ dependencies = [
"memmap2", "memmap2",
"num-traits", "num-traits",
"object_store", "object_store",
"once_cell",
"percent-encoding", "percent-encoding",
"polars-arrow", "polars-arrow",
"polars-core", "polars-core",
@ -5098,11 +5159,10 @@ dependencies = [
[[package]] [[package]]
name = "polars-json" name = "polars-json"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e30603ca81e317b66b4caac683a8325a6a82ea0489685dc37e22ae03720def98" checksum = "f8b0fdc69f8a20441b4fbc4843d643d0fa12fcef1b6b27608087ef544c5407f0"
dependencies = [ dependencies = [
"ahash",
"chrono", "chrono",
"chrono-tz", "chrono-tz",
"fallible-streaming-iterator", "fallible-streaming-iterator",
@ -5121,17 +5181,17 @@ dependencies = [
[[package]] [[package]]
name = "polars-lazy" name = "polars-lazy"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0a731a672dfc8ac38c1f73c9a4b2ae38d2fc8ac363bfb64c5f3a3e072ffc5ad" checksum = "03708e73a20fd7ca9fb0003e620daff5c488c9e2b287640289de78c5b135ead7"
dependencies = [ dependencies = [
"ahash",
"bitflags 2.6.0", "bitflags 2.6.0",
"chrono", "chrono",
"either",
"futures", "futures",
"memchr", "memchr",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute",
"polars-core", "polars-core",
"polars-expr", "polars-expr",
"polars-io", "polars-io",
@ -5150,9 +5210,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-mem-engine" name = "polars-mem-engine"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33442189bcbf2e2559aa7914db3835429030a13f4f18e43af5fba9d1b018cf12" checksum = "52126424d9612132b0b8b4b995ea25a42d79559d111063ef705a370dfa742d46"
dependencies = [ dependencies = [
"futures", "futures",
"memmap2", "memmap2",
@ -5167,16 +5227,16 @@ dependencies = [
"polars-time", "polars-time",
"polars-utils", "polars-utils",
"rayon", "rayon",
"recursive",
"tokio", "tokio",
] ]
[[package]] [[package]]
name = "polars-ops" name = "polars-ops"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbb83218b0c216104f0076cd1a005128be078f958125f3d59b094ee73d78c18e" checksum = "5e308800c11d5c6d0ddef16186ca026691aedeeb2210d458cdea997520907917"
dependencies = [ dependencies = [
"ahash",
"argminmax", "argminmax",
"base64 0.22.1", "base64 0.22.1",
"bytemuck", "bytemuck",
@ -5187,9 +5247,9 @@ dependencies = [
"hex", "hex",
"indexmap", "indexmap",
"jsonpath_lib_polars_vendor", "jsonpath_lib_polars_vendor",
"libm",
"memchr", "memchr",
"num-traits", "num-traits",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-core", "polars-core",
@ -5212,11 +5272,10 @@ dependencies = [
[[package]] [[package]]
name = "polars-parquet" name = "polars-parquet"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c60ee85535590a38db6c703a21be4cb25342e40f573f070d1e16f9d84a53ac7" checksum = "8e186177e24e217ce5b9bf441028417faeeef01b7e793fa815e1d26a53c38400"
dependencies = [ dependencies = [
"ahash",
"async-stream", "async-stream",
"base64 0.22.1", "base64 0.22.1",
"brotli", "brotli",
@ -5251,9 +5310,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-pipe" name = "polars-pipe"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42d238fb76698f56e51ddfa89b135e4eda56a4767c6e8859eed0ab78386fcd52" checksum = "59780346591510f17e26a2cb7a5f812167e9487163baf6570a53a4fab3e004f3"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"crossbeam-queue", "crossbeam-queue",
@ -5261,7 +5320,6 @@ dependencies = [
"futures", "futures",
"hashbrown 0.15.2", "hashbrown 0.15.2",
"num-traits", "num-traits",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-core", "polars-core",
@ -5279,11 +5337,10 @@ dependencies = [
[[package]] [[package]]
name = "polars-plan" name = "polars-plan"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f03533a93aa66127fcb909a87153a3c7cfee6f0ae59f497e73d7736208da54c" checksum = "3dfb0432eed610f25f436b519c007d4c6e2607c645ff27324aaaafda34ef51bf"
dependencies = [ dependencies = [
"ahash",
"bitflags 2.6.0", "bitflags 2.6.0",
"bytemuck", "bytemuck",
"bytes", "bytes",
@ -5294,7 +5351,6 @@ dependencies = [
"hashbrown 0.15.2", "hashbrown 0.15.2",
"memmap2", "memmap2",
"num-traits", "num-traits",
"once_cell",
"percent-encoding", "percent-encoding",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
@ -5315,9 +5371,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-row" name = "polars-row"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bf47f7409f8e75328d7d034be390842924eb276716d0458607be0bddb8cc839" checksum = "0be6a32d19ccae4dad60ef0394972419c771b393d31e1b141a89c69de5b33990"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.6.0",
"bytemuck", "bytemuck",
@ -5329,9 +5385,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-schema" name = "polars-schema"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "416621ae82b84466cf4ff36838a9b0aeb4a67e76bd3065edc8c9cb7da19b1bc7" checksum = "12b56ea2026a869b9bae7d8bd861b420d72e7bca4cf4b757368874577ac666b6"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"polars-error", "polars-error",
@ -5342,10 +5398,11 @@ dependencies = [
[[package]] [[package]]
name = "polars-sql" name = "polars-sql"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edaab553b90aa4d6743bb538978e1982368acb58a94408d7dd3299cad49c7083" checksum = "7ada7336bd78c67d896f9a13082b02e4a159c6c6fba777dcc152c0b1f126453b"
dependencies = [ dependencies = [
"bitflags 2.6.0",
"hex", "hex",
"polars-core", "polars-core",
"polars-error", "polars-error",
@ -5362,17 +5419,24 @@ dependencies = [
[[package]] [[package]]
name = "polars-stream" name = "polars-stream"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "498997b656c779610c1496b3d96a59fe569ef22a5b81ccfe5325cb3df8dff2fd" checksum = "d81638ef3da43d632da0250386bf4f042fe514005f2a6d59d9e6118194888eb7"
dependencies = [ dependencies = [
"async-channel",
"async-trait",
"atomic-waker", "atomic-waker",
"bitflags 2.6.0",
"crossbeam-channel",
"crossbeam-deque", "crossbeam-deque",
"crossbeam-queue",
"crossbeam-utils", "crossbeam-utils",
"futures", "futures",
"memmap2", "memmap2",
"parking_lot", "parking_lot",
"percent-encoding",
"pin-project-lite", "pin-project-lite",
"polars-arrow",
"polars-core", "polars-core",
"polars-error", "polars-error",
"polars-expr", "polars-expr",
@ -5392,9 +5456,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-time" name = "polars-time"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d192efbdab516d28b3fab1709a969e3385bd5cda050b7c9aa9e2502a01fda879" checksum = "706ea1e67d5bfcfd9e5ca59070fb5ad591d42b9729d4a60af1b320e645ea158e"
dependencies = [ dependencies = [
"atoi_simd", "atoi_simd",
"bytemuck", "bytemuck",
@ -5402,7 +5466,6 @@ dependencies = [
"chrono-tz", "chrono-tz",
"now", "now",
"num-traits", "num-traits",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-core", "polars-core",
@ -5417,28 +5480,31 @@ dependencies = [
[[package]] [[package]]
name = "polars-utils" name = "polars-utils"
version = "0.46.0" version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f6c8166a4a7fbc15b87c81645ed9e1f0651ff2e8c96cafc40ac5bf43441a10" checksum = "2c50cd0dac46936771793eb22cb7aeeef97d6aaa4f832f4209637e73178d39aa"
dependencies = [ dependencies = [
"ahash",
"bincode", "bincode",
"bytemuck", "bytemuck",
"bytes", "bytes",
"compact_str", "compact_str",
"flate2", "flate2",
"foldhash",
"hashbrown 0.15.2", "hashbrown 0.15.2",
"indexmap", "indexmap",
"libc", "libc",
"memmap2", "memmap2",
"num-traits", "num-traits",
"once_cell",
"polars-error", "polars-error",
"rand 0.8.5", "rand 0.8.5",
"raw-cpuid", "raw-cpuid",
"rayon", "rayon",
"regex",
"rmp-serde",
"serde", "serde",
"serde_ignored",
"serde_json", "serde_json",
"slotmap",
"stacker", "stacker",
"sysinfo", "sysinfo",
"version_check", "version_check",
@ -6541,6 +6607,15 @@ dependencies = [
"syn 2.0.90", "syn 2.0.90",
] ]
[[package]]
name = "serde_ignored"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b516445dac1e3535b6d658a7b528d771153dfb272ed4180ca4617a20550365ff"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.133" version = "1.0.133"
@ -6756,6 +6831,15 @@ version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "skiplist"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eec25f46463fcdc5e02f388c2780b1b58e01be81a8378e62ec60931beccc3f6"
dependencies = [
"rand 0.8.5",
]
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.9" version = "0.4.9"
@ -6780,27 +6864,6 @@ version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "snafu"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
dependencies = [
"snafu-derive",
]
[[package]]
name = "snafu-derive"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.90",
]
[[package]] [[package]]
name = "snap" name = "snap"
version = "1.1.1" version = "1.1.1"

View File

@ -33,11 +33,11 @@ indexmap = { version = "2.9" }
num = { version = "0.4" } num = { version = "0.4" }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.53" } sqlparser = { version = "0.53" }
polars-io = { version = "0.46", features = ["avro", "cloud", "aws"] } polars-io = { version = "0.48", features = ["avro", "cloud", "aws"] }
polars-arrow = { version = "0.46" } polars-arrow = { version = "0.48" }
polars-ops = { version = "0.46", features = ["pivot", "cutqcut"] } polars-ops = { version = "0.48", features = ["pivot", "cutqcut"] }
polars-plan = { version = "0.46", features = ["regex"] } polars-plan = { version = "0.48", features = ["regex"] }
polars-utils = { version = "0.46" } polars-utils = { version = "0.48" }
typetag = "0.2" typetag = "0.2"
env_logger = "0.11.3" env_logger = "0.11.3"
log.workspace = true log.workspace = true
@ -50,7 +50,7 @@ hashbrown = { version = "0.15", features = ["rayon", "serde"] }
aws-config = { version = "1.5", features = ["sso"] } aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2" aws-credential-types = "1.2"
tokio = { version = "1.45", features = ["full"] } tokio = { version = "1.45", features = ["full"] }
object_store = { version = "0.11", default-features = false } object_store = { version = "0.12", features = ["aws"], default-features = false }
url.workspace = true url.workspace = true
[dependencies.polars] [dependencies.polars]
@ -99,7 +99,7 @@ features = [
"trigonometry", "trigonometry",
] ]
optional = false optional = false
version = "0.46" version = "0.48"
[dev-dependencies] [dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.104.2" } nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.104.2" }

View File

@ -186,7 +186,7 @@ fn get_col_name(expr: &Expr) -> Option<String> {
| Expr::Exclude(expr, _) | Expr::Exclude(expr, _)
| Expr::Alias(expr, _) | Expr::Alias(expr, _)
| Expr::KeepName(expr) | Expr::KeepName(expr)
| Expr::Explode(expr) => get_col_name(expr.as_ref()), | Expr::Explode { input: expr, .. } => get_col_name(expr.as_ref()),
Expr::Ternary { .. } Expr::Ternary { .. }
| Expr::AnonymousFunction { .. } | Expr::AnonymousFunction { .. }
| Expr::Function { .. } | Expr::Function { .. }

View File

@ -5,7 +5,8 @@ use crate::{
}; };
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
}; };
use polars::df; use polars::df;
@ -94,7 +95,14 @@ impl PluginCommand for Over {
PolarsPluginObject::NuExpression(expr) => { PolarsPluginObject::NuExpression(expr) => {
let expr: NuExpression = expr let expr: NuExpression = expr
.into_polars() .into_polars()
.over_with_options(expressions, None, Default::default()) .over_with_options(Some(expressions), None, Default::default())
.map_err(|e| ShellError::GenericError {
error: format!("Error applying over expression: {e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into(); .into();
expr.to_pipeline_data(plugin, engine, call.head) expr.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -8,7 +8,7 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value, Value,
}; };
use polars::prelude::{DataType, IntoSeries, is_in, lit}; use polars::prelude::{DataType, lit};
#[derive(Clone)] #[derive(Clone)]
pub struct ExprIsIn; pub struct ExprIsIn;
@ -27,80 +27,48 @@ impl PluginCommand for ExprIsIn {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name()) Signature::build(self.name())
.required("list", SyntaxShape::Any, "List to check if values are in") .required("list", SyntaxShape::Any, "List to check if values are in")
.input_output_types(vec![ .input_output_types(vec![(
( Type::Custom("expression".into()),
Type::Custom("expression".into()), Type::Custom("expression".into()),
Type::Custom("expression".into()), )])
),
(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
),
])
.category(Category::Custom("expression".into())) .category(Category::Custom("expression".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![ vec![Example {
Example { description: "Creates a is-in expression",
description: "Creates a is-in expression", example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | polars into-df);
$df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#, $df | polars with-column (polars col a | polars is-in [one two] | polars as a_in)"#,
result: Some( result: Some(
NuDataFrame::try_from_columns( NuDataFrame::try_from_columns(
vec![ vec![
Column::new( Column::new(
"a".to_string(), "a".to_string(),
vec![ vec![
Value::test_string("one"), Value::test_string("one"),
Value::test_string("two"), Value::test_string("two"),
Value::test_string("three"), Value::test_string("three"),
], ],
), ),
Column::new( Column::new(
"b".to_string(), "b".to_string(),
vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)],
), ),
Column::new( Column::new(
"a_in".to_string(), "a_in".to_string(),
vec![
Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
],
),
],
None,
)
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Checks if elements from a series are contained in right series",
example: r#"let other = ([1 3 6] | polars into-df);
[5 6 6 6 8 8 8] | polars into-df | polars is-in $other"#,
result: Some(
NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(),
vec![ vec![
Value::test_bool(false),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true), Value::test_bool(true),
Value::test_bool(true),
Value::test_bool(false),
Value::test_bool(false),
Value::test_bool(false), Value::test_bool(false),
], ],
)], ),
None, ],
) None,
.expect("simple df for test should not fail") )
.into_value(Span::test_data()), .expect("simple df for test should not fail")
), .into_value(Span::test_data()),
}, ),
] }]
} }
fn search_terms(&self) -> Vec<&str> { fn search_terms(&self) -> Vec<&str> {
@ -117,10 +85,6 @@ impl PluginCommand for ExprIsIn {
let metadata = input.metadata(); let metadata = input.metadata();
let value = input.into_value(call.head)?; let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? { match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df),
PolarsPluginObject::NuLazyFrame(lazy) => {
command_df(plugin, engine, call, lazy.collect(call.head)?)
}
PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr), PolarsPluginObject::NuExpression(expr) => command_expr(plugin, engine, call, expr),
_ => Err(cant_convert_err( _ => Err(cant_convert_err(
&value, &value,
@ -154,39 +118,11 @@ fn command_expr(
}); });
} }
let expr: NuExpression = expr.into_polars().is_in(lit(list)).into(); // todo - at some point we should probably make this consistent with python api
let expr: NuExpression = expr.into_polars().is_in(lit(list).implode(), true).into();
expr.to_pipeline_data(plugin, engine, call.head) expr.to_pipeline_data(plugin, engine, call.head)
} }
fn command_df(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
df: NuDataFrame,
) -> Result<PipelineData, ShellError> {
let other_value: Value = call.req(0)?;
let other_span = other_value.span();
let other_df = NuDataFrame::try_from_value_coerce(plugin, &other_value, call.head)?;
let other = other_df.as_series(other_span)?;
let series = df.as_series(call.head)?;
let mut res = is_in(&series, &other)
.map_err(|e| ShellError::GenericError {
error: "Error finding in other".into(),
msg: e.to_string(),
span: Some(call.head),
help: None,
inner: vec![],
})?
.into_series();
res.rename("is_in".into());
let mut new_df = NuDataFrame::try_from_series_vec(vec![res.into_series()], call.head)?;
new_df.from_lazy = df.from_lazy;
new_df.to_pipeline_data(plugin, engine, call.head)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;

View File

@ -1,8 +1,9 @@
use std::fs::File; use std::fs::File;
use log::debug;
use nu_plugin::EvaluatedCall; use nu_plugin::EvaluatedCall;
use nu_protocol::ShellError; use nu_protocol::ShellError;
use polars::prelude::{IpcWriter, SerWriter}; use polars::prelude::{IpcWriter, SerWriter, SinkOptions};
use polars_io::ipc::IpcWriterOptions; use polars_io::ipc::IpcWriterOptions;
use crate::{ use crate::{
@ -10,21 +11,27 @@ use crate::{
values::{NuDataFrame, NuLazyFrame}, values::{NuDataFrame, NuLazyFrame},
}; };
use super::polars_file_save_error; use super::{polars_file_save_error, sink_target_from_string};
pub(crate) fn command_lazy( pub(crate) fn command_lazy(
_call: &EvaluatedCall, _call: &EvaluatedCall,
lazy: &NuLazyFrame, lazy: &NuLazyFrame,
resource: Resource, resource: Resource,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
let file_path = resource.path; let file_path = sink_target_from_string(resource.path.clone());
let file_span = resource.span; let file_span = resource.span;
debug!("Writing ipc file {}", resource.path);
lazy.to_polars() lazy.to_polars()
.sink_ipc( .sink_ipc(
file_path, file_path,
IpcWriterOptions::default(), IpcWriterOptions::default(),
resource.cloud_options, resource.cloud_options,
SinkOptions::default(),
) )
.and_then(|l| l.collect())
.map(|_| {
debug!("Wrote ipc file {}", resource.path);
})
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
} }

View File

@ -1,12 +1,13 @@
use std::fs::File; use std::fs::File;
use log::debug;
use nu_plugin::EvaluatedCall; use nu_plugin::EvaluatedCall;
use nu_protocol::{ShellError, Spanned}; use nu_protocol::{ShellError, Spanned};
use polars::prelude::{CsvWriter, SerWriter}; use polars::prelude::{CsvWriter, SerWriter, SinkOptions};
use polars_io::csv::write::{CsvWriterOptions, SerializeOptions}; use polars_io::csv::write::{CsvWriterOptions, SerializeOptions};
use crate::{ use crate::{
command::core::resource::Resource, command::core::{resource::Resource, save::sink_target_from_string},
values::{NuDataFrame, NuLazyFrame}, values::{NuDataFrame, NuLazyFrame},
}; };
@ -17,8 +18,9 @@ pub(crate) fn command_lazy(
lazy: &NuLazyFrame, lazy: &NuLazyFrame,
resource: Resource, resource: Resource,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
let file_path = resource.path; let file_path = sink_target_from_string(resource.path.clone());
let file_span = resource.span; let file_span = resource.span;
debug!("Writing csv file {}", resource.path);
let delimiter: Option<Spanned<String>> = call.get_flag("csv-delimiter")?; let delimiter: Option<Spanned<String>> = call.get_flag("csv-delimiter")?;
let separator = delimiter let separator = delimiter
.and_then(|d| d.item.chars().next().map(|c| c as u8)) .and_then(|d| d.item.chars().next().map(|c| c as u8))
@ -36,8 +38,17 @@ pub(crate) fn command_lazy(
}; };
lazy.to_polars() lazy.to_polars()
.sink_csv(file_path, options, resource.cloud_options) .sink_csv(
file_path,
options,
resource.cloud_options,
SinkOptions::default(),
)
.and_then(|l| l.collect())
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
.map(|_| {
debug!("Wrote parquet file {}", resource.path);
})
} }
pub(crate) fn command_eager( pub(crate) fn command_eager(

View File

@ -4,7 +4,7 @@ mod csv;
mod ndjson; mod ndjson;
mod parquet; mod parquet;
use std::path::PathBuf; use std::{path::PathBuf, sync::Arc};
use crate::{ use crate::{
PolarsPlugin, PolarsPlugin,
@ -19,7 +19,7 @@ use nu_protocol::{
Signature, Span, Spanned, SyntaxShape, Type, Signature, Span, Spanned, SyntaxShape, Type,
shell_error::{self, io::IoError}, shell_error::{self, io::IoError},
}; };
use polars::error::PolarsError; use polars::{error::PolarsError, prelude::SinkTarget};
#[derive(Clone)] #[derive(Clone)]
pub struct SaveDF; pub struct SaveDF;
@ -272,6 +272,13 @@ pub fn unknown_file_save_error(span: Span) -> ShellError {
} }
} }
pub(crate) fn sink_target_from_string(path: String) -> SinkTarget {
let path = PathBuf::from(path);
let target = SinkTarget::Path(Arc::new(path));
debug!("Sink target: {target:?}");
target
}
#[cfg(test)] #[cfg(test)]
pub(crate) mod test { pub(crate) mod test {
use nu_plugin_test_support::PluginTest; use nu_plugin_test_support::PluginTest;

View File

@ -1,12 +1,13 @@
use std::{fs::File, io::BufWriter}; use std::{fs::File, io::BufWriter};
use log::debug;
use nu_plugin::EvaluatedCall; use nu_plugin::EvaluatedCall;
use nu_protocol::ShellError; use nu_protocol::ShellError;
use polars::prelude::{JsonWriter, SerWriter}; use polars::prelude::{JsonWriter, SerWriter, SinkOptions};
use polars_io::json::JsonWriterOptions; use polars_io::json::JsonWriterOptions;
use crate::{ use crate::{
command::core::resource::Resource, command::core::{resource::Resource, save::sink_target_from_string},
values::{NuDataFrame, NuLazyFrame}, values::{NuDataFrame, NuLazyFrame},
}; };
@ -17,15 +18,21 @@ pub(crate) fn command_lazy(
lazy: &NuLazyFrame, lazy: &NuLazyFrame,
resource: Resource, resource: Resource,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
let file_path = resource.path; let file_path = sink_target_from_string(resource.path.clone());
let file_span = resource.span; let file_span = resource.span;
debug!("Writing ndjson file {}", resource.path);
lazy.to_polars() lazy.to_polars()
.sink_json( .sink_json(
file_path, file_path,
JsonWriterOptions::default(), JsonWriterOptions::default(),
resource.cloud_options, resource.cloud_options,
SinkOptions::default(),
) )
.and_then(|l| l.collect())
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
.map(|_| {
debug!("Wrote ndjson file {}", resource.path);
})
} }
pub(crate) fn command_eager(df: &NuDataFrame, resource: Resource) -> Result<(), ShellError> { pub(crate) fn command_eager(df: &NuDataFrame, resource: Resource) -> Result<(), ShellError> {
@ -58,12 +65,12 @@ pub mod test {
use crate::command::core::save::test::{test_eager_save, test_lazy_save}; use crate::command::core::save::test::{test_eager_save, test_lazy_save};
#[test] #[test]
pub fn test_arrow_eager_save() -> Result<(), Box<dyn std::error::Error>> { pub fn test_ndjson_eager_save() -> Result<(), Box<dyn std::error::Error>> {
test_eager_save("ndjson") test_eager_save("ndjson")
} }
#[test] #[test]
pub fn test_arrow_lazy_save() -> Result<(), Box<dyn std::error::Error>> { pub fn test_ndjson_lazy_save() -> Result<(), Box<dyn std::error::Error>> {
test_lazy_save("ndjson") test_lazy_save("ndjson")
} }
} }

View File

@ -3,11 +3,11 @@ use std::fs::File;
use log::debug; use log::debug;
use nu_plugin::EvaluatedCall; use nu_plugin::EvaluatedCall;
use nu_protocol::ShellError; use nu_protocol::ShellError;
use polars::prelude::ParquetWriter; use polars::prelude::{ParquetWriter, SinkOptions};
use polars_io::parquet::write::ParquetWriteOptions; use polars_io::parquet::write::ParquetWriteOptions;
use crate::{ use crate::{
command::core::resource::Resource, command::core::{resource::Resource, save::sink_target_from_string},
values::{NuDataFrame, NuLazyFrame}, values::{NuDataFrame, NuLazyFrame},
}; };
@ -18,16 +18,22 @@ pub(crate) fn command_lazy(
lazy: &NuLazyFrame, lazy: &NuLazyFrame,
resource: Resource, resource: Resource,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
let file_path = resource.path; let file_path = sink_target_from_string(resource.path.clone());
let file_span = resource.span; let file_span = resource.span;
debug!("Writing parquet file {file_path}"); debug!("Writing parquet file {}", resource.path);
lazy.to_polars() lazy.to_polars()
.sink_parquet( .sink_parquet(
&file_path, file_path,
ParquetWriteOptions::default(), ParquetWriteOptions::default(),
resource.cloud_options, resource.cloud_options,
SinkOptions::default(),
) )
.and_then(|l| l.collect())
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
.map(|_| {
debug!("Wrote parquet file {}", resource.path);
})
} }
pub(crate) fn command_eager(df: &NuDataFrame, resource: Resource) -> Result<(), ShellError> { pub(crate) fn command_eager(df: &NuDataFrame, resource: Resource) -> Result<(), ShellError> {

View File

@ -188,7 +188,7 @@ fn command(
let tail = df let tail = df
.as_ref() .as_ref()
.iter() .iter()
.filter(|col| !matches!(col.dtype(), &DataType::Object("object", _))) .filter(|col| !matches!(col.dtype(), &DataType::Object("object")))
.map(|col| { .map(|col| {
let count = col.len() as f64; let count = col.len() as f64;

View File

@ -102,7 +102,7 @@ fn literal_expr(value: &SqlValue) -> Result<Expr> {
SqlValue::HexStringLiteral(s) => lit(s.clone()), SqlValue::HexStringLiteral(s) => lit(s.clone()),
SqlValue::DoubleQuotedString(s) => lit(s.clone()), SqlValue::DoubleQuotedString(s) => lit(s.clone()),
SqlValue::Boolean(b) => lit(*b), SqlValue::Boolean(b) => lit(*b),
SqlValue::Null => Expr::Literal(LiteralValue::Null), SqlValue::Null => Expr::Literal(LiteralValue::untyped_null()),
_ => { _ => {
return Err(PolarsError::ComputeError( return Err(PolarsError::ComputeError(
format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(), format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(),

View File

@ -6,6 +6,7 @@ use crate::{
}, },
}; };
use chrono::DateTime; use chrono::DateTime;
use polars_plan::plans::DynLiteralValue;
use std::sync::Arc; use std::sync::Arc;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
@ -294,7 +295,9 @@ fn command(
None, None,
None, None,
options, options,
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))), Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Str(
PlSmallStr::from_string(ambiguous),
))),
) )
.into(); .into();
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
@ -324,7 +327,9 @@ fn command_lazy(
None, None,
None, None,
options, options,
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))), Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Str(
PlSmallStr::from_string(ambiguous),
))),
)]), )]),
) )
.to_pipeline_data(plugin, engine, call.head) .to_pipeline_data(plugin, engine, call.head)

View File

@ -7,12 +7,15 @@ use crate::{
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
Category, Example, LabeledError, PipelineData, Signature, Span, SyntaxShape, Type, Value, Category, Example, LabeledError, PipelineData, Signature, Span, Spanned, SyntaxShape, Type,
Value,
}; };
use chrono::DateTime; use chrono::DateTime;
use polars::prelude::*; use polars::prelude::*;
use super::timezone_from_str;
#[derive(Clone)] #[derive(Clone)]
pub struct ConvertTimeZone; pub struct ConvertTimeZone;
@ -76,7 +79,8 @@ impl PluginCommand for ConvertTimeZone {
"datetime".into(), "datetime".into(),
DataType::Datetime( DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("Europe/Lisbon")), TimeZone::opt_try_new(Some("Europe/Lisbon"))
.expect("timezone should be valid"),
), ),
), ),
])))), ])))),
@ -118,7 +122,8 @@ impl PluginCommand for ConvertTimeZone {
"datetime".into(), "datetime".into(),
DataType::Datetime( DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("America/New_York")), TimeZone::opt_try_new(Some("America/New_York"))
.expect("timezone should be valid"),
), ),
), ),
])))), ])))),
@ -142,12 +147,11 @@ impl PluginCommand for ConvertTimeZone {
match PolarsPluginObject::try_from_value(plugin, &value)? { match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuExpression(expr) => { PolarsPluginObject::NuExpression(expr) => {
let time_zone: String = call.req(0)?; let time_zone_spanned: Spanned<String> = call.req(0)?;
let expr: NuExpression = expr let time_zone =
.into_polars() timezone_from_str(&time_zone_spanned.item, Some(time_zone_spanned.span))?;
.dt() let expr: NuExpression =
.convert_time_zone(PlSmallStr::from_str(&time_zone)) expr.into_polars().dt().convert_time_zone(time_zone).into();
.into();
expr.to_pipeline_data(plugin, engine, call.head) expr.to_pipeline_data(plugin, engine, call.head)
} }
_ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])), _ => Err(cant_convert_err(&value, &[PolarsPluginType::NuExpression])),

View File

@ -33,6 +33,8 @@ pub use get_second::GetSecond;
pub use get_week::GetWeek; pub use get_week::GetWeek;
pub use get_weekday::GetWeekDay; pub use get_weekday::GetWeekDay;
pub use get_year::GetYear; pub use get_year::GetYear;
use nu_protocol::{ShellError, Span};
use polars::prelude::{PlSmallStr, TimeZone};
pub use replace_time_zone::ReplaceTimeZone; pub use replace_time_zone::ReplaceTimeZone;
pub use strftime::StrFTime; pub use strftime::StrFTime;
pub use truncate::Truncate; pub use truncate::Truncate;
@ -58,3 +60,45 @@ pub(crate) fn datetime_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPl
Box::new(Truncate), Box::new(Truncate),
] ]
} }
pub fn timezone_from_str(zone_str: &str, span: Option<Span>) -> Result<TimeZone, ShellError> {
TimeZone::opt_try_new(Some(PlSmallStr::from_str(zone_str)))
.map_err(|e| ShellError::GenericError {
error: format!("Invalid timezone: {} : {}", zone_str, e),
msg: "".into(),
span,
help: None,
inner: vec![],
})?
.ok_or(ShellError::GenericError {
error: format!("Invalid timezone {}", zone_str),
msg: "".into(),
span,
help: None,
inner: vec![],
})
}
pub fn timezone_utc() -> TimeZone {
TimeZone::opt_try_new(Some(PlSmallStr::from_str("UTC")))
.expect("UTC timezone should always be valid")
.expect("UTC timezone should always be present")
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_timezone_from_str() -> Result<(), ShellError> {
let tz = timezone_from_str("America/New_York", None)?;
assert_eq!(tz.to_string(), "America/New_York");
Ok(())
}
#[test]
fn test_timezone_utc() {
let tz = timezone_utc();
assert_eq!(tz.to_string(), "UTC");
}
}

View File

@ -7,12 +7,15 @@ use crate::{
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
Value, SyntaxShape, Type, Value,
}; };
use chrono::DateTime; use chrono::DateTime;
use polars::prelude::*; use polars::prelude::*;
use polars_plan::plans::DynLiteralValue;
use super::timezone_from_str;
#[derive(Clone)] #[derive(Clone)]
pub struct ReplaceTimeZone; pub struct ReplaceTimeZone;
@ -93,7 +96,8 @@ impl PluginCommand for ReplaceTimeZone {
"datetime".into(), "datetime".into(),
DataType::Datetime( DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("America/New_York")), TimeZone::opt_try_new(Some("America/New_York"))
.expect("timezone should be valid"),
), ),
), ),
])))), ])))),
@ -145,7 +149,8 @@ impl PluginCommand for ReplaceTimeZone {
"datetime".into(), "datetime".into(),
DataType::Datetime( DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("America/New_York")), TimeZone::opt_try_new(Some("America/New_York"))
.expect("timezone should be valid"),
), ),
), ),
])))), ])))),
@ -197,7 +202,8 @@ impl PluginCommand for ReplaceTimeZone {
"datetime".into(), "datetime".into(),
DataType::Datetime( DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("America/New_York")), TimeZone::opt_try_new(Some("America/New_York"))
.expect("timezone should be valid"),
), ),
), ),
])))), ])))),
@ -255,13 +261,17 @@ impl PluginCommand for ReplaceTimeZone {
match PolarsPluginObject::try_from_value(plugin, &value)? { match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuExpression(expr) => { PolarsPluginObject::NuExpression(expr) => {
let time_zone: String = call.req(0)?; let time_zone_spanned: Spanned<String> = call.req(0)?;
let time_zone =
timezone_from_str(&time_zone_spanned.item, Some(time_zone_spanned.span))?;
let expr: NuExpression = expr let expr: NuExpression = expr
.into_polars() .into_polars()
.dt() .dt()
.replace_time_zone( .replace_time_zone(
Some(PlSmallStr::from_str(&time_zone)), Some(time_zone),
Expr::Literal(LiteralValue::String(PlSmallStr::from_string(ambiguous))), Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Str(
PlSmallStr::from_string(ambiguous),
))),
nonexistent, nonexistent,
) )
.into(); .into();

View File

@ -15,6 +15,7 @@ use nu_protocol::{
use chrono::DateTime; use chrono::DateTime;
use polars::prelude::{DataType, Expr, Field, LiteralValue, PlSmallStr, Schema, TimeUnit}; use polars::prelude::{DataType, Expr, Field, LiteralValue, PlSmallStr, Schema, TimeUnit};
use polars_plan::plans::DynLiteralValue;
#[derive(Clone)] #[derive(Clone)]
pub struct Truncate; pub struct Truncate;
@ -191,9 +192,9 @@ fn command(
let res: NuExpression = expr let res: NuExpression = expr
.into_polars() .into_polars()
.dt() .dt()
.truncate(Expr::Literal(LiteralValue::String( .truncate(Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Str(
PlSmallStr::from_string(every), PlSmallStr::from_string(every),
))) ))))
.into(); .into();
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -147,7 +147,11 @@ fn command_expr(
}); });
} }
}; };
let res: NuExpression = expr.into_polars().list().contains(single_expression).into(); let res: NuExpression = expr
.into_polars()
.list()
.contains(single_expression, true)
.into();
res.to_pipeline_data(plugin, engine, call.head) res.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -417,6 +417,8 @@ pub trait CustomValueSupport: Cacheable {
mod test { mod test {
use polars::prelude::{DataType, TimeUnit, UnknownKind}; use polars::prelude::{DataType, TimeUnit, UnknownKind};
use crate::command::datetime::timezone_utc;
use super::*; use super::*;
#[test] #[test]
@ -498,7 +500,7 @@ mod test {
let dtype = "object"; let dtype = "object";
let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
let expected = DataType::Object("unknown", None); let expected = DataType::Object("unknown");
assert_eq!(schema, expected); assert_eq!(schema, expected);
} }
@ -526,7 +528,7 @@ mod test {
let dtype = "datetime<ms, UTC>"; let dtype = "datetime<ms, UTC>";
let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into())); let expected = DataType::Datetime(TimeUnit::Milliseconds, Some(timezone_utc()));
assert_eq!(schema, expected); assert_eq!(schema, expected);
let dtype = "invalid"; let dtype = "invalid";

View File

@ -14,13 +14,15 @@ use polars::prelude::{
Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntoSeries, ListBooleanChunkedBuilder, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntoSeries, ListBooleanChunkedBuilder,
ListBuilderTrait, ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, LogicalType, ListBuilderTrait, ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, LogicalType,
NamedFrom, NewChunkedArray, ObjectType, PolarsError, Schema, SchemaExt, Series, StructChunked, NamedFrom, NewChunkedArray, ObjectType, PolarsError, Schema, SchemaExt, Series, StructChunked,
TemporalMethods, TimeUnit, UInt8Type, UInt16Type, UInt32Type, UInt64Type, TemporalMethods, TimeUnit, TimeZone as PolarsTimeZone, UInt8Type, UInt16Type, UInt32Type,
UInt64Type,
}; };
use nu_protocol::{Record, ShellError, Span, Value}; use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::Either; use polars_arrow::Either;
use polars_arrow::array::Utf8ViewArray; use polars_arrow::array::Utf8ViewArray;
use crate::command::datetime::timezone_utc;
use crate::dataframe::values::NuSchema; use crate::dataframe::values::NuSchema;
use super::{DataFrameValue, NuDataFrame}; use super::{DataFrameValue, NuDataFrame};
@ -232,7 +234,7 @@ pub fn insert_value(
col_val.column_type = value_to_data_type(&value); col_val.column_type = value_to_data_type(&value);
} else if let Some(current_data_type) = current_data_type { } else if let Some(current_data_type) = current_data_type {
if col_val.column_type.as_ref() != Some(&current_data_type) { if col_val.column_type.as_ref() != Some(&current_data_type) {
col_val.column_type = Some(DataType::Object("Value", None)); col_val.column_type = Some(DataType::Object("Value"));
} }
} }
col_val.values.push(value); col_val.values.push(value);
@ -248,7 +250,7 @@ fn value_to_data_type(value: &Value) -> Option<DataType> {
Value::Bool { .. } => Some(DataType::Boolean), Value::Bool { .. } => Some(DataType::Boolean),
Value::Date { .. } => Some(DataType::Datetime( Value::Date { .. } => Some(DataType::Datetime(
TimeUnit::Nanoseconds, TimeUnit::Nanoseconds,
Some(PlSmallStr::from_static("UTC")), Some(timezone_utc()),
)), )),
Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)), Value::Duration { .. } => Some(DataType::Duration(TimeUnit::Nanoseconds)),
Value::Filesize { .. } => Some(DataType::Int64), Value::Filesize { .. } => Some(DataType::Int64),
@ -266,7 +268,7 @@ fn value_to_data_type(value: &Value) -> Option<DataType> {
.map(value_to_data_type) .map(value_to_data_type)
.nth(1) .nth(1)
.flatten() .flatten()
.unwrap_or(DataType::Object("Value", None)); .unwrap_or(DataType::Object("Value"));
Some(DataType::List(Box::new(list_type))) Some(DataType::List(Box::new(list_type)))
} }
@ -278,7 +280,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
let column_type = &column let column_type = &column
.column_type .column_type
.clone() .clone()
.unwrap_or(DataType::Object("Value", None)); .unwrap_or(DataType::Object("Value"));
match column_type { match column_type {
DataType::Float32 => { DataType::Float32 => {
let series_values: Result<Vec<_>, _> = column let series_values: Result<Vec<_>, _> = column
@ -433,7 +435,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
column.values.iter().map(|v| v.coerce_binary()).collect(); column.values.iter().map(|v| v.coerce_binary()).collect();
Ok(Series::new(name, series_values?)) Ok(Series::new(name, series_values?))
} }
DataType::Object(_, _) => value_to_series(name, &column.values), DataType::Object(_) => value_to_series(name, &column.values),
DataType::Duration(time_unit) => { DataType::Duration(time_unit) => {
let series_values: Result<Vec<_>, _> = column let series_values: Result<Vec<_>, _> = column
.values .values
@ -452,11 +454,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
Err(_) => { Err(_) => {
// An error case will occur when there are lists of mixed types. // An error case will occur when there are lists of mixed types.
// If this happens, fallback to object list // If this happens, fallback to object list
input_type_list_to_series( input_type_list_to_series(&name, &DataType::Object("unknown"), &column.values)
&name,
&DataType::Object("unknown", None),
&column.values,
)
} }
} }
} }
@ -1108,7 +1106,7 @@ fn series_to_values(
Ok(values) Ok(values)
} }
DataType::Object(x, _) => { DataType::Object(x) => {
let casted = series let casted = series
.as_any() .as_any()
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>(); .downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
@ -1451,7 +1449,7 @@ fn nanos_to_timeunit(a: i64, time_unit: TimeUnit) -> Result<i64, ShellError> {
fn datetime_from_epoch_nanos( fn datetime_from_epoch_nanos(
nanos: i64, nanos: i64,
timezone: &Option<PlSmallStr>, timezone: &Option<PolarsTimeZone>,
span: Span, span: Span,
) -> Result<DateTime<FixedOffset>, ShellError> { ) -> Result<DateTime<FixedOffset>, ShellError> {
let tz: Tz = if let Some(polars_tz) = timezone { let tz: Tz = if let Some(polars_tz) = timezone {

View File

@ -2,10 +2,10 @@ pub mod custom_value;
use custom_value::NuDataTypeCustomValue; use custom_value::NuDataTypeCustomValue;
use nu_protocol::{ShellError, Span, Value, record}; use nu_protocol::{ShellError, Span, Value, record};
use polars::prelude::{DataType, Field, PlSmallStr, TimeUnit, UnknownKind}; use polars::prelude::{DataType, Field, TimeUnit, UnknownKind};
use uuid::Uuid; use uuid::Uuid;
use crate::{Cacheable, PolarsPlugin}; use crate::{Cacheable, PolarsPlugin, command::datetime::timezone_from_str};
use super::{CustomValueSupport, PolarsPluginObject, PolarsPluginType}; use super::{CustomValueSupport, PolarsPluginObject, PolarsPluginType};
@ -166,7 +166,7 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result<DataType, ShellError> {
"time" => Ok(DataType::Time), "time" => Ok(DataType::Time),
"null" => Ok(DataType::Null), "null" => Ok(DataType::Null),
"unknown" => Ok(DataType::Unknown(UnknownKind::Any)), "unknown" => Ok(DataType::Unknown(UnknownKind::Any)),
"object" => Ok(DataType::Object("unknown", None)), "object" => Ok(DataType::Object("unknown")),
_ if dtype.starts_with("list") => { _ if dtype.starts_with("list") => {
let dtype = dtype let dtype = dtype
.trim_start_matches("list") .trim_start_matches("list")
@ -206,12 +206,10 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result<DataType, ShellError> {
let timezone = if "*" == next { let timezone = if "*" == next {
None None
} else { } else {
Some(next.to_string()) let zone_str = next.to_string();
Some(timezone_from_str(&zone_str, None)?)
}; };
Ok(DataType::Datetime( Ok(DataType::Datetime(time_unit, timezone))
time_unit,
timezone.map(PlSmallStr::from),
))
} }
_ if dtype.starts_with("duration") => { _ if dtype.starts_with("duration") => {
let inner = dtype.trim_start_matches("duration<").trim_end_matches('>'); let inner = dtype.trim_start_matches("duration<").trim_end_matches('>');

View File

@ -252,7 +252,7 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
record! { "expr" => Value::string("wildcard", span) }, record! { "expr" => Value::string("wildcard", span) },
span, span,
)), )),
Expr::Explode(expr) => Ok(Value::record( Expr::Explode { input: expr, .. } => Ok(Value::record(
record! { "expr" => expr_to_value(expr.as_ref(), span)? }, record! { "expr" => expr_to_value(expr.as_ref(), span)? },
span, span,
)), )),