Upgrade to polars 0.40 (#13069)

Upgrading to polars 0.40
This commit is contained in:
Jack Wright 2024-06-05 16:26:47 -07:00 committed by GitHub
parent 96493b26d9
commit a6b1d1f6d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 180 additions and 269 deletions

157
Cargo.lock generated
View File

@ -478,17 +478,6 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ada7f35ca622a86a4d6c27be2633fc6c243ecc834859628fcce0681d8e76e1c8" checksum = "ada7f35ca622a86a4d6c27be2633fc6c243ecc834859628fcce0681d8e76e1c8"
[[package]]
name = "brotli"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor 2.5.1",
]
[[package]] [[package]]
name = "brotli" name = "brotli"
version = "5.0.0" version = "5.0.0"
@ -497,17 +486,7 @@ checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67"
dependencies = [ dependencies = [
"alloc-no-stdlib", "alloc-no-stdlib",
"alloc-stdlib", "alloc-stdlib",
"brotli-decompressor 4.0.0", "brotli-decompressor",
]
[[package]]
name = "brotli-decompressor"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
] ]
[[package]] [[package]]
@ -871,7 +850,7 @@ checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [ dependencies = [
"crossterm", "crossterm",
"strum", "strum",
"strum_macros 0.26.2", "strum_macros",
"unicode-width", "unicode-width",
] ]
@ -1295,6 +1274,9 @@ name = "either"
version = "1.11.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "eml-parser" name = "eml-parser"
@ -1794,6 +1776,7 @@ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"allocator-api2", "allocator-api2",
"rayon", "rayon",
"serde",
] ]
[[package]] [[package]]
@ -2935,7 +2918,7 @@ dependencies = [
"alphanumeric-sort", "alphanumeric-sort",
"base64 0.22.1", "base64 0.22.1",
"bracoxide", "bracoxide",
"brotli 5.0.0", "brotli",
"byteorder", "byteorder",
"bytesize", "bytesize",
"calamine", "calamine",
@ -3222,7 +3205,7 @@ dependencies = [
name = "nu-protocol" name = "nu-protocol"
version = "0.94.3" version = "0.94.3"
dependencies = [ dependencies = [
"brotli 5.0.0", "brotli",
"byte-unit", "byte-unit",
"chrono", "chrono",
"chrono-humanize", "chrono-humanize",
@ -3243,7 +3226,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"strum", "strum",
"strum_macros 0.26.2", "strum_macros",
"tempfile", "tempfile",
"thiserror", "thiserror",
"typetag", "typetag",
@ -3404,7 +3387,7 @@ dependencies = [
"polars-plan", "polars-plan",
"polars-utils", "polars-utils",
"serde", "serde",
"sqlparser 0.45.0", "sqlparser 0.47.0",
"tempfile", "tempfile",
"typetag", "typetag",
"uuid", "uuid",
@ -4014,9 +3997,9 @@ dependencies = [
[[package]] [[package]]
name = "polars" name = "polars"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442"
dependencies = [ dependencies = [
"getrandom", "getrandom",
"polars-arrow", "polars-arrow",
@ -4034,9 +4017,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-arrow" name = "polars-arrow"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"atoi", "atoi",
@ -4082,9 +4065,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-compute" name = "polars-compute"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"either", "either",
@ -4098,9 +4081,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-core" name = "polars-core"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.5.0", "bitflags 2.5.0",
@ -4132,9 +4115,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-error" name = "polars-error"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a"
dependencies = [ dependencies = [
"avro-schema", "avro-schema",
"polars-arrow-format", "polars-arrow-format",
@ -4144,10 +4127,30 @@ dependencies = [
] ]
[[package]] [[package]]
name = "polars-io" name = "polars-expr"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484"
dependencies = [
"ahash 0.8.11",
"bitflags 2.5.0",
"once_cell",
"polars-arrow",
"polars-core",
"polars-io",
"polars-ops",
"polars-plan",
"polars-time",
"polars-utils",
"rayon",
"smartstring",
]
[[package]]
name = "polars-io"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"async-trait", "async-trait",
@ -4186,9 +4189,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-json" name = "polars-json"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701" checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"chrono", "chrono",
@ -4207,9 +4210,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-lazy" name = "polars-lazy"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.5.0", "bitflags 2.5.0",
@ -4217,6 +4220,7 @@ dependencies = [
"once_cell", "once_cell",
"polars-arrow", "polars-arrow",
"polars-core", "polars-core",
"polars-expr",
"polars-io", "polars-io",
"polars-json", "polars-json",
"polars-ops", "polars-ops",
@ -4231,13 +4235,13 @@ dependencies = [
[[package]] [[package]]
name = "polars-ops" name = "polars-ops"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"argminmax", "argminmax",
"base64 0.21.7", "base64 0.22.1",
"bytemuck", "bytemuck",
"chrono", "chrono",
"chrono-tz 0.8.6", "chrono-tz 0.8.6",
@ -4267,14 +4271,14 @@ dependencies = [
[[package]] [[package]]
name = "polars-parquet" name = "polars-parquet"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"async-stream", "async-stream",
"base64 0.21.7", "base64 0.22.1",
"brotli 3.5.0", "brotli",
"ethnum", "ethnum",
"flate2", "flate2",
"futures", "futures",
@ -4293,9 +4297,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-pipe" name = "polars-pipe"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"crossbeam-queue", "crossbeam-queue",
@ -4305,6 +4309,7 @@ dependencies = [
"polars-arrow", "polars-arrow",
"polars-compute", "polars-compute",
"polars-core", "polars-core",
"polars-expr",
"polars-io", "polars-io",
"polars-ops", "polars-ops",
"polars-plan", "polars-plan",
@ -4318,13 +4323,14 @@ dependencies = [
[[package]] [[package]]
name = "polars-plan" name = "polars-plan"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bytemuck", "bytemuck",
"chrono-tz 0.8.6", "chrono-tz 0.8.6",
"either",
"hashbrown 0.14.5", "hashbrown 0.14.5",
"once_cell", "once_cell",
"percent-encoding", "percent-encoding",
@ -4341,15 +4347,15 @@ dependencies = [
"regex", "regex",
"serde", "serde",
"smartstring", "smartstring",
"strum_macros 0.25.3", "strum_macros",
"version_check", "version_check",
] ]
[[package]] [[package]]
name = "polars-row" name = "polars-row"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"polars-arrow", "polars-arrow",
@ -4359,11 +4365,12 @@ dependencies = [
[[package]] [[package]]
name = "polars-sql" name = "polars-sql"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9"
dependencies = [ dependencies = [
"hex", "hex",
"once_cell",
"polars-arrow", "polars-arrow",
"polars-core", "polars-core",
"polars-error", "polars-error",
@ -4377,11 +4384,12 @@ dependencies = [
[[package]] [[package]]
name = "polars-time" name = "polars-time"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97"
dependencies = [ dependencies = [
"atoi", "atoi",
"bytemuck",
"chrono", "chrono",
"chrono-tz 0.8.6", "chrono-tz 0.8.6",
"now", "now",
@ -4398,9 +4406,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-utils" name = "polars-utils"
version = "0.39.2" version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bytemuck", "bytemuck",
@ -4834,7 +4842,7 @@ dependencies = [
"serde_json", "serde_json",
"strip-ansi-escapes", "strip-ansi-escapes",
"strum", "strum",
"strum_macros 0.26.2", "strum_macros",
"thiserror", "thiserror",
"unicode-segmentation", "unicode-segmentation",
"unicode-width", "unicode-width",
@ -5562,9 +5570,9 @@ dependencies = [
[[package]] [[package]]
name = "sqlparser" name = "sqlparser"
version = "0.45.0" version = "0.47.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25"
dependencies = [ dependencies = [
"log", "log",
] ]
@ -5678,20 +5686,7 @@ version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
dependencies = [ dependencies = [
"strum_macros 0.26.2", "strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.60",
] ]
[[package]] [[package]]

View File

@ -29,12 +29,12 @@ indexmap = { version = "2.2" }
mimalloc = { version = "0.1.42" } mimalloc = { version = "0.1.42" }
num = {version = "0.4"} num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.45"} sqlparser = { version = "0.47"}
polars-io = { version = "0.39", features = ["avro"]} polars-io = { version = "0.40", features = ["avro"]}
polars-arrow = { version = "0.39"} polars-arrow = { version = "0.40"}
polars-ops = { version = "0.39"} polars-ops = { version = "0.40"}
polars-plan = { version = "0.39", features = ["regex"]} polars-plan = { version = "0.40", features = ["regex"]}
polars-utils = { version = "0.39"} polars-utils = { version = "0.40"}
typetag = "0.2" typetag = "0.2"
uuid = { version = "1.7", features = ["v4", "serde"] } uuid = { version = "1.7", features = ["v4", "serde"] }
@ -70,7 +70,7 @@ features = [
"to_dummies", "to_dummies",
] ]
optional = false optional = false
version = "0.39" version = "0.40"
[dev-dependencies] [dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.94.3" } nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.94.3" }

View File

@ -16,14 +16,17 @@ use std::{
fs::File, fs::File,
io::BufReader, io::BufReader,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::Arc,
}; };
use polars::prelude::{ use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, CsvEncoding, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
}; };
use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions}; use polars_io::{
avro::AvroReader, csv::read::CsvReadOptions, prelude::ParallelStrategy, HiveOptions,
};
#[derive(Clone)] #[derive(Clone)]
pub struct OpenDataFrame; pub struct OpenDataFrame;
@ -175,6 +178,7 @@ fn from_parquet(
cloud_options: None, cloud_options: None,
use_statistics: false, use_statistics: false,
hive_options: HiveOptions::default(), hive_options: HiveOptions::default(),
glob: true,
}; };
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
@ -445,7 +449,7 @@ fn from_csv(
} }
}; };
let csv_reader = csv_reader.has_header(!no_header); let csv_reader = csv_reader.with_has_header(!no_header);
let csv_reader = match maybe_schema { let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())), Some(schema) => csv_reader.with_schema(Some(schema.into())),
@ -475,7 +479,23 @@ fn from_csv(
df.cache_and_to_value(plugin, engine, call.head) df.cache_and_to_value(plugin, engine, call.head)
} else { } else {
let csv_reader = CsvReader::from_path(file_path) let df = CsvReadOptions::default()
.with_has_header(!no_header)
.with_infer_schema_length(infer_schema)
.with_skip_rows(skip_rows.unwrap_or_default())
.with_schema(maybe_schema.map(|s| s.into()))
.with_columns(columns.map(Arc::new))
.map_parse_options(|options| {
options
.with_separator(
delimiter
.as_ref()
.and_then(|d| d.item.chars().next().map(|c| c as u8))
.unwrap_or(b','),
)
.with_encoding(CsvEncoding::LossyUtf8)
})
.try_into_reader_with_file_path(Some(file_path.to_path_buf()))
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(), error: "Error creating CSV reader".into(),
msg: e.to_string(), msg: e.to_string(),
@ -483,52 +503,6 @@ fn from_csv(
help: None, help: None,
inner: vec![], inner: vec![],
})? })?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
let df: NuDataFrame = csv_reader
.finish() .finish()
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {
error: "CSV reader error".into(), error: "CSV reader error".into(),
@ -536,9 +510,8 @@ fn from_csv(
span: Some(call.head), span: Some(call.head),
help: None, help: None,
inner: vec![], inner: vec![],
})? })?;
.into(); let df = NuDataFrame::new(false, df);
df.cache_and_to_value(plugin, engine, call.head) df.cache_and_to_value(plugin, engine, call.head)
} }
} }

View File

@ -3,7 +3,8 @@ use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Re
use sqlparser::ast::{ use sqlparser::ast::{
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType, DuplicateTreatment, Expr as SqlExpr, Function as SQLFunction, FunctionArguments,
Value as SqlValue, WindowType,
}; };
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> { fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
@ -33,7 +34,7 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds), SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
SQLDataType::Array(array_type_def) => match array_type_def { SQLDataType::Array(array_type_def) => match array_type_def {
ArrayElemTypeDef::AngleBracket(inner_type) ArrayElemTypeDef::AngleBracket(inner_type)
| ArrayElemTypeDef::SquareBracket(inner_type) => { | ArrayElemTypeDef::SquareBracket(inner_type, _) => {
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)) DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
} }
_ => { _ => {
@ -120,9 +121,7 @@ pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
} }
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?, SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
SqlExpr::Cast { SqlExpr::Cast {
expr, expr, data_type, ..
data_type,
format: _,
} => cast_(parse_sql_expr(expr)?, data_type)?, } => cast_(parse_sql_expr(expr)?, data_type)?,
SqlExpr::Nested(expr) => parse_sql_expr(expr)?, SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
SqlExpr::Value(value) => literal_expr(value)?, SqlExpr::Value(value) => literal_expr(value)?,
@ -162,8 +161,17 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
use sqlparser::ast::{FunctionArg, FunctionArgExpr}; use sqlparser::ast::{FunctionArg, FunctionArgExpr};
// Function name mostly do not have name space, so it mostly take the first args // Function name mostly do not have name space, so it mostly take the first args
let function_name = sql_function.name.0[0].value.to_ascii_lowercase(); let function_name = sql_function.name.0[0].value.to_ascii_lowercase();
let args = sql_function
.args // One day this should support the additional argument types supported with 0.40
let (args, distinct) = match &sql_function.args {
FunctionArguments::List(list) => (
list.args.clone(),
list.duplicate_treatment == Some(DuplicateTreatment::Distinct),
),
_ => (vec![], false),
};
let args = args
.iter() .iter()
.map(|arg| match arg { .map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg, FunctionArg::Named { arg, .. } => arg,
@ -174,15 +182,15 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
match ( match (
function_name.as_str(), function_name.as_str(),
args.as_slice(), args.as_slice(),
sql_function.distinct, distinct,
) { ) {
("sum", [FunctionArgExpr::Expr(expr)], false) => { ("sum", [FunctionArgExpr::Expr(ref expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum() apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum()
} }
("count", [FunctionArgExpr::Expr(expr)], false) => { ("count", [FunctionArgExpr::Expr(ref expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count() apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count()
} }
("count", [FunctionArgExpr::Expr(expr)], true) => { ("count", [FunctionArgExpr::Expr(ref expr)], true) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique() apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique()
} }
// Special case for wildcard args to count function. // Special case for wildcard args to count function.

View File

@ -189,53 +189,19 @@ fn command(
.map(|col| { .map(|col| {
let count = col.len() as f64; let count = col.len() as f64;
let sum = col.sum_as_series().ok().and_then(|series| { let sum = col.sum::<f64>().ok();
series let mean = col.mean();
.cast(&DataType::Float64) let median = col.median();
.ok() let std = col.std(0);
.and_then(|ca| match ca.get(0) { let min = col.min::<f64>().ok().flatten();
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mean = match col.mean_as_series().get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
};
let median = match col.median_as_series() {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let std = match col.std_as_series(0) {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let min = col.min_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut quantiles = quantiles let mut quantiles = quantiles
.clone() .clone()
.into_iter() .into_iter()
.map(|q| { .map(|q| {
col.quantile_as_series(q, QuantileInterpolOptions::default()) col.quantile_reduce(q, QuantileInterpolOptions::default())
.ok() .ok()
.map(|s| s.into_series("quantile"))
.and_then(|ca| ca.cast(&DataType::Float64).ok()) .and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) { .and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v), Ok(AnyValue::Float64(v)) => Some(v),
@ -244,15 +210,7 @@ fn command(
}) })
.collect::<Vec<Option<f64>>>(); .collect::<Vec<Option<f64>>>();
let max = col.max_as_series().ok().and_then(|series| { let max = col.max::<f64>().ok().flatten();
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mut descriptors = vec![Some(count), sum, mean, median, std, min]; let mut descriptors = vec![Some(count), sum, mean, median, std, min];
descriptors.append(&mut quantiles); descriptors.append(&mut quantiles);

View File

@ -5,9 +5,7 @@ use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::CustomValueSupport; use crate::values::CustomValueSupport;
use crate::PolarsPlugin; use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{ use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
// The structs defined in this file are structs that form part of other commands // The structs defined in this file are structs that form part of other commands
// since they share a similar name // since they share a similar name
@ -60,6 +58,7 @@ macro_rules! expr_command {
mod $test { mod $test {
use super::*; use super::*;
use crate::test::test_polars_plugin_command; use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test] #[test]
fn test_examples() -> Result<(), ShellError> { fn test_examples() -> Result<(), ShellError> {
@ -163,19 +162,7 @@ macro_rules! lazy_expr_command {
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?; .map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} else { } else {
@ -192,6 +179,7 @@ macro_rules! lazy_expr_command {
mod $test { mod $test {
use super::*; use super::*;
use crate::test::test_polars_plugin_command; use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test] #[test]
fn test_examples() -> Result<(), ShellError> { fn test_examples() -> Result<(), ShellError> {
@ -244,19 +232,7 @@ macro_rules! lazy_expr_command {
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?; .map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func($ddof));
lazy.from_eager,
lazy.to_polars()
.$func($ddof)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from) .map_err(LabeledError::from)
} else { } else {
@ -272,6 +248,7 @@ macro_rules! lazy_expr_command {
mod $test { mod $test {
use super::*; use super::*;
use crate::test::test_polars_plugin_command; use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test] #[test]
fn test_examples() -> Result<(), ShellError> { fn test_examples() -> Result<(), ShellError> {

View File

@ -35,7 +35,7 @@ impl PluginCommand for ExprLit {
example: "polars lit 2 | polars into-nu", example: "polars lit 2 | polars into-nu",
result: Some(Value::test_record(record! { result: Some(Value::test_record(record! {
"expr" => Value::test_string("literal"), "expr" => Value::test_string("literal"),
"value" => Value::test_string("2"), "value" => Value::test_string("dyn int: 2"),
})), })),
}] }]
} }

View File

@ -195,6 +195,7 @@ fn get_col_name(expr: &Expr) -> Option<String> {
| Expr::Len | Expr::Len
| Expr::Nth(_) | Expr::Nth(_)
| Expr::SubPlan(_, _) | Expr::SubPlan(_, _)
| Expr::IndexColumn(_)
| Expr::Selector(_) => None, | Expr::Selector(_) => None,
} }
} }

View File

@ -189,7 +189,7 @@ impl PluginCommand for LazyJoin {
let how = if left { let how = if left {
JoinType::Left JoinType::Left
} else if outer { } else if outer {
JoinType::Outer { coalesce: true } JoinType::Outer
} else if cross { } else if cross {
JoinType::Cross JoinType::Cross
} else { } else {

View File

@ -116,16 +116,7 @@ fn command(
call: &EvaluatedCall, call: &EvaluatedCall,
lazy: NuLazyFrame, lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let polars_lazy = lazy let polars_lazy = lazy.to_polars().median();
.to_polars()
.median()
.map_err(|e| ShellError::GenericError {
error: format!("Error in median operation: {e}"),
msg: "".into(),
help: None,
span: None,
inner: vec![],
})?;
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy); let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)
} }

View File

@ -134,14 +134,7 @@ fn command(
let lazy = NuLazyFrame::new( let lazy = NuLazyFrame::new(
lazy.from_eager, lazy.from_eager,
lazy.to_polars() lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default()) .quantile(lit(quantile), QuantileInterpolOptions::default()),
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})?,
); );
lazy.to_pipeline_data(plugin, engine, call.head) lazy.to_pipeline_data(plugin, engine, call.head)

View File

@ -7,7 +7,7 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value, SyntaxShape, Type, Value,
}; };
use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime}; use polars::prelude::{DataType, IntoSeries, RollingOptionsFixedWindow, SeriesOpsTime};
enum RollType { enum RollType {
Min, Min,
@ -131,7 +131,7 @@ fn command(
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let roll_type: Spanned<String> = call.req(0)?; let roll_type: Spanned<String> = call.req(0)?;
let window_size: i64 = call.req(1)?; let window_size: usize = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?; let series = df.as_series(call.head)?;
@ -148,17 +148,12 @@ fn command(
let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?; let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?;
let rolling_opts = RollingOptionsImpl { let rolling_opts = RollingOptionsFixedWindow {
window_size: Duration::new(window_size), window_size,
min_periods: window_size as usize, min_periods: window_size,
weights: None, ..RollingOptionsFixedWindow::default()
center: false,
by: None,
closed_window: None,
tu: None,
tz: None,
fn_params: None,
}; };
let res = match roll_type { let res = match roll_type {
RollType::Max => series.rolling_max(rolling_opts), RollType::Max => series.rolling_max(rolling_opts),
RollType::Min => series.rolling_min(rolling_opts), RollType::Min => series.rolling_min(rolling_opts),

View File

@ -155,7 +155,10 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
span, span,
)), )),
Expr::Columns(columns) => { Expr::Columns(columns) => {
let value = columns.iter().map(|col| Value::string(col, span)).collect(); let value = columns
.iter()
.map(|col| Value::string(col.to_string(), span))
.collect();
Ok(Value::record( Ok(Value::record(
record! { record! {
"expr" => Value::string("columns", span), "expr" => Value::string("columns", span),
@ -415,6 +418,12 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
msg_span: span, msg_span: span,
input_span: Span::unknown(), input_span: Span::unknown(),
}), }),
Expr::IndexColumn(_) => Err(ShellError::UnsupportedInput {
msg: "Expressions of type IndexColumn to Nu Values is not yet supported".to_string(),
input: format!("Expression is {expr:?}"),
msg_span: span,
input_span: Span::unknown(),
}),
} }
} }

View File

@ -160,7 +160,15 @@ impl CustomValueSupport for NuLazyFrame {
.unwrap_or_else(|_| "<NOT AVAILABLE>".to_string()); .unwrap_or_else(|_| "<NOT AVAILABLE>".to_string());
Ok(Value::record( Ok(Value::record(
record! { record! {
"plan" => Value::string(self.lazy.describe_plan(), span), "plan" => Value::string(
self.lazy.describe_plan().map_err(|e| ShellError::GenericError {
error: "Error getting plan".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?,
span),
"optimized_plan" => Value::string(optimized_plan, span), "optimized_plan" => Value::string(optimized_plan, span),
}, },
span, span,

View File

@ -1,7 +1,10 @@
use std::sync::Arc; use std::sync::Arc;
use nu_protocol::{ShellError, Span, Value}; use nu_protocol::{ShellError, Span, Value};
use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}; use polars::{
datatypes::UnknownKind,
prelude::{DataType, Field, Schema, SchemaRef, TimeUnit},
};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct NuSchema { pub struct NuSchema {
@ -104,7 +107,7 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result<DataType, ShellError> {
"date" => Ok(DataType::Date), "date" => Ok(DataType::Date),
"time" => Ok(DataType::Time), "time" => Ok(DataType::Time),
"null" => Ok(DataType::Null), "null" => Ok(DataType::Null),
"unknown" => Ok(DataType::Unknown), "unknown" => Ok(DataType::Unknown(UnknownKind::Any)),
"object" => Ok(DataType::Object("unknown", None)), "object" => Ok(DataType::Object("unknown", None)),
_ if dtype.starts_with("list") => { _ if dtype.starts_with("list") => {
let dtype = dtype let dtype = dtype
@ -299,7 +302,7 @@ mod test {
let dtype = "unknown"; let dtype = "unknown";
let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
let expected = DataType::Unknown; let expected = DataType::Unknown(UnknownKind::Any);
assert_eq!(schema, expected); assert_eq!(schema, expected);
let dtype = "object"; let dtype = "object";