Upgrade to polars 0.40 (#13069)

Upgrading to polars 0.40
This commit is contained in:
Jack Wright 2024-06-05 16:26:47 -07:00 committed by GitHub
parent 96493b26d9
commit a6b1d1f6d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 180 additions and 269 deletions

157
Cargo.lock generated
View File

@ -478,17 +478,6 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ada7f35ca622a86a4d6c27be2633fc6c243ecc834859628fcce0681d8e76e1c8"
[[package]]
name = "brotli"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor 2.5.1",
]
[[package]]
name = "brotli"
version = "5.0.0"
@ -497,17 +486,7 @@ checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor 4.0.0",
]
[[package]]
name = "brotli-decompressor"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor",
]
[[package]]
@ -871,7 +850,7 @@ checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
dependencies = [
"crossterm",
"strum",
"strum_macros 0.26.2",
"strum_macros",
"unicode-width",
]
@ -1295,6 +1274,9 @@ name = "either"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
dependencies = [
"serde",
]
[[package]]
name = "eml-parser"
@ -1794,6 +1776,7 @@ dependencies = [
"ahash 0.8.11",
"allocator-api2",
"rayon",
"serde",
]
[[package]]
@ -2935,7 +2918,7 @@ dependencies = [
"alphanumeric-sort",
"base64 0.22.1",
"bracoxide",
"brotli 5.0.0",
"brotli",
"byteorder",
"bytesize",
"calamine",
@ -3222,7 +3205,7 @@ dependencies = [
name = "nu-protocol"
version = "0.94.3"
dependencies = [
"brotli 5.0.0",
"brotli",
"byte-unit",
"chrono",
"chrono-humanize",
@ -3243,7 +3226,7 @@ dependencies = [
"serde",
"serde_json",
"strum",
"strum_macros 0.26.2",
"strum_macros",
"tempfile",
"thiserror",
"typetag",
@ -3404,7 +3387,7 @@ dependencies = [
"polars-plan",
"polars-utils",
"serde",
"sqlparser 0.45.0",
"sqlparser 0.47.0",
"tempfile",
"typetag",
"uuid",
@ -4014,9 +3997,9 @@ dependencies = [
[[package]]
name = "polars"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5"
checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442"
dependencies = [
"getrandom",
"polars-arrow",
@ -4034,9 +4017,9 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75"
checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e"
dependencies = [
"ahash 0.8.11",
"atoi",
@ -4082,9 +4065,9 @@ dependencies = [
[[package]]
name = "polars-compute"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe"
checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0"
dependencies = [
"bytemuck",
"either",
@ -4098,9 +4081,9 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c"
checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a"
dependencies = [
"ahash 0.8.11",
"bitflags 2.5.0",
@ -4132,9 +4115,9 @@ dependencies = [
[[package]]
name = "polars-error"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757"
checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a"
dependencies = [
"avro-schema",
"polars-arrow-format",
@ -4144,10 +4127,30 @@ dependencies = [
]
[[package]]
name = "polars-io"
version = "0.39.2"
name = "polars-expr"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b"
checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484"
dependencies = [
"ahash 0.8.11",
"bitflags 2.5.0",
"once_cell",
"polars-arrow",
"polars-core",
"polars-io",
"polars-ops",
"polars-plan",
"polars-time",
"polars-utils",
"rayon",
"smartstring",
]
[[package]]
name = "polars-io"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e"
dependencies = [
"ahash 0.8.11",
"async-trait",
@ -4186,9 +4189,9 @@ dependencies = [
[[package]]
name = "polars-json"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701"
checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901"
dependencies = [
"ahash 0.8.11",
"chrono",
@ -4207,9 +4210,9 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f"
checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5"
dependencies = [
"ahash 0.8.11",
"bitflags 2.5.0",
@ -4217,6 +4220,7 @@ dependencies = [
"once_cell",
"polars-arrow",
"polars-core",
"polars-expr",
"polars-io",
"polars-json",
"polars-ops",
@ -4231,13 +4235,13 @@ dependencies = [
[[package]]
name = "polars-ops"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f"
checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58"
dependencies = [
"ahash 0.8.11",
"argminmax",
"base64 0.21.7",
"base64 0.22.1",
"bytemuck",
"chrono",
"chrono-tz 0.8.6",
@ -4267,14 +4271,14 @@ dependencies = [
[[package]]
name = "polars-parquet"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a"
checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8"
dependencies = [
"ahash 0.8.11",
"async-stream",
"base64 0.21.7",
"brotli 3.5.0",
"base64 0.22.1",
"brotli",
"ethnum",
"flate2",
"futures",
@ -4293,9 +4297,9 @@ dependencies = [
[[package]]
name = "polars-pipe"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c"
checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436"
dependencies = [
"crossbeam-channel",
"crossbeam-queue",
@ -4305,6 +4309,7 @@ dependencies = [
"polars-arrow",
"polars-compute",
"polars-core",
"polars-expr",
"polars-io",
"polars-ops",
"polars-plan",
@ -4318,13 +4323,14 @@ dependencies = [
[[package]]
name = "polars-plan"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d"
checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90"
dependencies = [
"ahash 0.8.11",
"bytemuck",
"chrono-tz 0.8.6",
"either",
"hashbrown 0.14.5",
"once_cell",
"percent-encoding",
@ -4341,15 +4347,15 @@ dependencies = [
"regex",
"serde",
"smartstring",
"strum_macros 0.25.3",
"strum_macros",
"version_check",
]
[[package]]
name = "polars-row"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e"
checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40"
dependencies = [
"bytemuck",
"polars-arrow",
@ -4359,11 +4365,12 @@ dependencies = [
[[package]]
name = "polars-sql"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7"
checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9"
dependencies = [
"hex",
"once_cell",
"polars-arrow",
"polars-core",
"polars-error",
@ -4377,11 +4384,12 @@ dependencies = [
[[package]]
name = "polars-time"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1"
checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97"
dependencies = [
"atoi",
"bytemuck",
"chrono",
"chrono-tz 0.8.6",
"now",
@ -4398,9 +4406,9 @@ dependencies = [
[[package]]
name = "polars-utils"
version = "0.39.2"
version = "0.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8"
checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c"
dependencies = [
"ahash 0.8.11",
"bytemuck",
@ -4834,7 +4842,7 @@ dependencies = [
"serde_json",
"strip-ansi-escapes",
"strum",
"strum_macros 0.26.2",
"strum_macros",
"thiserror",
"unicode-segmentation",
"unicode-width",
@ -5562,9 +5570,9 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.45.0"
version = "0.47.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0"
checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25"
dependencies = [
"log",
]
@ -5678,20 +5686,7 @@ version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29"
dependencies = [
"strum_macros 0.26.2",
]
[[package]]
name = "strum_macros"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.60",
"strum_macros",
]
[[package]]

View File

@ -29,12 +29,12 @@ indexmap = { version = "2.2" }
mimalloc = { version = "0.1.42" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.45"}
polars-io = { version = "0.39", features = ["avro"]}
polars-arrow = { version = "0.39"}
polars-ops = { version = "0.39"}
polars-plan = { version = "0.39", features = ["regex"]}
polars-utils = { version = "0.39"}
sqlparser = { version = "0.47"}
polars-io = { version = "0.40", features = ["avro"]}
polars-arrow = { version = "0.40"}
polars-ops = { version = "0.40"}
polars-plan = { version = "0.40", features = ["regex"]}
polars-utils = { version = "0.40"}
typetag = "0.2"
uuid = { version = "1.7", features = ["v4", "serde"] }
@ -70,7 +70,7 @@ features = [
"to_dummies",
]
optional = false
version = "0.39"
version = "0.40"
[dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.94.3" }

View File

@ -16,14 +16,17 @@ use std::{
fs::File,
io::BufReader,
path::{Path, PathBuf},
sync::Arc,
};
use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
CsvEncoding, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame,
ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
};
use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions};
use polars_io::{
avro::AvroReader, csv::read::CsvReadOptions, prelude::ParallelStrategy, HiveOptions,
};
#[derive(Clone)]
pub struct OpenDataFrame;
@ -175,6 +178,7 @@ fn from_parquet(
cloud_options: None,
use_statistics: false,
hive_options: HiveOptions::default(),
glob: true,
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
@ -445,7 +449,7 @@ fn from_csv(
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = csv_reader.with_has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
@ -475,7 +479,23 @@ fn from_csv(
df.cache_and_to_value(plugin, engine, call.head)
} else {
let csv_reader = CsvReader::from_path(file_path)
let df = CsvReadOptions::default()
.with_has_header(!no_header)
.with_infer_schema_length(infer_schema)
.with_skip_rows(skip_rows.unwrap_or_default())
.with_schema(maybe_schema.map(|s| s.into()))
.with_columns(columns.map(Arc::new))
.map_parse_options(|options| {
options
.with_separator(
delimiter
.as_ref()
.and_then(|d| d.item.chars().next().map(|c| c as u8))
.unwrap_or(b','),
)
.with_encoding(CsvEncoding::LossyUtf8)
})
.try_into_reader_with_file_path(Some(file_path.to_path_buf()))
.map_err(|e| ShellError::GenericError {
error: "Error creating CSV reader".into(),
msg: e.to_string(),
@ -483,52 +503,6 @@ fn from_csv(
help: None,
inner: vec![],
})?
.with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter {
None => csv_reader,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::GenericError {
error: "Incorrect delimiter".into(),
msg: "Delimiter has to be one character".into(),
span: Some(d.span),
help: None,
inner: vec![],
});
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
csv_reader.with_separator(delimiter)
}
}
};
let csv_reader = csv_reader.has_header(!no_header);
let csv_reader = match maybe_schema {
Some(schema) => csv_reader.with_schema(Some(schema.into())),
None => csv_reader,
};
let csv_reader = match infer_schema {
None => csv_reader,
Some(r) => csv_reader.infer_schema(Some(r)),
};
let csv_reader = match skip_rows {
None => csv_reader,
Some(r) => csv_reader.with_skip_rows(r),
};
let csv_reader = match columns {
None => csv_reader,
Some(columns) => csv_reader.with_columns(Some(columns)),
};
let df: NuDataFrame = csv_reader
.finish()
.map_err(|e| ShellError::GenericError {
error: "CSV reader error".into(),
@ -536,9 +510,8 @@ fn from_csv(
span: Some(call.head),
help: None,
inner: vec![],
})?
.into();
})?;
let df = NuDataFrame::new(false, df);
df.cache_and_to_value(plugin, engine, call.head)
}
}

View File

@ -3,7 +3,8 @@ use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Re
use sqlparser::ast::{
ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType,
Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType,
DuplicateTreatment, Expr as SqlExpr, Function as SQLFunction, FunctionArguments,
Value as SqlValue, WindowType,
};
fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
@ -33,7 +34,7 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result<DataType> {
SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds),
SQLDataType::Array(array_type_def) => match array_type_def {
ArrayElemTypeDef::AngleBracket(inner_type)
| ArrayElemTypeDef::SquareBracket(inner_type) => {
| ArrayElemTypeDef::SquareBracket(inner_type, _) => {
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
}
_ => {
@ -120,9 +121,7 @@ pub fn parse_sql_expr(expr: &SqlExpr) -> Result<Expr> {
}
SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?,
SqlExpr::Cast {
expr,
data_type,
format: _,
expr, data_type, ..
} => cast_(parse_sql_expr(expr)?, data_type)?,
SqlExpr::Nested(expr) => parse_sql_expr(expr)?,
SqlExpr::Value(value) => literal_expr(value)?,
@ -162,8 +161,17 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
use sqlparser::ast::{FunctionArg, FunctionArgExpr};
// Function name mostly do not have name space, so it mostly take the first args
let function_name = sql_function.name.0[0].value.to_ascii_lowercase();
let args = sql_function
.args
// One day this should support the additional argument types supported with 0.40
let (args, distinct) = match &sql_function.args {
FunctionArguments::List(list) => (
list.args.clone(),
list.duplicate_treatment == Some(DuplicateTreatment::Distinct),
),
_ => (vec![], false),
};
let args = args
.iter()
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
@ -174,15 +182,15 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
match (
function_name.as_str(),
args.as_slice(),
sql_function.distinct,
distinct,
) {
("sum", [FunctionArgExpr::Expr(expr)], false) => {
("sum", [FunctionArgExpr::Expr(ref expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum()
}
("count", [FunctionArgExpr::Expr(expr)], false) => {
("count", [FunctionArgExpr::Expr(ref expr)], false) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count()
}
("count", [FunctionArgExpr::Expr(expr)], true) => {
("count", [FunctionArgExpr::Expr(ref expr)], true) => {
apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique()
}
// Special case for wildcard args to count function.

View File

@ -189,53 +189,19 @@ fn command(
.map(|col| {
let count = col.len() as f64;
let sum = col.sum_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let mean = match col.mean_as_series().get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
};
let median = match col.median_as_series() {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let std = match col.std_as_series(0) {
Ok(v) => match v.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
},
_ => None,
};
let min = col.min_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let sum = col.sum::<f64>().ok();
let mean = col.mean();
let median = col.median();
let std = col.std(0);
let min = col.min::<f64>().ok().flatten();
let mut quantiles = quantiles
.clone()
.into_iter()
.map(|q| {
col.quantile_as_series(q, QuantileInterpolOptions::default())
col.quantile_reduce(q, QuantileInterpolOptions::default())
.ok()
.map(|s| s.into_series("quantile"))
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
@ -244,15 +210,7 @@ fn command(
})
.collect::<Vec<Option<f64>>>();
let max = col.max_as_series().ok().and_then(|series| {
series
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
Ok(AnyValue::Float64(v)) => Some(v),
_ => None,
})
});
let max = col.max::<f64>().ok().flatten();
let mut descriptors = vec![Some(count), sum, mean, median, std, min];
descriptors.append(&mut quantiles);

View File

@ -5,9 +5,7 @@ use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use crate::values::CustomValueSupport;
use crate::PolarsPlugin;
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value,
};
use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value};
// The structs defined in this file are structs that form part of other commands
// since they share a similar name
@ -60,6 +58,7 @@ macro_rules! expr_command {
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
@ -163,19 +162,7 @@ macro_rules! lazy_expr_command {
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func()
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func());
lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
} else {
@ -192,6 +179,7 @@ macro_rules! lazy_expr_command {
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {
@ -244,19 +232,7 @@ macro_rules! lazy_expr_command {
if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) {
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)
.map_err(LabeledError::from)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.$func($ddof)
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})
.map_err(LabeledError::from)?,
);
let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func($ddof));
lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)
} else {
@ -272,6 +248,7 @@ macro_rules! lazy_expr_command {
mod $test {
use super::*;
use crate::test::test_polars_plugin_command;
use nu_protocol::ShellError;
#[test]
fn test_examples() -> Result<(), ShellError> {

View File

@ -35,7 +35,7 @@ impl PluginCommand for ExprLit {
example: "polars lit 2 | polars into-nu",
result: Some(Value::test_record(record! {
"expr" => Value::test_string("literal"),
"value" => Value::test_string("2"),
"value" => Value::test_string("dyn int: 2"),
})),
}]
}

View File

@ -195,6 +195,7 @@ fn get_col_name(expr: &Expr) -> Option<String> {
| Expr::Len
| Expr::Nth(_)
| Expr::SubPlan(_, _)
| Expr::IndexColumn(_)
| Expr::Selector(_) => None,
}
}

View File

@ -189,7 +189,7 @@ impl PluginCommand for LazyJoin {
let how = if left {
JoinType::Left
} else if outer {
JoinType::Outer { coalesce: true }
JoinType::Outer
} else if cross {
JoinType::Cross
} else {

View File

@ -116,16 +116,7 @@ fn command(
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let polars_lazy = lazy
.to_polars()
.median()
.map_err(|e| ShellError::GenericError {
error: format!("Error in median operation: {e}"),
msg: "".into(),
help: None,
span: None,
inner: vec![],
})?;
let polars_lazy = lazy.to_polars().median();
let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy);
lazy.to_pipeline_data(plugin, engine, call.head)
}

View File

@ -134,14 +134,7 @@ fn command(
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.quantile(lit(quantile), QuantileInterpolOptions::default())
.map_err(|e| ShellError::GenericError {
error: "Dataframe Error".into(),
msg: e.to_string(),
help: None,
span: None,
inner: vec![],
})?,
.quantile(lit(quantile), QuantileInterpolOptions::default()),
);
lazy.to_pipeline_data(plugin, engine, call.head)

View File

@ -7,7 +7,7 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned,
SyntaxShape, Type, Value,
};
use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime};
use polars::prelude::{DataType, IntoSeries, RollingOptionsFixedWindow, SeriesOpsTime};
enum RollType {
Min,
@ -131,7 +131,7 @@ fn command(
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let roll_type: Spanned<String> = call.req(0)?;
let window_size: i64 = call.req(1)?;
let window_size: usize = call.req(1)?;
let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?;
let series = df.as_series(call.head)?;
@ -148,17 +148,12 @@ fn command(
let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?;
let rolling_opts = RollingOptionsImpl {
window_size: Duration::new(window_size),
min_periods: window_size as usize,
weights: None,
center: false,
by: None,
closed_window: None,
tu: None,
tz: None,
fn_params: None,
let rolling_opts = RollingOptionsFixedWindow {
window_size,
min_periods: window_size,
..RollingOptionsFixedWindow::default()
};
let res = match roll_type {
RollType::Max => series.rolling_max(rolling_opts),
RollType::Min => series.rolling_min(rolling_opts),

View File

@ -155,7 +155,10 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
span,
)),
Expr::Columns(columns) => {
let value = columns.iter().map(|col| Value::string(col, span)).collect();
let value = columns
.iter()
.map(|col| Value::string(col.to_string(), span))
.collect();
Ok(Value::record(
record! {
"expr" => Value::string("columns", span),
@ -415,6 +418,12 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
msg_span: span,
input_span: Span::unknown(),
}),
Expr::IndexColumn(_) => Err(ShellError::UnsupportedInput {
msg: "Expressions of type IndexColumn to Nu Values is not yet supported".to_string(),
input: format!("Expression is {expr:?}"),
msg_span: span,
input_span: Span::unknown(),
}),
}
}

View File

@ -160,7 +160,15 @@ impl CustomValueSupport for NuLazyFrame {
.unwrap_or_else(|_| "<NOT AVAILABLE>".to_string());
Ok(Value::record(
record! {
"plan" => Value::string(self.lazy.describe_plan(), span),
"plan" => Value::string(
self.lazy.describe_plan().map_err(|e| ShellError::GenericError {
error: "Error getting plan".into(),
msg: e.to_string(),
span: Some(span),
help: None,
inner: vec![],
})?,
span),
"optimized_plan" => Value::string(optimized_plan, span),
},
span,

View File

@ -1,7 +1,10 @@
use std::sync::Arc;
use nu_protocol::{ShellError, Span, Value};
use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit};
use polars::{
datatypes::UnknownKind,
prelude::{DataType, Field, Schema, SchemaRef, TimeUnit},
};
#[derive(Debug, Clone)]
pub struct NuSchema {
@ -104,7 +107,7 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result<DataType, ShellError> {
"date" => Ok(DataType::Date),
"time" => Ok(DataType::Time),
"null" => Ok(DataType::Null),
"unknown" => Ok(DataType::Unknown),
"unknown" => Ok(DataType::Unknown(UnknownKind::Any)),
"object" => Ok(DataType::Object("unknown", None)),
_ if dtype.starts_with("list") => {
let dtype = dtype
@ -299,7 +302,7 @@ mod test {
let dtype = "unknown";
let schema = str_to_dtype(dtype, Span::unknown()).unwrap();
let expected = DataType::Unknown;
let expected = DataType::Unknown(UnknownKind::Any);
assert_eq!(schema, expected);
let dtype = "object";