Polars upgrade to 0.46 (#14933)

Upgraded to Polars 0.46
This commit is contained in:
Jack Wright
2025-01-27 11:01:39 -08:00
committed by GitHub
parent b53271b86a
commit c0b4d19761
14 changed files with 223 additions and 353 deletions

484
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,25 +30,25 @@ indexmap = { version = "2.7" }
mimalloc = { version = "0.1.42" } mimalloc = { version = "0.1.42" }
num = {version = "0.4"} num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.49"} sqlparser = { version = "0.53"}
polars-io = { version = "0.44", features = ["avro", "cloud", "aws"]} polars-io = { version = "0.46", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.44"} polars-arrow = { version = "0.46"}
polars-ops = { version = "0.44", features = ["pivot"]} polars-ops = { version = "0.46", features = ["pivot"]}
polars-plan = { version = "0.44", features = ["regex"]} polars-plan = { version = "0.46", features = ["regex"]}
polars-utils = { version = "0.44"} polars-utils = { version = "0.46"}
typetag = "0.2" typetag = "0.2"
env_logger = "0.11.3" env_logger = "0.11.3"
log.workspace = true log.workspace = true
uuid = { version = "1.12", features = ["v4", "serde"] } uuid = { version = "1.12", features = ["v4", "serde"] }
# Do to a compile error with polars, this included to force the raw dependency # Do to a compile error with polars, this included to force the raw dependency
hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] } hashbrown = { version = "0.15", features = ["rayon", "serde"] }
# Cloud support # Cloud support
aws-config = { version = "1.5", features = ["sso"] } aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2" aws-credential-types = "1.2"
tokio = { version = "1.43", features = ["full"] } tokio = { version = "1.43", features = ["full"] }
object_store = { version = "0.10", default-features = false } object_store = { version = "0.11", default-features = false }
url.workspace = true url.workspace = true
[dependencies.polars] [dependencies.polars]
@ -90,7 +90,7 @@ features = [
"to_dummies", "to_dummies",
] ]
optional = false optional = false
version = "0.44" version = "0.46"
[dev-dependencies] [dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.101.1" } nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.101.1" }

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span, file_span: Span,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
lazy.to_polars() lazy.to_polars()
.sink_ipc(file_path, IpcWriterOptions::default()) // todo - add cloud options
.sink_ipc(file_path, IpcWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
} }

View File

@ -32,7 +32,8 @@ pub(crate) fn command_lazy(
}; };
lazy.to_polars() lazy.to_polars()
.sink_csv(file_path, options) // todo - add cloud options
.sink_csv(file_path, options, None)
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
} }

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span, file_span: Span,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
lazy.to_polars() lazy.to_polars()
.sink_json(file_path, JsonWriterOptions::default()) // todo - add cloud options
.sink_json(file_path, JsonWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
} }

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span, file_span: Span,
) -> Result<(), ShellError> { ) -> Result<(), ShellError> {
lazy.to_polars() lazy.to_polars()
.sink_parquet(file_path, ParquetWriteOptions::default()) // todo - add cloud options
.sink_parquet(&file_path, ParquetWriteOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span)) .map_err(|e| polars_file_save_error(e, file_span))
} }

View File

@ -163,23 +163,23 @@ impl PluginCommand for ToDataFrame {
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}", example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
result: Some( result: Some(
NuDataFrame::try_from_series_vec(vec![ NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), &[1u8]), Series::new("a".into(), &[1u8]),
{ {
let dtype = DataType::Struct(vec![Field::new("a".into(), DataType::List(Box::new(DataType::UInt64)))]); let dtype = DataType::Struct(vec![Field::new("d".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned( let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a".into(), &[1u64, 2, 3]))], vec![Field::new("a".into(), DataType::String)]))); 1]; Box::new((vec![AnyValue::List(Series::new("d".into(), &[1u64, 2, 3]))], vec![Field::new("d".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false) Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail") .expect("Struct series should not fail")
}, },
{ {
let dtype = DataType::List(Box::new(DataType::String)); let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))]; let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false) Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail") .expect("List series should not fail")
} }
], Span::test_data()) ], Span::test_data())
.expect("simple df for test should not fail") .expect("simple df for test should not fail")
.into_value(Span::test_data()), .into_value(Span::test_data()),
), ),
}, },
Example { Example {

View File

@ -133,6 +133,8 @@ fn command_lazy(
to_supertypes, to_supertypes,
diagonal, diagonal,
from_partitioned_ds, from_partitioned_ds,
// todo - expose maintain order
..Default::default()
}; };
let res: NuLazyFrame = polars::prelude::concat(&dataframes, args) let res: NuLazyFrame = polars::prelude::concat(&dataframes, args)

View File

@ -143,6 +143,8 @@ impl PluginCommand for LazySortBy {
nulls_last: vec![nulls_last], nulls_last: vec![nulls_last],
multithreaded: true, multithreaded: true,
maintain_order, maintain_order,
// todo - expose limit
limit: None,
}; };
let pipeline_value = input.into_value(call.head)?; let pipeline_value = input.into_value(call.head)?;

View File

@ -176,6 +176,7 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
.map(|arg| match arg { .map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg, FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg, FunctionArg::Unnamed(arg) => arg,
FunctionArg::ExprNamed { arg, .. } => arg,
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
Ok( Ok(

View File

@ -113,6 +113,8 @@ fn command(
nulls_last: call.has_flag("nulls-last")?, nulls_last: call.has_flag("nulls-last")?,
multithreaded: true, multithreaded: true,
maintain_order: call.has_flag("maintain-order")?, maintain_order: call.has_flag("maintain-order")?,
// todo - expose limit
limit: None,
}; };
let mut res = df let mut res = df

View File

@ -1,5 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz; use chrono_tz::Tz;
@ -8,7 +9,6 @@ use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder; use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray; use polars::chunked_array::ChunkedArray;
use polars::datatypes::{AnyValue, PlSmallStr}; use polars::datatypes::{AnyValue, PlSmallStr};
use polars::export::arrow::Either;
use polars::prelude::{ use polars::prelude::{
ChunkAnyValue, Column as PolarsColumn, DataFrame, DataType, DatetimeChunked, Float32Type, ChunkAnyValue, Column as PolarsColumn, DataFrame, DataType, DatetimeChunked, Float32Type,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder,
@ -18,6 +18,7 @@ use polars::prelude::{
}; };
use nu_protocol::{Record, ShellError, Span, Value}; use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::Either;
use crate::dataframe::values::NuSchema; use crate::dataframe::values::NuSchema;
@ -474,7 +475,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
Ok(res.into_series()) Ok(res.into_series())
} }
DataType::Struct(fields) => { DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone()))); let schema = Some(NuSchema::new(Arc::new(Schema::from_iter(fields.clone()))));
// let mut structs: Vec<Series> = Vec::new(); // let mut structs: Vec<Series> = Vec::new();
let mut structs: HashMap<PlSmallStr, Series> = HashMap::new(); let mut structs: HashMap<PlSmallStr, Series> = HashMap::new();
@ -599,7 +600,7 @@ fn input_type_list_to_series(
.map(|v| value_to_primitive!(v, $vec_type)) .map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>() .collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?; .map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied()); builder.append_values_iter(value_list.iter().copied());
} }
let res = builder.finish(); let res = builder.finish();
Ok(res.into_series()) Ok(res.into_series())
@ -1359,8 +1360,8 @@ mod tests {
use indexmap::indexmap; use indexmap::indexmap;
use nu_protocol::record; use nu_protocol::record;
use polars::datatypes::CompatLevel; use polars::datatypes::CompatLevel;
use polars::export::arrow::array::{BooleanArray, PrimitiveArray};
use polars::prelude::Field; use polars::prelude::Field;
use polars_arrow::array::{BooleanArray, PrimitiveArray};
use polars_io::prelude::StructArray; use polars_io::prelude::StructArray;
use super::*; use super::*;

View File

@ -450,7 +450,7 @@ impl NuDataFrame {
} }
pub fn schema(&self) -> NuSchema { pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema()) NuSchema::new(Arc::clone(self.df.schema()))
} }
/// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame. /// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame.

View File

@ -12,10 +12,8 @@ pub struct NuSchema {
} }
impl NuSchema { impl NuSchema {
pub fn new(schema: Schema) -> Self { pub fn new(schema: SchemaRef) -> Self {
Self { Self { schema }
schema: Arc::new(schema),
}
} }
} }
@ -23,7 +21,7 @@ impl TryFrom<&Value> for NuSchema {
type Error = ShellError; type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> { fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?; let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema)) Ok(Self::new(Arc::new(schema)))
} }
} }