Polars upgrade to 0.46 (#14933)

Upgraded to Polars 0.46
This commit is contained in:
Jack Wright
2025-01-27 11:01:39 -08:00
committed by GitHub
parent b53271b86a
commit c0b4d19761
14 changed files with 223 additions and 353 deletions

484
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,25 +30,25 @@ indexmap = { version = "2.7" }
mimalloc = { version = "0.1.42" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.49"}
polars-io = { version = "0.44", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.44"}
polars-ops = { version = "0.44", features = ["pivot"]}
polars-plan = { version = "0.44", features = ["regex"]}
polars-utils = { version = "0.44"}
sqlparser = { version = "0.53"}
polars-io = { version = "0.46", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.46"}
polars-ops = { version = "0.46", features = ["pivot"]}
polars-plan = { version = "0.46", features = ["regex"]}
polars-utils = { version = "0.46"}
typetag = "0.2"
env_logger = "0.11.3"
log.workspace = true
uuid = { version = "1.12", features = ["v4", "serde"] }
# Do to a compile error with polars, this included to force the raw dependency
hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] }
hashbrown = { version = "0.15", features = ["rayon", "serde"] }
# Cloud support
aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2"
tokio = { version = "1.43", features = ["full"] }
object_store = { version = "0.10", default-features = false }
object_store = { version = "0.11", default-features = false }
url.workspace = true
[dependencies.polars]
@ -90,7 +90,7 @@ features = [
"to_dummies",
]
optional = false
version = "0.44"
version = "0.46"
[dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.101.1" }

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_ipc(file_path, IpcWriterOptions::default())
// todo - add cloud options
.sink_ipc(file_path, IpcWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -32,7 +32,8 @@ pub(crate) fn command_lazy(
};
lazy.to_polars()
.sink_csv(file_path, options)
// todo - add cloud options
.sink_csv(file_path, options, None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_json(file_path, JsonWriterOptions::default())
// todo - add cloud options
.sink_json(file_path, JsonWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_parquet(file_path, ParquetWriteOptions::default())
// todo - add cloud options
.sink_parquet(&file_path, ParquetWriteOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -163,23 +163,23 @@ impl PluginCommand for ToDataFrame {
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a".into(), &[1u64, 2, 3]))], vec![Field::new("a".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("d".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("d".into(), &[1u64, 2, 3]))], vec![Field::new("d".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {

View File

@ -133,6 +133,8 @@ fn command_lazy(
to_supertypes,
diagonal,
from_partitioned_ds,
// todo - expose maintain order
..Default::default()
};
let res: NuLazyFrame = polars::prelude::concat(&dataframes, args)

View File

@ -143,6 +143,8 @@ impl PluginCommand for LazySortBy {
nulls_last: vec![nulls_last],
multithreaded: true,
maintain_order,
// todo - expose limit
limit: None,
};
let pipeline_value = input.into_value(call.head)?;

View File

@ -176,6 +176,7 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg,
FunctionArg::ExprNamed { arg, .. } => arg,
})
.collect::<Vec<_>>();
Ok(

View File

@ -113,6 +113,8 @@ fn command(
nulls_last: call.has_flag("nulls-last")?,
multithreaded: true,
maintain_order: call.has_flag("maintain-order")?,
// todo - expose limit
limit: None,
};
let mut res = df

View File

@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
@ -8,7 +9,6 @@ use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
use polars::datatypes::{AnyValue, PlSmallStr};
use polars::export::arrow::Either;
use polars::prelude::{
ChunkAnyValue, Column as PolarsColumn, DataFrame, DataType, DatetimeChunked, Float32Type,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder,
@ -18,6 +18,7 @@ use polars::prelude::{
};
use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::Either;
use crate::dataframe::values::NuSchema;
@ -474,7 +475,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
Ok(res.into_series())
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let schema = Some(NuSchema::new(Arc::new(Schema::from_iter(fields.clone()))));
// let mut structs: Vec<Series> = Vec::new();
let mut structs: HashMap<PlSmallStr, Series> = HashMap::new();
@ -599,7 +600,7 @@ fn input_type_list_to_series(
.map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
builder.append_values_iter(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
@ -1359,8 +1360,8 @@ mod tests {
use indexmap::indexmap;
use nu_protocol::record;
use polars::datatypes::CompatLevel;
use polars::export::arrow::array::{BooleanArray, PrimitiveArray};
use polars::prelude::Field;
use polars_arrow::array::{BooleanArray, PrimitiveArray};
use polars_io::prelude::StructArray;
use super::*;

View File

@ -450,7 +450,7 @@ impl NuDataFrame {
}
pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema())
NuSchema::new(Arc::clone(self.df.schema()))
}
/// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame.

View File

@ -12,10 +12,8 @@ pub struct NuSchema {
}
impl NuSchema {
pub fn new(schema: Schema) -> Self {
Self {
schema: Arc::new(schema),
}
pub fn new(schema: SchemaRef) -> Self {
Self { schema }
}
}
@ -23,7 +21,7 @@ impl TryFrom<&Value> for NuSchema {
type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema))
Ok(Self::new(Arc::new(schema)))
}
}