1
0
mirror of https://github.com/nushell/nushell.git synced 2025-04-26 22:28:19 +02:00

Polars upgrade to 0.46 ()

Upgraded to Polars 0.46
This commit is contained in:
Jack Wright 2025-01-27 11:01:39 -08:00 committed by GitHub
parent b53271b86a
commit c0b4d19761
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 223 additions and 353 deletions

484
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,25 +30,25 @@ indexmap = { version = "2.7" }
mimalloc = { version = "0.1.42" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.49"}
polars-io = { version = "0.44", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.44"}
polars-ops = { version = "0.44", features = ["pivot"]}
polars-plan = { version = "0.44", features = ["regex"]}
polars-utils = { version = "0.44"}
sqlparser = { version = "0.53"}
polars-io = { version = "0.46", features = ["avro", "cloud", "aws"]}
polars-arrow = { version = "0.46"}
polars-ops = { version = "0.46", features = ["pivot"]}
polars-plan = { version = "0.46", features = ["regex"]}
polars-utils = { version = "0.46"}
typetag = "0.2"
env_logger = "0.11.3"
log.workspace = true
uuid = { version = "1.12", features = ["v4", "serde"] }
# Do to a compile error with polars, this included to force the raw dependency
hashbrown = { version = "0.14", features = ["rayon", "ahash", "serde", "raw"] }
hashbrown = { version = "0.15", features = ["rayon", "serde"] }
# Cloud support
aws-config = { version = "1.5", features = ["sso"] }
aws-credential-types = "1.2"
tokio = { version = "1.43", features = ["full"] }
object_store = { version = "0.10", default-features = false }
object_store = { version = "0.11", default-features = false }
url.workspace = true
[dependencies.polars]
@ -90,7 +90,7 @@ features = [
"to_dummies",
]
optional = false
version = "0.44"
version = "0.46"
[dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.101.1" }

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_ipc(file_path, IpcWriterOptions::default())
// todo - add cloud options
.sink_ipc(file_path, IpcWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -32,7 +32,8 @@ pub(crate) fn command_lazy(
};
lazy.to_polars()
.sink_csv(file_path, options)
// todo - add cloud options
.sink_csv(file_path, options, None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_json(file_path, JsonWriterOptions::default())
// todo - add cloud options
.sink_json(file_path, JsonWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_parquet(file_path, ParquetWriteOptions::default())
// todo - add cloud options
.sink_parquet(&file_path, ParquetWriteOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -163,23 +163,23 @@ impl PluginCommand for ToDataFrame {
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a".into(), &[1u64, 2, 3]))], vec![Field::new("a".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("d".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("d".into(), &[1u64, 2, 3]))], vec![Field::new("d".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {

View File

@ -133,6 +133,8 @@ fn command_lazy(
to_supertypes,
diagonal,
from_partitioned_ds,
// todo - expose maintain order
..Default::default()
};
let res: NuLazyFrame = polars::prelude::concat(&dataframes, args)

View File

@ -143,6 +143,8 @@ impl PluginCommand for LazySortBy {
nulls_last: vec![nulls_last],
multithreaded: true,
maintain_order,
// todo - expose limit
limit: None,
};
let pipeline_value = input.into_value(call.head)?;

View File

@ -176,6 +176,7 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg,
FunctionArg::ExprNamed { arg, .. } => arg,
})
.collect::<Vec<_>>();
Ok(

View File

@ -113,6 +113,8 @@ fn command(
nulls_last: call.has_flag("nulls-last")?,
multithreaded: true,
maintain_order: call.has_flag("maintain-order")?,
// todo - expose limit
limit: None,
};
let mut res = df

View File

@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
@ -8,7 +9,6 @@ use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
use polars::datatypes::{AnyValue, PlSmallStr};
use polars::export::arrow::Either;
use polars::prelude::{
ChunkAnyValue, Column as PolarsColumn, DataFrame, DataType, DatetimeChunked, Float32Type,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder,
@ -18,6 +18,7 @@ use polars::prelude::{
};
use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::Either;
use crate::dataframe::values::NuSchema;
@ -474,7 +475,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
Ok(res.into_series())
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let schema = Some(NuSchema::new(Arc::new(Schema::from_iter(fields.clone()))));
// let mut structs: Vec<Series> = Vec::new();
let mut structs: HashMap<PlSmallStr, Series> = HashMap::new();
@ -599,7 +600,7 @@ fn input_type_list_to_series(
.map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
builder.append_values_iter(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
@ -1359,8 +1360,8 @@ mod tests {
use indexmap::indexmap;
use nu_protocol::record;
use polars::datatypes::CompatLevel;
use polars::export::arrow::array::{BooleanArray, PrimitiveArray};
use polars::prelude::Field;
use polars_arrow::array::{BooleanArray, PrimitiveArray};
use polars_io::prelude::StructArray;
use super::*;

View File

@ -450,7 +450,7 @@ impl NuDataFrame {
}
pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema())
NuSchema::new(Arc::clone(self.df.schema()))
}
/// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame.

View File

@ -12,10 +12,8 @@ pub struct NuSchema {
}
impl NuSchema {
pub fn new(schema: Schema) -> Self {
Self {
schema: Arc::new(schema),
}
pub fn new(schema: SchemaRef) -> Self {
Self { schema }
}
}
@ -23,7 +21,7 @@ impl TryFrom<&Value> for NuSchema {
type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema))
Ok(Self::new(Arc::new(schema)))
}
}