Polars upgrade to 0.46 (#14933)

Upgraded to Polars 0.46
This commit is contained in:
Jack Wright
2025-01-27 11:01:39 -08:00
committed by GitHub
parent b53271b86a
commit c0b4d19761
14 changed files with 223 additions and 353 deletions

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_ipc(file_path, IpcWriterOptions::default())
// todo - add cloud options
.sink_ipc(file_path, IpcWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -32,7 +32,8 @@ pub(crate) fn command_lazy(
};
lazy.to_polars()
.sink_csv(file_path, options)
// todo - add cloud options
.sink_csv(file_path, options, None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_json(file_path, JsonWriterOptions::default())
// todo - add cloud options
.sink_json(file_path, JsonWriterOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -16,7 +16,8 @@ pub(crate) fn command_lazy(
file_span: Span,
) -> Result<(), ShellError> {
lazy.to_polars()
.sink_parquet(file_path, ParquetWriteOptions::default())
// todo - add cloud options
.sink_parquet(&file_path, ParquetWriteOptions::default(), None)
.map_err(|e| polars_file_save_error(e, file_span))
}

View File

@ -163,23 +163,23 @@ impl PluginCommand for ToDataFrame {
example: "[[a b c]; [1 {d: [1 2 3]} [10 11 12] ]]| polars into-df -s {a: u8, b: {d: list<u64>}, c: list<u8>}",
result: Some(
NuDataFrame::try_from_series_vec(vec![
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("a".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("a".into(), &[1u64, 2, 3]))], vec![Field::new("a".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
Series::new("a".into(), &[1u8]),
{
let dtype = DataType::Struct(vec![Field::new("d".into(), DataType::List(Box::new(DataType::UInt64)))]);
let vals = vec![AnyValue::StructOwned(
Box::new((vec![AnyValue::List(Series::new("d".into(), &[1u64, 2, 3]))], vec![Field::new("d".into(), DataType::String)]))); 1];
Series::from_any_values_and_dtype("b".into(), &vals, &dtype, false)
.expect("Struct series should not fail")
},
{
let dtype = DataType::List(Box::new(DataType::String));
let vals = vec![AnyValue::List(Series::new("c".into(), &[10, 11, 12]))];
Series::from_any_values_and_dtype("c".into(), &vals, &dtype, false)
.expect("List series should not fail")
}
], Span::test_data())
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {

View File

@ -133,6 +133,8 @@ fn command_lazy(
to_supertypes,
diagonal,
from_partitioned_ds,
// todo - expose maintain order
..Default::default()
};
let res: NuLazyFrame = polars::prelude::concat(&dataframes, args)

View File

@ -143,6 +143,8 @@ impl PluginCommand for LazySortBy {
nulls_last: vec![nulls_last],
multithreaded: true,
maintain_order,
// todo - expose limit
limit: None,
};
let pipeline_value = input.into_value(call.head)?;

View File

@ -176,6 +176,7 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result<Expr> {
.map(|arg| match arg {
FunctionArg::Named { arg, .. } => arg,
FunctionArg::Unnamed(arg) => arg,
FunctionArg::ExprNamed { arg, .. } => arg,
})
.collect::<Vec<_>>();
Ok(

View File

@ -113,6 +113,8 @@ fn command(
nulls_last: call.has_flag("nulls-last")?,
multithreaded: true,
maintain_order: call.has_flag("maintain-order")?,
// todo - expose limit
limit: None,
};
let mut res = df

View File

@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc};
use chrono_tz::Tz;
@ -8,7 +9,6 @@ use polars::chunked_array::builder::AnonymousOwnedListBuilder;
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
use polars::datatypes::{AnyValue, PlSmallStr};
use polars::export::arrow::Either;
use polars::prelude::{
ChunkAnyValue, Column as PolarsColumn, DataFrame, DataType, DatetimeChunked, Float32Type,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder,
@ -18,6 +18,7 @@ use polars::prelude::{
};
use nu_protocol::{Record, ShellError, Span, Value};
use polars_arrow::Either;
use crate::dataframe::values::NuSchema;
@ -474,7 +475,7 @@ fn typed_column_to_series(name: PlSmallStr, column: TypedColumn) -> Result<Serie
Ok(res.into_series())
}
DataType::Struct(fields) => {
let schema = Some(NuSchema::new(Schema::from_iter(fields.clone())));
let schema = Some(NuSchema::new(Arc::new(Schema::from_iter(fields.clone()))));
// let mut structs: Vec<Series> = Vec::new();
let mut structs: HashMap<PlSmallStr, Series> = HashMap::new();
@ -599,7 +600,7 @@ fn input_type_list_to_series(
.map(|v| value_to_primitive!(v, $vec_type))
.collect::<Result<Vec<$vec_type>, _>>()
.map_err(inconsistent_error)?;
builder.append_iter_values(value_list.iter().copied());
builder.append_values_iter(value_list.iter().copied());
}
let res = builder.finish();
Ok(res.into_series())
@ -1359,8 +1360,8 @@ mod tests {
use indexmap::indexmap;
use nu_protocol::record;
use polars::datatypes::CompatLevel;
use polars::export::arrow::array::{BooleanArray, PrimitiveArray};
use polars::prelude::Field;
use polars_arrow::array::{BooleanArray, PrimitiveArray};
use polars_io::prelude::StructArray;
use super::*;

View File

@ -450,7 +450,7 @@ impl NuDataFrame {
}
pub fn schema(&self) -> NuSchema {
NuSchema::new(self.df.schema())
NuSchema::new(Arc::clone(self.df.schema()))
}
/// This differs from try_from_value as it will attempt to coerce the type into a NuDataFrame.

View File

@ -12,10 +12,8 @@ pub struct NuSchema {
}
impl NuSchema {
pub fn new(schema: Schema) -> Self {
Self {
schema: Arc::new(schema),
}
pub fn new(schema: SchemaRef) -> Self {
Self { schema }
}
}
@ -23,7 +21,7 @@ impl TryFrom<&Value> for NuSchema {
type Error = ShellError;
fn try_from(value: &Value) -> Result<Self, Self::Error> {
let schema = value_to_schema(value, Span::unknown())?;
Ok(Self::new(schema))
Ok(Self::new(Arc::new(schema)))
}
}