Upgrading nu_plugin_polars to polars 0.39.1 (#12551)

# Description
Upgrading nu_plugin_polars to polars 0.39.1

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright
2024-04-17 04:35:09 -07:00
committed by GitHub
parent b296d6ee3c
commit 410f3c5c8a
6 changed files with 541 additions and 106 deletions

View File

@ -29,11 +29,11 @@ indexmap = { version = "2.2" }
num = {version = "0.4"}
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.45"}
polars-io = { version = "0.38", features = ["avro"]}
polars-arrow = { version = "0.38"}
polars-ops = { version = "0.38"}
polars-plan = { version = "0.38", features = ["regex"]}
polars-utils = { version = "0.38"}
polars-io = { version = "0.39", features = ["avro"]}
polars-arrow = { version = "0.39"}
polars-ops = { version = "0.39"}
polars-plan = { version = "0.39", features = ["regex"]}
polars-utils = { version = "0.39"}
typetag = "0.2"
uuid = { version = "1.7", features = ["v4", "serde"] }
@ -69,7 +69,7 @@ features = [
"to_dummies",
]
optional = false
version = "0.38"
version = "0.39"
[dev-dependencies]
nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.92.3" }

View File

@ -23,7 +23,7 @@ use polars::prelude::{
LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
};
use polars_io::{avro::AvroReader, prelude::ParallelStrategy};
use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions};
#[derive(Clone)]
pub struct OpenDataFrame;
@ -174,7 +174,7 @@ fn from_parquet(
low_memory: false,
cloud_options: None,
use_statistics: false,
hive_partitioning: false,
hive_options: HiveOptions::default(),
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
@ -271,7 +271,8 @@ fn from_ipc(
cache: true,
rechunk: false,
row_index: None,
memmap: true,
memory_map: true,
cloud_options: None,
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)

View File

@ -9,6 +9,7 @@ use nu_protocol::{
Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Value,
};
use polars::chunked_array::ops::SortMultipleOptions;
#[derive(Clone)]
pub struct LazySortBy;
@ -137,12 +138,18 @@ impl PluginCommand for LazySortBy {
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let sort_options = SortMultipleOptions {
descending: reverse,
nulls_last,
multithreaded: true,
maintain_order,
};
let pipeline_value = input.into_value(call.head);
let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.to_polars()
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
lazy.to_polars().sort_by_exprs(&expressions, sort_options),
);
lazy.to_pipeline_data(plugin, engine, call.head)
.map_err(LabeledError::from)

View File

@ -8,7 +8,10 @@ pub use operations::Axis;
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, IntoLazy, PolarsObject, Series};
use polars::{
chunked_array::ops::SortMultipleOptions,
prelude::{DataFrame, DataType, IntoLazy, PolarsObject, Series},
};
use polars_plan::prelude::{lit, Expr, Null};
use polars_utils::total_ord::{TotalEq, TotalHash};
use std::{
@ -436,12 +439,18 @@ impl NuDataFrame {
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(vec![*first_col], false, false) {
let lhs = match self
.as_ref()
.sort(vec![*first_col], SortMultipleOptions::default())
{
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(vec![*first_col], false, false) {
let rhs = match other
.as_ref()
.sort(vec![*first_col], SortMultipleOptions::default())
{
Ok(df) => df,
Err(_) => return None,
};

View File

@ -289,11 +289,15 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
Expr::SortBy {
expr,
by,
descending,
sort_options,
} => {
let by: Result<Vec<Value>, ShellError> =
by.iter().map(|b| expr_to_value(b, span)).collect();
let descending: Vec<Value> = descending.iter().map(|r| Value::bool(*r, span)).collect();
let descending: Vec<Value> = sort_options
.descending
.iter()
.map(|r| Value::bool(*r, span))
.collect();
Ok(Value::record(
record! {