Upgrading nu-cmd-dataframe to polars 0.39 (#12554)

#Description
Upgrading nu-cmd-dataframe to polars 0.39

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
Jack Wright
2024-04-17 10:50:17 -07:00
committed by GitHub
parent 13160b3ec3
commit 57b0c722c6
6 changed files with 127 additions and 497 deletions

View File

@ -26,11 +26,11 @@ num = { version = "0.4", optional = true }
serde = { workspace = true, features = ["derive"] }
# keep sqlparser at 0.39.0 until we can update polars
sqlparser = { version = "0.45", optional = true }
polars-io = { version = "0.38", features = ["avro"], optional = true }
polars-arrow = { version = "0.38", optional = true }
polars-ops = { version = "0.38", optional = true }
polars-plan = { version = "0.38", features = ["regex"], optional = true }
polars-utils = { version = "0.38", optional = true }
polars-io = { version = "0.39", features = ["avro"], optional = true }
polars-arrow = { version = "0.39", optional = true }
polars-ops = { version = "0.39", optional = true }
polars-plan = { version = "0.39", features = ["regex"], optional = true }
polars-utils = { version = "0.39", optional = true }
[dependencies.polars]
features = [
@ -65,7 +65,7 @@ features = [
]
default-features = false
optional = true
version = "0.38"
version = "0.39"
[features]
dataframe = ["num", "polars", "polars-io", "polars-arrow", "polars-ops", "polars-plan", "polars-utils", "sqlparser"]

View File

@ -5,7 +5,7 @@ use polars::prelude::{
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
LazyFrame, ParallelStrategy, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
};
use polars_io::avro::AvroReader;
use polars_io::{avro::AvroReader, HiveOptions};
use std::{fs::File, io::BufReader, path::PathBuf};
#[derive(Clone)]
@ -151,7 +151,7 @@ fn from_parquet(
low_memory: false,
cloud_options: None,
use_statistics: false,
hive_partitioning: false,
hive_options: HiveOptions::default(),
};
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
@ -246,7 +246,8 @@ fn from_ipc(
cache: true,
rechunk: false,
row_index: None,
memmap: true,
memory_map: true,
cloud_options: None,
};
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)

View File

@ -1,5 +1,6 @@
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
use nu_engine::command_prelude::*;
use polars::chunked_array::ops::SortMultipleOptions;
#[derive(Clone)]
pub struct LazySortBy;
@ -126,11 +127,17 @@ impl Command for LazySortBy {
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
};
let sort_options = SortMultipleOptions {
descending: reverse,
nulls_last,
multithreaded: true,
maintain_order,
};
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars()
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
lazy.into_polars().sort_by_exprs(&expressions, sort_options),
);
Ok(PipelineData::Value(

View File

@ -9,7 +9,10 @@ pub use operations::Axis;
use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
use indexmap::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series};
use polars::{
chunked_array::ops::SortMultipleOptions,
prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series},
};
use polars_plan::prelude::{lit, Expr, Null};
use polars_utils::total_ord::{TotalEq, TotalHash};
use serde::{Deserialize, Serialize};
@ -488,12 +491,18 @@ impl NuDataFrame {
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(vec![*first_col], false, false) {
let lhs = match self
.as_ref()
.sort(vec![*first_col], SortMultipleOptions::default())
{
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(vec![*first_col], false, false) {
let rhs = match other
.as_ref()
.sort(vec![*first_col], SortMultipleOptions::default())
{
Ok(df) => df,
Err(_) => return None,
};

View File

@ -313,11 +313,15 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
Expr::SortBy {
expr,
by,
descending,
sort_options,
} => {
let by: Result<Vec<Value>, ShellError> =
by.iter().map(|b| expr_to_value(b, span)).collect();
let descending: Vec<Value> = descending.iter().map(|r| Value::bool(*r, span)).collect();
let descending: Vec<Value> = sort_options
.descending
.iter()
.map(|r| Value::bool(*r, span))
.collect();
Ok(Value::record(
record! {