Upgrading nu-cmd-dataframe to polars 0.39 (#12554)

#Description Upgrading nu-cmd-dataframe to polars 0.39 --------- Co-authored-by: Jack Wright <jack.wright@disqo.com>
2025-06-30 22:50:14 +02:00 · 2024-04-17 10:50:17 -07:00
parent 13160b3ec3
commit 57b0c722c6
6 changed files with 127 additions and 497 deletions
--- a/crates/nu-cmd-dataframe/Cargo.toml
+++ b/crates/nu-cmd-dataframe/Cargo.toml
@ -26,11 +26,11 @@ num = { version = "0.4", optional = true }
 serde = { workspace = true, features = ["derive"] }
 # keep sqlparser at 0.39.0 until we can update polars
 sqlparser = { version = "0.45", optional = true }
-polars-io = { version = "0.38", features = ["avro"], optional = true }
-polars-arrow = { version = "0.38", optional = true }
-polars-ops = { version = "0.38", optional = true }
-polars-plan = { version = "0.38", features = ["regex"], optional = true }
-polars-utils = { version = "0.38", optional = true }
+polars-io = { version = "0.39", features = ["avro"], optional = true }
+polars-arrow = { version = "0.39", optional = true }
+polars-ops = { version = "0.39", optional = true }
+polars-plan = { version = "0.39", features = ["regex"], optional = true }
+polars-utils = { version = "0.39", optional = true }

 [dependencies.polars]
 features = [
@ -65,7 +65,7 @@ features = [
 ]
 default-features = false
 optional = true
-version = "0.38"
+version = "0.39"

 [features]
 dataframe = ["num", "polars", "polars-io", "polars-arrow", "polars-ops", "polars-plan", "polars-utils", "sqlparser"]
--- a/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs
+++ b/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs
@ -5,7 +5,7 @@ use polars::prelude::{
    CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
    LazyFrame, ParallelStrategy, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
 };
-use polars_io::avro::AvroReader;
+use polars_io::{avro::AvroReader, HiveOptions};
 use std::{fs::File, io::BufReader, path::PathBuf};

 #[derive(Clone)]
@ -151,7 +151,7 @@ fn from_parquet(
            low_memory: false,
            cloud_options: None,
            use_statistics: false,
-            hive_partitioning: false,
+            hive_options: HiveOptions::default(),
        };

        let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
@ -246,7 +246,8 @@ fn from_ipc(
            cache: true,
            rechunk: false,
            row_index: None,
-            memmap: true,
+            memory_map: true,
+            cloud_options: None,
        };

        let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
--- a/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs
+++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs
@ -1,5 +1,6 @@
 use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
 use nu_engine::command_prelude::*;
+use polars::chunked_array::ops::SortMultipleOptions;

 #[derive(Clone)]
 pub struct LazySortBy;
@ -126,11 +127,17 @@ impl Command for LazySortBy {
            None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
        };

+        let sort_options = SortMultipleOptions {
+            descending: reverse,
+            nulls_last,
+            multithreaded: true,
+            maintain_order,
+        };
+
        let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
        let lazy = NuLazyFrame::new(
            lazy.from_eager,
-            lazy.into_polars()
-                .sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
+            lazy.into_polars().sort_by_exprs(&expressions, sort_options),
        );

        Ok(PipelineData::Value(
--- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs
+++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs
@ -9,7 +9,10 @@ pub use operations::Axis;
 use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
 use indexmap::IndexMap;
 use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value};
-use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series};
+use polars::{
+    chunked_array::ops::SortMultipleOptions,
+    prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series},
+};
 use polars_plan::prelude::{lit, Expr, Null};
 use polars_utils::total_ord::{TotalEq, TotalHash};
 use serde::{Deserialize, Serialize};
@ -488,12 +491,18 @@ impl NuDataFrame {
            .expect("already checked that dataframe is different than 0");

        // if unable to sort, then unable to compare
-        let lhs = match self.as_ref().sort(vec![*first_col], false, false) {
+        let lhs = match self
+            .as_ref()
+            .sort(vec![*first_col], SortMultipleOptions::default())
+        {
            Ok(df) => df,
            Err(_) => return None,
        };

-        let rhs = match other.as_ref().sort(vec![*first_col], false, false) {
+        let rhs = match other
+            .as_ref()
+            .sort(vec![*first_col], SortMultipleOptions::default())
+        {
            Ok(df) => df,
            Err(_) => return None,
        };
--- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs
+++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs
@ -313,11 +313,15 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
        Expr::SortBy {
            expr,
            by,
-            descending,
+            sort_options,
        } => {
            let by: Result<Vec<Value>, ShellError> =
                by.iter().map(|b| expr_to_value(b, span)).collect();
-            let descending: Vec<Value> = descending.iter().map(|r| Value::bool(*r, span)).collect();
+            let descending: Vec<Value> = sort_options
+                .descending
+                .iter()
+                .map(|r| Value::bool(*r, span))
+                .collect();

            Ok(Value::record(
                record! {