mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 14:40:06 +02:00
Upgrading nu-cmd-dataframe to polars 0.39 (#12554)
#Description Upgrading nu-cmd-dataframe to polars 0.39 --------- Co-authored-by: Jack Wright <jack.wright@disqo.com>
This commit is contained in:
@ -26,11 +26,11 @@ num = { version = "0.4", optional = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
# keep sqlparser at 0.39.0 until we can update polars
|
||||
sqlparser = { version = "0.45", optional = true }
|
||||
polars-io = { version = "0.38", features = ["avro"], optional = true }
|
||||
polars-arrow = { version = "0.38", optional = true }
|
||||
polars-ops = { version = "0.38", optional = true }
|
||||
polars-plan = { version = "0.38", features = ["regex"], optional = true }
|
||||
polars-utils = { version = "0.38", optional = true }
|
||||
polars-io = { version = "0.39", features = ["avro"], optional = true }
|
||||
polars-arrow = { version = "0.39", optional = true }
|
||||
polars-ops = { version = "0.39", optional = true }
|
||||
polars-plan = { version = "0.39", features = ["regex"], optional = true }
|
||||
polars-utils = { version = "0.39", optional = true }
|
||||
|
||||
[dependencies.polars]
|
||||
features = [
|
||||
@ -65,7 +65,7 @@ features = [
|
||||
]
|
||||
default-features = false
|
||||
optional = true
|
||||
version = "0.38"
|
||||
version = "0.39"
|
||||
|
||||
[features]
|
||||
dataframe = ["num", "polars", "polars-io", "polars-arrow", "polars-ops", "polars-plan", "polars-utils", "sqlparser"]
|
||||
|
@ -5,7 +5,7 @@ use polars::prelude::{
|
||||
CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader,
|
||||
LazyFrame, ParallelStrategy, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader,
|
||||
};
|
||||
use polars_io::avro::AvroReader;
|
||||
use polars_io::{avro::AvroReader, HiveOptions};
|
||||
use std::{fs::File, io::BufReader, path::PathBuf};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -151,7 +151,7 @@ fn from_parquet(
|
||||
low_memory: false,
|
||||
cloud_options: None,
|
||||
use_statistics: false,
|
||||
hive_partitioning: false,
|
||||
hive_options: HiveOptions::default(),
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_parquet(file, args)
|
||||
@ -246,7 +246,8 @@ fn from_ipc(
|
||||
cache: true,
|
||||
rechunk: false,
|
||||
row_index: None,
|
||||
memmap: true,
|
||||
memory_map: true,
|
||||
cloud_options: None,
|
||||
};
|
||||
|
||||
let df: NuLazyFrame = LazyFrame::scan_ipc(file, args)
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame};
|
||||
use nu_engine::command_prelude::*;
|
||||
use polars::chunked_array::ops::SortMultipleOptions;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LazySortBy;
|
||||
@ -126,11 +127,17 @@ impl Command for LazySortBy {
|
||||
None => expressions.iter().map(|_| false).collect::<Vec<bool>>(),
|
||||
};
|
||||
|
||||
let sort_options = SortMultipleOptions {
|
||||
descending: reverse,
|
||||
nulls_last,
|
||||
multithreaded: true,
|
||||
maintain_order,
|
||||
};
|
||||
|
||||
let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?;
|
||||
let lazy = NuLazyFrame::new(
|
||||
lazy.from_eager,
|
||||
lazy.into_polars()
|
||||
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
|
||||
lazy.into_polars().sort_by_exprs(&expressions, sort_options),
|
||||
);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
|
@ -9,7 +9,10 @@ pub use operations::Axis;
|
||||
use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame};
|
||||
use indexmap::IndexMap;
|
||||
use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value};
|
||||
use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series};
|
||||
use polars::{
|
||||
chunked_array::ops::SortMultipleOptions,
|
||||
prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series},
|
||||
};
|
||||
use polars_plan::prelude::{lit, Expr, Null};
|
||||
use polars_utils::total_ord::{TotalEq, TotalHash};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -488,12 +491,18 @@ impl NuDataFrame {
|
||||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(vec![*first_col], false, false) {
|
||||
let lhs = match self
|
||||
.as_ref()
|
||||
.sort(vec![*first_col], SortMultipleOptions::default())
|
||||
{
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(vec![*first_col], false, false) {
|
||||
let rhs = match other
|
||||
.as_ref()
|
||||
.sort(vec![*first_col], SortMultipleOptions::default())
|
||||
{
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
@ -313,11 +313,15 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
|
||||
Expr::SortBy {
|
||||
expr,
|
||||
by,
|
||||
descending,
|
||||
sort_options,
|
||||
} => {
|
||||
let by: Result<Vec<Value>, ShellError> =
|
||||
by.iter().map(|b| expr_to_value(b, span)).collect();
|
||||
let descending: Vec<Value> = descending.iter().map(|r| Value::bool(*r, span)).collect();
|
||||
let descending: Vec<Value> = sort_options
|
||||
.descending
|
||||
.iter()
|
||||
.map(|r| Value::bool(*r, span))
|
||||
.collect();
|
||||
|
||||
Ok(Value::record(
|
||||
record! {
|
||||
|
Reference in New Issue
Block a user