Updating polars and sqlparser versions (#10114)

Polars and SQLParser upgrade.

I have exposed features that have been added to polars as command args
where appropriate.

---------

Co-authored-by: Jack Wright <jack.wright@disqo.com>
Co-authored-by: Darren Schroeder <343840+fdncred@users.noreply.github.com>
Co-authored-by: sholderbach <sholderbach@users.noreply.github.com>
This commit is contained in:
Jack Wright
2023-08-29 15:13:34 -07:00
committed by GitHub
parent 3f2c76df28
commit 3fd1a26ec0
19 changed files with 236 additions and 236 deletions

View File

@ -23,8 +23,8 @@ fancy-regex = "0.11"
indexmap = { version = "2.0" }
num = { version = "0.4", optional = true }
serde = { version = "1.0", features = ["derive"] }
sqlparser = { version = "0.34", features = ["serde"], optional = true }
polars-io = { version = "0.30.0", features = ["avro"] }
sqlparser = { version = "0.36.1", optional = true }
polars-io = { version = "0.32", features = ["avro"] }
[dependencies.polars]
features = [
@ -54,7 +54,7 @@ features = [
"to_dummies",
]
optional = true
version = "0.30.0"
version = "0.32"
[features]
dataframe = ["num", "polars", "sqlparser"]

View File

@ -20,6 +20,7 @@ impl Command for Dummies {
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("drop-first", "Drop first row", Some('d'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
@ -115,10 +116,11 @@ fn command(
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let drop_first: bool = call.has_flag("drop-first");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_ref()
.to_dummies(None)
.to_dummies(None, drop_first)
.map_err(|e| {
ShellError::GenericError(
"Error calculating dummies".into(),

View File

@ -128,7 +128,7 @@ fn dataframe_command(
}
fn expression_command(call: &Call, input: Value) -> Result<PipelineData, ShellError> {
let expr = NuExpression::try_from_value(input)?;
let value = expr.to_value(call.head);
let value = expr.to_value(call.head)?;
Ok(PipelineData::Value(value, None))
}

View File

@ -95,10 +95,8 @@ impl Command for ExprOtherwise {
let value = input.into_value(call.head);
let complete: NuExpression = match NuWhen::try_from_value(value)? {
NuWhen::WhenThen(when_then) => when_then
.otherwise(otherwise_predicate.into_polars())
.into(),
NuWhen::WhenThenThen(when_then_then) => when_then_then
NuWhen::Then(then) => then.otherwise(otherwise_predicate.into_polars()).into(),
NuWhen::ChainedThen(chained_when) => chained_when
.otherwise(otherwise_predicate.into_polars())
.into(),
};

View File

@ -110,11 +110,11 @@ impl Command for ExprWhen {
.then(then_predicate.into_polars())
.into(),
v => match NuWhen::try_from_value(v)? {
NuWhen::WhenThen(when_then) => when_then
NuWhen::Then(when_then) => when_then
.when(when_predicate.into_polars())
.then(then_predicate.into_polars())
.into(),
NuWhen::WhenThenThen(when_then_then) => when_then_then
NuWhen::ChainedThen(when_then_then) => when_then_then
.when(when_predicate.into_polars())
.then(then_predicate.into_polars())
.into(),

View File

@ -172,7 +172,6 @@ fn get_col_name(expr: &Expr) -> Option<String> {
},
Expr::Filter { input: expr, .. }
| Expr::Slice { input: expr, .. }
| Expr::Cache { input: expr, .. }
| Expr::Cast { expr, .. }
| Expr::Sort { expr, .. }
| Expr::Take { expr, .. }
@ -192,7 +191,8 @@ fn get_col_name(expr: &Expr) -> Option<String> {
| Expr::Wildcard
| Expr::RenameAlias { .. }
| Expr::Count
| Expr::Nth(_) => None,
| Expr::Nth(_)
| Expr::Selector(_) => None,
}
}

View File

@ -37,6 +37,7 @@ impl Command for LazySortBy {
"nulls are shown last in the dataframe",
Some('n'),
)
.switch("maintain-order", "Maintains order during sort", Some('m'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
@ -110,6 +111,7 @@ impl Command for LazySortBy {
};
let expressions = NuExpression::extract_exprs(value)?;
let nulls_last = call.has_flag("nulls-last");
let maintain_order = call.has_flag("maintain-order");
let reverse: Option<Vec<bool>> = call.get_flag(engine_state, stack, "reverse")?;
let reverse = match reverse {
@ -137,7 +139,7 @@ impl Command for LazySortBy {
let lazy = NuLazyFrame::new(
lazy.from_eager,
lazy.into_polars()
.sort_by_exprs(&expressions, reverse, nulls_last),
.sort_by_exprs(&expressions, reverse, nulls_last, maintain_order),
);
Ok(PipelineData::Value(

View File

@ -143,7 +143,13 @@ fn command(
})?;
let res = if not_exact {
casted.as_datetime_not_exact(Some(format.as_str()), TimeUnit::Nanoseconds, None)
casted.as_datetime_not_exact(
Some(format.as_str()),
TimeUnit::Nanoseconds,
false,
None,
None,
)
} else {
casted.as_datetime(
Some(format.as_str()),
@ -151,6 +157,7 @@ fn command(
false,
false,
None,
None,
)
};

View File

@ -27,6 +27,11 @@ impl Command for ArgSort {
Signature::build(self.name())
.switch("reverse", "reverse order", Some('r'))
.switch("nulls-last", "nulls ordered last", Some('n'))
.switch(
"maintain-order",
"maintain order on sorted items",
Some('m'),
)
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
@ -98,6 +103,7 @@ fn command(
descending: call.has_flag("reverse"),
nulls_last: call.has_flag("nulls-last"),
multithreaded: true,
maintain_order: call.has_flag("maintain-order"),
};
let mut res = df

View File

@ -98,7 +98,12 @@ fn command(
let value = NuDataFrame::dataframe_into_value(res, call.head);
Ok(PipelineData::Value(value, None))
}
_ => todo!(),
_ => Err(ShellError::UnsupportedInput(
"Expected the dataframe to have a column".to_string(),
"".to_string(),
call.head,
call.head,
)),
}
}

View File

@ -149,6 +149,7 @@ fn command(
closed_window: None,
tu: None,
tz: None,
fn_params: None,
};
let res = match roll_type {
RollType::Max => series.rolling_max(rolling_opts),

View File

@ -421,7 +421,17 @@ fn input_type_list_to_series(
let dt_chunked = ChunkedArray::<Int64Type>::from_iter_options(&list_name, it)
.into_datetime(TimeUnit::Nanoseconds, None);
builder.append_series(&dt_chunked.into_series());
builder
.append_series(&dt_chunked.into_series())
.map_err(|e| {
ShellError::GenericError(
"Error appending to series".into(),
"".to_string(),
None,
Some(e.to_string()),
Vec::new(),
)
})?
}
let res = builder.finish();
Ok(res.into_series())

View File

@ -466,12 +466,12 @@ impl NuDataFrame {
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(vec![*first_col], false) {
let lhs = match self.as_ref().sort(vec![*first_col], false, false) {
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(vec![*first_col], false) {
let rhs = match other.as_ref().sort(vec![*first_col], false, false) {
Ok(df) => df,
Err(_) => return None,
};

View File

@ -31,7 +31,7 @@ impl CustomValue for NuExpression {
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
Ok(self.to_value(span))
self.to_value(span)
}
fn as_any(&self) -> &dyn std::any::Any {

View File

@ -115,7 +115,7 @@ impl NuExpression {
f(expr, other).into()
}
pub fn to_value(&self, span: Span) -> Value {
pub fn to_value(&self, span: Span) -> Result<Value, ShellError> {
expr_to_value(self.as_ref(), span)
}
@ -164,60 +164,59 @@ impl ExtractedExpr {
}
}
pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
pub fn expr_to_value(expr: &Expr, span: Span) -> Result<Value, ShellError> {
match expr {
Expr::Alias(expr, alias) => Value::record(
Expr::Alias(expr, alias) => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"alias" => Value::string(alias.as_ref(), span),
},
span,
),
Expr::Column(name) => Value::record(
)),
Expr::Column(name) => Ok(Value::record(
record! {
"expr" => Value::string("column", span),
"value" => Value::string(name.to_string(), span),
},
span,
),
)),
Expr::Columns(columns) => {
let value = columns.iter().map(|col| Value::string(col, span)).collect();
Value::record(
Ok(Value::record(
record! {
"expr" => Value::string("columns", span),
"value" => Value::list(value, span),
},
span,
)
))
}
Expr::Literal(literal) => Value::record(
Expr::Literal(literal) => Ok(Value::record(
record! {
"expr" => Value::string("literal", span),
"value" => Value::string(format!("{literal:?}"), span),
},
span,
),
Expr::BinaryExpr { left, op, right } => Value::record(
)),
Expr::BinaryExpr { left, op, right } => Ok(Value::record(
record! {
"left" => expr_to_value(left, span),
"left" => expr_to_value(left, span)?,
"op" => Value::string(format!("{op:?}"), span),
"right" => expr_to_value(right, span),
"right" => expr_to_value(right, span)?,
},
span,
),
)),
Expr::Ternary {
predicate,
truthy,
falsy,
} => Value::record(
} => Ok(Value::record(
record! {
"predicate" => expr_to_value(predicate.as_ref(), span),
"truthy" => expr_to_value(truthy.as_ref(), span),
"falsy" => expr_to_value(falsy.as_ref(), span),
"predicate" => expr_to_value(predicate.as_ref(), span)?,
"truthy" => expr_to_value(truthy.as_ref(), span)?,
"falsy" => expr_to_value(falsy.as_ref(), span)?,
},
span,
),
)),
Expr::Agg(agg_expr) => {
let value = match agg_expr {
AggExpr::Min { input: expr, .. }
@ -237,37 +236,44 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
expr,
quantile,
interpol,
} => Value::record(
} => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"quantile" => expr_to_value(quantile.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"quantile" => expr_to_value(quantile.as_ref(), span)?,
"interpol" => Value::string(format!("{interpol:?}"), span),
},
span,
),
)),
};
Value::record(
Ok(Value::record(
record! {
"expr" => Value::string("agg", span),
"value" => value,
"value" => value?,
},
span,
)
))
}
Expr::Count => Value::record(record! { "expr" => Value::string("count", span) }, span),
Expr::Wildcard => {
Value::record(record! { "expr" => Value::string("wildcard", span) }, span)
}
Expr::Explode(expr) => Value::record(
record! { "expr" => expr_to_value(expr.as_ref(), span) },
Expr::Count => Ok(Value::record(
record! { "expr" => Value::string("count", span) },
span,
),
Expr::KeepName(expr) => Value::record(
record! { "expr" => expr_to_value(expr.as_ref(), span) },
)),
Expr::Wildcard => Ok(Value::record(
record! { "expr" => Value::string("wildcard", span) },
span,
),
Expr::Nth(i) => Value::record(record! { "expr" => Value::int(*i, span) }, span),
)),
Expr::Explode(expr) => Ok(Value::record(
record! { "expr" => expr_to_value(expr.as_ref(), span)? },
span,
)),
Expr::KeepName(expr) => Ok(Value::record(
record! { "expr" => expr_to_value(expr.as_ref(), span)? },
span,
)),
Expr::Nth(i) => Ok(Value::record(
record! { "expr" => Value::int(*i, span) },
span,
)),
Expr::DtypeColumn(dtypes) => {
let vals = dtypes
.iter()
@ -277,73 +283,74 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
})
.collect();
Value::List { vals, span }
Ok(Value::List { vals, span })
}
Expr::Sort { expr, options } => Value::record(
Expr::Sort { expr, options } => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"options" => Value::string(format!("{options:?}"), span),
},
span,
),
)),
Expr::Cast {
expr,
data_type,
strict,
} => Value::record(
} => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"dtype" => Value::string(format!("{data_type:?}"), span),
"strict" => Value::bool(*strict, span),
},
span,
),
Expr::Take { expr, idx } => Value::record(
)),
Expr::Take { expr, idx } => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"idx" => expr_to_value(idx.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"idx" => expr_to_value(idx.as_ref(), span)?,
},
span,
),
)),
Expr::SortBy {
expr,
by,
descending,
} => {
let by: Vec<Value> = by.iter().map(|b| expr_to_value(b, span)).collect();
let by: Result<Vec<Value>, ShellError> =
by.iter().map(|b| expr_to_value(b, span)).collect();
let descending: Vec<Value> = descending
.iter()
.map(|r| Value::Bool { val: *r, span })
.collect();
Value::record(
Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"by" => Value::list(by, span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"by" => Value::list(by?, span),
"descending" => Value::list(descending, span),
},
span,
)
))
}
Expr::Filter { input, by } => Value::record(
Expr::Filter { input, by } => Ok(Value::record(
record! {
"input" => expr_to_value(input.as_ref(), span),
"by" => expr_to_value(by.as_ref(), span),
"input" => expr_to_value(input.as_ref(), span)?,
"by" => expr_to_value(by.as_ref(), span)?,
},
span,
),
)),
Expr::Slice {
input,
offset,
length,
} => Value::record(
} => Ok(Value::record(
record! {
"input" => expr_to_value(input.as_ref(), span),
"offset" => expr_to_value(offset.as_ref(), span),
"length" => expr_to_value(length.as_ref(), span),
"input" => expr_to_value(input.as_ref(), span)?,
"offset" => expr_to_value(offset.as_ref(), span)?,
"length" => expr_to_value(length.as_ref(), span)?,
},
span,
),
)),
Expr::Exclude(expr, excluded) => {
let excluded = excluded
.iter()
@ -353,67 +360,62 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
})
.collect();
Value::record(
Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"excluded" => Value::list(excluded, span),
},
span,
)
))
}
Expr::RenameAlias { expr, function } => Value::record(
Expr::RenameAlias { expr, function } => Ok(Value::record(
record! {
"expr" => expr_to_value(expr.as_ref(), span),
"expr" => expr_to_value(expr.as_ref(), span)?,
"function" => Value::string(format!("{function:?}"), span),
},
span,
),
)),
Expr::AnonymousFunction {
input,
function,
output_type,
options,
} => {
let input: Vec<Value> = input.iter().map(|e| expr_to_value(e, span)).collect();
Value::record(
let input: Result<Vec<Value>, ShellError> =
input.iter().map(|e| expr_to_value(e, span)).collect();
Ok(Value::record(
record! {
"input" => Value::list(input, span),
"input" => Value::list(input?, span),
"function" => Value::string(format!("{function:?}"), span),
"output_type" => Value::string(format!("{output_type:?}"), span),
"options" => Value::string(format!("{options:?}"), span),
},
span,
)
))
}
Expr::Function {
input,
function,
options,
} => {
let input: Vec<Value> = input.iter().map(|e| expr_to_value(e, span)).collect();
Value::record(
let input: Result<Vec<Value>, ShellError> =
input.iter().map(|e| expr_to_value(e, span)).collect();
Ok(Value::record(
record! {
"input" => Value::list(input, span),
"input" => Value::list(input?, span),
"function" => Value::string(format!("{function:?}"), span),
"options" => Value::string(format!("{options:?}"), span),
},
span,
)
))
}
Expr::Cache { input, id } => Value::record(
record! {
"input" => expr_to_value(input.as_ref(), span),
"id" => Value::string(format!("{id:?}"), span),
},
span,
),
Expr::Window {
function,
partition_by,
order_by,
options,
} => {
let partition_by: Vec<Value> = partition_by
let partition_by: Result<Vec<Value>, ShellError> = partition_by
.iter()
.map(|e| expr_to_value(e, span))
.collect();
@ -421,17 +423,26 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Value {
let order_by = order_by
.as_ref()
.map(|e| expr_to_value(e.as_ref(), span))
.transpose()?
.unwrap_or_else(|| Value::nothing(span));
Value::record(
Ok(Value::record(
record! {
"function" => expr_to_value(function, span),
"partition_by" => Value::list(partition_by, span),
"function" => expr_to_value(function, span)?,
"partition_by" => Value::list(partition_by?, span),
"order_by" => order_by,
"options" => Value::string(format!("{options:?}"), span),
},
span,
)
))
}
// the parameter polars_plan::dsl::selector::Selector is not publicly exposed.
// I am not sure what we can meaningfully do with this at this time.
Expr::Selector(_) => Err(ShellError::UnsupportedInput(
"Expressions of type Selector to Nu Values is not yet supported".to_string(),
format!("Expression is {expr:?}"),
span,
Span::unknown(),
)),
}
}

View File

@ -26,8 +26,8 @@ impl CustomValue for NuWhen {
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let val = match self {
NuWhen::WhenThen(_) => "whenthen".into(),
NuWhen::WhenThenThen(_) => "whenthenthen".into(),
NuWhen::Then(_) => "whenthen".into(),
NuWhen::ChainedThen(_) => "whenthenthen".into(),
};
let value = Value::String { val, span };

View File

@ -2,13 +2,13 @@ mod custom_value;
use core::fmt;
use nu_protocol::{ShellError, Span, Value};
use polars::prelude::{col, when, WhenThen, WhenThenThen};
use polars::prelude::{col, when, ChainedThen, Then};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Clone)]
pub enum NuWhen {
WhenThen(Box<WhenThen>),
WhenThenThen(WhenThenThen),
Then(Box<Then>),
ChainedThen(ChainedThen),
}
// Mocked serialization of the LazyFrame object
@ -27,7 +27,7 @@ impl<'de> Deserialize<'de> for NuWhen {
where
D: Deserializer<'de>,
{
Ok(NuWhen::WhenThen(Box::new(when(col("a")).then(col("b")))))
Ok(NuWhen::Then(Box::new(when(col("a")).then(col("b")))))
}
}
@ -37,15 +37,15 @@ impl fmt::Debug for NuWhen {
}
}
impl From<WhenThen> for NuWhen {
fn from(when_then: WhenThen) -> Self {
NuWhen::WhenThen(Box::new(when_then))
impl From<Then> for NuWhen {
fn from(then: Then) -> Self {
NuWhen::Then(Box::new(then))
}
}
impl From<WhenThenThen> for NuWhen {
fn from(when_then_then: WhenThenThen) -> Self {
NuWhen::WhenThenThen(when_then_then)
impl From<ChainedThen> for NuWhen {
fn from(chained_when: ChainedThen) -> Self {
NuWhen::ChainedThen(chained_when)
}
}

View File

@ -80,7 +80,6 @@ serde_json = "1.0"
serde_urlencoded = "0.7"
serde_yaml = "0.9"
sha2 = "0.10"
sqlparser = { version = "0.34", features = ["serde"], optional = true }
sysinfo = "0.29"
tabled = { version = "0.14.0", features = ["color"], default-features = false }
terminal_size = "0.2"