upgrade polars to 0.17 (#4122)

This commit is contained in:
Nico Mandery 2021-11-16 00:01:02 +01:00 committed by GitHub
parent df87d90b8c
commit 16db368232
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 67 additions and 74 deletions

View File

@ -89,7 +89,7 @@ zip = { version="0.5.9", optional=true }
digest = "0.9.0" digest = "0.9.0"
[dependencies.polars] [dependencies.polars]
version = "0.16.0" version = "0.17.0"
optional = true optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"] features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"]

View File

@ -121,7 +121,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tail = df.as_ref().get_columns().iter().map(|col| { let tail = df.as_ref().get_columns().iter().map(|col| {
let count = col.len() as f64; let count = col.len() as f64;
let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) { let sum = match col.sum_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
@ -144,7 +144,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
_ => None, _ => None,
}; };
let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) { let min = match col.min_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
@ -153,7 +153,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}; };
let q_25 = match col.quantile_as_series(0.25) { let q_25 = match col.quantile_as_series(0.25) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
@ -164,7 +164,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}; };
let q_50 = match col.quantile_as_series(0.50) { let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
@ -175,7 +175,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}; };
let q_75 = match col.quantile_as_series(0.75) { let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
@ -185,7 +185,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Err(_) => None, Err(_) => None,
}; };
let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) { let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) { Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,

View File

@ -44,6 +44,12 @@ impl WholeStreamCommand for DataFrame {
"type of join. Inner by default", "type of join. Inner by default",
Some('t'), Some('t'),
) )
.named(
"suffix",
SyntaxShape::String,
"suffix for the columns of the right dataframe",
Some('s'),
)
} }
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> { fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -104,6 +110,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let r_df: Value = args.req(0)?; let r_df: Value = args.req(0)?;
let l_col: Vec<Value> = args.req_named("left")?; let l_col: Vec<Value> = args.req_named("left")?;
let r_col: Vec<Value> = args.req_named("right")?; let r_col: Vec<Value> = args.req_named("right")?;
let r_suffix: Option<Tagged<String>> = args.get_flag("suffix")?;
let join_type_op: Option<Tagged<String>> = args.get_flag("type")?; let join_type_op: Option<Tagged<String>> = args.get_flag("type")?;
let join_type = match join_type_op { let join_type = match join_type_op {
@ -124,6 +131,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}, },
}; };
let suffix = r_suffix.map(|s| s.item);
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?; let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?; let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
@ -142,7 +151,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)?; )?;
df.as_ref() df.as_ref()
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type) .join(
r_df.as_ref(),
&l_col_string,
&r_col_string,
join_type,
suffix,
)
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None)) .map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
} }
_ => Err(ShellError::labeled_error( _ => Err(ShellError::labeled_error(

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.day().into_series(); let res = casted.day().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.hour().into_series(); let res = casted.hour().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.minute().into_series(); let res = casted.minute().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.month().into_series(); let res = casted.month().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.nanosecond().into_series(); let res = casted.nanosecond().into_series();

View File

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.ordinal().into_series(); let res = casted.ordinal().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.second().into_series(); let res = casted.second().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.week().into_series(); let res = casted.week().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.weekday().into_series(); let res = casted.weekday().into_series();

View File

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.year().into_series(); let res = casted.year().into_series();

View File

@ -6,7 +6,7 @@ use nu_protocol::{
Signature, SyntaxShape, UntaggedValue, Signature, SyntaxShape, UntaggedValue,
}; };
use nu_source::Tagged; use nu_source::Tagged;
use polars::prelude::DataType; use polars::prelude::{DataType, RollingOptions};
enum RollType { enum RollType {
Min, Min,
@ -57,7 +57,6 @@ impl WholeStreamCommand for DataFrame {
Signature::build("dataframe rolling") Signature::build("dataframe rolling")
.required("type", SyntaxShape::String, "rolling operation") .required("type", SyntaxShape::String, "rolling operation")
.required("window", SyntaxShape::Int, "Window size for rolling") .required("window", SyntaxShape::Int, "Window size for rolling")
.switch("ignore_nulls", "Ignore nulls in column", Some('i'))
} }
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> { fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -112,7 +111,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone(); let tag = args.call_info.name_tag.clone();
let roll_type: Tagged<String> = args.req(0)?; let roll_type: Tagged<String> = args.req(0)?;
let window_size: Tagged<i64> = args.req(1)?; let window_size: Tagged<i64> = args.req(1)?;
let ignore_nulls = args.has_flag("ignore_nulls");
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
@ -126,31 +124,17 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
} }
let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?; let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?;
let rolling_opts = RollingOptions {
window_size: window_size.item as usize,
min_periods: window_size.item as usize,
weights: None,
center: false,
};
let res = match roll_type { let res = match roll_type {
RollType::Max => series.rolling_max( RollType::Max => series.rolling_max(rolling_opts),
window_size.item as u32, RollType::Min => series.rolling_min(rolling_opts),
None, RollType::Sum => series.rolling_sum(rolling_opts),
ignore_nulls, RollType::Mean => series.rolling_mean(rolling_opts),
window_size.item as u32,
),
RollType::Min => series.rolling_min(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Sum => series.rolling_sum(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Mean => series.rolling_mean(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
}; };
let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;

View File

@ -78,7 +78,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match indices.dtype() { let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref() .as_ref()
.cast_with_dtype(&DataType::UInt32) .cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)), .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary( _ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type", "Incorrect type",

View File

@ -58,7 +58,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?; let series = df.as_series(&df_tag.span)?;
let casted = series let casted = series
.date64() .datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.strftime(&fmt.item).into_series(); let res = casted.strftime(&fmt.item).into_series();

View File

@ -92,7 +92,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match series.dtype() { let casted = match series.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
.as_ref() .as_ref()
.cast_with_dtype(&DataType::UInt32) .cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)), .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary( _ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type", "Incorrect type",

View File

@ -73,9 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let writer = CsvWriter::new(&mut file); let writer = CsvWriter::new(&mut file);
let writer = if no_header { let writer = if no_header {
writer.has_headers(false) writer.has_header(false)
} else { } else {
writer.has_headers(true) writer.has_header(true)
}; };
let writer = match delimiter { let writer = match delimiter {

View File

@ -27,9 +27,9 @@ serde = { version="1.0", features=["derive"] }
serde_bytes = "0.11.5" serde_bytes = "0.11.5"
[dependencies.polars] [dependencies.polars]
version = "0.16.0" version = "0.17.0"
optional = true optional = true
features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-duration-ns"] features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-date", "dtype-datetime", "dtype-time"]
[features] [features]
dataframe = ["polars"] dataframe = ["polars"]

View File

@ -603,7 +603,7 @@ where
{ {
match series.dtype() { match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast_with_dtype(&DataType::Int64); let to_i64 = series.cast(&DataType::Int64);
match to_i64 { match to_i64 {
Ok(series) => { Ok(series) => {
@ -661,7 +661,7 @@ where
{ {
match series.dtype() { match series.dtype() {
DataType::Float32 => { DataType::Float32 => {
let to_f64 = series.cast_with_dtype(&DataType::Float64); let to_f64 = series.cast(&DataType::Float64);
match to_f64 { match to_f64 {
Ok(series) => { Ok(series) => {
@ -731,7 +731,7 @@ where
{ {
match series.dtype() { match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast_with_dtype(&DataType::Int64); let to_i64 = series.cast(&DataType::Int64);
match to_i64 { match to_i64 {
Ok(series) => { Ok(series) => {
@ -789,7 +789,7 @@ where
{ {
match series.dtype() { match series.dtype() {
DataType::Float32 => { DataType::Float32 => {
let to_f64 = series.cast_with_dtype(&DataType::Float64); let to_f64 = series.cast(&DataType::Float64);
match to_f64 { match to_f64 {
Ok(series) => { Ok(series) => {

View File

@ -8,8 +8,8 @@ use nu_errors::ShellError;
use nu_source::{Span, Tag}; use nu_source::{Span, Tag};
use num_bigint::BigInt; use num_bigint::BigInt;
use polars::prelude::{ use polars::prelude::{
DataFrame, DataType, Date64Type, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, ObjectType, DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
PolarsNumericType, Series, TimeUnit, ObjectType, PolarsNumericType, Series,
}; };
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
@ -310,8 +310,8 @@ pub fn create_column(
} }
} }
} }
DataType::Date32 => { DataType::Date => {
let casted = series.date32().map_err(|e| { let casted = series.date().map_err(|e| {
ShellError::labeled_error( ShellError::labeled_error(
"Casting error", "Casting error",
format!("casting error: {}", e), format!("casting error: {}", e),
@ -347,8 +347,8 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
DataType::Date64 => { DataType::Datetime => {
let casted = series.date64().map_err(|e| { let casted = series.datetime().map_err(|e| {
ShellError::labeled_error( ShellError::labeled_error(
"Casting error", "Casting error",
format!("casting error: {}", e), format!("casting error: {}", e),
@ -384,8 +384,8 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
DataType::Time64(timeunit) | DataType::Duration(timeunit) => { DataType::Time => {
let casted = series.time64_nanosecond().map_err(|e| { let casted = series.time().map_err(|e| {
ShellError::labeled_error( ShellError::labeled_error(
"Casting error", "Casting error",
format!("casting error: {}", e), format!("casting error: {}", e),
@ -398,14 +398,7 @@ pub fn create_column(
.skip(from_row) .skip(from_row)
.take(size) .take(size)
.map(|v| match v { .map(|v| match v {
Some(a) => { Some(nanoseconds) => {
let nanoseconds = match timeunit {
TimeUnit::Second => a / 1_000_000_000,
TimeUnit::Millisecond => a / 1_000_000,
TimeUnit::Microsecond => a / 1_000,
TimeUnit::Nanosecond => a,
};
let untagged = if let Some(bigint) = BigInt::from_i64(nanoseconds) { let untagged = if let Some(bigint) = BigInt::from_i64(nanoseconds) {
UntaggedValue::Primitive(Primitive::Duration(bigint)) UntaggedValue::Primitive(Primitive::Duration(bigint))
} else { } else {
@ -633,7 +626,8 @@ pub fn from_parsed_columns(
} }
}); });
let res = ChunkedArray::<Date64Type>::new_from_opt_iter(&name, it); let res: DatetimeChunked =
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it).into();
df_series.push(res.into_series()) df_series.push(res.into_series())
} }

View File

@ -87,7 +87,7 @@ impl PartialEq for NuDataFrame {
// Casting needed to compare other numeric types with nushell numeric type. // Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created // In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64 // with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) { DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series, Ok(series) => series,
Err(_) => return false, Err(_) => return false,
}, },