upgrade polars to 0.17 (#4122)

This commit is contained in:
Nico Mandery
2021-11-16 00:01:02 +01:00
committed by GitHub
parent df87d90b8c
commit 16db368232
22 changed files with 67 additions and 74 deletions

View File

@ -89,7 +89,7 @@ zip = { version="0.5.9", optional=true }
digest = "0.9.0"
[dependencies.polars]
version = "0.16.0"
version = "0.17.0"
optional = true
features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"]

View File

@ -121,7 +121,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tail = df.as_ref().get_columns().iter().map(|col| {
let count = col.len() as f64;
let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) {
let sum = match col.sum_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -144,7 +144,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
_ => None,
};
let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) {
let min = match col.min_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -153,7 +153,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_25 = match col.quantile_as_series(0.25) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -164,7 +164,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -175,7 +175,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
@ -185,7 +185,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Err(_) => None,
};
let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) {
let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,

View File

@ -44,6 +44,12 @@ impl WholeStreamCommand for DataFrame {
"type of join. Inner by default",
Some('t'),
)
.named(
"suffix",
SyntaxShape::String,
"suffix for the columns of the right dataframe",
Some('s'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -104,6 +110,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let r_df: Value = args.req(0)?;
let l_col: Vec<Value> = args.req_named("left")?;
let r_col: Vec<Value> = args.req_named("right")?;
let r_suffix: Option<Tagged<String>> = args.get_flag("suffix")?;
let join_type_op: Option<Tagged<String>> = args.get_flag("type")?;
let join_type = match join_type_op {
@ -124,6 +131,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
},
};
let suffix = r_suffix.map(|s| s.item);
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
@ -142,7 +151,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)?;
df.as_ref()
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
.join(
r_df.as_ref(),
&l_col_string,
&r_col_string,
join_type,
suffix,
)
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
}
_ => Err(ShellError::labeled_error(

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.day().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.hour().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.minute().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.month().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.nanosecond().into_series();

View File

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.ordinal().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.second().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.week().into_series();

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.weekday().into_series();

View File

@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.year().into_series();

View File

@ -6,7 +6,7 @@ use nu_protocol::{
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::prelude::DataType;
use polars::prelude::{DataType, RollingOptions};
enum RollType {
Min,
@ -57,7 +57,6 @@ impl WholeStreamCommand for DataFrame {
Signature::build("dataframe rolling")
.required("type", SyntaxShape::String, "rolling operation")
.required("window", SyntaxShape::Int, "Window size for rolling")
.switch("ignore_nulls", "Ignore nulls in column", Some('i'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -112,7 +111,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let roll_type: Tagged<String> = args.req(0)?;
let window_size: Tagged<i64> = args.req(1)?;
let ignore_nulls = args.has_flag("ignore_nulls");
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
@ -126,31 +124,17 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
}
let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?;
let rolling_opts = RollingOptions {
window_size: window_size.item as usize,
min_periods: window_size.item as usize,
weights: None,
center: false,
};
let res = match roll_type {
RollType::Max => series.rolling_max(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Min => series.rolling_min(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Sum => series.rolling_sum(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Mean => series.rolling_mean(
window_size.item as u32,
None,
ignore_nulls,
window_size.item as u32,
),
RollType::Max => series.rolling_max(rolling_opts),
RollType::Min => series.rolling_min(rolling_opts),
RollType::Sum => series.rolling_sum(rolling_opts),
RollType::Mean => series.rolling_mean(rolling_opts),
};
let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;

View File

@ -78,7 +78,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",

View File

@ -58,7 +58,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let series = df.as_series(&df_tag.span)?;
let casted = series
.date64()
.datetime()
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
let res = casted.strftime(&fmt.item).into_series();

View File

@ -92,7 +92,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let casted = match series.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
.as_ref()
.cast_with_dtype(&DataType::UInt32)
.cast(&DataType::UInt32)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect type",

View File

@ -73,9 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let writer = CsvWriter::new(&mut file);
let writer = if no_header {
writer.has_headers(false)
writer.has_header(false)
} else {
writer.has_headers(true)
writer.has_header(true)
};
let writer = match delimiter {