mirror of
https://github.com/nushell/nushell.git
synced 2024-11-22 16:33:37 +01:00
upgrade polars to 0.17 (#4122)
This commit is contained in:
parent
df87d90b8c
commit
16db368232
@ -89,7 +89,7 @@ zip = { version="0.5.9", optional=true }
|
||||
digest = "0.9.0"
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
optional = true
|
||||
features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"]
|
||||
|
||||
|
@ -121,7 +121,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tail = df.as_ref().get_columns().iter().map(|col| {
|
||||
let count = col.len() as f64;
|
||||
|
||||
let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
let sum = match col.sum_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
@ -144,7 +144,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
let min = match col.min_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
@ -153,7 +153,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
};
|
||||
|
||||
let q_25 = match col.quantile_as_series(0.25) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
@ -164,7 +164,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
};
|
||||
|
||||
let q_50 = match col.quantile_as_series(0.50) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
@ -175,7 +175,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
};
|
||||
|
||||
let q_75 = match col.quantile_as_series(0.75) {
|
||||
Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
@ -185,7 +185,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
Err(_) => None,
|
||||
};
|
||||
|
||||
let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) {
|
||||
let max = match col.max_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
|
@ -44,6 +44,12 @@ impl WholeStreamCommand for DataFrame {
|
||||
"type of join. Inner by default",
|
||||
Some('t'),
|
||||
)
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"suffix for the columns of the right dataframe",
|
||||
Some('s'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
@ -104,6 +110,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let r_df: Value = args.req(0)?;
|
||||
let l_col: Vec<Value> = args.req_named("left")?;
|
||||
let r_col: Vec<Value> = args.req_named("right")?;
|
||||
let r_suffix: Option<Tagged<String>> = args.get_flag("suffix")?;
|
||||
let join_type_op: Option<Tagged<String>> = args.get_flag("type")?;
|
||||
|
||||
let join_type = match join_type_op {
|
||||
@ -124,6 +131,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
},
|
||||
};
|
||||
|
||||
let suffix = r_suffix.map(|s| s.item);
|
||||
|
||||
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
|
||||
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
|
||||
|
||||
@ -142,7 +151,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)?;
|
||||
|
||||
df.as_ref()
|
||||
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
|
||||
.join(
|
||||
r_df.as_ref(),
|
||||
&l_col_string,
|
||||
&r_col_string,
|
||||
join_type,
|
||||
suffix,
|
||||
)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.day().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.hour().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.minute().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.month().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.nanosecond().into_series();
|
||||
|
@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.ordinal().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.second().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.week().into_series();
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.weekday().into_series();
|
||||
|
@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.year().into_series();
|
||||
|
@ -6,7 +6,7 @@ use nu_protocol::{
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::DataType;
|
||||
use polars::prelude::{DataType, RollingOptions};
|
||||
|
||||
enum RollType {
|
||||
Min,
|
||||
@ -57,7 +57,6 @@ impl WholeStreamCommand for DataFrame {
|
||||
Signature::build("dataframe rolling")
|
||||
.required("type", SyntaxShape::String, "rolling operation")
|
||||
.required("window", SyntaxShape::Int, "Window size for rolling")
|
||||
.switch("ignore_nulls", "Ignore nulls in column", Some('i'))
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
@ -112,7 +111,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let roll_type: Tagged<String> = args.req(0)?;
|
||||
let window_size: Tagged<i64> = args.req(1)?;
|
||||
let ignore_nulls = args.has_flag("ignore_nulls");
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
@ -126,31 +124,17 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
}
|
||||
|
||||
let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?;
|
||||
let rolling_opts = RollingOptions {
|
||||
window_size: window_size.item as usize,
|
||||
min_periods: window_size.item as usize,
|
||||
weights: None,
|
||||
center: false,
|
||||
};
|
||||
let res = match roll_type {
|
||||
RollType::Max => series.rolling_max(
|
||||
window_size.item as u32,
|
||||
None,
|
||||
ignore_nulls,
|
||||
window_size.item as u32,
|
||||
),
|
||||
RollType::Min => series.rolling_min(
|
||||
window_size.item as u32,
|
||||
None,
|
||||
ignore_nulls,
|
||||
window_size.item as u32,
|
||||
),
|
||||
RollType::Sum => series.rolling_sum(
|
||||
window_size.item as u32,
|
||||
None,
|
||||
ignore_nulls,
|
||||
window_size.item as u32,
|
||||
),
|
||||
RollType::Mean => series.rolling_mean(
|
||||
window_size.item as u32,
|
||||
None,
|
||||
ignore_nulls,
|
||||
window_size.item as u32,
|
||||
),
|
||||
RollType::Max => series.rolling_max(rolling_opts),
|
||||
RollType::Min => series.rolling_min(rolling_opts),
|
||||
RollType::Sum => series.rolling_sum(rolling_opts),
|
||||
RollType::Mean => series.rolling_mean(rolling_opts),
|
||||
};
|
||||
|
||||
let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
@ -78,7 +78,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let casted = match indices.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
|
||||
.as_ref()
|
||||
.cast_with_dtype(&DataType::UInt32)
|
||||
.cast(&DataType::UInt32)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Incorrect type",
|
||||
|
@ -58,7 +58,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let casted = series
|
||||
.date64()
|
||||
.datetime()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
let res = casted.strftime(&fmt.item).into_series();
|
||||
|
@ -92,7 +92,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let casted = match series.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
|
||||
.as_ref()
|
||||
.cast_with_dtype(&DataType::UInt32)
|
||||
.cast(&DataType::UInt32)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)),
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Incorrect type",
|
||||
|
@ -73,9 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let writer = CsvWriter::new(&mut file);
|
||||
|
||||
let writer = if no_header {
|
||||
writer.has_headers(false)
|
||||
writer.has_header(false)
|
||||
} else {
|
||||
writer.has_headers(true)
|
||||
writer.has_header(true)
|
||||
};
|
||||
|
||||
let writer = match delimiter {
|
||||
|
@ -27,9 +27,9 @@ serde = { version="1.0", features=["derive"] }
|
||||
serde_bytes = "0.11.5"
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.16.0"
|
||||
version = "0.17.0"
|
||||
optional = true
|
||||
features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-duration-ns"]
|
||||
features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-date", "dtype-datetime", "dtype-time"]
|
||||
|
||||
[features]
|
||||
dataframe = ["polars"]
|
||||
|
@ -603,7 +603,7 @@ where
|
||||
{
|
||||
match series.dtype() {
|
||||
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
|
||||
let to_i64 = series.cast_with_dtype(&DataType::Int64);
|
||||
let to_i64 = series.cast(&DataType::Int64);
|
||||
|
||||
match to_i64 {
|
||||
Ok(series) => {
|
||||
@ -661,7 +661,7 @@ where
|
||||
{
|
||||
match series.dtype() {
|
||||
DataType::Float32 => {
|
||||
let to_f64 = series.cast_with_dtype(&DataType::Float64);
|
||||
let to_f64 = series.cast(&DataType::Float64);
|
||||
|
||||
match to_f64 {
|
||||
Ok(series) => {
|
||||
@ -731,7 +731,7 @@ where
|
||||
{
|
||||
match series.dtype() {
|
||||
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
|
||||
let to_i64 = series.cast_with_dtype(&DataType::Int64);
|
||||
let to_i64 = series.cast(&DataType::Int64);
|
||||
|
||||
match to_i64 {
|
||||
Ok(series) => {
|
||||
@ -789,7 +789,7 @@ where
|
||||
{
|
||||
match series.dtype() {
|
||||
DataType::Float32 => {
|
||||
let to_f64 = series.cast_with_dtype(&DataType::Float64);
|
||||
let to_f64 = series.cast(&DataType::Float64);
|
||||
|
||||
match to_f64 {
|
||||
Ok(series) => {
|
||||
|
@ -8,8 +8,8 @@ use nu_errors::ShellError;
|
||||
use nu_source::{Span, Tag};
|
||||
use num_bigint::BigInt;
|
||||
use polars::prelude::{
|
||||
DataFrame, DataType, Date64Type, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, ObjectType,
|
||||
PolarsNumericType, Series, TimeUnit,
|
||||
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
|
||||
ObjectType, PolarsNumericType, Series,
|
||||
};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
@ -310,8 +310,8 @@ pub fn create_column(
|
||||
}
|
||||
}
|
||||
}
|
||||
DataType::Date32 => {
|
||||
let casted = series.date32().map_err(|e| {
|
||||
DataType::Date => {
|
||||
let casted = series.date().map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
"Casting error",
|
||||
format!("casting error: {}", e),
|
||||
@ -347,8 +347,8 @@ pub fn create_column(
|
||||
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Date64 => {
|
||||
let casted = series.date64().map_err(|e| {
|
||||
DataType::Datetime => {
|
||||
let casted = series.datetime().map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
"Casting error",
|
||||
format!("casting error: {}", e),
|
||||
@ -384,8 +384,8 @@ pub fn create_column(
|
||||
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Time64(timeunit) | DataType::Duration(timeunit) => {
|
||||
let casted = series.time64_nanosecond().map_err(|e| {
|
||||
DataType::Time => {
|
||||
let casted = series.time().map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
"Casting error",
|
||||
format!("casting error: {}", e),
|
||||
@ -398,14 +398,7 @@ pub fn create_column(
|
||||
.skip(from_row)
|
||||
.take(size)
|
||||
.map(|v| match v {
|
||||
Some(a) => {
|
||||
let nanoseconds = match timeunit {
|
||||
TimeUnit::Second => a / 1_000_000_000,
|
||||
TimeUnit::Millisecond => a / 1_000_000,
|
||||
TimeUnit::Microsecond => a / 1_000,
|
||||
TimeUnit::Nanosecond => a,
|
||||
};
|
||||
|
||||
Some(nanoseconds) => {
|
||||
let untagged = if let Some(bigint) = BigInt::from_i64(nanoseconds) {
|
||||
UntaggedValue::Primitive(Primitive::Duration(bigint))
|
||||
} else {
|
||||
@ -633,7 +626,8 @@ pub fn from_parsed_columns(
|
||||
}
|
||||
});
|
||||
|
||||
let res = ChunkedArray::<Date64Type>::new_from_opt_iter(&name, it);
|
||||
let res: DatetimeChunked =
|
||||
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it).into();
|
||||
|
||||
df_series.push(res.into_series())
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ impl PartialEq for NuDataFrame {
|
||||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
|
||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return false,
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user