From f1ee9113ac2897dc7e2612dc03022f939342cdca Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sun, 25 Jul 2021 11:01:54 +0100 Subject: [PATCH] All is a DataFrame (#3812) * nuframe in its own type in UntaggedValue * Removed eager dataframe from enum * Dataframe created from list of values * Corrected order in dataframe columns * Returned tag from stream collection * Removed series from dataframe commands * Arithmetic operators * forced push * forced push * Replace all command * String commands * appending operations with dfs * Testing suite for dataframes * Unit test for dataframe commands * improved equality for dataframes --- .../src/commands/dataframe/aggregate.rs | 212 ++---- .../src/commands/dataframe/column.rs | 35 +- .../nu-command/src/commands/dataframe/drop.rs | 30 +- .../src/commands/dataframe/drop_duplicates.rs | 36 +- .../src/commands/dataframe/drop_nulls.rs | 61 +- .../src/commands/dataframe/dtypes.rs | 82 ++- .../src/commands/dataframe/dummies.rs | 92 ++- .../src/commands/dataframe/filter.rs | 41 +- .../src/commands/dataframe/first.rs | 32 +- .../nu-command/src/commands/dataframe/get.rs | 30 +- .../src/commands/dataframe/groupby.rs | 6 +- .../nu-command/src/commands/dataframe/join.rs | 75 ++- .../nu-command/src/commands/dataframe/last.rs | 32 +- .../nu-command/src/commands/dataframe/list.rs | 95 ++- .../nu-command/src/commands/dataframe/melt.rs | 93 ++- .../nu-command/src/commands/dataframe/mod.rs | 9 +- .../src/commands/dataframe/pivot.rs | 2 +- .../src/commands/dataframe/sample.rs | 6 +- .../src/commands/dataframe/select.rs | 30 +- .../commands/dataframe/series/all_false.rs | 55 +- .../src/commands/dataframe/series/all_true.rs | 55 +- .../src/commands/dataframe/series/arg_max.rs | 38 +- .../src/commands/dataframe/series/arg_min.rs | 37 +- .../src/commands/dataframe/series/arg_sort.rs | 47 +- .../src/commands/dataframe/series/arg_true.rs | 40 +- .../commands/dataframe/series/arg_unique.rs | 51 +- .../commands/dataframe/series/concatenate.rs | 107 +++ .../src/commands/dataframe/series/contains.rs | 90 +++ .../dataframe/series/is_duplicated.rs | 55 +- .../src/commands/dataframe/series/is_in.rs | 62 +- .../commands/dataframe/series/is_not_null.rs | 45 +- .../src/commands/dataframe/series/is_null.rs | 45 +- .../commands/dataframe/series/is_unique.rs | 55 +- .../src/commands/dataframe/series/mod.rs | 14 + .../src/commands/dataframe/series/n_null.rs | 42 +- .../src/commands/dataframe/series/n_unique.rs | 42 +- .../src/commands/dataframe/series/not.rs | 45 +- .../src/commands/dataframe/series/rename.rs | 44 +- .../src/commands/dataframe/series/replace.rs | 56 +- .../commands/dataframe/series/replace_all.rs | 100 +++ .../src/commands/dataframe/series/set.rs | 90 ++- .../commands/dataframe/series/set_with_idx.rs | 68 +- .../src/commands/dataframe/series/shift.rs | 15 +- .../commands/dataframe/series/str_lengths.rs | 81 +++ .../commands/dataframe/series/str_slice.rs | 92 +++ .../commands/dataframe/series/to_lowercase.rs | 82 +++ .../commands/dataframe/series/to_uppercase.rs | 82 +++ .../src/commands/dataframe/series/unique.rs | 43 +- .../commands/dataframe/series/value_counts.rs | 45 +- .../src/commands/dataframe/shape.rs | 49 +- .../nu-command/src/commands/dataframe/show.rs | 2 +- .../src/commands/dataframe/slice.rs | 30 +- .../nu-command/src/commands/dataframe/sort.rs | 89 ++- .../nu-command/src/commands/dataframe/take.rs | 62 +- .../src/commands/dataframe/to_csv.rs | 2 +- .../src/commands/dataframe/to_df.rs | 115 +++- .../src/commands/dataframe/to_parquet.rs | 2 +- .../src/commands/dataframe/to_series.rs | 44 -- .../src/commands/dataframe/where_.rs | 27 +- .../src/commands/dataframe/with_column.rs | 56 +- .../nu-command/src/commands/filters/first.rs | 12 +- .../nu-command/src/commands/filters/uniq.rs | 2 +- .../src/commands/formats/to/json.rs | 2 +- .../src/commands/formats/to/toml.rs | 4 +- .../src/commands/formats/to/yaml.rs | 2 +- crates/nu-command/src/commands/mod.rs | 19 +- .../src/commands/viewers/autoview/command.rs | 20 +- crates/nu-command/src/default_context.rs | 8 +- crates/nu-command/src/examples.rs | 84 +++ crates/nu-data/src/base/shape.rs | 2 +- crates/nu-data/src/config.rs | 4 +- crates/nu-data/src/dataframe.rs | 624 ++++++++++-------- crates/nu-engine/src/evaluate/operator.rs | 13 +- crates/nu-protocol/src/dataframe/mod.rs | 8 +- .../nu-protocol/src/dataframe/nu_dataframe.rs | 315 +++++++-- .../nu-protocol/src/dataframe/nu_groupby.rs | 4 +- crates/nu-protocol/src/dataframe/nu_series.rs | 345 ---------- crates/nu-protocol/src/type_shape.rs | 8 +- crates/nu-protocol/src/value.rs | 19 +- crates/nu-protocol/src/value/debug.rs | 4 +- crates/nu-value-ext/src/lib.rs | 12 +- crates/nu_plugin_post/src/post.rs | 2 +- crates/nu_plugin_to_bson/src/to_bson.rs | 2 +- 83 files changed, 3293 insertions(+), 1422 deletions(-) create mode 100644 crates/nu-command/src/commands/dataframe/series/concatenate.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/contains.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/replace_all.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/str_lengths.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/str_slice.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/to_lowercase.rs create mode 100644 crates/nu-command/src/commands/dataframe/series/to_uppercase.rs delete mode 100644 crates/nu-command/src/commands/dataframe/to_series.rs delete mode 100644 crates/nu-protocol/src/dataframe/nu_series.rs diff --git a/crates/nu-command/src/commands/dataframe/aggregate.rs b/crates/nu-command/src/commands/dataframe/aggregate.rs index 20790e57e..d5f1a922a 100644 --- a/crates/nu-command/src/commands/dataframe/aggregate.rs +++ b/crates/nu-command/src/commands/dataframe/aggregate.rs @@ -2,14 +2,11 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, PolarsData}, - Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, + dataframe::{Column, FrameStruct, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, }; use nu_source::Tagged; -use polars::{ - frame::groupby::GroupBy, - prelude::{DataType, PolarsError, Series}, -}; +use polars::{frame::groupby::GroupBy, prelude::PolarsError}; enum Operation { Mean, @@ -111,17 +108,40 @@ impl WholeStreamCommand for DataFrame { description: "Aggregate sum by grouping by column a and summing on col b", example: "[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe group-by a | dataframe aggregate sum", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::string("one").into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Aggregate sum in dataframe columns", example: "[[a b]; [4 1] [5 2]] | dataframe to-df | dataframe aggregate sum", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(9).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Aggregate sum in series", - example: "[4 1 5 6] | dataframe to-series | dataframe aggregate sum", - result: None, + example: "[4 1 5 6] | dataframe to-df | dataframe aggregate sum", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("0".to_string(), vec![UntaggedValue::int(16).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -139,7 +159,7 @@ fn command(mut args: CommandArgs) -> Result { })?; match value.value { - UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => { + UntaggedValue::FrameStruct(FrameStruct::GroupBy(nu_groupby)) => { let groupby = nu_groupby.to_groupby()?; let res = perform_groupby_aggregation( @@ -152,18 +172,13 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + UntaggedValue::DataFrame(df) => { let df = df.as_ref(); let res = perform_dataframe_aggregation(&df, op, &operation.tag)?; Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - UntaggedValue::DataFrame(PolarsData::Series(series)) => { - let value = perform_series_aggregation(series.as_ref(), op, &operation.tag)?; - - Ok(OutputStream::one(value)) - } _ => Err(ShellError::labeled_error( "No groupby, dataframe or series in stream", "no groupby, dataframe or series found in input stream", @@ -264,162 +279,15 @@ fn perform_dataframe_aggregation( } } -fn perform_series_aggregation( - series: &Series, - operation: Operation, - operation_tag: &Tag, -) -> Result { - match operation { - Operation::Mean => { - let res = match series.mean() { - Some(val) => UntaggedValue::Primitive(val.into()), - None => UntaggedValue::Primitive(0.into()), - }; +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; - let value = Value { - value: res, - tag: operation_tag.clone(), - }; + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; - let mut data = TaggedDictBuilder::new(operation_tag.clone()); - data.insert_value(series.name(), value); - - Ok(data.into_value()) - } - Operation::Median => { - let res = match series.median() { - Some(val) => UntaggedValue::Primitive(val.into()), - None => UntaggedValue::Primitive(0.into()), - }; - - let value = Value { - value: res, - tag: operation_tag.clone(), - }; - - let mut data = TaggedDictBuilder::new(operation_tag.clone()); - data.insert_value(series.name(), value); - - Ok(data.into_value()) - } - Operation::Sum => { - let untagged = match series.dtype() { - DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 => { - let res: i64 = series.sum().unwrap_or(0); - Ok(UntaggedValue::Primitive(res.into())) - } - DataType::Float32 | DataType::Float64 => { - let res: f64 = series.sum().unwrap_or(0.0); - Ok(UntaggedValue::Primitive(res.into())) - } - _ => Err(ShellError::labeled_error( - "Not valid type", - format!( - "this operation can not be performed with series of type {}", - series.dtype() - ), - &operation_tag.span, - )), - }?; - - let value = Value { - value: untagged, - tag: operation_tag.clone(), - }; - - let mut data = TaggedDictBuilder::new(operation_tag.clone()); - data.insert_value(series.name(), value); - - Ok(data.into_value()) - } - Operation::Max => { - let untagged = match series.dtype() { - DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 => { - let res: i64 = series.max().unwrap_or(0); - Ok(UntaggedValue::Primitive(res.into())) - } - DataType::Float32 | DataType::Float64 => { - let res: f64 = series.max().unwrap_or(0.0); - Ok(UntaggedValue::Primitive(res.into())) - } - _ => Err(ShellError::labeled_error( - "Not valid type", - format!( - "this operation can not be performed with series of type {}", - series.dtype() - ), - &operation_tag.span, - )), - }?; - - let value = Value { - value: untagged, - tag: operation_tag.clone(), - }; - - let mut data = TaggedDictBuilder::new(operation_tag.clone()); - data.insert_value(series.name(), value); - - Ok(data.into_value()) - } - Operation::Min => { - let untagged = match series.dtype() { - DataType::Int8 - | DataType::Int16 - | DataType::Int32 - | DataType::Int64 - | DataType::UInt8 - | DataType::UInt16 - | DataType::UInt32 - | DataType::UInt64 => { - let res: i64 = series.min().unwrap_or(0); - Ok(UntaggedValue::Primitive(res.into())) - } - DataType::Float32 | DataType::Float64 => { - let res: f64 = series.min().unwrap_or(0.0); - Ok(UntaggedValue::Primitive(res.into())) - } - _ => Err(ShellError::labeled_error( - "Not valid type", - format!( - "this operation can not be performed with series of type {}", - series.dtype() - ), - &operation_tag.span, - )), - }?; - - let value = Value { - value: untagged, - tag: operation_tag.clone(), - }; - - let mut data = TaggedDictBuilder::new(operation_tag.clone()); - data.insert_value(series.name(), value); - - Ok(data.into_value()) - } - - _ => Err(ShellError::labeled_error_with_secondary( - "Not valid operation", - "operation not valid for series", - &operation_tag.span, - "Perhaps you want: mean, median, sum, max, min", - &operation_tag.span, - )), + test_examples(DataFrame {}) } } diff --git a/crates/nu-command/src/commands/dataframe/column.rs b/crates/nu-command/src/commands/dataframe/column.rs index 282acad27..8f76fae00 100644 --- a/crates/nu-command/src/commands/dataframe/column.rs +++ b/crates/nu-command/src/commands/dataframe/column.rs @@ -2,8 +2,8 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuSeries}, - Signature, SyntaxShape, + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, }; use nu_source::Tagged; @@ -32,7 +32,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Returns the selected column as series", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe column a", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -41,15 +49,26 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let column: Tagged = args.req(0)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df .as_ref() .column(column.item.as_ref()) .map_err(|e| parse_polars_error::<&str>(&e, &column.tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.clone(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.clone()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/drop.rs b/crates/nu-command/src/commands/dataframe/drop.rs index 387cf7cee..798768d73 100644 --- a/crates/nu-command/src/commands/dataframe/drop.rs +++ b/crates/nu-command/src/commands/dataframe/drop.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; @@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "drop column a", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe drop a", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -39,7 +50,7 @@ fn command(mut args: CommandArgs) -> Result { let columns: Vec = args.rest(0)?; let (col_string, col_span) = convert_columns(&columns, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let new_df = match col_string.get(0) { Some(col) => df @@ -63,3 +74,16 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/drop_duplicates.rs b/crates/nu-command/src/commands/dataframe/drop_duplicates.rs index a7ed6ef7f..31420c3bc 100644 --- a/crates/nu-command/src/commands/dataframe/drop_duplicates.rs +++ b/crates/nu-command/src/commands/dataframe/drop_duplicates.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; @@ -34,7 +37,21 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "drop duplicates", example: "[[a b]; [1 2] [3 4] [1 2]] | dataframe to-df | dataframe drop-duplicates", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -52,7 +69,7 @@ fn command(mut args: CommandArgs) -> Result { None => (None, Span::unknown()), }; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let subset_slice = subset.as_ref().map(|cols| &cols[..]); @@ -63,3 +80,16 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/drop_nulls.rs b/crates/nu-command/src/commands/dataframe/drop_nulls.rs index 72659962e..f974d914a 100644 --- a/crates/nu-command/src/commands/dataframe/drop_nulls.rs +++ b/crates/nu-command/src/commands/dataframe/drop_nulls.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuSeries, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; @@ -38,15 +38,45 @@ impl WholeStreamCommand for DataFrame { example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df); let res = ($df.b / $df.b); let df = ($df | dataframe with-column $res --name res); - $df | dataframe drop-nulls -"#, - result: None, + $df | dataframe drop-nulls"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(2).into()], + ), + Column::new( + "res".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "drop null values in dataframe", - example: r#"let s = ([1 2 0 0 3 4] | dataframe to-series); + example: r#"let s = ([1 2 0 0 3 4] | dataframe to-df); ($s / $s) | dataframe drop-nulls"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "div_0_0".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(1).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -60,7 +90,7 @@ fn command(mut args: CommandArgs) -> Result { })?; match value.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + UntaggedValue::DataFrame(df) => { // Extracting the selection columns of the columns to perform the aggregation let columns: Option> = args.opt(0)?; let (subset, col_span) = match columns { @@ -80,10 +110,6 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - UntaggedValue::DataFrame(PolarsData::Series(series)) => { - let res = series.as_ref().drop_nulls(); - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) - } _ => Err(ShellError::labeled_error( "Incorrect type", "drop nulls cannot be done with this value", @@ -91,3 +117,16 @@ fn command(mut args: CommandArgs) -> Result { )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/dtypes.rs b/crates/nu-command/src/commands/dataframe/dtypes.rs index 3ab0b1924..4ec6c5aef 100644 --- a/crates/nu-command/src/commands/dataframe/dtypes.rs +++ b/crates/nu-command/src/commands/dataframe/dtypes.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, Value, +}; pub struct DataFrame; @@ -26,7 +29,27 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "drop column a", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe dtypes", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "column".to_string(), + vec![ + UntaggedValue::string("a").into(), + UntaggedValue::string("b").into(), + ], + ), + Column::new( + "dtype".to_string(), + vec![ + UntaggedValue::string("i64").into(), + UntaggedValue::string("i64").into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,26 +58,49 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let col_names = df + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let mut dtypes: Vec = Vec::new(); + let names: Vec = df .as_ref() .get_column_names() .iter() - .map(|v| v.to_string()) - .collect::>(); + .map(|v| { + let dtype = df + .as_ref() + .column(v) + .expect("using name from list of names from dataframe") + .dtype(); - let values = df - .as_ref() - .dtypes() - .into_iter() - .zip(col_names.into_iter()) - .map(move |(dtype, name)| { - let mut data = TaggedDictBuilder::new(tag.clone()); - data.insert_value("column", name.as_ref()); - data.insert_value("dtype", format!("{}", dtype)); + let dtype_str = format!("{}", dtype); + dtypes.push(Value { + value: dtype_str.into(), + tag: Tag::default(), + }); - data.into_value() - }); + Value { + value: v.to_string().into(), + tag: Tag::default(), + } + }) + .collect(); - Ok(OutputStream::from_stream(values)) + let names_col = Column::new("column".to_string(), names); + let dtypes_col = Column::new("dtype".to_string(), dtypes); + + let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], &tag.span)?; + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/dummies.rs b/crates/nu-command/src/commands/dataframe/dummies.rs index 9541db302..cd977712e 100644 --- a/crates/nu-command/src/commands/dataframe/dummies.rs +++ b/crates/nu-command/src/commands/dataframe/dummies.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, UntaggedValue, }; @@ -32,12 +32,70 @@ impl WholeStreamCommand for DataFrame { Example { description: "Create new dataframe with dummy variables from a dataframe", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a_1".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()], + ), + Column::new( + "a_3".to_string(), + vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()], + ), + Column::new( + "b_2".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()], + ), + Column::new( + "b_4".to_string(), + vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Create new dataframe with dummy variables from a series", - example: "[1 2 2 3 3] | dataframe to-series | dataframe to-dummies", - result: None, + example: "[1 2 2 3 3] | dataframe to-df | dataframe to-dummies", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "0_1".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + ], + ), + Column::new( + "0_2".to_string(), + vec![ + UntaggedValue::int(0).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + ], + ), + Column::new( + "0_3".to_string(), + vec![ + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(1).into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -51,7 +109,7 @@ fn command(mut args: CommandArgs) -> Result { })?; match value.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + UntaggedValue::DataFrame(df) => { let res = df.as_ref().to_dummies().map_err(|e| { parse_polars_error( &e, @@ -62,17 +120,6 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - UntaggedValue::DataFrame(PolarsData::Series(series)) => { - let res = series.as_ref().to_dummies().map_err(|e| { - parse_polars_error( - &e, - &tag.span, - Some("The only allowed column types for dummies are String or Int"), - ) - })?; - - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) - } _ => Err(ShellError::labeled_error( "Incorrect type", "dummies cannot be done with this value", @@ -80,3 +127,16 @@ fn command(mut args: CommandArgs) -> Result { )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/filter.rs b/crates/nu-command/src/commands/dataframe/filter.rs index 07e484800..eb9ed1a4d 100644 --- a/crates/nu-command/src/commands/dataframe/filter.rs +++ b/crates/nu-command/src/commands/dataframe/filter.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; @@ -34,13 +34,21 @@ impl WholeStreamCommand for DataFrame { vec![ Example { description: "Filter dataframe using a bool mask", - example: r#"let mask = ([$true $false] | dataframe to-series); + example: r#"let mask = ([$true $false] | dataframe to-df); [[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Filter dataframe by creating a mask from operation", - example: r#"let mask = (([5 6] | dataframe to-series) > 5); + example: r#"let mask = (([5 6] | dataframe to-df) > 5); [[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#, result: None, }, @@ -53,16 +61,16 @@ fn command(mut args: CommandArgs) -> Result { let value: Value = args.req(0)?; let series_span = value.tag.span; - let series = match value.value { - UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series), + let df = match value.value { + UntaggedValue::DataFrame(df) => Ok(df), _ => Err(ShellError::labeled_error( "Incorrect type", "can only add a series to a dataframe", value.tag.span, )), }?; - - let casted = series.as_ref().bool().map_err(|e| { + let series = df.as_series(&series_span)?; + let casted = series.bool().map_err(|e| { parse_polars_error( &e, &&series_span, @@ -70,12 +78,25 @@ fn command(mut args: CommandArgs) -> Result { ) })?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df .as_ref() .filter(&casted) - .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/first.rs b/crates/nu-command/src/commands/dataframe/first.rs index aae838266..0c23f4d59 100644 --- a/crates/nu-command/src/commands/dataframe/first.rs +++ b/crates/nu-command/src/commands/dataframe/first.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; use nu_source::Tagged; @@ -31,8 +34,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create new dataframe with head rows", - example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first", - result: None, + example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first 1", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -46,8 +57,21 @@ fn command(mut args: CommandArgs) -> Result { None => 5, }; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df.as_ref().head(Some(rows)); Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/get.rs b/crates/nu-command/src/commands/dataframe/get.rs index 6d015615f..6890c0830 100644 --- a/crates/nu-command/src/commands/dataframe/get.rs +++ b/crates/nu-command/src/commands/dataframe/get.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; pub struct DataFrame; @@ -27,7 +30,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Creates dataframe with selected columns", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe get a", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -38,7 +49,7 @@ fn command(mut args: CommandArgs) -> Result { let (col_string, col_span) = convert_columns(&columns, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df .as_ref() @@ -47,3 +58,16 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/groupby.rs b/crates/nu-command/src/commands/dataframe/groupby.rs index 8844d3a4d..34363f689 100644 --- a/crates/nu-command/src/commands/dataframe/groupby.rs +++ b/crates/nu-command/src/commands/dataframe/groupby.rs @@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuGroupBy, PolarsData}, + dataframe::{FrameStruct, NuDataFrame, NuGroupBy}, Signature, SyntaxShape, UntaggedValue, Value, }; @@ -43,7 +43,7 @@ fn command(mut args: CommandArgs) -> Result { let by_columns: Vec = args.rest(0)?; let (columns_string, col_span) = convert_columns(&by_columns, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; // This is the expensive part of the groupby; to create the // groups that will be used for grouping the data in the @@ -57,7 +57,7 @@ fn command(mut args: CommandArgs) -> Result { let groups = groupby.get_groups().to_vec(); let groupby = Value { tag, - value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new( + value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(NuGroupBy::new( NuDataFrame::new(df.as_ref().clone()), columns_string, groups, diff --git a/crates/nu-command/src/commands/dataframe/join.rs b/crates/nu-command/src/commands/dataframe/join.rs index 9908faee6..82c21e7ec 100644 --- a/crates/nu-command/src/commands/dataframe/join.rs +++ b/crates/nu-command/src/commands/dataframe/join.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; @@ -51,20 +51,50 @@ impl WholeStreamCommand for DataFrame { } fn examples(&self) -> Vec { - vec![ - Example { - description: "inner join dataframe", - example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dataframe to-df); + vec![Example { + description: "inner join dataframe", + example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dataframe to-df); $right | dataframe join $right -l [a b] -r [a b]"#, - result: None, - }, - Example { - description: "right join dataframe", - example: r#"let right = ([[a b c]; [1 2 3] [3 4 5] [5 6 7]] | dataframe to-df); - $right | dataframe join $right -l [a c] -r [a c] -t inner"#, - result: None, - }, - ] + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(5).into(), + ], + ), + Column::new( + "b".to_string(), + vec![ + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + UntaggedValue::int(6).into(), + ], + ), + Column::new( + "c".to_string(), + vec![ + UntaggedValue::int(5).into(), + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + ], + ), + Column::new( + "c_right".to_string(), + vec![ + UntaggedValue::int(5).into(), + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] } } @@ -97,10 +127,10 @@ fn command(mut args: CommandArgs) -> Result { let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?; let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = match r_df.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => { + UntaggedValue::DataFrame(r_df) => { // Checking the column types before performing the join check_column_datatypes( df.as_ref(), @@ -173,3 +203,16 @@ fn check_column_datatypes>( Ok(()) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/last.rs b/crates/nu-command/src/commands/dataframe/last.rs index b34dae063..1838570ca 100644 --- a/crates/nu-command/src/commands/dataframe/last.rs +++ b/crates/nu-command/src/commands/dataframe/last.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; use nu_source::Tagged; pub struct DataFrame; @@ -30,8 +33,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create new dataframe with last rows", - example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last", - result: None, + example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last 1", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(3).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(4).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -45,9 +56,22 @@ fn command(mut args: CommandArgs) -> Result { None => 5, }; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df.as_ref().tail(Some(rows)); Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/list.rs b/crates/nu-command/src/commands/dataframe/list.rs index 33c9ecd6e..56e78ce8b 100644 --- a/crates/nu-command/src/commands/dataframe/list.rs +++ b/crates/nu-command/src/commands/dataframe/list.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::PolarsData, Signature, TaggedDictBuilder, UntaggedValue}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, Value, +}; pub struct DataFrame; @@ -19,46 +22,94 @@ impl WholeStreamCommand for DataFrame { } fn run(&self, args: CommandArgs) -> Result { - let values = args + let data = args .context .scope .get_vars() .into_iter() .filter_map(|(name, value)| { - if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = &value.value { - let mut data = TaggedDictBuilder::new(value.tag.clone()); + if let UntaggedValue::DataFrame(df) = &value.value { + let rows = Value { + value: (df.as_ref().height() as i64).into(), + tag: Tag::default(), + }; - let rows = df.as_ref().height(); - let cols = df.as_ref().width(); + let cols = Value { + value: (df.as_ref().width() as i64).into(), + tag: Tag::default(), + }; - data.insert_value("name", name.as_ref()); - data.insert_value("rows", format!("{}", rows)); - data.insert_value("columns", format!("{}", cols)); + let location = match value.tag.anchor { + Some(AnchorLocation::File(name)) => name, + Some(AnchorLocation::Url(name)) => name, + Some(AnchorLocation::Source(text)) => text.slice(0..text.end).text, + None => "stream".to_string(), + }; - match value.tag.anchor { - Some(AnchorLocation::File(name)) => data.insert_value("location", name), - Some(AnchorLocation::Url(name)) => data.insert_value("location", name), - Some(AnchorLocation::Source(text)) => { - let loc_name = text.slice(0..text.end); - data.insert_value("location", loc_name.text) - } - None => data.insert_value("location", "stream"), - } + let location = Value { + value: location.into(), + tag: Tag::default(), + }; - Some(data.into_value()) + let name = Value { + value: name.into(), + tag: Tag::default(), + }; + + Some((name, rows, cols, location)) } else { None } }); - Ok(OutputStream::from_stream(values)) + let mut name = Column::new_empty("name".to_string()); + let mut rows = Column::new_empty("rows".to_string()); + let mut cols = Column::new_empty("columns".to_string()); + let mut location = Column::new_empty("location".to_string()); + + for tuple in data { + name.push(tuple.0); + rows.push(tuple.1); + cols.push(tuple.2); + location.push(tuple.3); + } + + let tag = args.call_info.name_tag; + let df = NuDataFrame::try_from_columns(vec![name, rows, cols, location], &tag.span)?; + Ok(OutputStream::one(df.into_value(tag))) } fn examples(&self) -> Vec { vec![Example { description: "Lists loaded dataframes in current scope", - example: "dataframe list", - result: None, + example: "let a = ([[a b];[1 2] [3 4]] | dataframe to-df); dataframe list", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("name".to_string(), vec![UntaggedValue::string("$a").into()]), + Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]), + Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]), + Column::new( + "location".to_string(), + vec![UntaggedValue::string("stream").into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/melt.rs b/crates/nu-command/src/commands/dataframe/melt.rs index 678af7ba2..16df644f4 100644 --- a/crates/nu-command/src/commands/dataframe/melt.rs +++ b/crates/nu-command/src/commands/dataframe/melt.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::convert_columns; @@ -18,8 +21,18 @@ impl WholeStreamCommand for DataFrame { fn signature(&self) -> Signature { Signature::build("dataframe melt") - .required("id_columns", SyntaxShape::Table, "Id columns for melting") - .rest(SyntaxShape::Any, "columns used as value columns") + .required_named( + "columns", + SyntaxShape::Table, + "column names for melting", + Some('c'), + ) + .required_named( + "values", + SyntaxShape::Table, + "column names used as value columns", + Some('v'), + ) } fn run(&self, args: CommandArgs) -> Result { @@ -29,8 +42,59 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "melt dataframe", - example: "[[a b]; [a 2] [b 4] [a 6]] | dataframe to-df | dataframe melt a b", - result: None, + example: + "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dataframe to-df | dataframe melt -c [b c] -v [a d]", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "b".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(3).into(), + ], + ), + Column::new( + "c".to_string(), + vec![ + UntaggedValue::int(4).into(), + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + UntaggedValue::int(4).into(), + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + ], + ), + Column::new( + "variable".to_string(), + vec![ + UntaggedValue::string("a").into(), + UntaggedValue::string("a").into(), + UntaggedValue::string("a").into(), + UntaggedValue::string("d").into(), + UntaggedValue::string("d").into(), + UntaggedValue::string("d").into(), + ], + ), + Column::new( + "value".to_string(), + vec![ + UntaggedValue::string("x").into(), + UntaggedValue::string("y").into(), + UntaggedValue::string("z").into(), + UntaggedValue::string("a").into(), + UntaggedValue::string("b").into(), + UntaggedValue::string("c").into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -38,13 +102,13 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let id_col: Vec = args.req(0)?; - let val_col: Vec = args.rest(1)?; + let id_col: Vec = args.req_named("columns")?; + let val_col: Vec = args.req_named("values")?; let (id_col_string, id_col_span) = convert_columns(&id_col, &tag)?; let (val_col_string, val_col_span) = convert_columns(&val_col, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?; check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?; @@ -99,3 +163,16 @@ fn check_column_datatypes>( Ok(()) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/mod.rs b/crates/nu-command/src/commands/dataframe/mod.rs index b0fc5eadb..dcb9fcf3b 100644 --- a/crates/nu-command/src/commands/dataframe/mod.rs +++ b/crates/nu-command/src/commands/dataframe/mod.rs @@ -26,7 +26,6 @@ pub mod take; pub mod to_csv; pub mod to_df; pub mod to_parquet; -pub mod to_series; pub(crate) mod utils; pub mod where_; pub mod with_column; @@ -59,7 +58,6 @@ pub use take::DataFrame as DataFrameTake; pub use to_csv::DataFrame as DataFrameToCsv; pub use to_df::DataFrame as DataFrameToDF; pub use to_parquet::DataFrame as DataFrameToParquet; -pub use to_series::DataFrame as DataFrameToSeries; pub use where_::DataFrame as DataFrameWhere; pub use with_column::DataFrame as DataFrameWithColumn; @@ -71,6 +69,8 @@ pub use series::DataFrameArgMin; pub use series::DataFrameArgSort; pub use series::DataFrameArgTrue; pub use series::DataFrameArgUnique; +pub use series::DataFrameConcatenate; +pub use series::DataFrameContains; pub use series::DataFrameIsDuplicated; pub use series::DataFrameIsIn; pub use series::DataFrameIsNotNull; @@ -80,9 +80,14 @@ pub use series::DataFrameNNull; pub use series::DataFrameNUnique; pub use series::DataFrameNot; pub use series::DataFrameReplace; +pub use series::DataFrameReplaceAll; pub use series::DataFrameSeriesRename; pub use series::DataFrameSet; pub use series::DataFrameSetWithIdx; pub use series::DataFrameShift; +pub use series::DataFrameStringLengths; +pub use series::DataFrameStringSlice; +pub use series::DataFrameToLowercase; +pub use series::DataFrameToUppercase; pub use series::DataFrameUnique; pub use series::DataFrameValueCounts; diff --git a/crates/nu-command/src/commands/dataframe/pivot.rs b/crates/nu-command/src/commands/dataframe/pivot.rs index 088bcc9a2..643040036 100644 --- a/crates/nu-command/src/commands/dataframe/pivot.rs +++ b/crates/nu-command/src/commands/dataframe/pivot.rs @@ -73,7 +73,7 @@ impl WholeStreamCommand for DataFrame { description: "Pivot a dataframe on b and aggregation on col c", example: "[[a b c]; [one x 1] [two y 2]] | dataframe to-df | dataframe group-by a | dataframe pivot b c sum", - result: None, + result: None, // No sample because there are nulls in the result dataframe }] } } diff --git a/crates/nu-command/src/commands/dataframe/sample.rs b/crates/nu-command/src/commands/dataframe/sample.rs index b59d8afec..50c02df62 100644 --- a/crates/nu-command/src/commands/dataframe/sample.rs +++ b/crates/nu-command/src/commands/dataframe/sample.rs @@ -42,13 +42,13 @@ impl WholeStreamCommand for DataFrame { Example { description: "Sample rows from dataframe", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe sample -r 1", - result: None, + result: None, // No expected value because sampling is random }, Example { description: "Shows sample row using fraction and replace", example: "[[a b]; [1 2] [3 4] [5 6]] | dataframe to-df | dataframe sample -f 0.5 -e", - result: None, + result: None, // No expected value because sampling is random }, ] } @@ -61,7 +61,7 @@ fn command(mut args: CommandArgs) -> Result { let fraction: Option> = args.get_flag("fraction")?; let replace: bool = args.has_flag("replace"); - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = match (rows, fraction) { (Some(rows), None) => df diff --git a/crates/nu-command/src/commands/dataframe/select.rs b/crates/nu-command/src/commands/dataframe/select.rs index ea86ab822..e57f22912 100644 --- a/crates/nu-command/src/commands/dataframe/select.rs +++ b/crates/nu-command/src/commands/dataframe/select.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; use super::utils::{convert_columns, parse_polars_error}; @@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Create new dataframe with column a", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe select a", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -40,7 +51,7 @@ fn command(mut args: CommandArgs) -> Result { let (col_string, col_span) = convert_columns(&columns, &tag)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df .as_ref() @@ -49,3 +60,16 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/all_false.rs b/crates/nu-command/src/commands/dataframe/series/all_false.rs index 8d5441667..88c5b3fdb 100644 --- a/crates/nu-command/src/commands/dataframe/series/all_false.rs +++ b/crates/nu-command/src/commands/dataframe/series/all_false.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, Value, +}; pub struct DataFrame; @@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame { vec![ Example { description: "Returns true if all values are false", - example: "[$false $false $false] | dataframe to-series | dataframe all-false", - result: None, + example: "[$false $false $false] | dataframe to-df | dataframe all-false", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![UntaggedValue::boolean(true).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Checks the result from a comparison", - example: r#"let s = ([5 6 2 8] | dataframe to-series); + example: r#"let s = ([5 6 2 10] | dataframe to-df); let res = ($s > 9); $res | dataframe all-false"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "all_false".to_string(), + vec![UntaggedValue::boolean(false).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let bool = series.as_ref().bool().map_err(|e| { + let series = df.as_series(&df_tag.span)?; + let bool = series.bool().map_err(|e| { parse_polars_error::<&str>( &e, &tag.span, @@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result { tag: tag.clone(), }; - let mut data = TaggedDictBuilder::new(tag); - data.insert_value("all_false", value); + let df = NuDataFrame::try_from_columns( + vec![Column::new("all_false".to_string(), vec![value])], + &tag.span, + )?; - Ok(OutputStream::one(data.into_value())) + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/all_true.rs b/crates/nu-command/src/commands/dataframe/series/all_true.rs index 85e6d5ad0..46a8e3e3c 100644 --- a/crates/nu-command/src/commands/dataframe/series/all_true.rs +++ b/crates/nu-command/src/commands/dataframe/series/all_true.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, Value, +}; pub struct DataFrame; @@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame { vec![ Example { description: "Returns true if all values are true", - example: "[$true $true $true] | dataframe to-series | dataframe all-true", - result: None, + example: "[$true $true $true] | dataframe to-df | dataframe all-true", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![UntaggedValue::boolean(true).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Checks the result from a comparison", - example: r#"let s = ([5 6 2 8] | dataframe to-series); + example: r#"let s = ([5 6 2 8] | dataframe to-df); let res = ($s > 9); $res | dataframe all-true"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "all_true".to_string(), + vec![UntaggedValue::boolean(false).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let bool = series.as_ref().bool().map_err(|e| { + let series = df.as_series(&df_tag.span)?; + let bool = series.bool().map_err(|e| { parse_polars_error::<&str>( &e, &tag.span, @@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result { tag: tag.clone(), }; - let mut data = TaggedDictBuilder::new(tag); - data.insert_value("all_true", value); + let df = NuDataFrame::try_from_columns( + vec![Column::new("all_true".to_string(), vec![value])], + &tag.span, + )?; - Ok(OutputStream::one(data.into_value())) + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/arg_max.rs b/crates/nu-command/src/commands/dataframe/series/arg_max.rs index 626107850..050f4534f 100644 --- a/crates/nu-command/src/commands/dataframe/series/arg_max.rs +++ b/crates/nu-command/src/commands/dataframe/series/arg_max.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked}; @@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns index for max value", - example: "[1 3 2] | dataframe to-series | dataframe arg-max", - result: None, + example: "[1 3 2] | dataframe to-df | dataframe arg-max", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "arg_max".to_string(), + vec![UntaggedValue::int(1).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -36,9 +47,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; - let res = series.as_ref().arg_max(); + let res = series.arg_max(); let chunked = match res { Some(index) => UInt32Chunked::new_from_slice("arg_max", &[index as u32]), @@ -46,6 +58,20 @@ fn command(mut args: CommandArgs) -> Result { }; let res = chunked.into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/arg_min.rs b/crates/nu-command/src/commands/dataframe/series/arg_min.rs index 3db3d9f70..710410bdb 100644 --- a/crates/nu-command/src/commands/dataframe/series/arg_min.rs +++ b/crates/nu-command/src/commands/dataframe/series/arg_min.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked}; @@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns index for min value", - example: "[1 3 2] | dataframe to-series | dataframe arg-min", - result: None, + example: "[1 3 2] | dataframe to-df | dataframe arg-min", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "arg_min".to_string(), + vec![UntaggedValue::int(0).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -36,9 +47,9 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().arg_min(); + let res = df.as_series(&df_tag.span)?.arg_min(); let chunked = match res { Some(index) => UInt32Chunked::new_from_slice("arg_min", &[index as u32]), @@ -46,6 +57,20 @@ fn command(mut args: CommandArgs) -> Result { }; let res = chunked.into_series(); + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/arg_sort.rs b/crates/nu-command/src/commands/dataframe/series/arg_sort.rs index 7be88426b..af5cf7a48 100644 --- a/crates/nu-command/src/commands/dataframe/series/arg_sort.rs +++ b/crates/nu-command/src/commands/dataframe/series/arg_sort.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,8 +29,22 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns indexes for a sorted series", - example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-sort", - result: None, + example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-sort", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "arg_sort".to_string(), + vec![ + UntaggedValue::int(0).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(4).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -36,12 +53,24 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let reverse = args.has_flag("reverse"); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().argsort(reverse); + let mut res = df.as_series(&df_tag.span)?.argsort(reverse).into_series(); + res.rename("arg_sort"); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/arg_true.rs b/crates/nu-command/src/commands/dataframe/series/arg_true.rs index d4b5a13ed..1f278f91d 100644 --- a/crates/nu-command/src/commands/dataframe/series/arg_true.rs +++ b/crates/nu-command/src/commands/dataframe/series/arg_true.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,8 +29,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns indexes where values are true", - example: "[$false $true $false] | dataframe to-series | dataframe arg-true", - result: None, + example: "[$false $true $false] | dataframe to-df | dataframe arg-true", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "arg_true".to_string(), + vec![UntaggedValue::int(1).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,9 +46,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let bool = series.as_ref().bool().map_err(|e| { + let series = df.as_series(&df_tag.span)?; + let bool = series.bool().map_err(|e| { parse_polars_error::<&str>( &e, &tag.span, @@ -46,7 +58,21 @@ fn command(mut args: CommandArgs) -> Result { })?; let mut res = bool.arg_true().into_series(); - res.rename("int"); + res.rename("arg_true"); - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/arg_unique.rs b/crates/nu-command/src/commands/dataframe/series/arg_unique.rs index 2b096f336..dc33e14e2 100644 --- a/crates/nu-command/src/commands/dataframe/series/arg_unique.rs +++ b/crates/nu-command/src/commands/dataframe/series/arg_unique.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,8 +29,20 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns indexes for unique values", - example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-unique", - result: None, + example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-unique", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "arg_unique".to_string(), + vec![ + UntaggedValue::int(0).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,15 +50,29 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series - .as_ref() + let mut res = df + .as_series(&df_tag.span)? .arg_unique() - .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))? + .into_series(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + res.rename("arg_unique"); + + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/concatenate.rs b/crates/nu-command/src/commands/dataframe/series/concatenate.rs new file mode 100644 index 000000000..bcad8aea4 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/concatenate.rs @@ -0,0 +1,107 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, Value, +}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe concatenate" + } + + fn usage(&self) -> &str { + "[Series] Concatenates strings with other array" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe concatenate").required( + "other", + SyntaxShape::Any, + "Other array with string to be concatenated", + ) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Concatenate string", + example: r#"let other = ([za xs cd] | dataframe to-df); + [abc abc abc] | dataframe to-df | dataframe concatenate $other"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("abcza").into(), + UntaggedValue::string("abcxs").into(), + UntaggedValue::string("abccd").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let other: Value = args.req(0)?; + + let other_df = match &other.value { + UntaggedValue::DataFrame(df) => Ok(df), + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only concatenate another series", + other.tag.span, + )), + }?; + + let other_series = other_df.as_series(&other.tag.span)?; + let other_chunked = other_series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &other.tag.span, + Some("The concatenate command can only be used with string columns"), + ) + })?; + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The concatenate command can only be used with string columns"), + ) + })?; + + let mut res = chunked.concat(&other_chunked); + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/contains.rs b/crates/nu-command/src/commands/dataframe/series/contains.rs new file mode 100644 index 000000000..914615a3d --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/contains.rs @@ -0,0 +1,90 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; +use nu_source::Tagged; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe contains" + } + + fn usage(&self) -> &str { + "[Series] Checks if a patter is contained in a string" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe contains").required_named( + "pattern", + SyntaxShape::String, + "Regex pattern to be searched", + Some('p'), + ) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns boolean indicating if patter was found", + example: "[abc acb acb] | dataframe to-df | dataframe contains -p ab", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let pattern: Tagged = args.req_named("pattern")?; + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The contains command can only be used with string columns"), + ) + })?; + + let res = chunked + .contains(pattern.as_str()) + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs b/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs index 14c2038c9..071cb879b 100644 --- a/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs +++ b/crates/nu-command/src/commands/dataframe/series/is_duplicated.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create mask indicating duplicated values", - example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-duplicated", - result: None, + example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-duplicated", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "is_duplicated".to_string(), + vec![ + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series - .as_ref() + let mut res = df + .as_series(&df_tag.span)? .is_duplicated() - .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))? + .into_series(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + res.rename("is_duplicated"); + + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/is_in.rs b/crates/nu-command/src/commands/dataframe/series/is_in.rs index 21452b1b1..01f2557a1 100644 --- a/crates/nu-command/src/commands/dataframe/series/is_in.rs +++ b/crates/nu-command/src/commands/dataframe/series/is_in.rs @@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuSeries, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; use polars::prelude::IntoSeries; @@ -29,9 +29,25 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Checks if elements from a series are contained in right series", - example: r#"let other = ([1 3 6] | dataframe to-series); - [5 6 6 6 8 8 8] | dataframe to-series | dataframe is-in $other"#, - result: None, + example: r#"let other = ([1 3 6] | dataframe to-df); + [5 6 6 6 8 8 8] | dataframe to-df | dataframe is-in $other"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "is_in".to_string(), + vec![ + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -40,8 +56,8 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let value: Value = args.req(0)?; - let other = match value.value { - UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series), + let other_df = match value.value { + UntaggedValue::DataFrame(df) => Ok(df), _ => Err(ShellError::labeled_error( "Incorrect type", "can only search in a series", @@ -49,15 +65,31 @@ fn command(mut args: CommandArgs) -> Result { )), }?; - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let other = other_df.as_series(&value.tag.span)?; - let res = series - .as_ref() - .is_in(other.as_ref()) - .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let mut res = df + .as_series(&df_tag.span)? + .is_in(&other) + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))? + .into_series(); + + res.rename("is_in"); + + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/is_not_null.rs b/crates/nu-command/src/commands/dataframe/series/is_not_null.rs index ad3d4cb77..6dc6c5e88 100644 --- a/crates/nu-command/src/commands/dataframe/series/is_not_null.rs +++ b/crates/nu-command/src/commands/dataframe/series/is_not_null.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create mask where values are not null", - example: r#"let s = ([5 6 0 8] | dataframe to-series); + example: r#"let s = ([5 6 0 8] | dataframe to-df); let res = ($s / $s); $res | dataframe is-not-null"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "is_not_null".to_string(), + vec![ + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(true).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().is_not_null(); + let res = df.as_series(&df_tag.span)?.is_not_null(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/is_null.rs b/crates/nu-command/src/commands/dataframe/series/is_null.rs index 20d41d8b9..dcd8f37d1 100644 --- a/crates/nu-command/src/commands/dataframe/series/is_null.rs +++ b/crates/nu-command/src/commands/dataframe/series/is_null.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create mask where values are null", - example: r#"let s = ([5 6 0 8] | dataframe to-series); + example: r#"let s = ([5 6 0 8] | dataframe to-df); let res = ($s / $s); $res | dataframe is-null"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "is_null".to_string(), + vec![ + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().is_null(); + let res = df.as_series(&df_tag.span)?.is_null(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/is_unique.rs b/crates/nu-command/src/commands/dataframe/series/is_unique.rs index 7c0b73a96..85c539663 100644 --- a/crates/nu-command/src/commands/dataframe/series/is_unique.rs +++ b/crates/nu-command/src/commands/dataframe/series/is_unique.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Create mask indicating unique values", - example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-unique", - result: None, + example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-unique", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "is_unique".to_string(), + vec![ + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series - .as_ref() + let mut res = df + .as_series(&df_tag.span)? .is_unique() - .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))? + .into_series(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + res.rename("is_unique"); + + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/mod.rs b/crates/nu-command/src/commands/dataframe/series/mod.rs index 7e4309721..cbd7cf60a 100644 --- a/crates/nu-command/src/commands/dataframe/series/mod.rs +++ b/crates/nu-command/src/commands/dataframe/series/mod.rs @@ -5,6 +5,8 @@ pub mod arg_min; pub mod arg_sort; pub mod arg_true; pub mod arg_unique; +pub mod concatenate; +pub mod contains; pub mod is_duplicated; pub mod is_in; pub mod is_not_null; @@ -15,9 +17,14 @@ pub mod n_unique; pub mod not; pub mod rename; pub mod replace; +pub mod replace_all; pub mod set; pub mod set_with_idx; pub mod shift; +pub mod str_lengths; +pub mod str_slice; +pub mod to_lowercase; +pub mod to_uppercase; pub mod unique; pub mod value_counts; @@ -28,6 +35,8 @@ pub use arg_min::DataFrame as DataFrameArgMin; pub use arg_sort::DataFrame as DataFrameArgSort; pub use arg_true::DataFrame as DataFrameArgTrue; pub use arg_unique::DataFrame as DataFrameArgUnique; +pub use concatenate::DataFrame as DataFrameConcatenate; +pub use contains::DataFrame as DataFrameContains; pub use is_duplicated::DataFrame as DataFrameIsDuplicated; pub use is_in::DataFrame as DataFrameIsIn; pub use is_not_null::DataFrame as DataFrameIsNotNull; @@ -38,8 +47,13 @@ pub use n_unique::DataFrame as DataFrameNUnique; pub use not::DataFrame as DataFrameNot; pub use rename::DataFrame as DataFrameSeriesRename; pub use replace::DataFrame as DataFrameReplace; +pub use replace_all::DataFrame as DataFrameReplaceAll; pub use set::DataFrame as DataFrameSet; pub use set_with_idx::DataFrame as DataFrameSetWithIdx; pub use shift::DataFrame as DataFrameShift; +pub use str_lengths::DataFrame as DataFrameStringLengths; +pub use str_slice::DataFrame as DataFrameStringSlice; +pub use to_lowercase::DataFrame as DataFrameToLowercase; +pub use to_uppercase::DataFrame as DataFrameToUppercase; pub use unique::DataFrame as DataFrameUnique; pub use value_counts::DataFrame as DataFrameValueCounts; diff --git a/crates/nu-command/src/commands/dataframe/series/n_null.rs b/crates/nu-command/src/commands/dataframe/series/n_null.rs index 3d3bf66ef..42cac40c2 100644 --- a/crates/nu-command/src/commands/dataframe/series/n_null.rs +++ b/crates/nu-command/src/commands/dataframe/series/n_null.rs @@ -2,7 +2,8 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, + dataframe::{Column, NuDataFrame}, + Primitive, Signature, UntaggedValue, Value, }; pub struct DataFrame; @@ -27,9 +28,17 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Counts null values", - example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-series); - ($s / ss) | dataframe count-null"#, - result: None, + example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-df); + ($s / $s) | dataframe count-null"#, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "count_null".to_string(), + vec![UntaggedValue::int(2).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -37,17 +46,32 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().null_count(); + let res = df.as_series(&df_tag.span)?.null_count(); let value = Value { value: UntaggedValue::Primitive(Primitive::Int(res as i64)), tag: tag.clone(), }; - let mut data = TaggedDictBuilder::new(tag); - data.insert_value("count-null", value); + let df = NuDataFrame::try_from_columns( + vec![Column::new("count_null".to_string(), vec![value])], + &tag.span, + )?; - Ok(OutputStream::one(data.into_value())) + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/n_unique.rs b/crates/nu-command/src/commands/dataframe/series/n_unique.rs index adbb22fc6..cc774f77d 100644 --- a/crates/nu-command/src/commands/dataframe/series/n_unique.rs +++ b/crates/nu-command/src/commands/dataframe/series/n_unique.rs @@ -2,7 +2,8 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value, + dataframe::{Column, NuDataFrame}, + Primitive, Signature, UntaggedValue, Value, }; pub struct DataFrame; @@ -27,8 +28,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Counts unique values", - example: "[1 1 2 2 3 3 4] | dataframe to-series | dataframe count-unique", - result: None, + example: "[1 1 2 2 3 3 4] | dataframe to-df | dataframe count-unique", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "count_unique".to_string(), + vec![UntaggedValue::int(4).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -36,10 +45,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series - .as_ref() + let res = df + .as_series(&df_tag.span)? .n_unique() .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; @@ -48,8 +57,23 @@ fn command(mut args: CommandArgs) -> Result { tag: tag.clone(), }; - let mut data = TaggedDictBuilder::new(tag); - data.insert_value("count-unique", value); + let df = NuDataFrame::try_from_columns( + vec![Column::new("count_unique".to_string(), vec![value])], + &tag.span, + )?; - Ok(OutputStream::one(data.into_value())) + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/not.rs b/crates/nu-command/src/commands/dataframe/series/not.rs index fe1ef4994..d51fe7e27 100644 --- a/crates/nu-command/src/commands/dataframe/series/not.rs +++ b/crates/nu-command/src/commands/dataframe/series/not.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; use polars::prelude::IntoSeries; use std::ops::Not; @@ -27,8 +30,20 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Inverts boolean mask", - example: "[$true $false $true] | dataframe to-series | dataframe not", - result: None, + example: "[$true $false $true] | dataframe to-df | dataframe not", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::boolean(false).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -36,9 +51,10 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; - let bool = series.as_ref().bool().map_err(|e| { + let bool = series.bool().map_err(|e| { parse_polars_error::<&str>( &e, &tag.span, @@ -48,8 +64,19 @@ fn command(mut args: CommandArgs) -> Result { let res = bool.not(); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/rename.rs b/crates/nu-command/src/commands/dataframe/series/rename.rs index b5804f90d..aa74d6d86 100644 --- a/crates/nu-command/src/commands/dataframe/series/rename.rs +++ b/crates/nu-command/src/commands/dataframe/series/rename.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; use nu_source::Tagged; pub struct DataFrame; @@ -30,8 +33,21 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Renames a series", - example: "[5 6 7 8] | dataframe to-series | dataframe rename-series new_name", - result: None, + example: "[5 6 7 8] | dataframe to-df | dataframe rename new_name", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "new_name".to_string(), + vec![ + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + UntaggedValue::int(7).into(), + UntaggedValue::int(8).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -40,9 +56,25 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let name: Tagged = args.req(0)?; - let mut series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - series.as_mut().rename(name.item.as_ref()); + let mut series = df.as_series(&df_tag.span)?; - Ok(OutputStream::one(series.into_value(tag))) + series.rename(name.item.as_ref()); + + let df = NuDataFrame::try_from_series(vec![series], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/replace.rs b/crates/nu-command/src/commands/dataframe/series/replace.rs index b59d957fb..fe2e89a71 100644 --- a/crates/nu-command/src/commands/dataframe/series/replace.rs +++ b/crates/nu-command/src/commands/dataframe/series/replace.rs @@ -1,8 +1,11 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape}; -use nu_source::Tagged; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; +use nu_source::{Span, Tagged}; use polars::prelude::IntoSeries; pub struct DataFrame; @@ -39,8 +42,20 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Replaces string", - example: "[abc abc abc] | dataframe to-series | dataframe replace -p ab -r AB", - result: None, + example: "[abc abc abc] | dataframe to-df | dataframe replace -p ab -r AB", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("ABc").into(), + UntaggedValue::string("ABc").into(), + UntaggedValue::string("ABc").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -50,23 +65,36 @@ fn command(mut args: CommandArgs) -> Result { let pattern: Tagged = args.req_named("pattern")?; let replace: Tagged = args.req_named("replace")?; - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let chunked = series.as_ref().utf8().map_err(|e| { + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { parse_polars_error::<&str>( &e, - &tag.span, - Some("The replace command can only be used with string columns"), + &df_tag.span, + Some("The replace-all command can only be used with string columns"), ) })?; - let res = chunked - .as_ref() + let mut res = chunked .replace(pattern.as_str(), replace.as_str()) .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/replace_all.rs b/crates/nu-command/src/commands/dataframe/series/replace_all.rs new file mode 100644 index 000000000..cd38bc7b8 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/replace_all.rs @@ -0,0 +1,100 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; +use nu_source::Tagged; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe replace-all" + } + + fn usage(&self) -> &str { + "[Series] Replace all (sub)strings by a regex pattern" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe replace") + .required_named( + "pattern", + SyntaxShape::String, + "Regex pattern to be matched", + Some('p'), + ) + .required_named( + "replace", + SyntaxShape::String, + "replacing string", + Some('r'), + ) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Replaces string", + example: "[abac abac abac] | dataframe to-df | dataframe replace-all -p a -r A", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("AbAc").into(), + UntaggedValue::string("AbAc").into(), + UntaggedValue::string("AbAc").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let pattern: Tagged = args.req_named("pattern")?; + let replace: Tagged = args.req_named("replace")?; + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The replace command can only be used with string columns"), + ) + })?; + + let mut res = chunked + .replace_all(pattern.as_str(), replace.as_str()) + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/set.rs b/crates/nu-command/src/commands/dataframe/series/set.rs index 6a21b0440..68b13a125 100644 --- a/crates/nu-command/src/commands/dataframe/series/set.rs +++ b/crates/nu-command/src/commands/dataframe/series/set.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Primitive, Signature, SyntaxShape, UntaggedValue, Value, +}; use polars::prelude::{ChunkSet, DataType, IntoSeries}; pub struct DataFrame; @@ -33,10 +36,24 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Shifts the values by a given period", - example: r#"let s = ([1 2 2 3 3] | dataframe to-series | dataframe shift 2); + example: r#"let s = ([1 2 2 3 3] | dataframe to-df | dataframe shift 2); let mask = ($s | dataframe is-null); $s | dataframe set 0 --mask $mask"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(0).into(), + UntaggedValue::int(0).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(2).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -46,20 +63,21 @@ fn command(mut args: CommandArgs) -> Result { let value: Value = args.req(0)?; let mask: Value = args.req_named("mask")?; - let bool_mask = match &mask.value { - UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => { - match series.as_ref().dtype() { - DataType::Boolean => series - .as_ref() - .bool() - .map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)), - _ => Err(ShellError::labeled_error( - "Incorrect type", - "can only use bool series as mask", - value.tag.span, - )), - } - } + let mask_df = match &mask.value { + UntaggedValue::DataFrame(df) => Ok(df), + _ => Err(ShellError::labeled_error( + "Incorrect type", + "can only use a series as mask", + value.tag.span, + )), + }?; + + let mask_series = mask_df.as_series(&mask.tag.span)?; + + let bool_mask = match mask_series.dtype() { + DataType::Boolean => mask_series + .bool() + .map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)), _ => Err(ShellError::labeled_error( "Incorrect type", "can only use bool series as mask", @@ -67,11 +85,12 @@ fn command(mut args: CommandArgs) -> Result { )), }?; - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; match &value.value { UntaggedValue::Primitive(Primitive::Int(val)) => { - let chunked = series.as_ref().i64().map_err(|e| { + let chunked = series.i64().map_err(|e| { parse_polars_error::<&str>( &e, &value.tag.span, @@ -83,10 +102,8 @@ fn command(mut args: CommandArgs) -> Result { .set(bool_mask, Some(*val)) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } UntaggedValue::Primitive(Primitive::Decimal(val)) => { let chunked = series.as_ref().f64().map_err(|e| { @@ -107,10 +124,8 @@ fn command(mut args: CommandArgs) -> Result { ) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } UntaggedValue::Primitive(Primitive::String(val)) => { let chunked = series.as_ref().utf8().map_err(|e| { @@ -128,18 +143,29 @@ fn command(mut args: CommandArgs) -> Result { let mut res = res.into_series(); res.rename("string"); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } _ => Err(ShellError::labeled_error( "Incorrect type", format!( "this value cannot be set in a series of type '{}'", - series.as_ref().dtype() + series.dtype() ), value.tag.span, )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs b/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs index 6dea8cdf4..ff582d5cd 100644 --- a/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs +++ b/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs @@ -1,7 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Primitive, Signature, SyntaxShape, UntaggedValue, Value, +}; use polars::prelude::{ChunkSet, DataType, IntoSeries}; pub struct DataFrame; @@ -33,10 +36,25 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Set value in selected rows from series", - example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series); - let indices = ([0 2] | dataframe to-series); + example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df); + let indices = ([0 2] | dataframe to-df); $series | dataframe set-with-idx 6 -i $indices"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(6).into(), + UntaggedValue::int(1).into(), + UntaggedValue::int(6).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + UntaggedValue::int(3).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -47,7 +65,7 @@ fn command(mut args: CommandArgs) -> Result { let indices: Value = args.req_named("indices")?; let indices = match &indices.value { - UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => Ok(series), + UntaggedValue::DataFrame(df) => Ok(df), _ => Err(ShellError::labeled_error( "Incorrect type", "can only use a series for set command", @@ -55,7 +73,9 @@ fn command(mut args: CommandArgs) -> Result { )), }?; - let casted = match indices.as_ref().dtype() { + let indices = indices.as_series(&value.tag.span)?; + + let casted = match indices.dtype() { DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices .as_ref() .cast_with_dtype(&DataType::UInt32) @@ -75,11 +95,12 @@ fn command(mut args: CommandArgs) -> Result { .into_iter() .filter_map(|val| val.map(|v| v as usize)); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = df.as_series(&df_tag.span)?; match &value.value { UntaggedValue::Primitive(Primitive::Int(val)) => { - let chunked = series.as_ref().i64().map_err(|e| { + let chunked = series.i64().map_err(|e| { parse_polars_error::<&str>( &e, &value.tag.span, @@ -91,10 +112,8 @@ fn command(mut args: CommandArgs) -> Result { .set_at_idx(indices, Some(*val)) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } UntaggedValue::Primitive(Primitive::Decimal(val)) => { let chunked = series.as_ref().f64().map_err(|e| { @@ -115,10 +134,8 @@ fn command(mut args: CommandArgs) -> Result { ) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } UntaggedValue::Primitive(Primitive::String(val)) => { let chunked = series.as_ref().utf8().map_err(|e| { @@ -136,10 +153,8 @@ fn command(mut args: CommandArgs) -> Result { let mut res = res.into_series(); res.rename("string"); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } _ => Err(ShellError::labeled_error( "Incorrect type", @@ -151,3 +166,16 @@ fn command(mut args: CommandArgs) -> Result { )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/shift.rs b/crates/nu-command/src/commands/dataframe/series/shift.rs index b02575bc9..6aaa6b60b 100644 --- a/crates/nu-command/src/commands/dataframe/series/shift.rs +++ b/crates/nu-command/src/commands/dataframe/series/shift.rs @@ -1,9 +1,8 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape}; +use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape}; use nu_source::Tagged; -use polars::prelude::IntoSeries; pub struct DataFrame; @@ -27,7 +26,7 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Shifts the values by a given period", - example: "[1 2 2 3 3] | dataframe to-series | dataframe shift 2", + example: "[1 2 2 3 3] | dataframe to-df | dataframe shift 2", result: None, }] } @@ -37,12 +36,10 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let period: Tagged = args.req(0)?; - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series.as_ref().shift(period.item); + let res = df.as_series(&df_tag.span)?.shift(period.item); - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) } diff --git a/crates/nu-command/src/commands/dataframe/series/str_lengths.rs b/crates/nu-command/src/commands/dataframe/series/str_lengths.rs new file mode 100644 index 000000000..a2d31e160 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/str_lengths.rs @@ -0,0 +1,81 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe str-lengths" + } + + fn usage(&self) -> &str { + "[Series] Get lengths of all strings" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe str-lengths") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Returns string lengths", + example: "[a ab abc] | dataframe to-df | dataframe str-lengths", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(3).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The str-lengths command can only be used with string columns"), + ) + })?; + + let res = chunked.as_ref().str_lengths(); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/str_slice.rs b/crates/nu-command/src/commands/dataframe/series/str_slice.rs new file mode 100644 index 000000000..2689348c3 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/str_slice.rs @@ -0,0 +1,92 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; +use nu_source::Tagged; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe str-slice" + } + + fn usage(&self) -> &str { + "[Series] Slices the string from the start position until the selected length" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe replace") + .required_named("start", SyntaxShape::Int, "start of slice", Some('s')) + .named("length", SyntaxShape::Int, "optional length", Some('l')) + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Creates slices from the strings", + example: "[abcded abc321 abc123] | dataframe to-df | dataframe str-slice -s 1 -l 2", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("bc").into(), + UntaggedValue::string("bc").into(), + UntaggedValue::string("bc").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + let start: Tagged = args.req_named("start")?; + + let length: Option> = args.get_flag("length")?; + let length = length.map(|v| v.item as u64); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The str-slice command can only be used with string columns"), + ) + })?; + + let mut res = chunked + .str_slice(start.item, length) + .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; + + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/to_lowercase.rs b/crates/nu-command/src/commands/dataframe/series/to_lowercase.rs new file mode 100644 index 000000000..7ea88f3b1 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/to_lowercase.rs @@ -0,0 +1,82 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe to-lowercase" + } + + fn usage(&self) -> &str { + "[Series] Lowercase the strings in the column" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe to-lowercase") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to lowercase", + example: "[Abc aBc abC] | dataframe to-df | dataframe to-lowercase", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("abc").into(), + UntaggedValue::string("abc").into(), + UntaggedValue::string("abc").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The to-lowercase command can only be used with string columns"), + ) + })?; + + let mut res = chunked.to_lowercase(); + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/to_uppercase.rs b/crates/nu-command/src/commands/dataframe/series/to_uppercase.rs new file mode 100644 index 000000000..ab0665572 --- /dev/null +++ b/crates/nu-command/src/commands/dataframe/series/to_uppercase.rs @@ -0,0 +1,82 @@ +use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; +use nu_engine::WholeStreamCommand; +use nu_errors::ShellError; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; +use polars::prelude::IntoSeries; + +pub struct DataFrame; + +impl WholeStreamCommand for DataFrame { + fn name(&self) -> &str { + "dataframe to-uppercase" + } + + fn usage(&self) -> &str { + "[Series] Uppercase the strings in the column" + } + + fn signature(&self) -> Signature { + Signature::build("dataframe to-uppercase") + } + + fn run(&self, args: CommandArgs) -> Result { + command(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Modifies strings to uppercase", + example: "[Abc aBc abC] | dataframe to-df | dataframe to-uppercase", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("ABC").into(), + UntaggedValue::string("ABC").into(), + UntaggedValue::string("ABC").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }] + } +} + +fn command(mut args: CommandArgs) -> Result { + let tag = args.call_info.name_tag.clone(); + + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + + let series = df.as_series(&df_tag.span)?; + let chunked = series.utf8().map_err(|e| { + parse_polars_error::<&str>( + &e, + &df_tag.span, + Some("The to-uppercase command can only be used with string columns"), + ) + })?; + + let mut res = chunked.to_uppercase(); + res.rename(series.name()); + + let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/series/unique.rs b/crates/nu-command/src/commands/dataframe/series/unique.rs index ebf1e8488..eac8994b5 100644 --- a/crates/nu-command/src/commands/dataframe/series/unique.rs +++ b/crates/nu-command/src/commands/dataframe/series/unique.rs @@ -1,8 +1,10 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*}; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature}; -use polars::prelude::IntoSeries; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; pub struct DataFrame; @@ -26,8 +28,16 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Returns unique values from a series", - example: "[1 2 2 3 3] | dataframe to-series | dataframe unique", - result: None, + example: "[2 2 2 2 2] | dataframe to-df | dataframe unique", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(2).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -35,15 +45,26 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let res = series - .as_ref() + let res = df + .as_series(&df_tag.span)? .unique() .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; - Ok(OutputStream::one(NuSeries::series_to_value( - res.into_series(), - tag, - ))) + let df = NuDataFrame::try_from_series(vec![res], &tag.span)?; + Ok(OutputStream::one(df.into_value(df_tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/series/value_counts.rs b/crates/nu-command/src/commands/dataframe/series/value_counts.rs index 843acff1a..a992db6a5 100644 --- a/crates/nu-command/src/commands/dataframe/series/value_counts.rs +++ b/crates/nu-command/src/commands/dataframe/series/value_counts.rs @@ -2,8 +2,8 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuSeries}, - Signature, + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, }; use crate::commands::dataframe::utils::parse_polars_error; @@ -30,8 +30,22 @@ impl WholeStreamCommand for DataFrame { fn examples(&self) -> Vec { vec![Example { description: "Calculates value counts", - example: "[5 5 6 6] | dataframe to-series | dataframe value-counts", - result: None, + example: "[5 5 5 5 6 6] | dataframe to-df | dataframe value-counts", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![UntaggedValue::int(5).into(), UntaggedValue::int(6).into()], + ), + Column::new( + "counts".to_string(), + vec![UntaggedValue::int(4).into(), UntaggedValue::int(2).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -39,12 +53,27 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?; + let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let df = series - .as_ref() + let df_new = df + .as_series(&df_tag.span)? .value_counts() .map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?; - Ok(OutputStream::one(NuDataFrame::dataframe_to_value(df, tag))) + Ok(OutputStream::one(NuDataFrame::dataframe_to_value( + df_new, tag, + ))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/shape.rs b/crates/nu-command/src/commands/dataframe/shape.rs index 111c6f56b..29424b034 100644 --- a/crates/nu-command/src/commands/dataframe/shape.rs +++ b/crates/nu-command/src/commands/dataframe/shape.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, Value, +}; pub struct DataFrame; @@ -26,7 +29,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Shows row and column shape", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe shape", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]), + Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -34,14 +45,34 @@ impl WholeStreamCommand for DataFrame { fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; - let rows = df.as_ref().height(); - let cols = df.as_ref().width(); + let rows = Value { + value: (df.as_ref().height() as i64).into(), + tag: Tag::default(), + }; - let mut data = TaggedDictBuilder::new(&tag); - data.insert_value("rows", format!("{}", rows)); - data.insert_value("columns", format!("{}", cols)); + let cols = Value { + value: (df.as_ref().width() as i64).into(), + tag: Tag::default(), + }; - Ok(OutputStream::one(data.into_value())) + let rows_col = Column::new("rows".to_string(), vec![rows]); + let cols_col = Column::new("columns".to_string(), vec![cols]); + + let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], &tag.span)?; + Ok(OutputStream::one(df.into_value(tag))) +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } } diff --git a/crates/nu-command/src/commands/dataframe/show.rs b/crates/nu-command/src/commands/dataframe/show.rs index 93cc47e1d..6a21d24da 100644 --- a/crates/nu-command/src/commands/dataframe/show.rs +++ b/crates/nu-command/src/commands/dataframe/show.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let rows: Option> = args.get_flag("n_rows")?; let tail: bool = args.has_flag("tail"); - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let rows = rows.map(|v| v.item); let values = if tail { df.tail(rows)? } else { df.head(rows)? }; diff --git a/crates/nu-command/src/commands/dataframe/slice.rs b/crates/nu-command/src/commands/dataframe/slice.rs index 8734fd339..5a7b5c6f2 100644 --- a/crates/nu-command/src/commands/dataframe/slice.rs +++ b/crates/nu-command/src/commands/dataframe/slice.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, SyntaxShape, UntaggedValue, +}; use nu_source::Tagged; pub struct DataFrame; @@ -29,7 +32,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Create new dataframe from a slice of the rows", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe slice 0 1", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -40,8 +51,21 @@ fn command(mut args: CommandArgs) -> Result { let offset: Tagged = args.req(0)?; let size: Tagged = args.req(1)?; - let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let res = df.as_ref().slice(offset.item as i64, size.item); Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/sort.rs b/crates/nu-command/src/commands/dataframe/sort.rs index dd9b14b64..e58aaf824 100644 --- a/crates/nu-command/src/commands/dataframe/sort.rs +++ b/crates/nu-command/src/commands/dataframe/sort.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuSeries, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; @@ -33,12 +33,39 @@ impl WholeStreamCommand for DataFrame { Example { description: "Create new sorted dataframe", example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort a", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Create new sorted series", - example: "[3 4 1 2] | dataframe to-series | dataframe sort", - result: None, + example: "[3 4 1 2] | dataframe to-df | dataframe sort", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(2).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(4).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -53,31 +80,38 @@ fn command(mut args: CommandArgs) -> Result { let reverse = args.has_flag("reverse"); - match value.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { - let columns: Vec = args.rest(0)?; - - if !columns.is_empty() { - let (col_string, col_span) = convert_columns(&columns, &tag)?; + match &value.value { + UntaggedValue::DataFrame(df) => { + if df.is_series() { + let columns = df.as_ref().get_column_names(); let res = df .as_ref() - .sort(&col_string, reverse) - .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; + .sort(columns, reverse) + .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?; Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } else { - Err(ShellError::labeled_error( - "Missing columns", - "missing column name to perform sort", - &tag.span, - )) + let columns: Vec = args.rest(0)?; + + if !columns.is_empty() { + let (col_string, col_span) = convert_columns(&columns, &tag)?; + + let res = df + .as_ref() + .sort(&col_string, reverse) + .map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?; + + Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) + } else { + Err(ShellError::labeled_error( + "Missing columns", + "missing column name to perform sort", + &tag.span, + )) + } } } - UntaggedValue::DataFrame(PolarsData::Series(series)) => { - let res = series.as_ref().sort(reverse); - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) - } _ => Err(ShellError::labeled_error( "Incorrect type", "sort cannot be done with this value", @@ -85,3 +119,16 @@ fn command(mut args: CommandArgs) -> Result { )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/take.rs b/crates/nu-command/src/commands/dataframe/take.rs index e90f6b948..1dcf2a2d5 100644 --- a/crates/nu-command/src/commands/dataframe/take.rs +++ b/crates/nu-command/src/commands/dataframe/take.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, NuSeries, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; use polars::prelude::DataType; @@ -37,16 +37,38 @@ impl WholeStreamCommand for DataFrame { Example { description: "Takes selected rows from dataframe", example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dataframe to-df); - let indices = ([0 2] | dataframe to-series); + let indices = ([0 2] | dataframe to-df); $df | dataframe take $indices"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(4).into(), UntaggedValue::int(4).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, Example { description: "Takes selected rows from series", - example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series); - let indices = ([0 2] | dataframe to-series); + example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df); + let indices = ([0 2] | dataframe to-df); $series | dataframe take $indices"#, - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![UntaggedValue::int(4).into(), UntaggedValue::int(5).into()], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }, ] } @@ -56,8 +78,8 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let value: Value = args.req(0)?; - let series = match &value.value { - UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series), + let df = match &value.value { + UntaggedValue::DataFrame(df) => Ok(df), _ => Err(ShellError::labeled_error( "Incorrect type", "can only use a series for take command", @@ -65,7 +87,9 @@ fn command(mut args: CommandArgs) -> Result { )), }?; - let casted = match series.as_ref().dtype() { + let series = df.as_series(&value.tag.span)?; + + let casted = match series.dtype() { DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series .as_ref() .cast_with_dtype(&DataType::UInt32) @@ -88,16 +112,11 @@ fn command(mut args: CommandArgs) -> Result { })?; match value.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => { + UntaggedValue::DataFrame(df) => { let res = df.as_ref().take(indices); Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag))) } - UntaggedValue::DataFrame(PolarsData::Series(series)) => { - let res = series.as_ref().take(indices); - - Ok(OutputStream::one(NuSeries::series_to_value(res, tag))) - } _ => Err(ShellError::labeled_error( "No dataframe or series in stream", "no dataframe or series found in input stream", @@ -105,3 +124,16 @@ fn command(mut args: CommandArgs) -> Result { )), } } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/to_csv.rs b/crates/nu-command/src/commands/dataframe/to_csv.rs index b142dc7d7..65aa2e881 100644 --- a/crates/nu-command/src/commands/dataframe/to_csv.rs +++ b/crates/nu-command/src/commands/dataframe/to_csv.rs @@ -64,7 +64,7 @@ fn command(mut args: CommandArgs) -> Result { let delimiter: Option> = args.get_flag("delimiter")?; let no_header: bool = args.has_flag("no_header"); - let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let mut file = File::create(&file_name.item).map_err(|e| { ShellError::labeled_error( diff --git a/crates/nu-command/src/commands/dataframe/to_df.rs b/crates/nu-command/src/commands/dataframe/to_df.rs index 5a1268430..cb85d6b08 100644 --- a/crates/nu-command/src/commands/dataframe/to_df.rs +++ b/crates/nu-command/src/commands/dataframe/to_df.rs @@ -1,7 +1,10 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; -use nu_protocol::{dataframe::NuDataFrame, Signature}; +use nu_protocol::{ + dataframe::{Column, NuDataFrame}, + Signature, UntaggedValue, +}; pub struct DataFrame; @@ -11,7 +14,7 @@ impl WholeStreamCommand for DataFrame { } fn usage(&self) -> &str { - "Converts a pipelined Table or List into a polars dataframe" + "Converts a List, Table or Dictionary into a polars dataframe" } fn signature(&self) -> Signature { @@ -27,10 +30,108 @@ impl WholeStreamCommand for DataFrame { } fn examples(&self) -> Vec { - vec![Example { - description: "Takes an input stream and converts it to a polars dataframe", - example: "[[a b];[1 2] [3 4]] | dataframe to-df", - result: None, - }] + vec![ + Example { + description: "Takes a dictionary and creates a dataframe", + example: "[[a b];[1 2] [3 4]] | dataframe to-df", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()], + ), + Column::new( + "b".to_string(), + vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + Example { + description: "Takes a list of tables and creates a dataframe", + example: "[[1 2 a] [3 4 b] [5 6 c]] | dataframe to-df", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "0".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + UntaggedValue::int(5).into(), + ], + ), + Column::new( + "1".to_string(), + vec![ + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + UntaggedValue::int(6).into(), + ], + ), + Column::new( + "2".to_string(), + vec![ + UntaggedValue::string("a").into(), + UntaggedValue::string("b").into(), + UntaggedValue::string("c").into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + Example { + description: "Takes a list and creates a dataframe", + example: "[a b c] | dataframe to-df", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::string("a").into(), + UntaggedValue::string("b").into(), + UntaggedValue::string("c").into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + Example { + description: "Takes a list of booleans and creates a dataframe", + example: "[$true $true $false] | dataframe to-df", + result: Some(vec![NuDataFrame::try_from_columns( + vec![Column::new( + "0".to_string(), + vec![ + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(true).into(), + UntaggedValue::boolean(false).into(), + ], + )], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), + }, + ] + } +} + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) } } diff --git a/crates/nu-command/src/commands/dataframe/to_parquet.rs b/crates/nu-command/src/commands/dataframe/to_parquet.rs index 4982c86a0..e1cf1787b 100644 --- a/crates/nu-command/src/commands/dataframe/to_parquet.rs +++ b/crates/nu-command/src/commands/dataframe/to_parquet.rs @@ -48,7 +48,7 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let file_name: Tagged = args.req(0)?; - let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let file = File::create(&file_name.item).map_err(|e| { ShellError::labeled_error( diff --git a/crates/nu-command/src/commands/dataframe/to_series.rs b/crates/nu-command/src/commands/dataframe/to_series.rs deleted file mode 100644 index 060afccb4..000000000 --- a/crates/nu-command/src/commands/dataframe/to_series.rs +++ /dev/null @@ -1,44 +0,0 @@ -use crate::prelude::*; -use nu_engine::WholeStreamCommand; -use nu_errors::ShellError; -use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape}; -use nu_source::Tagged; - -pub struct DataFrame; - -impl WholeStreamCommand for DataFrame { - fn name(&self) -> &str { - "dataframe to-series" - } - - fn usage(&self) -> &str { - "Converts a pipelined List into a polars series" - } - - fn signature(&self) -> Signature { - Signature::build("dataframe to-series").optional( - "name", - SyntaxShape::String, - "Optional series name", - ) - } - - fn run(&self, args: CommandArgs) -> Result { - let tag = args.call_info.name_tag.clone(); - - let name: Option> = args.opt(0)?; - let name = name.map(|v| v.item); - - let series = NuSeries::try_from_iter(args.input, name)?; - - Ok(InputStream::one(series.into_value(tag))) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Takes an input stream and converts it to a polars series", - example: "[1 2 3 4] | dataframe to-series my-col", - result: None, - }] - } -} diff --git a/crates/nu-command/src/commands/dataframe/where_.rs b/crates/nu-command/src/commands/dataframe/where_.rs index 96f6ac71c..ade8fcc39 100644 --- a/crates/nu-command/src/commands/dataframe/where_.rs +++ b/crates/nu-command/src/commands/dataframe/where_.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::{evaluate_baseline_expr, WholeStreamCommand}; use nu_errors::ShellError; use nu_protocol::{ - dataframe::NuDataFrame, + dataframe::{Column, NuDataFrame}, hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression}, Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value, }; @@ -37,7 +37,15 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Filter dataframe based on column a", example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe where a == 1", - result: None, + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]), + Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -143,7 +151,7 @@ fn filter_dataframe( }?; let span = args.call_info.name_tag.span; - let df = NuDataFrame::try_from_stream(&mut args.input, &span)?; + let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &span)?; let col = df .as_ref() @@ -214,3 +222,16 @@ fn filter_dataframe( args.call_info.name_tag, ))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/dataframe/with_column.rs b/crates/nu-command/src/commands/dataframe/with_column.rs index a0fdef26b..3b6d5e8a8 100644 --- a/crates/nu-command/src/commands/dataframe/with_column.rs +++ b/crates/nu-command/src/commands/dataframe/with_column.rs @@ -2,7 +2,7 @@ use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{ - dataframe::{NuDataFrame, PolarsData}, + dataframe::{Column, NuDataFrame}, Signature, SyntaxShape, UntaggedValue, Value, }; use nu_source::Tagged; @@ -33,8 +33,35 @@ impl WholeStreamCommand for DataFrame { vec![Example { description: "Adds a series to the dataframe", example: - "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-series) --name c", - result: None, + "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-df) --name c", + result: Some(vec![NuDataFrame::try_from_columns( + vec![ + Column::new( + "a".to_string(), + vec![ + UntaggedValue::int(1).into(), + UntaggedValue::int(3).into(), + ], + ), + Column::new( + "b".to_string(), + vec![ + UntaggedValue::int(2).into(), + UntaggedValue::int(4).into(), + ], + ), + Column::new( + "c".to_string(), + vec![ + UntaggedValue::int(5).into(), + UntaggedValue::int(6).into(), + ], + ), + ], + &Span::default(), + ) + .expect("simple df for test should not fail") + .into_value(Tag::default())]), }] } } @@ -44,8 +71,8 @@ fn command(mut args: CommandArgs) -> Result { let value: Value = args.req(0)?; let name: Tagged = args.req_named("name")?; - let mut series = match value.value { - UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series), + let df = match value.value { + UntaggedValue::DataFrame(df) => Ok(df), _ => Err(ShellError::labeled_error( "Incorrect type", "can only add a series to a dataframe", @@ -53,9 +80,11 @@ fn command(mut args: CommandArgs) -> Result { )), }?; - let series = series.as_mut().rename(name.item.as_ref()).clone(); + let mut series = df.as_series(&value.tag.span)?; - let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; + let series = series.rename(name.item.as_ref()).clone(); + + let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; df.as_mut() .with_column(series) @@ -63,3 +92,16 @@ fn command(mut args: CommandArgs) -> Result { Ok(OutputStream::one(df.into_value(tag))) } + +#[cfg(test)] +mod tests { + use super::DataFrame; + use super::ShellError; + + #[test] + fn examples_work_as_expected() -> Result<(), ShellError> { + use crate::examples::test_dataframe as test_examples; + + test_examples(DataFrame {}) + } +} diff --git a/crates/nu-command/src/commands/filters/first.rs b/crates/nu-command/src/commands/filters/first.rs index d86378017..c592fda64 100644 --- a/crates/nu-command/src/commands/filters/first.rs +++ b/crates/nu-command/src/commands/filters/first.rs @@ -126,11 +126,13 @@ fn first(args: CommandArgs) -> Result { tag, )), #[cfg(all(not(target_arch = "wasm32"), feature = "dataframe"))] - UntaggedValue::DataFrame(_) => Err(ShellError::labeled_error( - "unsure how to handled UntaggedValue::DataFrame", - "found dataframe", - tag, - )), + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { + Err(ShellError::labeled_error( + "unsure how to handled dataframe struct", + "found dataframe", + tag, + )) + } }, None => Ok(input_peek.take(rows_desired).into_output_stream()), } diff --git a/crates/nu-command/src/commands/filters/uniq.rs b/crates/nu-command/src/commands/filters/uniq.rs index ace3c7490..16cf98a76 100644 --- a/crates/nu-command/src/commands/filters/uniq.rs +++ b/crates/nu-command/src/commands/filters/uniq.rs @@ -166,7 +166,7 @@ fn uniq(args: CommandArgs) -> Result { )) } #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => { + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { return Err(ShellError::labeled_error( "uniq -c cannot operate on data structs", "source", diff --git a/crates/nu-command/src/commands/formats/to/json.rs b/crates/nu-command/src/commands/formats/to/json.rs index 758391242..f63288e99 100644 --- a/crates/nu-command/src/commands/formats/to/json.rs +++ b/crates/nu-command/src/commands/formats/to/json.rs @@ -115,7 +115,7 @@ pub fn value_to_json_value(v: &Value) -> Result { serde_json::Value::Null } #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => serde_json::Value::Null, + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_json::Value::Null, UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array( b.iter() .map(|x| { diff --git a/crates/nu-command/src/commands/formats/to/toml.rs b/crates/nu-command/src/commands/formats/to/toml.rs index ab16f6d03..6423af75d 100644 --- a/crates/nu-command/src/commands/formats/to/toml.rs +++ b/crates/nu-command/src/commands/formats/to/toml.rs @@ -74,7 +74,9 @@ fn helper(v: &Value) -> Result { UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Block(_) => toml::Value::String("".to_string()), #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => toml::Value::String("".to_string()), + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { + toml::Value::String("".to_string()) + } UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("".to_string()), UntaggedValue::Primitive(Primitive::Binary(b)) => { toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) diff --git a/crates/nu-command/src/commands/formats/to/yaml.rs b/crates/nu-command/src/commands/formats/to/yaml.rs index 7ea6270db..c6a23a4d2 100644 --- a/crates/nu-command/src/commands/formats/to/yaml.rs +++ b/crates/nu-command/src/commands/formats/to/yaml.rs @@ -96,7 +96,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result { serde_yaml::Value::Null } #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => serde_yaml::Value::Null, + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_yaml::Value::Null, UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence( b.iter() .map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x))) diff --git a/crates/nu-command/src/commands/mod.rs b/crates/nu-command/src/commands/mod.rs index 329c3dc50..00ee4a16b 100644 --- a/crates/nu-command/src/commands/mod.rs +++ b/crates/nu-command/src/commands/mod.rs @@ -27,15 +27,16 @@ pub use core_commands::*; pub use dataframe::{ DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax, DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn, - DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, - DataFrameFilter, DataFrameFirst, DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, - DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, - DataFrameLast, DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot, - DataFrameOpen, DataFramePivot, DataFrameReplace, DataFrameSample, DataFrameSelect, - DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, - DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTake, DataFrameToCsv, DataFrameToDF, - DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere, - DataFrameWithColumn, + DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop, + DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst, + DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull, + DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt, + DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen, DataFramePivot, + DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect, DataFrameSeriesRename, + DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, DataFrameShow, + DataFrameSlice, DataFrameSort, DataFrameStringLengths, DataFrameStringSlice, DataFrameTake, + DataFrameToCsv, DataFrameToDF, DataFrameToLowercase, DataFrameToParquet, DataFrameToUppercase, + DataFrameUnique, DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn, }; pub use env::*; pub use filesystem::*; diff --git a/crates/nu-command/src/commands/viewers/autoview/command.rs b/crates/nu-command/src/commands/viewers/autoview/command.rs index 3f78bbff6..c829a1983 100644 --- a/crates/nu-command/src/commands/viewers/autoview/command.rs +++ b/crates/nu-command/src/commands/viewers/autoview/command.rs @@ -9,7 +9,7 @@ use nu_protocol::{Primitive, Signature, UntaggedValue, Value}; use nu_table::TextStyle; #[cfg(feature = "dataframe")] -use nu_protocol::dataframe::PolarsData; +use nu_protocol::dataframe::FrameStruct; pub struct Command; @@ -239,7 +239,7 @@ pub fn autoview(args: CommandArgs) -> Result { } #[cfg(feature = "dataframe")] Value { - value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)), + value: UntaggedValue::DataFrame(df), tag, } => { if let Some(table) = table { @@ -253,7 +253,7 @@ pub fn autoview(args: CommandArgs) -> Result { } #[cfg(feature = "dataframe")] Value { - value: UntaggedValue::DataFrame(PolarsData::GroupBy(groupby)), + value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(groupby)), tag, } => { if let Some(table) = table { @@ -265,20 +265,6 @@ pub fn autoview(args: CommandArgs) -> Result { let _ = result.collect::>(); } } - #[cfg(feature = "dataframe")] - Value { - value: UntaggedValue::DataFrame(PolarsData::Series(series)), - tag, - } => { - if let Some(table) = table { - // TODO. Configure the parameter rows from file. It can be - // adjusted to see a certain amount of values in the head - let command_args = - create_default_command_args(&context, series.print()?.into(), tag); - let result = table.run(command_args)?; - let _ = result.collect::>(); - } - } Value { value: UntaggedValue::Primitive(Primitive::Nothing), .. diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 19a921cc7..4873399a8 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -287,7 +287,6 @@ pub fn create_default_context(interactive: bool) -> Result Result Result<(), ShellError> { Ok(()) } +#[cfg(feature = "dataframe")] +pub fn test_dataframe(cmd: impl WholeStreamCommand + 'static) -> Result<(), ShellError> { + use nu_protocol::UntaggedValue; + + let examples = cmd.examples(); + + let base_context = EvaluationContext::basic(); + + base_context.add_commands(vec![ + whole_stream_command(cmd), + // Commands used with dataframe + whole_stream_command(DataFrameToDF), + whole_stream_command(DataFrameShift), + whole_stream_command(DataFrameIsNull), + whole_stream_command(DataFrameGroupBy), + whole_stream_command(DataFrameWithColumn), + // Base commands for context + whole_stream_command(Math), + whole_stream_command(MathMode {}), + whole_stream_command(Echo {}), + whole_stream_command(BuildString {}), + whole_stream_command(Get {}), + whole_stream_command(Keep {}), + whole_stream_command(Each {}), + whole_stream_command(Let {}), + whole_stream_command(Select), + whole_stream_command(StrCollect), + whole_stream_command(Wrap), + ]); + + for sample_pipeline in examples { + let mut ctx = base_context.clone(); + + println!("{:?}", &sample_pipeline.example); + let block = parse_line(sample_pipeline.example, &ctx)?; + + if let Some(expected) = &sample_pipeline.result { + let start = std::time::Instant::now(); + let result = evaluate_block(block, &mut ctx)?; + + println!("input: {}", sample_pipeline.example); + println!("result: {:?}", result); + println!("done: {:?}", start.elapsed()); + + let value = match result.get(0) { + Some(v) => v, + None => panic!( + "Unable to extract a value after parsing example: {}", + sample_pipeline.example + ), + }; + + let df = match &value.value { + UntaggedValue::DataFrame(df) => df, + _ => panic!( + "Unable to extract dataframe from parsed example: {}", + sample_pipeline.example + ), + }; + + let expected = match expected.get(0) { + Some(v) => v, + None => panic!("Empty vector in result example"), + }; + + let df_expected = match &expected.value { + UntaggedValue::DataFrame(df) => df, + _ => panic!("Unable to extract dataframe from example result"), + }; + + println!("expected: {:?}", df_expected); + + assert_eq!(df, df_expected) + } + } + + Ok(()) +} + pub fn test_anchors(cmd: Command) -> Result<(), ShellError> { let examples = cmd.examples(); diff --git a/crates/nu-data/src/base/shape.rs b/crates/nu-data/src/base/shape.rs index e6b39e701..a0e41164f 100644 --- a/crates/nu-data/src/base/shape.rs +++ b/crates/nu-data/src/base/shape.rs @@ -131,7 +131,7 @@ impl InlineShape { UntaggedValue::Error(_) => InlineShape::Error, UntaggedValue::Block(_) => InlineShape::Block, #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => InlineShape::DataFrame, + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => InlineShape::DataFrame, } } diff --git a/crates/nu-data/src/config.rs b/crates/nu-data/src/config.rs index 94237038a..77bd5e43f 100644 --- a/crates/nu-data/src/config.rs +++ b/crates/nu-data/src/config.rs @@ -117,7 +117,9 @@ fn helper(v: &Value) -> Result { UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Block(_) => toml::Value::String("".to_string()), #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => toml::Value::String("".to_string()), + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { + toml::Value::String("".to_string()) + } UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("".to_string()), UntaggedValue::Primitive(Primitive::Binary(b)) => { toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect()) diff --git a/crates/nu-data/src/dataframe.rs b/crates/nu-data/src/dataframe.rs index 7995a3146..f5aeda7c7 100644 --- a/crates/nu-data/src/dataframe.rs +++ b/crates/nu-data/src/dataframe.rs @@ -1,218 +1,291 @@ use bigdecimal::BigDecimal; use nu_errors::ShellError; +use nu_protocol::dataframe::NuDataFrame; use nu_protocol::hir::Operator; -use nu_protocol::{ - dataframe::{NuSeries, PolarsData}, - Primitive, ShellTypeName, UntaggedValue, Value, -}; +use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value}; use nu_source::Span; use num_traits::ToPrimitive; use polars::prelude::{ - BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, - NumOpsDispatchChecked, PolarsError, Series, + BooleanType, ChunkCompare, ChunkedArray, DataFrame, DataType, Float64Type, Int64Type, + IntoSeries, NumOpsDispatchChecked, PolarsError, Series, }; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; -pub fn compute_between_series( +pub fn compute_between_dataframes( operator: Operator, left: &Value, right: &Value, ) -> Result { - if let ( - UntaggedValue::DataFrame(PolarsData::Series(lhs)), - UntaggedValue::DataFrame(PolarsData::Series(rhs)), - ) = (&left.value, &right.value) + if let (UntaggedValue::DataFrame(lhs), UntaggedValue::DataFrame(rhs)) = + (&left.value, &right.value) { - if lhs.as_ref().dtype() != rhs.as_ref().dtype() { - return Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Mixed datatypes", - "this datatype does not match the right hand side datatype", - &left.tag.span, - format!( - "Perhaps you want to change this datatype to '{}'", - lhs.as_ref().dtype() - ), - &right.tag.span, - ), - )); - } + let operation_span = left.tag.span.until(right.tag.span); + match (lhs.is_series(), rhs.is_series()) { + (true, true) => { + let lhs = &lhs + .as_series(&left.tag.span) + .expect("Already checked that is a series"); + let rhs = &rhs + .as_series(&right.tag.span) + .expect("Already checked that is a series"); - if lhs.as_ref().len() != rhs.as_ref().len() { - return Ok(UntaggedValue::Error(ShellError::labeled_error( - "Different length", - "this column length does not match the right hand column length", - &left.tag.span, - ))); - } + if lhs.dtype() != rhs.dtype() { + return Ok(UntaggedValue::Error( + ShellError::labeled_error_with_secondary( + "Mixed datatypes", + "this datatype does not match the right hand side datatype", + &left.tag.span, + format!( + "Perhaps you want to change this datatype to '{}'", + lhs.as_ref().dtype() + ), + &right.tag.span, + ), + )); + } - match operator { - Operator::Plus => { - let mut res = lhs.as_ref() + rhs.as_ref(); - let name = format!("sum_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::Minus => { - let mut res = lhs.as_ref() - rhs.as_ref(); - let name = format!("sub_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::Multiply => { - let mut res = lhs.as_ref() * rhs.as_ref(); - let name = format!("mul_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::Divide => { - let res = lhs.as_ref().checked_div(rhs.as_ref()); - match res { - Ok(mut res) => { - let name = format!("div_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error( - "Division error", - format!("{}", e), + if lhs.len() != rhs.len() { + return Ok(UntaggedValue::Error(ShellError::labeled_error( + "Different length", + "this column length does not match the right hand column length", &left.tag.span, - ))), + ))); } - } - Operator::Equal => { - let mut res = Series::eq(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("eq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::NotEqual => { - let mut res = Series::neq(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("neq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::LessThan => { - let mut res = Series::lt(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("lt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::LessThanOrEqual => { - let mut res = Series::lt_eq(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("lte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::GreaterThan => { - let mut res = Series::gt(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("gt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::GreaterThanOrEqual => { - let mut res = Series::gt_eq(lhs.as_ref(), rhs.as_ref()).into_series(); - let name = format!("gte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - Operator::And => match lhs.as_ref().dtype() { - DataType::Boolean => { - let lhs_cast = lhs.as_ref().bool(); - let rhs_cast = rhs.as_ref().bool(); - match (lhs_cast, rhs_cast) { - (Ok(l), Ok(r)) => { - let mut res = l.bitand(r).into_series(); - let name = - format!("and_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - _ => Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Casting error", - "unable to cast to boolean", - &left.tag.span, - "unable to cast to boolean", - &right.tag.span, - ), - )), - } + compute_between_series(operator, lhs, rhs, &operation_span) + } + _ => { + if lhs.as_ref().height() != rhs.as_ref().height() { + return Ok(UntaggedValue::Error( + ShellError::labeled_error_with_secondary( + "Mixed datatypes", + "this datatype size does not match the right hand side datatype", + &left.tag.span, + "Perhaps you want to select another dataframe with same number of rows", + &right.tag.span, + ), + )); } - _ => Ok(UntaggedValue::Error(ShellError::labeled_error( - "Incorrect datatype", - "And operation can only be done with boolean values", - &left.tag.span, - ))), - }, - Operator::Or => match lhs.as_ref().dtype() { - DataType::Boolean => { - let lhs_cast = lhs.as_ref().bool(); - let rhs_cast = rhs.as_ref().bool(); - match (lhs_cast, rhs_cast) { - (Ok(l), Ok(r)) => { - let mut res = l.bitor(r).into_series(); - let name = - format!("or_{}_{}", lhs.as_ref().name(), rhs.as_ref().name()); - res.rename(name.as_ref()); - Ok(NuSeries::series_to_untagged(res)) - } - _ => Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Casting error", - "unable to cast to boolean", - &left.tag.span, - "unable to cast to boolean", - &right.tag.span, - ), - )), - } - } - _ => Ok(UntaggedValue::Error(ShellError::labeled_error( - "Incorrect datatype", - "And operation can only be done with boolean values", - &left.tag.span, - ))), - }, - _ => Ok(UntaggedValue::Error(ShellError::labeled_error( - "Incorrect datatype", - "unable to use this datatype for this operation", - &left.tag.span, - ))), + between_dataframes(operator, lhs, rhs, &operation_span) + } } } else { Err((left.type_name(), right.type_name())) } } +pub fn between_dataframes( + operator: Operator, + lhs: &NuDataFrame, + rhs: &NuDataFrame, + operation_span: &Span, +) -> Result { + match operator { + Operator::Plus => { + let mut columns: Vec<&str> = Vec::new(); + + let new = lhs + .as_ref() + .get_columns() + .iter() + .chain(rhs.as_ref().get_columns().iter()) + .map(|s| { + let name = if columns.contains(&s.name()) { + format!("{}_{}", s.name(), "x") + } else { + columns.push(s.name()); + s.name().to_string() + }; + + let mut series = s.clone(); + series.rename(name.as_str()); + series + }) + .collect::>(); + + match DataFrame::new(new) { + Ok(df) => Ok(NuDataFrame::dataframe_to_untagged(df)), + Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Appending error", + format!("{}", e), + operation_span, + ))), + } + } + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Incorrect datatype", + "unable to use this datatype for this operation", + operation_span, + ))), + } +} + +pub fn compute_between_series( + operator: Operator, + lhs: &Series, + rhs: &Series, + operation_span: &Span, +) -> Result { + match operator { + Operator::Plus => { + let mut res = lhs + rhs; + let name = format!("sum_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::Minus => { + let mut res = lhs - rhs; + let name = format!("sub_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::Multiply => { + let mut res = lhs * rhs; + let name = format!("mul_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::Divide => { + let res = lhs.checked_div(rhs); + match res { + Ok(mut res) => { + let name = format!("div_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Division error", + format!("{}", e), + operation_span, + ))), + } + } + Operator::Equal => { + let mut res = Series::eq(lhs, rhs).into_series(); + let name = format!("eq_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::NotEqual => { + let mut res = Series::neq(lhs, rhs).into_series(); + let name = format!("neq_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::LessThan => { + let mut res = Series::lt(lhs, rhs).into_series(); + let name = format!("lt_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::LessThanOrEqual => { + let mut res = Series::lt_eq(lhs, rhs).into_series(); + let name = format!("lte_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::GreaterThan => { + let mut res = Series::gt(lhs, rhs).into_series(); + let name = format!("gt_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::GreaterThanOrEqual => { + let mut res = Series::gt_eq(lhs, rhs).into_series(); + let name = format!("gte_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, operation_span)) + } + Operator::And => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitand(r).into_series(); + let name = format!("and_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, &operation_span)) + } + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Casting error", + "unable to cast to boolean", + operation_span, + ))), + } + } + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Incorrect datatype", + "And operation can only be done with boolean values", + operation_span, + ))), + }, + Operator::Or => match lhs.dtype() { + DataType::Boolean => { + let lhs_cast = lhs.bool(); + let rhs_cast = rhs.bool(); + + match (lhs_cast, rhs_cast) { + (Ok(l), Ok(r)) => { + let mut res = l.bitor(r).into_series(); + let name = format!("or_{}_{}", lhs.name(), rhs.name()); + res.rename(name.as_ref()); + Ok(NuDataFrame::series_to_untagged(res, &operation_span)) + } + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Casting error", + "unable to cast to boolean", + operation_span, + ))), + } + } + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Incorrect datatype", + "And operation can only be done with boolean values", + operation_span, + ))), + }, + _ => Ok(UntaggedValue::Error(ShellError::labeled_error( + "Incorrect datatype", + "unable to use this datatype for this operation", + operation_span, + ))), + } +} + pub fn compute_series_single_value( operator: Operator, left: &Value, right: &Value, ) -> Result { - if let (UntaggedValue::DataFrame(PolarsData::Series(lhs)), UntaggedValue::Primitive(_)) = + if let (UntaggedValue::DataFrame(lhs), UntaggedValue::Primitive(_)) = (&left.value, &right.value) { + let lhs = match lhs.as_series(&left.tag.span) { + Ok(series) => series, + Err(e) => return Ok(UntaggedValue::Error(e)), + }; + match operator { Operator::Plus => match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, val, >::add, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), >::add, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal( - lhs.as_ref(), + &lhs, val, >::add, &left.tag.span, @@ -229,20 +302,20 @@ pub fn compute_series_single_value( }, Operator::Minus => match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, val, >::sub, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), >::sub, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal( - lhs.as_ref(), + &lhs, val, >::sub, &left.tag.span, @@ -259,20 +332,20 @@ pub fn compute_series_single_value( }, Operator::Multiply => match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, val, >::mul, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), >::mul, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal( - lhs.as_ref(), + &lhs, val, >::mul, &left.tag.span, @@ -297,7 +370,7 @@ pub fn compute_series_single_value( ))) } else { Ok(compute_series_i64( - lhs.as_ref(), + &lhs, val, >::div, &left.tag.span, @@ -313,7 +386,7 @@ pub fn compute_series_single_value( ))) } else { Ok(compute_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), >::div, @@ -330,7 +403,7 @@ pub fn compute_series_single_value( ))) } else { Ok(compute_series_decimal( - lhs.as_ref(), + &lhs, val, >::div, &left.tag.span, @@ -350,20 +423,20 @@ pub fn compute_series_single_value( Operator::Equal => { match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, val, ChunkedArray::eq, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), ChunkedArray::eq, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( - compare_series_decimal(lhs.as_ref(), val, ChunkedArray::eq, &left.tag.span), + compare_series_decimal(&lhs, val, ChunkedArray::eq, &left.tag.span), ), _ => Ok(UntaggedValue::Error( ShellError::labeled_error_with_secondary( @@ -376,53 +449,52 @@ pub fn compute_series_single_value( )), } } - Operator::NotEqual => match &right.value { - UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), - val, - ChunkedArray::neq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), - &val.to_i64() - .expect("Internal error: protocol did not use compatible decimal"), - ChunkedArray::neq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal( - lhs.as_ref(), - val, - ChunkedArray::neq, - &left.tag.span, - )), - _ => Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Operation unavailable", - "unable to compare this value to the series", - &right.tag.span, - "Only primary values are allowed", - &right.tag.span, + Operator::NotEqual => { + match &right.value { + UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( + &lhs, + val, + ChunkedArray::neq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( + &lhs, + &val.to_i64() + .expect("Internal error: protocol did not use compatible decimal"), + ChunkedArray::neq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( + compare_series_decimal(&lhs, val, ChunkedArray::neq, &left.tag.span), ), - )), - }, + _ => Ok(UntaggedValue::Error( + ShellError::labeled_error_with_secondary( + "Operation unavailable", + "unable to compare this value to the series", + &right.tag.span, + "Only primary values are allowed", + &right.tag.span, + ), + )), + } + } Operator::LessThan => { match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, val, ChunkedArray::lt, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), ChunkedArray::lt, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( - compare_series_decimal(lhs.as_ref(), val, ChunkedArray::lt, &left.tag.span), + compare_series_decimal(&lhs, val, ChunkedArray::lt, &left.tag.span), ), _ => Ok(UntaggedValue::Error( ShellError::labeled_error_with_secondary( @@ -435,53 +507,52 @@ pub fn compute_series_single_value( )), } } - Operator::LessThanOrEqual => match &right.value { - UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), - val, - ChunkedArray::lt_eq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), - &val.to_i64() - .expect("Internal error: protocol did not use compatible decimal"), - ChunkedArray::lt_eq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal( - lhs.as_ref(), - val, - ChunkedArray::lt_eq, - &left.tag.span, - )), - _ => Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Operation unavailable", - "unable to compare this value to the series", - &right.tag.span, - "Only primary values are allowed", - &right.tag.span, + Operator::LessThanOrEqual => { + match &right.value { + UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( + &lhs, + val, + ChunkedArray::lt_eq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( + &lhs, + &val.to_i64() + .expect("Internal error: protocol did not use compatible decimal"), + ChunkedArray::lt_eq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( + compare_series_decimal(&lhs, val, ChunkedArray::lt_eq, &left.tag.span), ), - )), - }, + _ => Ok(UntaggedValue::Error( + ShellError::labeled_error_with_secondary( + "Operation unavailable", + "unable to compare this value to the series", + &right.tag.span, + "Only primary values are allowed", + &right.tag.span, + ), + )), + } + } Operator::GreaterThan => { match &right.value { UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, val, ChunkedArray::gt, &left.tag.span, )), UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), + &lhs, &val.to_i64() .expect("Internal error: protocol did not use compatible decimal"), ChunkedArray::gt, &left.tag.span, )), UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( - compare_series_decimal(lhs.as_ref(), val, ChunkedArray::gt, &left.tag.span), + compare_series_decimal(&lhs, val, ChunkedArray::gt, &left.tag.span), ), _ => Ok(UntaggedValue::Error( ShellError::labeled_error_with_secondary( @@ -494,39 +565,38 @@ pub fn compute_series_single_value( )), } } - Operator::GreaterThanOrEqual => match &right.value { - UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( - lhs.as_ref(), - val, - ChunkedArray::gt_eq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( - lhs.as_ref(), - &val.to_i64() - .expect("Internal error: protocol did not use compatible decimal"), - ChunkedArray::gt_eq, - &left.tag.span, - )), - UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal( - lhs.as_ref(), - val, - ChunkedArray::gt_eq, - &left.tag.span, - )), - _ => Ok(UntaggedValue::Error( - ShellError::labeled_error_with_secondary( - "Operation unavailable", - "unable to compare this value to the series", - &right.tag.span, - "Only primary values are allowed", - &right.tag.span, + Operator::GreaterThanOrEqual => { + match &right.value { + UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64( + &lhs, + val, + ChunkedArray::gt_eq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64( + &lhs, + &val.to_i64() + .expect("Internal error: protocol did not use compatible decimal"), + ChunkedArray::gt_eq, + &left.tag.span, + )), + UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok( + compare_series_decimal(&lhs, val, ChunkedArray::gt_eq, &left.tag.span), ), - )), - }, + _ => Ok(UntaggedValue::Error( + ShellError::labeled_error_with_secondary( + "Operation unavailable", + "unable to compare this value to the series", + &right.tag.span, + "Only primary values are allowed", + &right.tag.span, + ), + )), + } + } Operator::Contains => match &right.value { UntaggedValue::Primitive(Primitive::String(val)) => { - Ok(contains_series_pat(lhs.as_ref(), val, &left.tag.span)) + Ok(contains_series_pat(&lhs, val, &left.tag.span)) } _ => Ok(UntaggedValue::Error( ShellError::labeled_error_with_secondary( @@ -597,7 +667,7 @@ where Ok(casted) => { let res = f(casted.clone(), val); let res = res.into_series(); - NuSeries::series_to_untagged(res) + NuDataFrame::series_to_untagged(res, span) } Err(e) => UntaggedValue::Error(ShellError::labeled_error( "Casting error", @@ -667,7 +737,7 @@ where Ok(casted) => { let res = f(casted.clone(), val); let res = res.into_series(); - NuSeries::series_to_untagged(res) + NuDataFrame::series_to_untagged(res, span) } Err(e) => UntaggedValue::Error(ShellError::labeled_error( "Casting error", @@ -725,7 +795,7 @@ where Ok(casted) => { let res = f(casted, val); let res = res.into_series(); - NuSeries::series_to_untagged(res) + NuDataFrame::series_to_untagged(res, span) } Err(e) => UntaggedValue::Error(ShellError::labeled_error( "Casting error", @@ -795,7 +865,7 @@ where Ok(casted) => { let res = f(casted, val); let res = res.into_series(); - NuSeries::series_to_untagged(res) + NuDataFrame::series_to_untagged(res, span) } Err(e) => UntaggedValue::Error(ShellError::labeled_error( "Casting error", @@ -814,7 +884,7 @@ fn contains_series_pat(series: &Series, pat: &str, span: &Span) -> UntaggedValue match res { Ok(res) => { let res = res.into_series(); - NuSeries::series_to_untagged(res) + NuDataFrame::series_to_untagged(res, span) } Err(e) => UntaggedValue::Error(ShellError::labeled_error( "Search error", diff --git a/crates/nu-engine/src/evaluate/operator.rs b/crates/nu-engine/src/evaluate/operator.rs index c7b0e3d70..0773462cf 100644 --- a/crates/nu-engine/src/evaluate/operator.rs +++ b/crates/nu-engine/src/evaluate/operator.rs @@ -5,9 +5,7 @@ use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value}; use std::ops::Not; #[cfg(feature = "dataframe")] -use nu_data::dataframe::{compute_between_series, compute_series_single_value}; -#[cfg(feature = "dataframe")] -use nu_protocol::dataframe::PolarsData; +use nu_data::dataframe::{compute_between_dataframes, compute_series_single_value}; pub fn apply_operator( op: Operator, @@ -15,13 +13,10 @@ pub fn apply_operator( right: &Value, ) -> Result { #[cfg(feature = "dataframe")] - if let ( - UntaggedValue::DataFrame(PolarsData::Series(_)), - UntaggedValue::DataFrame(PolarsData::Series(_)), - ) = (&left.value, &right.value) + if let (UntaggedValue::DataFrame(_), UntaggedValue::DataFrame(_)) = (&left.value, &right.value) { - return compute_between_series(op, left, right); - } else if let (UntaggedValue::DataFrame(PolarsData::Series(_)), UntaggedValue::Primitive(_)) = + return compute_between_dataframes(op, left, right); + } else if let (UntaggedValue::DataFrame(_), UntaggedValue::Primitive(_)) = (&left.value, &right.value) { return compute_series_single_value(op, left, right); diff --git a/crates/nu-protocol/src/dataframe/mod.rs b/crates/nu-protocol/src/dataframe/mod.rs index 12ca7c9a9..985c11c93 100644 --- a/crates/nu-protocol/src/dataframe/mod.rs +++ b/crates/nu-protocol/src/dataframe/mod.rs @@ -1,15 +1,11 @@ pub mod nu_dataframe; pub mod nu_groupby; -pub mod nu_series; -pub use nu_dataframe::NuDataFrame; +pub use nu_dataframe::{Column, NuDataFrame}; pub use nu_groupby::NuGroupBy; -pub use nu_series::NuSeries; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] -pub enum PolarsData { - EagerDataFrame(NuDataFrame), +pub enum FrameStruct { GroupBy(NuGroupBy), - Series(NuSeries), } diff --git a/crates/nu-protocol/src/dataframe/nu_dataframe.rs b/crates/nu-protocol/src/dataframe/nu_dataframe.rs index 85dce0103..7b69fc1e6 100644 --- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs +++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs @@ -1,54 +1,147 @@ +use indexmap::{map::Entry, IndexMap}; +use std::cmp::Ordering; use std::hash::{Hash, Hasher}; -use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap}; +use std::ops::{Deref, DerefMut}; use bigdecimal::FromPrimitive; use chrono::{DateTime, FixedOffset, NaiveDateTime}; use nu_errors::ShellError; use nu_source::{Span, Tag}; use num_bigint::BigInt; -use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit}; +use polars::prelude::{AnyValue, DataFrame, DataType, NamedFrom, Series, TimeUnit}; use serde::{Deserialize, Serialize}; use crate::{Dictionary, Primitive, UntaggedValue, Value}; -use super::PolarsData; - const SECS_PER_DAY: i64 = 86_400; #[derive(Debug)] -enum InputValue { - Integer, - Decimal, - String, +pub struct Column { + name: String, + values: Vec, +} + +impl Column { + pub fn new(name: String, values: Vec) -> Self { + Self { name, values } + } + + pub fn new_empty(name: String) -> Self { + Self { + name, + values: Vec::new(), + } + } + + pub fn push(&mut self, value: Value) { + self.values.push(value) + } } #[derive(Debug)] -struct ColumnValues { - pub value_type: InputValue, - pub values: Vec, +enum InputType { + Integer, + Decimal, + String, + Boolean, } -impl Default for ColumnValues { - fn default() -> Self { +#[derive(Debug)] +struct TypedColumn { + pub column: Column, + pub column_type: Option, +} + +impl TypedColumn { + fn new_empty(name: String) -> Self { Self { - value_type: InputValue::Integer, - values: Vec::new(), + column: Column::new_empty(name), + column_type: None, } } } -type ColumnMap = HashMap; +impl Deref for TypedColumn { + type Target = Column; + + fn deref(&self) -> &Self::Target { + &self.column + } +} + +impl DerefMut for TypedColumn { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.column + } +} + +type ColumnMap = IndexMap; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NuDataFrame { dataframe: DataFrame, } -// TODO. Better definition of equality and comparison for a dataframe. -// Probably it make sense to have a name field and use it for comparisons +// Dataframes are considered equal if they have the same shape, column name +// and values impl PartialEq for NuDataFrame { - fn eq(&self, _: &Self) -> bool { - false + fn eq(&self, other: &Self) -> bool { + if self.as_ref().width() == 0 { + // checking for empty dataframe + return false; + } + + if self.as_ref().get_column_names() != other.as_ref().get_column_names() { + // checking both dataframes share the same names + return false; + } + + if self.as_ref().height() != other.as_ref().height() { + // checking both dataframes have the same row size + return false; + } + + // sorting dataframe by the first column + let column_names = self.as_ref().get_column_names(); + let first_col = column_names + .get(0) + .expect("already checked that dataframe is different than 0"); + + // if unable to sort, then unable to compare + let lhs = match self.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return false, + }; + + let rhs = match other.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return false, + }; + + for name in self.as_ref().get_column_names() { + let self_series = lhs.column(name).expect("name from dataframe names"); + + let other_series = rhs + .column(name) + .expect("already checked that name in other"); + + let self_series = match self_series.dtype() { + // Casting needed to compare other numeric types with nushell numeric type. + // In nushell we only have i64 integer numeric types and any array created + // with nushell untagged primitives will be of type i64 + DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) { + Ok(series) => series, + Err(_) => return false, + }, + _ => self_series.clone(), + }; + + if !self_series.series_equal(&other_series) { + return false; + } + } + + true } } @@ -87,14 +180,14 @@ impl NuDataFrame { NuDataFrame { dataframe } } - pub fn try_from_stream(input: &mut T, span: &Span) -> Result + pub fn try_from_stream(input: &mut T, span: &Span) -> Result<(Self, Tag), ShellError> where T: Iterator, { input .next() .and_then(|value| match value.value { - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df), + UntaggedValue::DataFrame(df) => Some((df, value.tag)), _ => None, }) .ok_or_else(|| { @@ -113,41 +206,127 @@ impl NuDataFrame { // Dictionary to store the columnar data extracted from // the input. During the iteration we check if the values // have different type - let mut column_values: ColumnMap = HashMap::new(); + let mut column_values: ColumnMap = IndexMap::new(); for value in iter { match value.value { UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?, UntaggedValue::Table(table) => insert_table(&mut column_values, table)?, + UntaggedValue::Primitive(Primitive::Int(_)) + | UntaggedValue::Primitive(Primitive::Decimal(_)) + | UntaggedValue::Primitive(Primitive::String(_)) + | UntaggedValue::Primitive(Primitive::Boolean(_)) => { + let key = format!("{}", 0); + insert_value(value, key, &mut column_values)? + } _ => { return Err(ShellError::labeled_error_with_secondary( "Format not supported", "Value not supported for conversion", &value.tag, - "Perhaps you want to use a List of Tables or a Dictionary", + "Perhaps you want to use a List, a List of Tables or a Dictionary", &value.tag, )); } } } - from_parsed_columns(column_values, tag) + from_parsed_columns(column_values, &tag.span) + } + + pub fn try_from_series(columns: Vec, span: &Span) -> Result { + let dataframe = DataFrame::new(columns).map_err(|e| { + ShellError::labeled_error( + "DataFrame Creation", + format!("Unable to create DataFrame: {}", e), + span, + ) + })?; + + Ok(Self { dataframe }) + } + + pub fn try_from_columns(columns: Vec, span: &Span) -> Result { + let mut column_values: ColumnMap = IndexMap::new(); + + for column in columns { + for value in column.values { + insert_value(value, column.name.clone(), &mut column_values)?; + } + } + + from_parsed_columns(column_values, span) } pub fn into_value(self, tag: Tag) -> Value { Value { - value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)), + value: Self::into_untagged(self), tag, } } + pub fn into_untagged(self) -> UntaggedValue { + UntaggedValue::DataFrame(self) + } + pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value { Value { - value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))), + value: Self::dataframe_to_untagged(df), tag, } } + pub fn dataframe_to_untagged(df: DataFrame) -> UntaggedValue { + UntaggedValue::DataFrame(Self::new(df)) + } + + pub fn series_to_untagged(series: Series, span: &Span) -> UntaggedValue { + match DataFrame::new(vec![series]) { + Ok(dataframe) => UntaggedValue::DataFrame(Self { dataframe }), + Err(e) => UntaggedValue::Error(ShellError::labeled_error( + "DataFrame Creation", + format!("Unable to create DataFrame: {}", e), + span, + )), + } + } + + pub fn column(&self, column: &str, tag: &Tag) -> Result { + let s = self.as_ref().column(column).map_err(|e| { + ShellError::labeled_error("Column not found", format!("{}", e), tag.span) + })?; + + let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| { + ShellError::labeled_error("DataFrame error", format!("{}", e), tag.span) + })?; + + Ok(Self { dataframe }) + } + + pub fn is_series(&self) -> bool { + self.as_ref().width() == 1 + } + + pub fn as_series(&self, span: &Span) -> Result { + if !self.is_series() { + return Err(ShellError::labeled_error_with_secondary( + "Not a Series", + "DataFrame cannot be used as Series", + span, + "Note that a Series is a DataFrame with one column", + span, + )); + } + + let series = self + .as_ref() + .get_columns() + .get(0) + .expect("We have already checked that the width is 1"); + + Ok(series.clone()) + } + // Print is made out a head and if the dataframe is too large, then a tail pub fn print(&self) -> Result, ShellError> { let df = &self.as_ref(); @@ -188,24 +367,17 @@ impl NuDataFrame { pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result, ShellError> { let df = self.as_ref(); - let column_names = df.get_column_names(); + let upper_row = to_row.min(df.height()); let mut values: Vec = Vec::new(); - - let upper_row = to_row.min(df.height()); for i in from_row..upper_row { - let row = df.get_row(i); let mut dictionary_row = Dictionary::default(); - - for (val, name) in row.0.iter().zip(column_names.iter()) { - let untagged_val = anyvalue_to_untagged(val)?; - + for col in df.get_columns() { let dict_val = Value { - value: untagged_val, + value: anyvalue_to_untagged(&col.get(i))?, tag: Tag::unknown(), }; - - dictionary_row.insert(name.to_string(), dict_val); + dictionary_row.insert(col.name().into(), dict_val); } let value = Value { @@ -213,7 +385,7 @@ impl NuDataFrame { tag: Tag::unknown(), }; - values.push(value); + values.push(value) } Ok(values) @@ -336,8 +508,8 @@ fn insert_value( key: String, column_values: &mut ColumnMap, ) -> Result<(), ShellError> { - let col_val = match column_values.entry(key) { - Entry::Vacant(entry) => entry.insert(ColumnValues::default()), + let col_val = match column_values.entry(key.clone()) { + Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)), Entry::Occupied(entry) => entry.into_mut(), }; @@ -346,13 +518,16 @@ fn insert_value( if col_val.values.is_empty() { match &value.value { UntaggedValue::Primitive(Primitive::Int(_)) => { - col_val.value_type = InputValue::Integer; + col_val.column_type = Some(InputType::Integer); } UntaggedValue::Primitive(Primitive::Decimal(_)) => { - col_val.value_type = InputValue::Decimal; + col_val.column_type = Some(InputType::Decimal); } UntaggedValue::Primitive(Primitive::String(_)) => { - col_val.value_type = InputValue::String; + col_val.column_type = Some(InputType::String); + } + UntaggedValue::Primitive(Primitive::Boolean(_)) => { + col_val.column_type = Some(InputType::Boolean); } _ => { return Err(ShellError::labeled_error( @@ -378,6 +553,10 @@ fn insert_value( | ( UntaggedValue::Primitive(Primitive::String(_)), UntaggedValue::Primitive(Primitive::String(_)), + ) + | ( + UntaggedValue::Primitive(Primitive::Boolean(_)), + UntaggedValue::Primitive(Primitive::Boolean(_)), ) => col_val.values.push(value), _ => { return Err(ShellError::labeled_error_with_secondary( @@ -397,27 +576,35 @@ fn insert_value( // The ColumnMap has the parsed data from the StreamInput // This data can be used to create a Series object that can initialize // the dataframe based on the type of data that is found -fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result { +fn from_parsed_columns(column_values: ColumnMap, span: &Span) -> Result { let mut df_series: Vec = Vec::new(); for (name, column) in column_values { - match column.value_type { - InputValue::Decimal => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_f64()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputValue::Integer => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_i64()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) - } - InputValue::String => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_string()).collect(); - let series = Series::new(&name, series_values?); - df_series.push(series) + if let Some(column_type) = &column.column_type { + match column_type { + InputType::Decimal => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_f64()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::Integer => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_i64()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::String => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_string()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } + InputType::Boolean => { + let series_values: Result, _> = + column.values.iter().map(|v| v.as_bool()).collect(); + let series = Series::new(&name, series_values?); + df_series.push(series) + } } } } @@ -430,7 +617,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result Some(group), + UntaggedValue::FrameStruct(FrameStruct::GroupBy(group)) => Some(group), _ => None, }) .ok_or_else(|| { diff --git a/crates/nu-protocol/src/dataframe/nu_series.rs b/crates/nu-protocol/src/dataframe/nu_series.rs deleted file mode 100644 index 28301083d..000000000 --- a/crates/nu-protocol/src/dataframe/nu_series.rs +++ /dev/null @@ -1,345 +0,0 @@ -use std::cmp::Ordering; -use std::hash::{Hash, Hasher}; -use std::vec; - -use nu_errors::ShellError; -use nu_source::{Span, Tag}; -use polars::prelude::{DataType, NamedFrom, Series}; -use serde::{Deserialize, Serialize}; - -use crate::{Dictionary, Primitive, UntaggedValue, Value}; - -use super::PolarsData; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NuSeries { - series: Series, - dtype: String, -} - -// TODO. Better definition of equality and comparison for a dataframe. -// Probably it make sense to have a name field and use it for comparisons -impl PartialEq for NuSeries { - fn eq(&self, _: &Self) -> bool { - false - } -} - -impl Eq for NuSeries {} - -impl PartialOrd for NuSeries { - fn partial_cmp(&self, _: &Self) -> Option { - Some(Ordering::Equal) - } -} - -impl Ord for NuSeries { - fn cmp(&self, _: &Self) -> Ordering { - Ordering::Equal - } -} - -impl Hash for NuSeries { - fn hash(&self, _: &mut H) {} -} - -impl NuSeries { - pub fn new(series: Series) -> Self { - let dtype = series.dtype().to_string(); - - NuSeries { series, dtype } - } - - pub fn try_from_stream(input: &mut T, span: &Span) -> Result - where - T: Iterator, - { - input - .next() - .and_then(|value| match value.value { - UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series), - _ => None, - }) - .ok_or_else(|| { - ShellError::labeled_error( - "No series in stream", - "no series found in input stream", - span, - ) - }) - } - - pub fn try_from_iter(iter: T, name: Option) -> Result - where - T: Iterator, - { - let mut vec_values: Vec = Vec::new(); - - for value in iter { - match value.value { - UntaggedValue::Primitive(Primitive::Int(_)) - | UntaggedValue::Primitive(Primitive::Decimal(_)) - | UntaggedValue::Primitive(Primitive::String(_)) - | UntaggedValue::Primitive(Primitive::Boolean(_)) => { - insert_value(value, &mut vec_values)? - } - _ => { - return Err(ShellError::labeled_error_with_secondary( - "Format not supported", - "Value not supported for conversion", - &value.tag.span, - "Perhaps you want to use a list of primitive values (int, decimal, string, or bool)", - &value.tag.span, - )); - } - } - } - - from_parsed_vector(vec_values, name) - } - - pub fn into_value(self, tag: Tag) -> Value { - Value { - value: UntaggedValue::DataFrame(PolarsData::Series(self)), - tag, - } - } - - pub fn series_to_value(series: Series, tag: Tag) -> Value { - Value { - value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))), - tag, - } - } - - pub fn series_to_untagged(series: Series) -> UntaggedValue { - UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))) - } - - pub fn dtype(&self) -> &str { - &self.dtype - } - - pub fn series(self) -> Series { - self.series - } -} - -impl AsRef for NuSeries { - fn as_ref(&self) -> &Series { - &self.series - } -} - -impl AsMut for NuSeries { - fn as_mut(&mut self) -> &mut Series { - &mut self.series - } -} - -macro_rules! series_to_chunked { - ($converter: expr, $self: expr) => {{ - let chunked_array = $converter.map_err(|e| { - ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown()) - })?; - - let size = 20; - - let (head_size, skip, tail_size) = if $self.as_ref().len() > size { - let remaining = $self.as_ref().len() - (size / 2); - let skip = $self.as_ref().len() - remaining; - (size / 2, skip, remaining.min(size / 2)) - } else { - (size, 0, 0) - }; - - let head = chunked_array.into_iter().take(head_size).map(|value| { - let value = match value { - Some(v) => Value { - value: UntaggedValue::Primitive(v.into()), - tag: Tag::unknown(), - }, - None => Value { - value: UntaggedValue::Primitive(Primitive::Nothing), - tag: Tag::unknown(), - }, - }; - - let mut dictionary_row = Dictionary::default(); - let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype()); - dictionary_row.insert(header, value); - - Value { - value: UntaggedValue::Row(dictionary_row), - tag: Tag::unknown(), - } - }); - - let res = if $self.as_ref().len() < size { - head.collect::>() - } else { - let middle = std::iter::once({ - let mut dictionary_row = Dictionary::default(); - - let value = Value { - value: UntaggedValue::Primitive("...".into()), - tag: Tag::unknown(), - }; - - let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype()); - dictionary_row.insert(header, value); - - Value { - value: UntaggedValue::Row(dictionary_row), - tag: Tag::unknown(), - } - }); - - let tail = - chunked_array - .into_iter() - .skip(skip) - .take(tail_size) - .map(|value| match value { - Some(v) => { - let mut dictionary_row = Dictionary::default(); - - let value = Value { - value: UntaggedValue::Primitive(v.into()), - tag: Tag::unknown(), - }; - - let header = format!("{} ({})", $self.as_ref().name(), $self.dtype()); - dictionary_row.insert(header, value); - - Value { - value: UntaggedValue::Row(dictionary_row), - tag: Tag::unknown(), - } - } - None => Value { - value: UntaggedValue::Primitive(Primitive::Nothing), - tag: Tag::unknown(), - }, - }); - - head.chain(middle).chain(tail).collect::>() - }; - - Ok(res) - }}; -} - -impl NuSeries { - pub fn print(&self) -> Result, ShellError> { - match self.as_ref().dtype() { - DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self), - DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self), - DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self), - DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self), - DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self), - DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self), - DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self), - DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self), - DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self), - DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self), - DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self), - DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self), - DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self), - DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self), - DataType::Null => Ok(vec![Value { - value: UntaggedValue::Primitive(Primitive::Nothing), - tag: Tag::unknown(), - }]), - //DataType::List(_) => None, - //DataType::Time64(TimeUnit) => None, - //DataType::Duration(TimeUnit) => None, - // DataType::Categorical => None, - _ => unimplemented!(), - } - } -} - -fn insert_value(value: Value, vec_values: &mut Vec) -> Result<(), ShellError> { - // Checking that the type for the value is the same - // for the previous value in the column - if vec_values.is_empty() { - vec_values.push(value); - Ok(()) - } else { - let prev_value = &vec_values[vec_values.len() - 1]; - - match (&prev_value.value, &value.value) { - ( - UntaggedValue::Primitive(Primitive::Int(_)), - UntaggedValue::Primitive(Primitive::Int(_)), - ) - | ( - UntaggedValue::Primitive(Primitive::Decimal(_)), - UntaggedValue::Primitive(Primitive::Decimal(_)), - ) - | ( - UntaggedValue::Primitive(Primitive::String(_)), - UntaggedValue::Primitive(Primitive::String(_)), - ) - | ( - UntaggedValue::Primitive(Primitive::Boolean(_)), - UntaggedValue::Primitive(Primitive::Boolean(_)), - ) => { - vec_values.push(value); - Ok(()) - } - _ => Err(ShellError::labeled_error_with_secondary( - "Different values in column", - "Value with different type", - &value.tag, - "Perhaps you want to change it to this value type", - &prev_value.tag, - )), - } - } -} - -fn from_parsed_vector( - vec_values: Vec, - name: Option, -) -> Result { - let series = match &vec_values[0].value { - UntaggedValue::Primitive(Primitive::Int(_)) => { - let series_values: Result, _> = vec_values.iter().map(|v| v.as_i64()).collect(); - let series_name = match &name { - Some(n) => n.as_ref(), - None => "int", - }; - Series::new(series_name, series_values?) - } - UntaggedValue::Primitive(Primitive::Decimal(_)) => { - let series_values: Result, _> = vec_values.iter().map(|v| v.as_f64()).collect(); - let series_name = match &name { - Some(n) => n.as_ref(), - None => "decimal", - }; - Series::new(series_name, series_values?) - } - UntaggedValue::Primitive(Primitive::String(_)) => { - let series_values: Result, _> = - vec_values.iter().map(|v| v.as_string()).collect(); - let series_name = match &name { - Some(n) => n.as_ref(), - None => "string", - }; - Series::new(series_name, series_values?) - } - UntaggedValue::Primitive(Primitive::Boolean(_)) => { - let series_values: Result, _> = vec_values.iter().map(|v| v.as_bool()).collect(); - let series_name = match &name { - Some(n) => n.as_ref(), - None => "string", - }; - Series::new(series_name, series_values?) - } - _ => unreachable!("The untagged type is checked while creating vec_values"), - }; - - Ok(NuSeries::new(series)) -} diff --git a/crates/nu-protocol/src/type_shape.rs b/crates/nu-protocol/src/type_shape.rs index a4000434d..497984c02 100644 --- a/crates/nu-protocol/src/type_shape.rs +++ b/crates/nu-protocol/src/type_shape.rs @@ -75,6 +75,10 @@ pub enum Type { /// Dataframe #[cfg(feature = "dataframe")] DataFrame, + + /// Dataframe + #[cfg(feature = "dataframe")] + FrameStruct, } /// A shape representation of the type of a row @@ -192,6 +196,8 @@ impl Type { UntaggedValue::Block(_) => Type::Block, #[cfg(feature = "dataframe")] UntaggedValue::DataFrame(_) => Type::DataFrame, + #[cfg(feature = "dataframe")] + UntaggedValue::FrameStruct(_) => Type::DataFrame, } } } @@ -298,7 +304,7 @@ impl PrettyDebug for Type { } Type::Block => ty("block"), #[cfg(feature = "dataframe")] - Type::DataFrame => ty("data_type_formatter"), + Type::DataFrame | Type::FrameStruct => ty("data_type_formatter"), } } } diff --git a/crates/nu-protocol/src/value.rs b/crates/nu-protocol/src/value.rs index e9e244f76..c21541436 100644 --- a/crates/nu-protocol/src/value.rs +++ b/crates/nu-protocol/src/value.rs @@ -31,7 +31,7 @@ use std::path::PathBuf; use std::time::SystemTime; #[cfg(feature = "dataframe")] -use crate::dataframe::PolarsData; +use crate::dataframe::{FrameStruct, NuDataFrame}; /// The core structured values that flow through a pipeline #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)] @@ -51,10 +51,15 @@ pub enum UntaggedValue { /// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values Block(Box), - /// Data option that holds the polars structs required to to data - /// manipulation and operations using polars dataframes + /// Main nushell dataframe #[cfg(feature = "dataframe")] - DataFrame(PolarsData), + DataFrame(NuDataFrame), + + /// Data option that holds intermediate struct required to do data + /// manipulation and operations for dataframes such as groupby, lazy frames + /// and lazy groupby + #[cfg(feature = "dataframe")] + FrameStruct(FrameStruct), } impl UntaggedValue { @@ -685,11 +690,9 @@ impl ShellTypeName for UntaggedValue { UntaggedValue::Error(_) => "error", UntaggedValue::Block(_) => "block", #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe", + UntaggedValue::DataFrame(_) => "dataframe", #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(PolarsData::Series(_)) => "series", - #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby", + UntaggedValue::FrameStruct(FrameStruct::GroupBy(_)) => "groupby", } } } diff --git a/crates/nu-protocol/src/value/debug.rs b/crates/nu-protocol/src/value/debug.rs index d6e3d52b3..6ce7da44e 100644 --- a/crates/nu-protocol/src/value/debug.rs +++ b/crates/nu-protocol/src/value/debug.rs @@ -25,7 +25,9 @@ impl PrettyDebug for Value { UntaggedValue::Error(_) => DbgDocBldr::error("error"), UntaggedValue::Block(_) => DbgDocBldr::opaque("block"), #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"), + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { + DbgDocBldr::opaque("dataframe") + } } } } diff --git a/crates/nu-value-ext/src/lib.rs b/crates/nu-value-ext/src/lib.rs index dbaddf264..a48f37f85 100644 --- a/crates/nu-value-ext/src/lib.rs +++ b/crates/nu-value-ext/src/lib.rs @@ -12,7 +12,7 @@ use nu_source::{ use num_traits::cast::ToPrimitive; #[cfg(feature = "dataframe")] -use nu_protocol::dataframe::{NuSeries, PolarsData}; +use nu_protocol::dataframe::NuDataFrame; pub trait ValueExt { fn into_parts(self) -> (UntaggedValue, Tag); @@ -203,14 +203,14 @@ pub fn get_data_by_member(value: &Value, name: &PathMember) -> Result match &name.unspanned { + UntaggedValue::DataFrame(df) => match &name.unspanned { UnspannedPathMember::String(string) => { - let column = df.as_ref().column(string.as_ref()).map_err(|e| { + let column = df.as_ref().select(string.as_str()).map_err(|e| { ShellError::labeled_error("Dataframe error", format!("{}", e), &name.span) })?; - Ok(NuSeries::series_to_value( - column.clone(), + Ok(NuDataFrame::dataframe_to_value( + column, Tag::new(value.anchor(), name.span), )) } @@ -746,7 +746,7 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value, MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value()) } #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => { + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value()) } } diff --git a/crates/nu_plugin_post/src/post.rs b/crates/nu_plugin_post/src/post.rs index bd22fc7dd..86e5e7140 100644 --- a/crates/nu_plugin_post/src/post.rs +++ b/crates/nu_plugin_post/src/post.rs @@ -406,7 +406,7 @@ pub fn value_to_json_value(v: &Value) -> Result { UntaggedValue::Table(l) => serde_json::Value::Array(json_list(l)?), #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => { + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => { return Err(ShellError::labeled_error( "Cannot convert data struct", "Cannot convert data struct", diff --git a/crates/nu_plugin_to_bson/src/to_bson.rs b/crates/nu_plugin_to_bson/src/to_bson.rs index a94ec8046..228aa10b8 100644 --- a/crates/nu_plugin_to_bson/src/to_bson.rs +++ b/crates/nu_plugin_to_bson/src/to_bson.rs @@ -64,7 +64,7 @@ pub fn value_to_bson_value(v: &Value) -> Result { ), UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null, #[cfg(feature = "dataframe")] - UntaggedValue::DataFrame(_) => Bson::Null, + UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => Bson::Null, UntaggedValue::Error(e) => return Err(e.clone()), UntaggedValue::Primitive(Primitive::Binary(b)) => { Bson::Binary(BinarySubtype::Generic, b.clone())