From 29efbee285fd793fd483f196a9569a25ff84f9fb Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sun, 5 Dec 2021 13:25:37 +0000 Subject: [PATCH] corrected missing shellerror type (#439) --- Cargo.lock | 35 +- crates/nu-command/Cargo.toml | 2 +- crates/nu-command/src/dataframe/README.md | 2 +- crates/nu-command/src/dataframe/mod.rs | 2 +- .../dataframe/nu_dataframe/commands/mod.rs | 12 - .../nu_dataframe/{commands => }/describe.rs | 115 +++-- .../nu_dataframe/{commands => }/dtypes.rs | 10 +- .../src/dataframe/nu_dataframe/mod.rs | 398 +----------------- .../{ => nu_dataframe}/between_values.rs | 136 ++++-- .../{ => nu_dataframe}/conversion.rs | 113 +++-- .../{ => nu_dataframe}/custom_value.rs | 0 .../nu_dataframe/nu_dataframe/mod.rs | 395 +++++++++++++++++ .../{ => nu_dataframe}/operations.rs | 11 +- .../nu_dataframe/{commands => }/open.rs | 70 ++- .../{commands => }/test_dataframe.rs | 0 .../nu_dataframe/{commands => }/to_df.rs | 8 +- .../nu-plugin/src/serializers/plugin_call.rs | 21 + 17 files changed, 723 insertions(+), 607 deletions(-) delete mode 100644 crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs rename crates/nu-command/src/dataframe/nu_dataframe/{commands => }/describe.rs (70%) rename crates/nu-command/src/dataframe/nu_dataframe/{commands => }/dtypes.rs (89%) rename crates/nu-command/src/dataframe/nu_dataframe/{ => nu_dataframe}/between_values.rs (82%) rename crates/nu-command/src/dataframe/nu_dataframe/{ => nu_dataframe}/conversion.rs (83%) rename crates/nu-command/src/dataframe/nu_dataframe/{ => nu_dataframe}/custom_value.rs (100%) create mode 100644 crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs rename crates/nu-command/src/dataframe/nu_dataframe/{ => nu_dataframe}/operations.rs (95%) rename crates/nu-command/src/dataframe/nu_dataframe/{commands => }/open.rs (74%) rename crates/nu-command/src/dataframe/nu_dataframe/{commands => }/test_dataframe.rs (100%) rename crates/nu-command/src/dataframe/nu_dataframe/{commands => }/to_df.rs (92%) diff --git a/Cargo.lock b/Cargo.lock index 394520390..03b074102 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -120,11 +120,10 @@ dependencies = [ [[package]] name = "arrow2" -version = "0.7.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395" +checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44" dependencies = [ - "ahash", "arrow-format", "base64", "chrono", @@ -628,9 +627,9 @@ dependencies = [ [[package]] name = "dirs" -version = "3.0.2" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" +checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" dependencies = [ "dirs-sys", ] @@ -1823,9 +1822,9 @@ dependencies = [ [[package]] name = "parquet2" -version = "0.6.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d" +checksum = "41051fae4c0fab9040e291b360c6c8037d09d482aa83e94e37f3d080a32a58c3" dependencies = [ "async-stream", "bitpacking", @@ -1916,9 +1915,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "polars" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598" +checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1" dependencies = [ "polars-core", "polars-io", @@ -1927,9 +1926,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99" +checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf" dependencies = [ "arrow2", "num 0.4.0", @@ -1938,9 +1937,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6" +checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1" dependencies = [ "ahash", "anyhow", @@ -1963,15 +1962,15 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957" +checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12" dependencies = [ "ahash", "anyhow", "arrow2", "csv-core", - "dirs 3.0.2", + "dirs 4.0.0", "lazy_static", "lexical", "memchr", @@ -1987,9 +1986,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61" +checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46" dependencies = [ "ahash", "itertools", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index c2f577bcb..c603d7776 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -54,7 +54,7 @@ crossterm = "0.22.1" num = {version="0.4.0", optional=true} [dependencies.polars] -version = "0.17.0" +version = "0.18.0" optional = true features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"] diff --git a/crates/nu-command/src/dataframe/README.md b/crates/nu-command/src/dataframe/README.md index e20fd519f..2a50786a0 100644 --- a/crates/nu-command/src/dataframe/README.md +++ b/crates/nu-command/src/dataframe/README.md @@ -1,3 +1,3 @@ # nu-dataframe -The nu-dataframe crate holds the definitions of the dataframe structure +The nu-dataframe crate holds the definitions of the dataframe structures and commands diff --git a/crates/nu-command/src/dataframe/mod.rs b/crates/nu-command/src/dataframe/mod.rs index f77f2eb31..3b6d78309 100644 --- a/crates/nu-command/src/dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/mod.rs @@ -1,3 +1,3 @@ mod nu_dataframe; -pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame}; +pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame}; diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs b/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs deleted file mode 100644 index c08d34988..000000000 --- a/crates/nu-command/src/dataframe/nu_dataframe/commands/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -mod describe; -mod dtypes; -mod open; -mod to_df; - -pub use describe::DescribeDF; -pub use dtypes::DataTypes; -pub use open::OpenDataFrame; -pub use to_df::ToDataFrame; - -#[cfg(test)] -mod test_dataframe; diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs b/crates/nu-command/src/dataframe/nu_dataframe/describe.rs similarity index 70% rename from crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs rename to crates/nu-command/src/dataframe/nu_dataframe/describe.rs index 1a0a06af1..2aa62f440 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/commands/describe.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/describe.rs @@ -1,6 +1,4 @@ -use crate::dataframe::nu_dataframe::Column; - -use super::super::NuDataFrame; +use super::nu_dataframe::{Column, NuDataFrame}; use nu_protocol::{ ast::Call, @@ -19,7 +17,7 @@ pub struct DescribeDF; impl Command for DescribeDF { fn name(&self) -> &str { - "describe" + "describe-df" } fn usage(&self) -> &str { @@ -27,13 +25,13 @@ impl Command for DescribeDF { } fn signature(&self) -> Signature { - Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { vec![Example { description: "dataframe description", - example: "[[a b]; [1 1] [1 1]] | to df | describe", + example: "[[a b]; [1 1] [1 1]] | to df | describe-df", result: Some( NuDataFrame::try_from_columns(vec![ Column::new( @@ -134,13 +132,14 @@ fn command( .map(|col| { let count = col.len() as f64; - let sum = match col.sum_as_series().cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { + let sum = col + .sum_as_series() + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, - }, - Err(_) => None, - }; + }); let mean = match col.mean_as_series().get(0) { AnyValue::Float64(v) => Some(v), @@ -157,54 +156,50 @@ fn command( _ => None, }; - let min = match col.min_as_series().cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { + let min = col + .min_as_series() + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, - }, - Err(_) => None, - }; + }); - let q_25 = match col.quantile_as_series(0.25) { - Ok(ca) => match ca.cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { - AnyValue::Float64(v) => Some(v), - _ => None, - }, - Err(_) => None, - }, - Err(_) => None, - }; - - let q_50 = match col.quantile_as_series(0.50) { - Ok(ca) => match ca.cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { - AnyValue::Float64(v) => Some(v), - _ => None, - }, - Err(_) => None, - }, - Err(_) => None, - }; - - let q_75 = match col.quantile_as_series(0.75) { - Ok(ca) => match ca.cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { - AnyValue::Float64(v) => Some(v), - _ => None, - }, - Err(_) => None, - }, - Err(_) => None, - }; - - let max = match col.max_as_series().cast(&DataType::Float64) { - Ok(ca) => match ca.get(0) { + let q_25 = col + .quantile_as_series(0.25) + .ok() + .and_then(|ca| ca.cast(&DataType::Float64).ok()) + .and_then(|ca| match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, - }, - Err(_) => None, - }; + }); + + let q_50 = col + .quantile_as_series(0.50) + .ok() + .and_then(|ca| ca.cast(&DataType::Float64).ok()) + .and_then(|ca| match ca.get(0) { + AnyValue::Float64(v) => Some(v), + _ => None, + }); + + let q_75 = col + .quantile_as_series(0.75) + .ok() + .and_then(|ca| ca.cast(&DataType::Float64).ok()) + .and_then(|ca| match ca.get(0) { + AnyValue::Float64(v) => Some(v), + _ => None, + }); + + let max = col + .max_as_series() + .cast(&DataType::Float64) + .ok() + .and_then(|ca| match ca.get(0) { + AnyValue::Float64(v) => Some(v), + _ => None, + }); let name = format!("{} ({})", col.name(), col.dtype()); ChunkedArray::::new_from_opt_slice( @@ -226,12 +221,12 @@ fn command( }); let res = head.chain(tail).collect::>(); - let df = DataFrame::new(res).map_err(|e| { - ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head) - })?; - Ok(PipelineData::Value(NuDataFrame::dataframe_into_value( - df, call.head, - ))) + + DataFrame::new(res) + .map_err(|e| { + ShellError::SpannedLabeledError("Dataframe Error".into(), e.to_string(), call.head) + }) + .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs b/crates/nu-command/src/dataframe/nu_dataframe/dtypes.rs similarity index 89% rename from crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs rename to crates/nu-command/src/dataframe/nu_dataframe/dtypes.rs index db6e7b50f..4212d5418 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/commands/dtypes.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/dtypes.rs @@ -1,4 +1,4 @@ -use super::super::{Column, NuDataFrame}; +use super::nu_dataframe::{Column, NuDataFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, @@ -18,12 +18,12 @@ impl Command for DataTypes { } fn signature(&self) -> Signature { - Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { vec![Example { - description: "drop column a", + description: "Dataframe dtypes", example: "[[a b]; [1 2] [3 4]] | to df | dtypes", result: Some( NuDataFrame::try_from_columns(vec![ @@ -90,8 +90,8 @@ fn command( let names_col = Column::new("column".to_string(), names); let dtypes_col = Column::new("dtype".to_string(), dtypes); - let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?; - Ok(PipelineData::Value(df.into_value(call.head))) + NuDataFrame::try_from_columns(vec![names_col, dtypes_col]) + .map(|df| PipelineData::Value(df.into_value(call.head), None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/nu_dataframe/mod.rs index 8517836df..53dd22871 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/mod.rs @@ -1,390 +1,14 @@ -pub mod commands; +mod nu_dataframe; -mod between_values; -mod conversion; -mod custom_value; -mod operations; +mod describe; +mod dtypes; +mod open; +mod to_df; -use conversion::{Column, ColumnMap}; +pub use describe::DescribeDF; +pub use dtypes::DataTypes; +pub use open::OpenDataFrame; +pub use to_df::ToDataFrame; -use indexmap::map::IndexMap; -use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; -use polars::prelude::{DataFrame, DataType, PolarsObject, Series}; -use serde::{Deserialize, Serialize}; -use std::{cmp::Ordering, fmt::Display, hash::Hasher}; - -// DataFrameValue is an encapsulation of Nushell Value that can be used -// to define the PolarsObject Trait. The polars object trait allows to -// create dataframes with mixed datatypes -#[derive(Clone, Debug)] -pub struct DataFrameValue(Value); - -impl DataFrameValue { - fn new(value: Value) -> Self { - Self(value) - } - - fn get_value(&self) -> Value { - self.0.clone() - } -} - -impl Display for DataFrameValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0.get_type()) - } -} - -impl Default for DataFrameValue { - fn default() -> Self { - Self(Value::Nothing { - span: Span::unknown(), - }) - } -} - -impl PartialEq for DataFrameValue { - fn eq(&self, other: &Self) -> bool { - self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq) - } -} -impl Eq for DataFrameValue {} - -impl std::hash::Hash for DataFrameValue { - fn hash(&self, state: &mut H) { - match &self.0 { - Value::Nothing { .. } => 0.hash(state), - Value::Int { val, .. } => val.hash(state), - Value::String { val, .. } => val.hash(state), - // TODO. Define hash for the rest of types - _ => {} - } - } -} - -impl PolarsObject for DataFrameValue { - fn type_name() -> &'static str { - "object" - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct NuDataFrame(DataFrame); - -impl AsRef for NuDataFrame { - fn as_ref(&self) -> &polars::prelude::DataFrame { - &self.0 - } -} - -impl AsMut for NuDataFrame { - fn as_mut(&mut self) -> &mut polars::prelude::DataFrame { - &mut self.0 - } -} - -impl NuDataFrame { - pub fn new(dataframe: DataFrame) -> Self { - Self(dataframe) - } - - fn default_value(span: Span) -> Value { - let dataframe = DataFrame::default(); - NuDataFrame::dataframe_into_value(dataframe, span) - } - - pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value { - Value::CustomValue { - val: Box::new(Self::new(dataframe)), - span, - } - } - - pub fn into_value(self, span: Span) -> Value { - Value::CustomValue { - val: Box::new(self), - span, - } - } - - pub fn series_to_value(series: Series, span: Span) -> Result { - match DataFrame::new(vec![series]) { - Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)), - Err(e) => Err(ShellError::InternalError(e.to_string())), - } - } - - pub fn try_from_iter(iter: T) -> Result - where - T: Iterator, - { - // Dictionary to store the columnar data extracted from - // the input. During the iteration we check if the values - // have different type - let mut column_values: ColumnMap = IndexMap::new(); - - for value in iter { - match value { - Value::List { vals, .. } => { - let cols = (0..vals.len()) - .map(|i| format!("{}", i)) - .collect::>(); - - conversion::insert_record(&mut column_values, &cols, &vals)? - } - Value::Record { cols, vals, .. } => { - conversion::insert_record(&mut column_values, &cols, &vals)? - } - _ => { - let key = "0".to_string(); - conversion::insert_value(value, key, &mut column_values)? - } - } - } - - conversion::from_parsed_columns(column_values) - } - - //pub fn try_from_series(columns: Vec) -> Result { - // let dataframe = DataFrame::new(columns) - // .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?; - - // Ok(Self::new(dataframe)) - //} - - pub fn try_from_columns(columns: Vec) -> Result { - let mut column_values: ColumnMap = IndexMap::new(); - - for column in columns { - let name = column.name().to_string(); - for value in column { - conversion::insert_value(value, name.clone(), &mut column_values)?; - } - } - - conversion::from_parsed_columns(column_values) - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - match input.into_value(span) { - Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { - Some(df) => Ok(NuDataFrame(df.0.clone())), - None => Err(ShellError::CantConvert( - "Dataframe not found".into(), - "value is not a dataframe".into(), - span, - )), - }, - _ => Err(ShellError::CantConvert( - "Dataframe not found".into(), - "value is not a dataframe".into(), - span, - )), - } - } - - pub fn column(&self, column: &str, span: Span) -> Result { - let s = self.0.column(column).map_err(|_| { - let possibilities = self - .0 - .get_column_names() - .iter() - .map(|name| name.to_string()) - .collect::>(); - - let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string()); - ShellError::DidYouMean(option, span) - })?; - - let dataframe = DataFrame::new(vec![s.clone()]) - .map_err(|e| ShellError::InternalError(e.to_string()))?; - - Ok(Self(dataframe)) - } - - pub fn is_series(&self) -> bool { - self.0.width() == 1 - } - - pub fn as_series(&self, _span: Span) -> Result { - if !self.is_series() { - return Err(ShellError::InternalError( - "DataFrame cannot be used as Series".into(), - )); - } - - let series = self - .0 - .get_columns() - .get(0) - .expect("We have already checked that the width is 1"); - - Ok(series.clone()) - } - - pub fn get_value(&self, row: usize, span: Span) -> Result { - let series = self.as_series(Span::unknown())?; - let column = conversion::create_column(&series, row, row + 1)?; - - if column.len() == 0 { - Err(ShellError::AccessBeyondEnd(series.len(), span)) - } else { - let value = column - .into_iter() - .next() - .expect("already checked there is a value"); - Ok(value) - } - } - - // Print is made out a head and if the dataframe is too large, then a tail - pub fn print(&self) -> Result, ShellError> { - let df = &self.0; - let size: usize = 20; - - if df.height() > size { - let sample_size = size / 2; - let mut values = self.head(Some(sample_size))?; - conversion::add_separator(&mut values, df); - let remaining = df.height() - sample_size; - let tail_size = remaining.min(sample_size); - let mut tail_values = self.tail(Some(tail_size))?; - values.append(&mut tail_values); - - Ok(values) - } else { - Ok(self.head(Some(size))?) - } - } - - pub fn head(&self, rows: Option) -> Result, ShellError> { - let to_row = rows.unwrap_or(5); - let values = self.to_rows(0, to_row)?; - - Ok(values) - } - - pub fn tail(&self, rows: Option) -> Result, ShellError> { - let df = &self.0; - let to_row = df.height(); - let size = rows.unwrap_or(5); - let from_row = to_row.saturating_sub(size); - - let values = self.to_rows(from_row, to_row)?; - - Ok(values) - } - - pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result, ShellError> { - let df = &self.0; - let upper_row = to_row.min(df.height()); - - let mut size: usize = 0; - let columns = self - .0 - .get_columns() - .iter() - .map( - |col| match conversion::create_column(col, from_row, upper_row) { - Ok(col) => { - size = col.len(); - Ok(col) - } - Err(e) => Err(e), - }, - ) - .collect::, ShellError>>()?; - - let mut iterators = columns - .into_iter() - .map(|col| (col.name().to_string(), col.into_iter())) - .collect::)>>(); - - let values = (0..size) - .into_iter() - .map(|_| { - let mut cols = vec![]; - let mut vals = vec![]; - - for (name, col) in &mut iterators { - cols.push(name.clone()); - - match col.next() { - Some(v) => vals.push(v), - None => vals.push(Value::Nothing { - span: Span::unknown(), - }), - }; - } - - Value::Record { - cols, - vals, - span: Span::unknown(), - } - }) - .collect::>(); - - Ok(values) - } - - // Dataframes are considered equal if they have the same shape, column name and values - pub fn is_equal(&self, other: &Self) -> Option { - if self.as_ref().width() == 0 { - // checking for empty dataframe - return None; - } - - if self.as_ref().get_column_names() != other.as_ref().get_column_names() { - // checking both dataframes share the same names - return None; - } - - if self.as_ref().height() != other.as_ref().height() { - // checking both dataframes have the same row size - return None; - } - - // sorting dataframe by the first column - let column_names = self.as_ref().get_column_names(); - let first_col = column_names - .get(0) - .expect("already checked that dataframe is different than 0"); - - // if unable to sort, then unable to compare - let lhs = match self.as_ref().sort(*first_col, false) { - Ok(df) => df, - Err(_) => return None, - }; - - let rhs = match other.as_ref().sort(*first_col, false) { - Ok(df) => df, - Err(_) => return None, - }; - - for name in self.as_ref().get_column_names() { - let self_series = lhs.column(name).expect("name from dataframe names"); - - let other_series = rhs - .column(name) - .expect("already checked that name in other"); - - let self_series = match self_series.dtype() { - // Casting needed to compare other numeric types with nushell numeric type. - // In nushell we only have i64 integer numeric types and any array created - // with nushell untagged primitives will be of type i64 - DataType::UInt32 => match self_series.cast(&DataType::Int64) { - Ok(series) => series, - Err(_) => return None, - }, - _ => self_series.clone(), - }; - - if !self_series.series_equal(other_series) { - return None; - } - } - - Some(Ordering::Equal) - } -} +#[cfg(test)] +mod test_dataframe; diff --git a/crates/nu-command/src/dataframe/nu_dataframe/between_values.rs b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/between_values.rs similarity index 82% rename from crates/nu-command/src/dataframe/nu_dataframe/between_values.rs rename to crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/between_values.rs index 528777bd2..2a6451f08 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/between_values.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/between_values.rs @@ -66,17 +66,21 @@ pub fn compute_between_series( res.rename(&name); NuDataFrame::series_to_value(res, operation_span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Division error".into(), + e.to_string(), + right.span()?, + )), } } Operator::Equal => { - let mut res = Series::eq(lhs, rhs).into_series(); + let mut res = Series::equal(lhs, rhs).into_series(); let name = format!("eq_{}_{}", lhs.name(), rhs.name()); res.rename(&name); NuDataFrame::series_to_value(res, operation_span) } Operator::NotEqual => { - let mut res = Series::neq(lhs, rhs).into_series(); + let mut res = Series::not_equal(lhs, rhs).into_series(); let name = format!("neq_{}_{}", lhs.name(), rhs.name()); res.rename(&name); NuDataFrame::series_to_value(res, operation_span) @@ -117,8 +121,10 @@ pub fn compute_between_series( res.rename(&name); NuDataFrame::series_to_value(res, operation_span) } - _ => Err(ShellError::InternalError( + _ => Err(ShellError::SpannedLabeledError( + "Incompatible types".into(), "unable to cast to boolean".into(), + right.span()?, )), } } @@ -142,8 +148,10 @@ pub fn compute_between_series( res.rename(&name); NuDataFrame::series_to_value(res, operation_span) } - _ => Err(ShellError::InternalError( + _ => Err(ShellError::SpannedLabeledError( + "Incompatible types".into(), "unable to cast to boolean".into(), + right.span()?, )), } } @@ -254,9 +262,9 @@ pub fn compute_series_single_value( }), }, Operator::Equal => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span), + Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span), Value::Float { val, .. } => { - compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span) + compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span) } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, @@ -267,9 +275,11 @@ pub fn compute_series_single_value( }), }, Operator::NotEqual => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span), + Value::Int { val, .. } => { + compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span) + } Value::Float { val, .. } => { - compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span) + compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span) } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, @@ -364,17 +374,25 @@ where let casted = series.i64(); compute_casted_i64(casted, val, f, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to i64".into(), + e.to_string(), + span, + )), } } DataType::Int64 => { let casted = series.i64(); compute_casted_i64(casted, val, f, span) } - _ => Err(ShellError::InternalError(format!( - "Series of type {} can not be used for operations with an i64 value", - series.dtype() - ))), + _ => Err(ShellError::SpannedLabeledError( + "Incorrect type".into(), + format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ), + span, + )), } } @@ -393,7 +411,11 @@ where let res = res.into_series(); NuDataFrame::series_to_value(res, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to i64".into(), + e.to_string(), + span, + )), } } @@ -415,17 +437,25 @@ where let casted = series.f64(); compute_casted_f64(casted, val, f, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to f64".into(), + e.to_string(), + span, + )), } } DataType::Float64 => { let casted = series.f64(); compute_casted_f64(casted, val, f, span) } - _ => Err(ShellError::InternalError(format!( - "Series of type {} can not be used for operations with a decimal value", - series.dtype() - ))), + _ => Err(ShellError::SpannedLabeledError( + "Incorrect type".into(), + format!( + "Series of type {} can not be used for operations with a decimal value", + series.dtype() + ), + span, + )), } } @@ -444,7 +474,11 @@ where let res = res.into_series(); NuDataFrame::series_to_value(res, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to f64".into(), + e.to_string(), + span, + )), } } @@ -461,17 +495,25 @@ where let casted = series.i64(); compare_casted_i64(casted, val, f, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to f64".into(), + e.to_string(), + span, + )), } } DataType::Int64 => { let casted = series.i64(); compare_casted_i64(casted, val, f, span) } - _ => Err(ShellError::InternalError(format!( - "Series of type {} can not be used for operations with an i64 value", - series.dtype() - ))), + _ => Err(ShellError::SpannedLabeledError( + "Incorrect type".into(), + format!( + "Series of type {} can not be used for operations with an i64 value", + series.dtype() + ), + span, + )), } } @@ -490,7 +532,11 @@ where let res = res.into_series(); NuDataFrame::series_to_value(res, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to i64".into(), + e.to_string(), + span, + )), } } @@ -512,17 +558,25 @@ where let casted = series.f64(); compare_casted_f64(casted, val, f, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to i64".into(), + e.to_string(), + span, + )), } } DataType::Float64 => { let casted = series.f64(); compare_casted_f64(casted, val, f, span) } - _ => Err(ShellError::InternalError(format!( - "Series of type {} can not be used for operations with a decimal value", - series.dtype() - ))), + _ => Err(ShellError::SpannedLabeledError( + "Incorrect type".into(), + format!( + "Series of type {} can not be used for operations with a decimal value", + series.dtype() + ), + span, + )), } } @@ -541,7 +595,11 @@ where let res = res.into_series(); NuDataFrame::series_to_value(res, span) } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to f64".into(), + e.to_string(), + span, + )), } } @@ -556,9 +614,17 @@ fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Error using contains".into(), + e.to_string(), + span, + )), } } - Err(e) => Err(ShellError::InternalError(e.to_string())), + Err(e) => Err(ShellError::SpannedLabeledError( + "Unable to cast to string".into(), + e.to_string(), + span, + )), } } diff --git a/crates/nu-command/src/dataframe/nu_dataframe/conversion.rs b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/conversion.rs similarity index 83% rename from crates/nu-command/src/dataframe/nu_dataframe/conversion.rs rename to crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/conversion.rs index daccdedbc..5fe78714a 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/conversion.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/conversion.rs @@ -122,69 +122,69 @@ pub fn create_column( Ok(Column::new(series.name().into(), values)) } DataType::UInt8 => { - let casted = series - .u8() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.u8().map_err(|e| { + ShellError::LabeledError("Error casting column to u8".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::UInt16 => { - let casted = series - .u16() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.u16().map_err(|e| { + ShellError::LabeledError("Error casting column to u16".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::UInt32 => { - let casted = series - .u32() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.u32().map_err(|e| { + ShellError::LabeledError("Error casting column to u32".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::UInt64 => { - let casted = series - .u64() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.u64().map_err(|e| { + ShellError::LabeledError("Error casting column to u64".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Int8 => { - let casted = series - .i8() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.i8().map_err(|e| { + ShellError::LabeledError("Error casting column to i8".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Int16 => { - let casted = series - .i16() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.i16().map_err(|e| { + ShellError::LabeledError("Error casting column to i16".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Int32 => { - let casted = series - .i32() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.i32().map_err(|e| { + ShellError::LabeledError("Error casting column to i32".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Int64 => { - let casted = series - .i64() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.i64().map_err(|e| { + ShellError::LabeledError("Error casting column to i64".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Float32 => { - let casted = series - .f32() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.f32().map_err(|e| { + ShellError::LabeledError("Error casting column to f32".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Float64 => { - let casted = series - .f64() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.f64().map_err(|e| { + ShellError::LabeledError("Error casting column to f64".into(), e.to_string()) + })?; Ok(column_from_casted(casted, from_row, size)) } DataType::Boolean => { - let casted = series - .bool() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.bool().map_err(|e| { + ShellError::LabeledError("Error casting column to bool".into(), e.to_string()) + })?; let values = casted .into_iter() @@ -204,9 +204,9 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } DataType::Utf8 => { - let casted = series - .utf8() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.utf8().map_err(|e| { + ShellError::LabeledError("Error casting column to string".into(), e.to_string()) + })?; let values = casted .into_iter() @@ -231,10 +231,10 @@ pub fn create_column( .downcast_ref::>>(); match casted { - None => Err(ShellError::InternalError(format!( - "Object not supported for conversion: {}", - x - ))), + None => Err(ShellError::LabeledError( + "Error casting object from series".into(), + format!("Object not supported for conversion: {}", x), + )), Some(ca) => { let values = ca .into_iter() @@ -253,9 +253,9 @@ pub fn create_column( } } DataType::Date => { - let casted = series - .date() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.date().map_err(|e| { + ShellError::LabeledError("Error casting column to date".into(), e.to_string()) + })?; let values = casted .into_iter() @@ -285,9 +285,9 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } DataType::Datetime => { - let casted = series - .datetime() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.datetime().map_err(|e| { + ShellError::LabeledError("Error casting column to datetime".into(), e.to_string()) + })?; let values = casted .into_iter() @@ -317,9 +317,9 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } DataType::Time => { - let casted = series - .time() - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let casted = series.time().map_err(|e| { + ShellError::LabeledError("Error casting column to time".into(), e.to_string()) + })?; let values = casted .into_iter() @@ -338,10 +338,10 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } - e => Err(ShellError::InternalError(format!( - "Value not supported in nushell: {}", - e - ))), + e => Err(ShellError::LabeledError( + "Error creating Dataframe".into(), + format!("Value not supported in nushell: {}", e), + )), } } @@ -530,8 +530,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result Ok(NuDataFrame::new(df)), - Err(e) => Err(ShellError::InternalError(e.to_string())), - } + DataFrame::new(df_series) + .map(|df| NuDataFrame::new(df)) + .map_err(|e| ShellError::LabeledError("Error creating dataframe".into(), e.to_string())) } diff --git a/crates/nu-command/src/dataframe/nu_dataframe/custom_value.rs b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/custom_value.rs similarity index 100% rename from crates/nu-command/src/dataframe/nu_dataframe/custom_value.rs rename to crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/custom_value.rs diff --git a/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs new file mode 100644 index 000000000..127450603 --- /dev/null +++ b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs @@ -0,0 +1,395 @@ +mod between_values; +mod conversion; +mod custom_value; +mod operations; + +pub(super) use conversion::{Column, ColumnMap}; + +use indexmap::map::IndexMap; +use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; +use polars::prelude::{DataFrame, DataType, PolarsObject, Series}; +use serde::{Deserialize, Serialize}; +use std::{cmp::Ordering, fmt::Display, hash::Hasher}; + +// DataFrameValue is an encapsulation of Nushell Value that can be used +// to define the PolarsObject Trait. The polars object trait allows to +// create dataframes with mixed datatypes +#[derive(Clone, Debug)] +pub struct DataFrameValue(Value); + +impl DataFrameValue { + fn new(value: Value) -> Self { + Self(value) + } + + fn get_value(&self) -> Value { + self.0.clone() + } +} + +impl Display for DataFrameValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.get_type()) + } +} + +impl Default for DataFrameValue { + fn default() -> Self { + Self(Value::Nothing { + span: Span::unknown(), + }) + } +} + +impl PartialEq for DataFrameValue { + fn eq(&self, other: &Self) -> bool { + self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq) + } +} +impl Eq for DataFrameValue {} + +impl std::hash::Hash for DataFrameValue { + fn hash(&self, state: &mut H) { + match &self.0 { + Value::Nothing { .. } => 0.hash(state), + Value::Int { val, .. } => val.hash(state), + Value::String { val, .. } => val.hash(state), + // TODO. Define hash for the rest of types + _ => {} + } + } +} + +impl PolarsObject for DataFrameValue { + fn type_name() -> &'static str { + "object" + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NuDataFrame(DataFrame); + +impl AsRef for NuDataFrame { + fn as_ref(&self) -> &polars::prelude::DataFrame { + &self.0 + } +} + +impl AsMut for NuDataFrame { + fn as_mut(&mut self) -> &mut polars::prelude::DataFrame { + &mut self.0 + } +} + +impl NuDataFrame { + pub fn new(dataframe: DataFrame) -> Self { + Self(dataframe) + } + + fn default_value(span: Span) -> Value { + let dataframe = DataFrame::default(); + NuDataFrame::dataframe_into_value(dataframe, span) + } + + pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value { + Value::CustomValue { + val: Box::new(Self::new(dataframe)), + span, + } + } + + pub fn into_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn series_to_value(series: Series, span: Span) -> Result { + match DataFrame::new(vec![series]) { + Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)), + Err(e) => Err(ShellError::SpannedLabeledError( + "Error creating dataframe".into(), + e.to_string(), + span, + )), + } + } + + pub fn try_from_iter(iter: T) -> Result + where + T: Iterator, + { + // Dictionary to store the columnar data extracted from + // the input. During the iteration we check if the values + // have different type + let mut column_values: ColumnMap = IndexMap::new(); + + for value in iter { + match value { + Value::List { vals, .. } => { + let cols = (0..vals.len()) + .map(|i| format!("{}", i)) + .collect::>(); + + conversion::insert_record(&mut column_values, &cols, &vals)? + } + Value::Record { cols, vals, .. } => { + conversion::insert_record(&mut column_values, &cols, &vals)? + } + _ => { + let key = "0".to_string(); + conversion::insert_value(value, key, &mut column_values)? + } + } + } + + conversion::from_parsed_columns(column_values) + } + + //pub fn try_from_series(columns: Vec) -> Result { + // let dataframe = DataFrame::new(columns) + // .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?; + + // Ok(Self::new(dataframe)) + //} + + pub fn try_from_columns(columns: Vec) -> Result { + let mut column_values: ColumnMap = IndexMap::new(); + + for column in columns { + let name = column.name().to_string(); + for value in column { + conversion::insert_value(value, name.clone(), &mut column_values)?; + } + } + + conversion::from_parsed_columns(column_values) + } + + pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { + match input.into_value(span) { + Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { + Some(df) => Ok(NuDataFrame(df.0.clone())), + None => Err(ShellError::CantConvert( + "Dataframe not found".into(), + "value is not a dataframe".into(), + span, + )), + }, + _ => Err(ShellError::CantConvert( + "Dataframe not found".into(), + "value is not a dataframe".into(), + span, + )), + } + } + + pub fn column(&self, column: &str, span: Span) -> Result { + let s = self.0.column(column).map_err(|_| { + let possibilities = self + .0 + .get_column_names() + .iter() + .map(|name| name.to_string()) + .collect::>(); + + let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string()); + ShellError::DidYouMean(option, span) + })?; + + let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| { + ShellError::SpannedLabeledError("Error creating dataframe".into(), e.to_string(), span) + })?; + + Ok(Self(dataframe)) + } + + pub fn is_series(&self) -> bool { + self.0.width() == 1 + } + + pub fn as_series(&self, span: Span) -> Result { + if !self.is_series() { + return Err(ShellError::SpannedLabeledError( + "Error using as series".into(), + "dataframe has more than one column".into(), + span, + )); + } + + let series = self + .0 + .get_columns() + .get(0) + .expect("We have already checked that the width is 1"); + + Ok(series.clone()) + } + + pub fn get_value(&self, row: usize, span: Span) -> Result { + let series = self.as_series(Span::unknown())?; + let column = conversion::create_column(&series, row, row + 1)?; + + if column.len() == 0 { + Err(ShellError::AccessBeyondEnd(series.len(), span)) + } else { + let value = column + .into_iter() + .next() + .expect("already checked there is a value"); + Ok(value) + } + } + + // Print is made out a head and if the dataframe is too large, then a tail + pub fn print(&self) -> Result, ShellError> { + let df = &self.0; + let size: usize = 20; + + if df.height() > size { + let sample_size = size / 2; + let mut values = self.head(Some(sample_size))?; + conversion::add_separator(&mut values, df); + let remaining = df.height() - sample_size; + let tail_size = remaining.min(sample_size); + let mut tail_values = self.tail(Some(tail_size))?; + values.append(&mut tail_values); + + Ok(values) + } else { + Ok(self.head(Some(size))?) + } + } + + pub fn head(&self, rows: Option) -> Result, ShellError> { + let to_row = rows.unwrap_or(5); + let values = self.to_rows(0, to_row)?; + + Ok(values) + } + + pub fn tail(&self, rows: Option) -> Result, ShellError> { + let df = &self.0; + let to_row = df.height(); + let size = rows.unwrap_or(5); + let from_row = to_row.saturating_sub(size); + + let values = self.to_rows(from_row, to_row)?; + + Ok(values) + } + + pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result, ShellError> { + let df = &self.0; + let upper_row = to_row.min(df.height()); + + let mut size: usize = 0; + let columns = self + .0 + .get_columns() + .iter() + .map( + |col| match conversion::create_column(col, from_row, upper_row) { + Ok(col) => { + size = col.len(); + Ok(col) + } + Err(e) => Err(e), + }, + ) + .collect::, ShellError>>()?; + + let mut iterators = columns + .into_iter() + .map(|col| (col.name().to_string(), col.into_iter())) + .collect::)>>(); + + let values = (0..size) + .into_iter() + .map(|_| { + let mut cols = vec![]; + let mut vals = vec![]; + + for (name, col) in &mut iterators { + cols.push(name.clone()); + + match col.next() { + Some(v) => vals.push(v), + None => vals.push(Value::Nothing { + span: Span::unknown(), + }), + }; + } + + Value::Record { + cols, + vals, + span: Span::unknown(), + } + }) + .collect::>(); + + Ok(values) + } + + // Dataframes are considered equal if they have the same shape, column name and values + pub fn is_equal(&self, other: &Self) -> Option { + if self.as_ref().width() == 0 { + // checking for empty dataframe + return None; + } + + if self.as_ref().get_column_names() != other.as_ref().get_column_names() { + // checking both dataframes share the same names + return None; + } + + if self.as_ref().height() != other.as_ref().height() { + // checking both dataframes have the same row size + return None; + } + + // sorting dataframe by the first column + let column_names = self.as_ref().get_column_names(); + let first_col = column_names + .get(0) + .expect("already checked that dataframe is different than 0"); + + // if unable to sort, then unable to compare + let lhs = match self.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return None, + }; + + let rhs = match other.as_ref().sort(*first_col, false) { + Ok(df) => df, + Err(_) => return None, + }; + + for name in self.as_ref().get_column_names() { + let self_series = lhs.column(name).expect("name from dataframe names"); + + let other_series = rhs + .column(name) + .expect("already checked that name in other"); + + let self_series = match self_series.dtype() { + // Casting needed to compare other numeric types with nushell numeric type. + // In nushell we only have i64 integer numeric types and any array created + // with nushell untagged primitives will be of type i64 + DataType::UInt32 => match self_series.cast(&DataType::Int64) { + Ok(series) => series, + Err(_) => return None, + }, + _ => self_series.clone(), + }; + + if !self_series.series_equal(other_series) { + return None; + } + } + + Some(Ordering::Equal) + } +} diff --git a/crates/nu-command/src/dataframe/nu_dataframe/operations.rs b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/operations.rs similarity index 95% rename from crates/nu-command/src/dataframe/nu_dataframe/operations.rs rename to crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/operations.rs index c37db6c28..7fbc00c2e 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/operations.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/operations.rs @@ -122,7 +122,7 @@ impl NuDataFrame { &self, other: &NuDataFrame, axis: Axis, - _span: Span, + span: Span, ) -> Result { match axis { Axis::Row => { @@ -147,8 +147,13 @@ impl NuDataFrame { }) .collect::>(); - let df_new = DataFrame::new(new_cols) - .map_err(|e| ShellError::InternalError(e.to_string()))?; + let df_new = DataFrame::new(new_cols).map_err(|e| { + ShellError::SpannedLabeledError( + "Error creating dataframe".into(), + e.to_string(), + span, + ) + })?; Ok(NuDataFrame::new(df_new)) } //Axis::Column => { diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs b/crates/nu-command/src/dataframe/nu_dataframe/open.rs similarity index 74% rename from crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs rename to crates/nu-command/src/dataframe/nu_dataframe/open.rs index d30967642..828ae5097 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/commands/open.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/open.rs @@ -1,4 +1,4 @@ -use super::super::NuDataFrame; +use super::nu_dataframe::NuDataFrame; use nu_engine::CallExt; use nu_protocol::{ ast::Call, @@ -22,7 +22,7 @@ impl Command for OpenDataFrame { } fn signature(&self) -> Signature { - Signature::build(self.name().to_string()) + Signature::build(self.name()) .required( "file", SyntaxShape::Filepath, @@ -54,7 +54,7 @@ impl Command for OpenDataFrame { .named( "columns", SyntaxShape::List(Box::new(SyntaxShape::String)), - "Columns to be selected from csv file. CSV file", + "Columns to be selected from csv file. CSV and Parquet file", None, ) .category(Category::Custom("dataframe".into())) @@ -87,7 +87,7 @@ fn command( let span = call.head; let file: Spanned = call.req(engine_state, stack, 0)?; - let df = match file.item.extension() { + match file.item.extension() { Some(e) => match e.to_str() { Some("csv") => from_csv(engine_state, stack, call), Some("parquet") => from_parquet(engine_state, stack, call), @@ -101,11 +101,8 @@ fn command( "File without extension".into(), file.span, )), - }?; - - Ok(PipelineData::Value(NuDataFrame::dataframe_into_value( - df, span, - ))) + } + .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, span), None)) } fn from_parquet( @@ -114,12 +111,25 @@ fn from_parquet( call: &Call, ) -> Result { let file: Spanned = call.req(engine_state, stack, 0)?; - let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; + let columns: Option> = call.get_flag(engine_state, stack, "columns")?; + + let r = File::open(&file.item).map_err(|e| { + ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span) + })?; let reader = ParquetReader::new(r); - reader - .finish() - .map_err(|e| ShellError::InternalError(format!("{:?}", e))) + let reader = match columns { + None => reader, + Some(columns) => reader.with_columns(Some(columns)), + }; + + reader.finish().map_err(|e| { + ShellError::SpannedLabeledError( + "Parquet reader error".into(), + format!("{:?}", e), + call.head, + ) + }) } fn from_json( @@ -129,13 +139,15 @@ fn from_json( ) -> Result { let file: Spanned = call.req(engine_state, stack, 0)?; - let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; + let r = File::open(&file.item).map_err(|e| { + ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span) + })?; let reader = JsonReader::new(r); - reader - .finish() - .map_err(|e| ShellError::InternalError(e.to_string())) + reader.finish().map_err(|e| { + ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head) + }) } fn from_csv( @@ -151,15 +163,23 @@ fn from_csv( let columns: Option> = call.get_flag(engine_state, stack, "columns")?; let csv_reader = CsvReader::from_path(&file.item) - .map_err(|e| ShellError::InternalError(e.to_string()))? + .map_err(|e| { + ShellError::SpannedLabeledError( + "Error creating CSV reader".into(), + e.to_string(), + file.span, + ) + })? .with_encoding(CsvEncoding::LossyUtf8); let csv_reader = match delimiter { None => csv_reader, Some(d) => { if d.item.len() != 1 { - return Err(ShellError::InternalError( - "Delimiter has to be one char".into(), + return Err(ShellError::SpannedLabeledError( + "Incorrect delimiter".into(), + "Delimiter has to be one character".into(), + d.span, )); } else { let delimiter = match d.item.chars().next() { @@ -188,7 +208,11 @@ fn from_csv( Some(columns) => csv_reader.with_columns(Some(columns)), }; - csv_reader - .finish() - .map_err(|e| ShellError::InternalError(e.to_string())) + csv_reader.finish().map_err(|e| { + ShellError::SpannedLabeledError( + "Parquet reader error".into(), + format!("{:?}", e), + call.head, + ) + }) } diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/test_dataframe.rs b/crates/nu-command/src/dataframe/nu_dataframe/test_dataframe.rs similarity index 100% rename from crates/nu-command/src/dataframe/nu_dataframe/commands/test_dataframe.rs rename to crates/nu-command/src/dataframe/nu_dataframe/test_dataframe.rs diff --git a/crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs b/crates/nu-command/src/dataframe/nu_dataframe/to_df.rs similarity index 92% rename from crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs rename to crates/nu-command/src/dataframe/nu_dataframe/to_df.rs index 6ce8845b8..95df4049c 100644 --- a/crates/nu-command/src/dataframe/nu_dataframe/commands/to_df.rs +++ b/crates/nu-command/src/dataframe/nu_dataframe/to_df.rs @@ -1,4 +1,4 @@ -use super::super::{Column, NuDataFrame}; +use super::nu_dataframe::{Column, NuDataFrame}; use nu_protocol::{ ast::Call, @@ -19,7 +19,7 @@ impl Command for ToDataFrame { } fn signature(&self) -> Signature { - Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe".into())) } fn examples(&self) -> Vec { @@ -94,8 +94,8 @@ impl Command for ToDataFrame { call: &Call, input: PipelineData, ) -> Result { - let df = NuDataFrame::try_from_iter(input.into_iter())?; - Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head))) + NuDataFrame::try_from_iter(input.into_iter()) + .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) } } diff --git a/crates/nu-plugin/src/serializers/plugin_call.rs b/crates/nu-plugin/src/serializers/plugin_call.rs index f9e6cdeba..4216712ff 100644 --- a/crates/nu-plugin/src/serializers/plugin_call.rs +++ b/crates/nu-plugin/src/serializers/plugin_call.rs @@ -382,4 +382,25 @@ mod tests { PluginResponse::Value(_) => panic!("returned wrong call type"), } } + + #[test] + fn response_round_trip_error_none() { + let error = LabeledError { + label: "label".into(), + msg: "msg".into(), + span: None, + }; + let response = PluginResponse::Error(error.clone()); + + let mut buffer: Vec = Vec::new(); + encode_response(&response, &mut buffer).expect("unable to serialize message"); + let returned = + decode_response(&mut buffer.as_slice()).expect("unable to deserialize message"); + + match returned { + PluginResponse::Error(msg) => assert_eq!(error, msg), + PluginResponse::Signature(_) => panic!("returned wrong call type"), + PluginResponse::Value(_) => panic!("returned wrong call type"), + } + } }