corrected missing shellerror type (#439)

This commit is contained in:
Fernando Herrera 2021-12-05 13:25:37 +00:00 committed by GitHub
parent 22469a9cb1
commit 29efbee285
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 723 additions and 607 deletions

35
Cargo.lock generated
View File

@ -120,11 +120,10 @@ dependencies = [
[[package]] [[package]]
name = "arrow2" name = "arrow2"
version = "0.7.0" version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395" checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44"
dependencies = [ dependencies = [
"ahash",
"arrow-format", "arrow-format",
"base64", "base64",
"chrono", "chrono",
@ -628,9 +627,9 @@ dependencies = [
[[package]] [[package]]
name = "dirs" name = "dirs"
version = "3.0.2" version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
dependencies = [ dependencies = [
"dirs-sys", "dirs-sys",
] ]
@ -1823,9 +1822,9 @@ dependencies = [
[[package]] [[package]]
name = "parquet2" name = "parquet2"
version = "0.6.0" version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d" checksum = "41051fae4c0fab9040e291b360c6c8037d09d482aa83e94e37f3d080a32a58c3"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"bitpacking", "bitpacking",
@ -1916,9 +1915,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]] [[package]]
name = "polars" name = "polars"
version = "0.17.0" version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598" checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1"
dependencies = [ dependencies = [
"polars-core", "polars-core",
"polars-io", "polars-io",
@ -1927,9 +1926,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-arrow" name = "polars-arrow"
version = "0.17.0" version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99" checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf"
dependencies = [ dependencies = [
"arrow2", "arrow2",
"num 0.4.0", "num 0.4.0",
@ -1938,9 +1937,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-core" name = "polars-core"
version = "0.17.0" version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6" checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
@ -1963,15 +1962,15 @@ dependencies = [
[[package]] [[package]]
name = "polars-io" name = "polars-io"
version = "0.17.0" version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957" checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
"arrow2", "arrow2",
"csv-core", "csv-core",
"dirs 3.0.2", "dirs 4.0.0",
"lazy_static", "lazy_static",
"lexical", "lexical",
"memchr", "memchr",
@ -1987,9 +1986,9 @@ dependencies = [
[[package]] [[package]]
name = "polars-lazy" name = "polars-lazy"
version = "0.17.0" version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61" checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46"
dependencies = [ dependencies = [
"ahash", "ahash",
"itertools", "itertools",

View File

@ -54,7 +54,7 @@ crossterm = "0.22.1"
num = {version="0.4.0", optional=true} num = {version="0.4.0", optional=true}
[dependencies.polars] [dependencies.polars]
version = "0.17.0" version = "0.18.0"
optional = true optional = true
features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"] features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"]

View File

@ -1,3 +1,3 @@
# nu-dataframe # nu-dataframe
The nu-dataframe crate holds the definitions of the dataframe structure The nu-dataframe crate holds the definitions of the dataframe structures and commands

View File

@ -1,3 +1,3 @@
mod nu_dataframe; mod nu_dataframe;
pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame}; pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};

View File

@ -1,12 +0,0 @@
mod describe;
mod dtypes;
mod open;
mod to_df;
pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
#[cfg(test)]
mod test_dataframe;

View File

@ -1,6 +1,4 @@
use crate::dataframe::nu_dataframe::Column; use super::nu_dataframe::{Column, NuDataFrame};
use super::super::NuDataFrame;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -19,7 +17,7 @@ pub struct DescribeDF;
impl Command for DescribeDF { impl Command for DescribeDF {
fn name(&self) -> &str { fn name(&self) -> &str {
"describe" "describe-df"
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
@ -27,13 +25,13 @@ impl Command for DescribeDF {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) Signature::build(self.name()).category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "dataframe description", description: "dataframe description",
example: "[[a b]; [1 1] [1 1]] | to df | describe", example: "[[a b]; [1 1] [1 1]] | to df | describe-df",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
Column::new( Column::new(
@ -134,13 +132,14 @@ fn command(
.map(|col| { .map(|col| {
let count = col.len() as f64; let count = col.len() as f64;
let sum = match col.sum_as_series().cast(&DataType::Float64) { let sum = col
Ok(ca) => match ca.get(0) { .sum_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
}, });
Err(_) => None,
};
let mean = match col.mean_as_series().get(0) { let mean = match col.mean_as_series().get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
@ -157,54 +156,50 @@ fn command(
_ => None, _ => None,
}; };
let min = match col.min_as_series().cast(&DataType::Float64) { let min = col
Ok(ca) => match ca.get(0) { .min_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
}, });
Err(_) => None,
};
let q_25 = match col.quantile_as_series(0.25) { let q_25 = col
Ok(ca) => match ca.cast(&DataType::Float64) { .quantile_as_series(0.25)
Ok(ca) => match ca.get(0) { .ok()
AnyValue::Float64(v) => Some(v), .and_then(|ca| ca.cast(&DataType::Float64).ok())
_ => None, .and_then(|ca| match ca.get(0) {
},
Err(_) => None,
},
Err(_) => None,
};
let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v), AnyValue::Float64(v) => Some(v),
_ => None, _ => None,
}, });
Err(_) => None,
}; let q_50 = col
.quantile_as_series(0.50)
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let q_75 = col
.quantile_as_series(0.75)
.ok()
.and_then(|ca| ca.cast(&DataType::Float64).ok())
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let max = col
.max_as_series()
.cast(&DataType::Float64)
.ok()
.and_then(|ca| match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
});
let name = format!("{} ({})", col.name(), col.dtype()); let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::new_from_opt_slice( ChunkedArray::<Float64Type>::new_from_opt_slice(
@ -226,12 +221,12 @@ fn command(
}); });
let res = head.chain(tail).collect::<Vec<Series>>(); let res = head.chain(tail).collect::<Vec<Series>>();
let df = DataFrame::new(res).map_err(|e| {
ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head) DataFrame::new(res)
})?; .map_err(|e| {
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value( ShellError::SpannedLabeledError("Dataframe Error".into(), e.to_string(), call.head)
df, call.head, })
))) .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,4 +1,4 @@
use super::super::{Column, NuDataFrame}; use super::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -18,12 +18,12 @@ impl Command for DataTypes {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) Signature::build(self.name()).category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
vec![Example { vec![Example {
description: "drop column a", description: "Dataframe dtypes",
example: "[[a b]; [1 2] [3 4]] | to df | dtypes", example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
result: Some( result: Some(
NuDataFrame::try_from_columns(vec![ NuDataFrame::try_from_columns(vec![
@ -90,8 +90,8 @@ fn command(
let names_col = Column::new("column".to_string(), names); let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes); let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?; NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
Ok(PipelineData::Value(df.into_value(call.head))) .map(|df| PipelineData::Value(df.into_value(call.head), None))
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,390 +1,14 @@
pub mod commands; mod nu_dataframe;
mod between_values; mod describe;
mod conversion; mod dtypes;
mod custom_value; mod open;
mod operations; mod to_df;
use conversion::{Column, ColumnMap}; pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
use indexmap::map::IndexMap; #[cfg(test)]
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; mod test_dataframe;
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes
#[derive(Clone, Debug)]
pub struct DataFrameValue(Value);
impl DataFrameValue {
fn new(value: Value) -> Self {
Self(value)
}
fn get_value(&self) -> Value {
self.0.clone()
}
}
impl Display for DataFrameValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.get_type())
}
}
impl Default for DataFrameValue {
fn default() -> Self {
Self(Value::Nothing {
span: Span::unknown(),
})
}
}
impl PartialEq for DataFrameValue {
fn eq(&self, other: &Self) -> bool {
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
}
}
impl Eq for DataFrameValue {}
impl std::hash::Hash for DataFrameValue {
fn hash<H: Hasher>(&self, state: &mut H) {
match &self.0 {
Value::Nothing { .. } => 0.hash(state),
Value::Int { val, .. } => val.hash(state),
Value::String { val, .. } => val.hash(state),
// TODO. Define hash for the rest of types
_ => {}
}
}
}
impl PolarsObject for DataFrameValue {
fn type_name() -> &'static str {
"object"
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
}
fn default_value(span: Span) -> Value {
let dataframe = DataFrame::default();
NuDataFrame::dataframe_into_value(dataframe, span)
}
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
span,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value {
Value::List { vals, .. } => {
let cols = (0..vals.len())
.map(|i| format!("{}", i))
.collect::<Vec<String>>();
conversion::insert_record(&mut column_values, &cols, &vals)?
}
Value::Record { cols, vals, .. } => {
conversion::insert_record(&mut column_values, &cols, &vals)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
}
}
}
conversion::from_parsed_columns(column_values)
}
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns)
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
// Ok(Self::new(dataframe))
//}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
}
}
conversion::from_parsed_columns(column_values)
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
match input.into_value(span) {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
},
_ => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
}
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let possibilities = self
.0
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()])
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(Self(dataframe))
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
}
pub fn as_series(&self, _span: Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::InternalError(
"DataFrame cannot be used as Series".into(),
));
}
let series = self
.0
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
let series = self.as_series(Span::unknown())?;
let column = conversion::create_column(&series, row, row + 1)?;
if column.len() == 0 {
Err(ShellError::AccessBeyondEnd(series.len(), span))
} else {
let value = column
.into_iter()
.next()
.expect("already checked there is a value");
Ok(value)
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
conversion::add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.get_columns()
.iter()
.map(
|col| match conversion::create_column(col, from_row, upper_row) {
Ok(col) => {
size = col.len();
Ok(col)
}
Err(e) => Err(e),
},
)
.collect::<Result<Vec<Column>, ShellError>>()?;
let mut iterators = columns
.into_iter()
.map(|col| (col.name().to_string(), col.into_iter()))
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
let values = (0..size)
.into_iter()
.map(|_| {
let mut cols = vec![];
let mut vals = vec![];
for (name, col) in &mut iterators {
cols.push(name.clone());
match col.next() {
Some(v) => vals.push(v),
None => vals.push(Value::Nothing {
span: Span::unknown(),
}),
};
}
Value::Record {
cols,
vals,
span: Span::unknown(),
}
})
.collect::<Vec<Value>>();
Ok(values)
}
// Dataframes are considered equal if they have the same shape, column name and values
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return None;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return None;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return None;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series,
Err(_) => return None,
},
_ => self_series.clone(),
};
if !self_series.series_equal(other_series) {
return None;
}
}
Some(Ordering::Equal)
}
}

View File

@ -66,17 +66,21 @@ pub fn compute_between_series(
res.rename(&name); res.rename(&name);
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Division error".into(),
e.to_string(),
right.span()?,
)),
} }
} }
Operator::Equal => { Operator::Equal => {
let mut res = Series::eq(lhs, rhs).into_series(); let mut res = Series::equal(lhs, rhs).into_series();
let name = format!("eq_{}_{}", lhs.name(), rhs.name()); let name = format!("eq_{}_{}", lhs.name(), rhs.name());
res.rename(&name); res.rename(&name);
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
Operator::NotEqual => { Operator::NotEqual => {
let mut res = Series::neq(lhs, rhs).into_series(); let mut res = Series::not_equal(lhs, rhs).into_series();
let name = format!("neq_{}_{}", lhs.name(), rhs.name()); let name = format!("neq_{}_{}", lhs.name(), rhs.name());
res.rename(&name); res.rename(&name);
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
@ -117,8 +121,10 @@ pub fn compute_between_series(
res.rename(&name); res.rename(&name);
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
_ => Err(ShellError::InternalError( _ => Err(ShellError::SpannedLabeledError(
"Incompatible types".into(),
"unable to cast to boolean".into(), "unable to cast to boolean".into(),
right.span()?,
)), )),
} }
} }
@ -142,8 +148,10 @@ pub fn compute_between_series(
res.rename(&name); res.rename(&name);
NuDataFrame::series_to_value(res, operation_span) NuDataFrame::series_to_value(res, operation_span)
} }
_ => Err(ShellError::InternalError( _ => Err(ShellError::SpannedLabeledError(
"Incompatible types".into(),
"unable to cast to boolean".into(), "unable to cast to boolean".into(),
right.span()?,
)), )),
} }
} }
@ -254,9 +262,9 @@ pub fn compute_series_single_value(
}), }),
}, },
Operator::Equal => match &right { Operator::Equal => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span), Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span),
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span)
} }
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
@ -267,9 +275,11 @@ pub fn compute_series_single_value(
}), }),
}, },
Operator::NotEqual => match &right { Operator::NotEqual => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span), Value::Int { val, .. } => {
compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span)
}
Value::Float { val, .. } => { Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span) compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span)
} }
_ => Err(ShellError::OperatorMismatch { _ => Err(ShellError::OperatorMismatch {
op_span: operator.span, op_span: operator.span,
@ -364,17 +374,25 @@ where
let casted = series.i64(); let casted = series.i64();
compute_casted_i64(casted, val, f, span) compute_casted_i64(casted, val, f, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
} }
} }
DataType::Int64 => { DataType::Int64 => {
let casted = series.i64(); let casted = series.i64();
compute_casted_i64(casted, val, f, span) compute_casted_i64(casted, val, f, span)
} }
_ => Err(ShellError::InternalError(format!( _ => Err(ShellError::SpannedLabeledError(
"Series of type {} can not be used for operations with an i64 value", "Incorrect type".into(),
series.dtype() format!(
))), "Series of type {} can not be used for operations with an i64 value",
series.dtype()
),
span,
)),
} }
} }
@ -393,7 +411,11 @@ where
let res = res.into_series(); let res = res.into_series();
NuDataFrame::series_to_value(res, span) NuDataFrame::series_to_value(res, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
} }
} }
@ -415,17 +437,25 @@ where
let casted = series.f64(); let casted = series.f64();
compute_casted_f64(casted, val, f, span) compute_casted_f64(casted, val, f, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
} }
} }
DataType::Float64 => { DataType::Float64 => {
let casted = series.f64(); let casted = series.f64();
compute_casted_f64(casted, val, f, span) compute_casted_f64(casted, val, f, span)
} }
_ => Err(ShellError::InternalError(format!( _ => Err(ShellError::SpannedLabeledError(
"Series of type {} can not be used for operations with a decimal value", "Incorrect type".into(),
series.dtype() format!(
))), "Series of type {} can not be used for operations with a decimal value",
series.dtype()
),
span,
)),
} }
} }
@ -444,7 +474,11 @@ where
let res = res.into_series(); let res = res.into_series();
NuDataFrame::series_to_value(res, span) NuDataFrame::series_to_value(res, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
} }
} }
@ -461,17 +495,25 @@ where
let casted = series.i64(); let casted = series.i64();
compare_casted_i64(casted, val, f, span) compare_casted_i64(casted, val, f, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
} }
} }
DataType::Int64 => { DataType::Int64 => {
let casted = series.i64(); let casted = series.i64();
compare_casted_i64(casted, val, f, span) compare_casted_i64(casted, val, f, span)
} }
_ => Err(ShellError::InternalError(format!( _ => Err(ShellError::SpannedLabeledError(
"Series of type {} can not be used for operations with an i64 value", "Incorrect type".into(),
series.dtype() format!(
))), "Series of type {} can not be used for operations with an i64 value",
series.dtype()
),
span,
)),
} }
} }
@ -490,7 +532,11 @@ where
let res = res.into_series(); let res = res.into_series();
NuDataFrame::series_to_value(res, span) NuDataFrame::series_to_value(res, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
} }
} }
@ -512,17 +558,25 @@ where
let casted = series.f64(); let casted = series.f64();
compare_casted_f64(casted, val, f, span) compare_casted_f64(casted, val, f, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to i64".into(),
e.to_string(),
span,
)),
} }
} }
DataType::Float64 => { DataType::Float64 => {
let casted = series.f64(); let casted = series.f64();
compare_casted_f64(casted, val, f, span) compare_casted_f64(casted, val, f, span)
} }
_ => Err(ShellError::InternalError(format!( _ => Err(ShellError::SpannedLabeledError(
"Series of type {} can not be used for operations with a decimal value", "Incorrect type".into(),
series.dtype() format!(
))), "Series of type {} can not be used for operations with a decimal value",
series.dtype()
),
span,
)),
} }
} }
@ -541,7 +595,11 @@ where
let res = res.into_series(); let res = res.into_series();
NuDataFrame::series_to_value(res, span) NuDataFrame::series_to_value(res, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to f64".into(),
e.to_string(),
span,
)),
} }
} }
@ -556,9 +614,17 @@ fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result<Value,
let res = res.into_series(); let res = res.into_series();
NuDataFrame::series_to_value(res, span) NuDataFrame::series_to_value(res, span)
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Error using contains".into(),
e.to_string(),
span,
)),
} }
} }
Err(e) => Err(ShellError::InternalError(e.to_string())), Err(e) => Err(ShellError::SpannedLabeledError(
"Unable to cast to string".into(),
e.to_string(),
span,
)),
} }
} }

View File

@ -122,69 +122,69 @@ pub fn create_column(
Ok(Column::new(series.name().into(), values)) Ok(Column::new(series.name().into(), values))
} }
DataType::UInt8 => { DataType::UInt8 => {
let casted = series let casted = series.u8().map_err(|e| {
.u8() ShellError::LabeledError("Error casting column to u8".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::UInt16 => { DataType::UInt16 => {
let casted = series let casted = series.u16().map_err(|e| {
.u16() ShellError::LabeledError("Error casting column to u16".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::UInt32 => { DataType::UInt32 => {
let casted = series let casted = series.u32().map_err(|e| {
.u32() ShellError::LabeledError("Error casting column to u32".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::UInt64 => { DataType::UInt64 => {
let casted = series let casted = series.u64().map_err(|e| {
.u64() ShellError::LabeledError("Error casting column to u64".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Int8 => { DataType::Int8 => {
let casted = series let casted = series.i8().map_err(|e| {
.i8() ShellError::LabeledError("Error casting column to i8".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Int16 => { DataType::Int16 => {
let casted = series let casted = series.i16().map_err(|e| {
.i16() ShellError::LabeledError("Error casting column to i16".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Int32 => { DataType::Int32 => {
let casted = series let casted = series.i32().map_err(|e| {
.i32() ShellError::LabeledError("Error casting column to i32".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Int64 => { DataType::Int64 => {
let casted = series let casted = series.i64().map_err(|e| {
.i64() ShellError::LabeledError("Error casting column to i64".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Float32 => { DataType::Float32 => {
let casted = series let casted = series.f32().map_err(|e| {
.f32() ShellError::LabeledError("Error casting column to f32".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Float64 => { DataType::Float64 => {
let casted = series let casted = series.f64().map_err(|e| {
.f64() ShellError::LabeledError("Error casting column to f64".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
Ok(column_from_casted(casted, from_row, size)) Ok(column_from_casted(casted, from_row, size))
} }
DataType::Boolean => { DataType::Boolean => {
let casted = series let casted = series.bool().map_err(|e| {
.bool() ShellError::LabeledError("Error casting column to bool".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
let values = casted let values = casted
.into_iter() .into_iter()
@ -204,9 +204,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
DataType::Utf8 => { DataType::Utf8 => {
let casted = series let casted = series.utf8().map_err(|e| {
.utf8() ShellError::LabeledError("Error casting column to string".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
let values = casted let values = casted
.into_iter() .into_iter()
@ -231,10 +231,10 @@ pub fn create_column(
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>(); .downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
match casted { match casted {
None => Err(ShellError::InternalError(format!( None => Err(ShellError::LabeledError(
"Object not supported for conversion: {}", "Error casting object from series".into(),
x format!("Object not supported for conversion: {}", x),
))), )),
Some(ca) => { Some(ca) => {
let values = ca let values = ca
.into_iter() .into_iter()
@ -253,9 +253,9 @@ pub fn create_column(
} }
} }
DataType::Date => { DataType::Date => {
let casted = series let casted = series.date().map_err(|e| {
.date() ShellError::LabeledError("Error casting column to date".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
let values = casted let values = casted
.into_iter() .into_iter()
@ -285,9 +285,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
DataType::Datetime => { DataType::Datetime => {
let casted = series let casted = series.datetime().map_err(|e| {
.datetime() ShellError::LabeledError("Error casting column to datetime".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
let values = casted let values = casted
.into_iter() .into_iter()
@ -317,9 +317,9 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
DataType::Time => { DataType::Time => {
let casted = series let casted = series.time().map_err(|e| {
.time() ShellError::LabeledError("Error casting column to time".into(), e.to_string())
.map_err(|e| ShellError::InternalError(e.to_string()))?; })?;
let values = casted let values = casted
.into_iter() .into_iter()
@ -338,10 +338,10 @@ pub fn create_column(
Ok(Column::new(casted.name().into(), values)) Ok(Column::new(casted.name().into(), values))
} }
e => Err(ShellError::InternalError(format!( e => Err(ShellError::LabeledError(
"Value not supported in nushell: {}", "Error creating Dataframe".into(),
e format!("Value not supported in nushell: {}", e),
))), )),
} }
} }
@ -530,8 +530,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
} }
} }
match DataFrame::new(df_series) { DataFrame::new(df_series)
Ok(df) => Ok(NuDataFrame::new(df)), .map(|df| NuDataFrame::new(df))
Err(e) => Err(ShellError::InternalError(e.to_string())), .map_err(|e| ShellError::LabeledError("Error creating dataframe".into(), e.to_string()))
}
} }

View File

@ -0,0 +1,395 @@
mod between_values;
mod conversion;
mod custom_value;
mod operations;
pub(super) use conversion::{Column, ColumnMap};
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes
#[derive(Clone, Debug)]
pub struct DataFrameValue(Value);
impl DataFrameValue {
fn new(value: Value) -> Self {
Self(value)
}
fn get_value(&self) -> Value {
self.0.clone()
}
}
impl Display for DataFrameValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.get_type())
}
}
impl Default for DataFrameValue {
fn default() -> Self {
Self(Value::Nothing {
span: Span::unknown(),
})
}
}
impl PartialEq for DataFrameValue {
fn eq(&self, other: &Self) -> bool {
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
}
}
impl Eq for DataFrameValue {}
impl std::hash::Hash for DataFrameValue {
fn hash<H: Hasher>(&self, state: &mut H) {
match &self.0 {
Value::Nothing { .. } => 0.hash(state),
Value::Int { val, .. } => val.hash(state),
Value::String { val, .. } => val.hash(state),
// TODO. Define hash for the rest of types
_ => {}
}
}
}
impl PolarsObject for DataFrameValue {
fn type_name() -> &'static str {
"object"
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
}
fn default_value(span: Span) -> Value {
let dataframe = DataFrame::default();
NuDataFrame::dataframe_into_value(dataframe, span)
}
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
span,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
Err(e) => Err(ShellError::SpannedLabeledError(
"Error creating dataframe".into(),
e.to_string(),
span,
)),
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value {
Value::List { vals, .. } => {
let cols = (0..vals.len())
.map(|i| format!("{}", i))
.collect::<Vec<String>>();
conversion::insert_record(&mut column_values, &cols, &vals)?
}
Value::Record { cols, vals, .. } => {
conversion::insert_record(&mut column_values, &cols, &vals)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
}
}
}
conversion::from_parsed_columns(column_values)
}
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns)
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
// Ok(Self::new(dataframe))
//}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
}
}
conversion::from_parsed_columns(column_values)
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
match input.into_value(span) {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
},
_ => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
}
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let possibilities = self
.0
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
ShellError::SpannedLabeledError("Error creating dataframe".into(), e.to_string(), span)
})?;
Ok(Self(dataframe))
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
}
pub fn as_series(&self, span: Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::SpannedLabeledError(
"Error using as series".into(),
"dataframe has more than one column".into(),
span,
));
}
let series = self
.0
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
let series = self.as_series(Span::unknown())?;
let column = conversion::create_column(&series, row, row + 1)?;
if column.len() == 0 {
Err(ShellError::AccessBeyondEnd(series.len(), span))
} else {
let value = column
.into_iter()
.next()
.expect("already checked there is a value");
Ok(value)
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
conversion::add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.get_columns()
.iter()
.map(
|col| match conversion::create_column(col, from_row, upper_row) {
Ok(col) => {
size = col.len();
Ok(col)
}
Err(e) => Err(e),
},
)
.collect::<Result<Vec<Column>, ShellError>>()?;
let mut iterators = columns
.into_iter()
.map(|col| (col.name().to_string(), col.into_iter()))
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
let values = (0..size)
.into_iter()
.map(|_| {
let mut cols = vec![];
let mut vals = vec![];
for (name, col) in &mut iterators {
cols.push(name.clone());
match col.next() {
Some(v) => vals.push(v),
None => vals.push(Value::Nothing {
span: Span::unknown(),
}),
};
}
Value::Record {
cols,
vals,
span: Span::unknown(),
}
})
.collect::<Vec<Value>>();
Ok(values)
}
// Dataframes are considered equal if they have the same shape, column name and values
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return None;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return None;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return None;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return None,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
Ok(series) => series,
Err(_) => return None,
},
_ => self_series.clone(),
};
if !self_series.series_equal(other_series) {
return None;
}
}
Some(Ordering::Equal)
}
}

View File

@ -122,7 +122,7 @@ impl NuDataFrame {
&self, &self,
other: &NuDataFrame, other: &NuDataFrame,
axis: Axis, axis: Axis,
_span: Span, span: Span,
) -> Result<Self, ShellError> { ) -> Result<Self, ShellError> {
match axis { match axis {
Axis::Row => { Axis::Row => {
@ -147,8 +147,13 @@ impl NuDataFrame {
}) })
.collect::<Vec<Series>>(); .collect::<Vec<Series>>();
let df_new = DataFrame::new(new_cols) let df_new = DataFrame::new(new_cols).map_err(|e| {
.map_err(|e| ShellError::InternalError(e.to_string()))?; ShellError::SpannedLabeledError(
"Error creating dataframe".into(),
e.to_string(),
span,
)
})?;
Ok(NuDataFrame::new(df_new)) Ok(NuDataFrame::new(df_new))
} //Axis::Column => { } //Axis::Column => {

View File

@ -1,4 +1,4 @@
use super::super::NuDataFrame; use super::nu_dataframe::NuDataFrame;
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -22,7 +22,7 @@ impl Command for OpenDataFrame {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name().to_string()) Signature::build(self.name())
.required( .required(
"file", "file",
SyntaxShape::Filepath, SyntaxShape::Filepath,
@ -54,7 +54,7 @@ impl Command for OpenDataFrame {
.named( .named(
"columns", "columns",
SyntaxShape::List(Box::new(SyntaxShape::String)), SyntaxShape::List(Box::new(SyntaxShape::String)),
"Columns to be selected from csv file. CSV file", "Columns to be selected from csv file. CSV and Parquet file",
None, None,
) )
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
@ -87,7 +87,7 @@ fn command(
let span = call.head; let span = call.head;
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?; let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let df = match file.item.extension() { match file.item.extension() {
Some(e) => match e.to_str() { Some(e) => match e.to_str() {
Some("csv") => from_csv(engine_state, stack, call), Some("csv") => from_csv(engine_state, stack, call),
Some("parquet") => from_parquet(engine_state, stack, call), Some("parquet") => from_parquet(engine_state, stack, call),
@ -101,11 +101,8 @@ fn command(
"File without extension".into(), "File without extension".into(),
file.span, file.span,
)), )),
}?; }
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, span), None))
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
df, span,
)))
} }
fn from_parquet( fn from_parquet(
@ -114,12 +111,25 @@ fn from_parquet(
call: &Call, call: &Call,
) -> Result<polars::prelude::DataFrame, ShellError> { ) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?; let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let r = File::open(&file.item).map_err(|e| {
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
})?;
let reader = ParquetReader::new(r); let reader = ParquetReader::new(r);
reader let reader = match columns {
.finish() None => reader,
.map_err(|e| ShellError::InternalError(format!("{:?}", e))) Some(columns) => reader.with_columns(Some(columns)),
};
reader.finish().map_err(|e| {
ShellError::SpannedLabeledError(
"Parquet reader error".into(),
format!("{:?}", e),
call.head,
)
})
} }
fn from_json( fn from_json(
@ -129,13 +139,15 @@ fn from_json(
) -> Result<polars::prelude::DataFrame, ShellError> { ) -> Result<polars::prelude::DataFrame, ShellError> {
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?; let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?; let r = File::open(&file.item).map_err(|e| {
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
})?;
let reader = JsonReader::new(r); let reader = JsonReader::new(r);
reader reader.finish().map_err(|e| {
.finish() ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head)
.map_err(|e| ShellError::InternalError(e.to_string())) })
} }
fn from_csv( fn from_csv(
@ -151,15 +163,23 @@ fn from_csv(
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?; let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
let csv_reader = CsvReader::from_path(&file.item) let csv_reader = CsvReader::from_path(&file.item)
.map_err(|e| ShellError::InternalError(e.to_string()))? .map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating CSV reader".into(),
e.to_string(),
file.span,
)
})?
.with_encoding(CsvEncoding::LossyUtf8); .with_encoding(CsvEncoding::LossyUtf8);
let csv_reader = match delimiter { let csv_reader = match delimiter {
None => csv_reader, None => csv_reader,
Some(d) => { Some(d) => {
if d.item.len() != 1 { if d.item.len() != 1 {
return Err(ShellError::InternalError( return Err(ShellError::SpannedLabeledError(
"Delimiter has to be one char".into(), "Incorrect delimiter".into(),
"Delimiter has to be one character".into(),
d.span,
)); ));
} else { } else {
let delimiter = match d.item.chars().next() { let delimiter = match d.item.chars().next() {
@ -188,7 +208,11 @@ fn from_csv(
Some(columns) => csv_reader.with_columns(Some(columns)), Some(columns) => csv_reader.with_columns(Some(columns)),
}; };
csv_reader csv_reader.finish().map_err(|e| {
.finish() ShellError::SpannedLabeledError(
.map_err(|e| ShellError::InternalError(e.to_string())) "Parquet reader error".into(),
format!("{:?}", e),
call.head,
)
})
} }

View File

@ -1,4 +1,4 @@
use super::super::{Column, NuDataFrame}; use super::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -19,7 +19,7 @@ impl Command for ToDataFrame {
} }
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into())) Signature::build(self.name()).category(Category::Custom("dataframe".into()))
} }
fn examples(&self) -> Vec<Example> { fn examples(&self) -> Vec<Example> {
@ -94,8 +94,8 @@ impl Command for ToDataFrame {
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_iter(input.into_iter())?; NuDataFrame::try_from_iter(input.into_iter())
Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head))) .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
} }
} }

View File

@ -382,4 +382,25 @@ mod tests {
PluginResponse::Value(_) => panic!("returned wrong call type"), PluginResponse::Value(_) => panic!("returned wrong call type"),
} }
} }
#[test]
fn response_round_trip_error_none() {
let error = LabeledError {
label: "label".into(),
msg: "msg".into(),
span: None,
};
let response = PluginResponse::Error(error.clone());
let mut buffer: Vec<u8> = Vec::new();
encode_response(&response, &mut buffer).expect("unable to serialize message");
let returned =
decode_response(&mut buffer.as_slice()).expect("unable to deserialize message");
match returned {
PluginResponse::Error(msg) => assert_eq!(error, msg),
PluginResponse::Signature(_) => panic!("returned wrong call type"),
PluginResponse::Value(_) => panic!("returned wrong call type"),
}
}
} }