forked from extern/nushell
corrected missing shellerror type (#439)
This commit is contained in:
parent
22469a9cb1
commit
29efbee285
35
Cargo.lock
generated
35
Cargo.lock
generated
@ -120,11 +120,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.7.0"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395"
|
||||
checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-format",
|
||||
"base64",
|
||||
"chrono",
|
||||
@ -628,9 +627,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "3.0.2"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309"
|
||||
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
|
||||
dependencies = [
|
||||
"dirs-sys",
|
||||
]
|
||||
@ -1823,9 +1822,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet2"
|
||||
version = "0.6.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d"
|
||||
checksum = "41051fae4c0fab9040e291b360c6c8037d09d482aa83e94e37f3d080a32a58c3"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"bitpacking",
|
||||
@ -1916,9 +1915,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598"
|
||||
checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1"
|
||||
dependencies = [
|
||||
"polars-core",
|
||||
"polars-io",
|
||||
@ -1927,9 +1926,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-arrow"
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99"
|
||||
checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"num 0.4.0",
|
||||
@ -1938,9 +1937,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-core"
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6"
|
||||
checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
@ -1963,15 +1962,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-io"
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957"
|
||||
checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"anyhow",
|
||||
"arrow2",
|
||||
"csv-core",
|
||||
"dirs 3.0.2",
|
||||
"dirs 4.0.0",
|
||||
"lazy_static",
|
||||
"lexical",
|
||||
"memchr",
|
||||
@ -1987,9 +1986,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "polars-lazy"
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61"
|
||||
checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"itertools",
|
||||
|
@ -54,7 +54,7 @@ crossterm = "0.22.1"
|
||||
num = {version="0.4.0", optional=true}
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.17.0"
|
||||
version = "0.18.0"
|
||||
optional = true
|
||||
features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"]
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
# nu-dataframe
|
||||
|
||||
The nu-dataframe crate holds the definitions of the dataframe structure
|
||||
The nu-dataframe crate holds the definitions of the dataframe structures and commands
|
||||
|
@ -1,3 +1,3 @@
|
||||
mod nu_dataframe;
|
||||
|
||||
pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
|
||||
pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
|
||||
|
@ -1,12 +0,0 @@
|
||||
mod describe;
|
||||
mod dtypes;
|
||||
mod open;
|
||||
mod to_df;
|
||||
|
||||
pub use describe::DescribeDF;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_dataframe;
|
@ -1,6 +1,4 @@
|
||||
use crate::dataframe::nu_dataframe::Column;
|
||||
|
||||
use super::super::NuDataFrame;
|
||||
use super::nu_dataframe::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -19,7 +17,7 @@ pub struct DescribeDF;
|
||||
|
||||
impl Command for DescribeDF {
|
||||
fn name(&self) -> &str {
|
||||
"describe"
|
||||
"describe-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -27,13 +25,13 @@ impl Command for DescribeDF {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "dataframe description",
|
||||
example: "[[a b]; [1 1] [1 1]] | to df | describe",
|
||||
example: "[[a b]; [1 1] [1 1]] | to df | describe-df",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
@ -134,13 +132,14 @@ fn command(
|
||||
.map(|col| {
|
||||
let count = col.len() as f64;
|
||||
|
||||
let sum = match col.sum_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let sum = col
|
||||
.sum_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let mean = match col.mean_as_series().get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
@ -157,54 +156,50 @@ fn command(
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let min = match col.min_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let min = col
|
||||
.min_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let q_25 = match col.quantile_as_series(0.25) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let q_25 = col
|
||||
.quantile_as_series(0.25)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let q_50 = match col.quantile_as_series(0.50) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let q_50 = col
|
||||
.quantile_as_series(0.50)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let q_75 = match col.quantile_as_series(0.75) {
|
||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let q_75 = col
|
||||
.quantile_as_series(0.75)
|
||||
.ok()
|
||||
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let max = match col.max_as_series().cast(&DataType::Float64) {
|
||||
Ok(ca) => match ca.get(0) {
|
||||
let max = col
|
||||
.max_as_series()
|
||||
.cast(&DataType::Float64)
|
||||
.ok()
|
||||
.and_then(|ca| match ca.get(0) {
|
||||
AnyValue::Float64(v) => Some(v),
|
||||
_ => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
};
|
||||
});
|
||||
|
||||
let name = format!("{} ({})", col.name(), col.dtype());
|
||||
ChunkedArray::<Float64Type>::new_from_opt_slice(
|
||||
@ -226,12 +221,12 @@ fn command(
|
||||
});
|
||||
|
||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||
let df = DataFrame::new(res).map_err(|e| {
|
||||
ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head)
|
||||
})?;
|
||||
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
|
||||
df, call.head,
|
||||
)))
|
||||
|
||||
DataFrame::new(res)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Dataframe Error".into(), e.to_string(), call.head)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
@ -1,4 +1,4 @@
|
||||
use super::super::{Column, NuDataFrame};
|
||||
use super::nu_dataframe::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -18,12 +18,12 @@ impl Command for DataTypes {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
description: "Dataframe dtypes",
|
||||
example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
@ -90,8 +90,8 @@ fn command(
|
||||
let names_col = Column::new("column".to_string(), names);
|
||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?;
|
||||
Ok(PipelineData::Value(df.into_value(call.head)))
|
||||
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
@ -1,390 +1,14 @@
|
||||
pub mod commands;
|
||||
mod nu_dataframe;
|
||||
|
||||
mod between_values;
|
||||
mod conversion;
|
||||
mod custom_value;
|
||||
mod operations;
|
||||
mod describe;
|
||||
mod dtypes;
|
||||
mod open;
|
||||
mod to_df;
|
||||
|
||||
use conversion::{Column, ColumnMap};
|
||||
pub use describe::DescribeDF;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
||||
|
||||
use indexmap::map::IndexMap;
|
||||
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||
// to define the PolarsObject Trait. The polars object trait allows to
|
||||
// create dataframes with mixed datatypes
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DataFrameValue(Value);
|
||||
|
||||
impl DataFrameValue {
|
||||
fn new(value: Value) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
|
||||
fn get_value(&self) -> Value {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for DataFrameValue {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.get_type())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DataFrameValue {
|
||||
fn default() -> Self {
|
||||
Self(Value::Nothing {
|
||||
span: Span::unknown(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for DataFrameValue {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
|
||||
}
|
||||
}
|
||||
impl Eq for DataFrameValue {}
|
||||
|
||||
impl std::hash::Hash for DataFrameValue {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
match &self.0 {
|
||||
Value::Nothing { .. } => 0.hash(state),
|
||||
Value::Int { val, .. } => val.hash(state),
|
||||
Value::String { val, .. } => val.hash(state),
|
||||
// TODO. Define hash for the rest of types
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PolarsObject for DataFrameValue {
|
||||
fn type_name() -> &'static str {
|
||||
"object"
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NuDataFrame(DataFrame);
|
||||
|
||||
impl AsRef<DataFrame> for NuDataFrame {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<DataFrame> for NuDataFrame {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn new(dataframe: DataFrame) -> Self {
|
||||
Self(dataframe)
|
||||
}
|
||||
|
||||
fn default_value(span: Span) -> Value {
|
||||
let dataframe = DataFrame::default();
|
||||
NuDataFrame::dataframe_into_value(dataframe, span)
|
||||
}
|
||||
|
||||
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(Self::new(dataframe)),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
|
||||
match DataFrame::new(vec![series]) {
|
||||
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
// Dictionary to store the columnar data extracted from
|
||||
// the input. During the iteration we check if the values
|
||||
// have different type
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for value in iter {
|
||||
match value {
|
||||
Value::List { vals, .. } => {
|
||||
let cols = (0..vals.len())
|
||||
.map(|i| format!("{}", i))
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||
}
|
||||
Value::Record { cols, vals, .. } => {
|
||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||
}
|
||||
_ => {
|
||||
let key = "0".to_string();
|
||||
conversion::insert_value(value, key, &mut column_values)?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
conversion::from_parsed_columns(column_values)
|
||||
}
|
||||
|
||||
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
|
||||
// let dataframe = DataFrame::new(columns)
|
||||
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
|
||||
|
||||
// Ok(Self::new(dataframe))
|
||||
//}
|
||||
|
||||
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for column in columns {
|
||||
let name = column.name().to_string();
|
||||
for value in column {
|
||||
conversion::insert_value(value, name.clone(), &mut column_values)?;
|
||||
}
|
||||
}
|
||||
|
||||
conversion::from_parsed_columns(column_values)
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
match input.into_value(span) {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
|
||||
Some(df) => Ok(NuDataFrame(df.0.clone())),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"Dataframe not found".into(),
|
||||
"value is not a dataframe".into(),
|
||||
span,
|
||||
)),
|
||||
},
|
||||
_ => Err(ShellError::CantConvert(
|
||||
"Dataframe not found".into(),
|
||||
"value is not a dataframe".into(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
|
||||
let s = self.0.column(column).map_err(|_| {
|
||||
let possibilities = self
|
||||
.0
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|name| name.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
|
||||
ShellError::DidYouMean(option, span)
|
||||
})?;
|
||||
|
||||
let dataframe = DataFrame::new(vec![s.clone()])
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
|
||||
Ok(Self(dataframe))
|
||||
}
|
||||
|
||||
pub fn is_series(&self) -> bool {
|
||||
self.0.width() == 1
|
||||
}
|
||||
|
||||
pub fn as_series(&self, _span: Span) -> Result<Series, ShellError> {
|
||||
if !self.is_series() {
|
||||
return Err(ShellError::InternalError(
|
||||
"DataFrame cannot be used as Series".into(),
|
||||
));
|
||||
}
|
||||
|
||||
let series = self
|
||||
.0
|
||||
.get_columns()
|
||||
.get(0)
|
||||
.expect("We have already checked that the width is 1");
|
||||
|
||||
Ok(series.clone())
|
||||
}
|
||||
|
||||
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
|
||||
let series = self.as_series(Span::unknown())?;
|
||||
let column = conversion::create_column(&series, row, row + 1)?;
|
||||
|
||||
if column.len() == 0 {
|
||||
Err(ShellError::AccessBeyondEnd(series.len(), span))
|
||||
} else {
|
||||
let value = column
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("already checked there is a value");
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
// Print is made out a head and if the dataframe is too large, then a tail
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let size: usize = 20;
|
||||
|
||||
if df.height() > size {
|
||||
let sample_size = size / 2;
|
||||
let mut values = self.head(Some(sample_size))?;
|
||||
conversion::add_separator(&mut values, df);
|
||||
let remaining = df.height() - sample_size;
|
||||
let tail_size = remaining.min(sample_size);
|
||||
let mut tail_values = self.tail(Some(tail_size))?;
|
||||
values.append(&mut tail_values);
|
||||
|
||||
Ok(values)
|
||||
} else {
|
||||
Ok(self.head(Some(size))?)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||
let to_row = rows.unwrap_or(5);
|
||||
let values = self.to_rows(0, to_row)?;
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let to_row = df.height();
|
||||
let size = rows.unwrap_or(5);
|
||||
let from_row = to_row.saturating_sub(size);
|
||||
|
||||
let values = self.to_rows(from_row, to_row)?;
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let upper_row = to_row.min(df.height());
|
||||
|
||||
let mut size: usize = 0;
|
||||
let columns = self
|
||||
.0
|
||||
.get_columns()
|
||||
.iter()
|
||||
.map(
|
||||
|col| match conversion::create_column(col, from_row, upper_row) {
|
||||
Ok(col) => {
|
||||
size = col.len();
|
||||
Ok(col)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
)
|
||||
.collect::<Result<Vec<Column>, ShellError>>()?;
|
||||
|
||||
let mut iterators = columns
|
||||
.into_iter()
|
||||
.map(|col| (col.name().to_string(), col.into_iter()))
|
||||
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
|
||||
|
||||
let values = (0..size)
|
||||
.into_iter()
|
||||
.map(|_| {
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
|
||||
for (name, col) in &mut iterators {
|
||||
cols.push(name.clone());
|
||||
|
||||
match col.next() {
|
||||
Some(v) => vals.push(v),
|
||||
None => vals.push(Value::Nothing {
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals,
|
||||
span: Span::unknown(),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
// Dataframes are considered equal if they have the same shape, column name and values
|
||||
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.as_ref().width() == 0 {
|
||||
// checking for empty dataframe
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
||||
// checking both dataframes share the same names
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().height() != other.as_ref().height() {
|
||||
// checking both dataframes have the same row size
|
||||
return None;
|
||||
}
|
||||
|
||||
// sorting dataframe by the first column
|
||||
let column_names = self.as_ref().get_column_names();
|
||||
let first_col = column_names
|
||||
.get(0)
|
||||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
for name in self.as_ref().get_column_names() {
|
||||
let self_series = lhs.column(name).expect("name from dataframe names");
|
||||
|
||||
let other_series = rhs
|
||||
.column(name)
|
||||
.expect("already checked that name in other");
|
||||
|
||||
let self_series = match self_series.dtype() {
|
||||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return None,
|
||||
},
|
||||
_ => self_series.clone(),
|
||||
};
|
||||
|
||||
if !self_series.series_equal(other_series) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test_dataframe;
|
||||
|
@ -66,17 +66,21 @@ pub fn compute_between_series(
|
||||
res.rename(&name);
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Division error".into(),
|
||||
e.to_string(),
|
||||
right.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
Operator::Equal => {
|
||||
let mut res = Series::eq(lhs, rhs).into_series();
|
||||
let mut res = Series::equal(lhs, rhs).into_series();
|
||||
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
Operator::NotEqual => {
|
||||
let mut res = Series::neq(lhs, rhs).into_series();
|
||||
let mut res = Series::not_equal(lhs, rhs).into_series();
|
||||
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(&name);
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
@ -117,8 +121,10 @@ pub fn compute_between_series(
|
||||
res.rename(&name);
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incompatible types".into(),
|
||||
"unable to cast to boolean".into(),
|
||||
right.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
@ -142,8 +148,10 @@ pub fn compute_between_series(
|
||||
res.rename(&name);
|
||||
NuDataFrame::series_to_value(res, operation_span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incompatible types".into(),
|
||||
"unable to cast to boolean".into(),
|
||||
right.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
@ -254,9 +262,9 @@ pub fn compute_series_single_value(
|
||||
}),
|
||||
},
|
||||
Operator::Equal => match &right {
|
||||
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span),
|
||||
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span),
|
||||
Value::Float { val, .. } => {
|
||||
compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span)
|
||||
compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span)
|
||||
}
|
||||
_ => Err(ShellError::OperatorMismatch {
|
||||
op_span: operator.span,
|
||||
@ -267,9 +275,11 @@ pub fn compute_series_single_value(
|
||||
}),
|
||||
},
|
||||
Operator::NotEqual => match &right {
|
||||
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span),
|
||||
Value::Int { val, .. } => {
|
||||
compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span)
|
||||
}
|
||||
Value::Float { val, .. } => {
|
||||
compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span)
|
||||
compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span)
|
||||
}
|
||||
_ => Err(ShellError::OperatorMismatch {
|
||||
op_span: operator.span,
|
||||
@ -364,17 +374,25 @@ where
|
||||
let casted = series.i64();
|
||||
compute_casted_i64(casted, val, f, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to i64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let casted = series.i64();
|
||||
compute_casted_i64(casted, val, f, span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(format!(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect type".into(),
|
||||
format!(
|
||||
"Series of type {} can not be used for operations with an i64 value",
|
||||
series.dtype()
|
||||
))),
|
||||
),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -393,7 +411,11 @@ where
|
||||
let res = res.into_series();
|
||||
NuDataFrame::series_to_value(res, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to i64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -415,17 +437,25 @@ where
|
||||
let casted = series.f64();
|
||||
compute_casted_f64(casted, val, f, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to f64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let casted = series.f64();
|
||||
compute_casted_f64(casted, val, f, span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(format!(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect type".into(),
|
||||
format!(
|
||||
"Series of type {} can not be used for operations with a decimal value",
|
||||
series.dtype()
|
||||
))),
|
||||
),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -444,7 +474,11 @@ where
|
||||
let res = res.into_series();
|
||||
NuDataFrame::series_to_value(res, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to f64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,17 +495,25 @@ where
|
||||
let casted = series.i64();
|
||||
compare_casted_i64(casted, val, f, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to f64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let casted = series.i64();
|
||||
compare_casted_i64(casted, val, f, span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(format!(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect type".into(),
|
||||
format!(
|
||||
"Series of type {} can not be used for operations with an i64 value",
|
||||
series.dtype()
|
||||
))),
|
||||
),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -490,7 +532,11 @@ where
|
||||
let res = res.into_series();
|
||||
NuDataFrame::series_to_value(res, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to i64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -512,17 +558,25 @@ where
|
||||
let casted = series.f64();
|
||||
compare_casted_f64(casted, val, f, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to i64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let casted = series.f64();
|
||||
compare_casted_f64(casted, val, f, span)
|
||||
}
|
||||
_ => Err(ShellError::InternalError(format!(
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect type".into(),
|
||||
format!(
|
||||
"Series of type {} can not be used for operations with a decimal value",
|
||||
series.dtype()
|
||||
))),
|
||||
),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -541,7 +595,11 @@ where
|
||||
let res = res.into_series();
|
||||
NuDataFrame::series_to_value(res, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to f64".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -556,9 +614,17 @@ fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result<Value,
|
||||
let res = res.into_series();
|
||||
NuDataFrame::series_to_value(res, span)
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Error using contains".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Unable to cast to string".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
@ -122,69 +122,69 @@ pub fn create_column(
|
||||
Ok(Column::new(series.name().into(), values))
|
||||
}
|
||||
DataType::UInt8 => {
|
||||
let casted = series
|
||||
.u8()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.u8().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to u8".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::UInt16 => {
|
||||
let casted = series
|
||||
.u16()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.u16().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to u16".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::UInt32 => {
|
||||
let casted = series
|
||||
.u32()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.u32().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to u32".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::UInt64 => {
|
||||
let casted = series
|
||||
.u64()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.u64().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to u64".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Int8 => {
|
||||
let casted = series
|
||||
.i8()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.i8().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to i8".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Int16 => {
|
||||
let casted = series
|
||||
.i16()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.i16().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to i16".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Int32 => {
|
||||
let casted = series
|
||||
.i32()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.i32().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to i32".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Int64 => {
|
||||
let casted = series
|
||||
.i64()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.i64().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to i64".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Float32 => {
|
||||
let casted = series
|
||||
.f32()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.f32().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to f32".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let casted = series
|
||||
.f64()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.f64().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to f64".into(), e.to_string())
|
||||
})?;
|
||||
Ok(column_from_casted(casted, from_row, size))
|
||||
}
|
||||
DataType::Boolean => {
|
||||
let casted = series
|
||||
.bool()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.bool().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to bool".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
let values = casted
|
||||
.into_iter()
|
||||
@ -204,9 +204,9 @@ pub fn create_column(
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Utf8 => {
|
||||
let casted = series
|
||||
.utf8()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.utf8().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to string".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
let values = casted
|
||||
.into_iter()
|
||||
@ -231,10 +231,10 @@ pub fn create_column(
|
||||
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
|
||||
|
||||
match casted {
|
||||
None => Err(ShellError::InternalError(format!(
|
||||
"Object not supported for conversion: {}",
|
||||
x
|
||||
))),
|
||||
None => Err(ShellError::LabeledError(
|
||||
"Error casting object from series".into(),
|
||||
format!("Object not supported for conversion: {}", x),
|
||||
)),
|
||||
Some(ca) => {
|
||||
let values = ca
|
||||
.into_iter()
|
||||
@ -253,9 +253,9 @@ pub fn create_column(
|
||||
}
|
||||
}
|
||||
DataType::Date => {
|
||||
let casted = series
|
||||
.date()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.date().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to date".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
let values = casted
|
||||
.into_iter()
|
||||
@ -285,9 +285,9 @@ pub fn create_column(
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Datetime => {
|
||||
let casted = series
|
||||
.datetime()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.datetime().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to datetime".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
let values = casted
|
||||
.into_iter()
|
||||
@ -317,9 +317,9 @@ pub fn create_column(
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
DataType::Time => {
|
||||
let casted = series
|
||||
.time()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let casted = series.time().map_err(|e| {
|
||||
ShellError::LabeledError("Error casting column to time".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
let values = casted
|
||||
.into_iter()
|
||||
@ -338,10 +338,10 @@ pub fn create_column(
|
||||
|
||||
Ok(Column::new(casted.name().into(), values))
|
||||
}
|
||||
e => Err(ShellError::InternalError(format!(
|
||||
"Value not supported in nushell: {}",
|
||||
e
|
||||
))),
|
||||
e => Err(ShellError::LabeledError(
|
||||
"Error creating Dataframe".into(),
|
||||
format!("Value not supported in nushell: {}", e),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -530,8 +530,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
|
||||
}
|
||||
}
|
||||
|
||||
match DataFrame::new(df_series) {
|
||||
Ok(df) => Ok(NuDataFrame::new(df)),
|
||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
||||
}
|
||||
DataFrame::new(df_series)
|
||||
.map(|df| NuDataFrame::new(df))
|
||||
.map_err(|e| ShellError::LabeledError("Error creating dataframe".into(), e.to_string()))
|
||||
}
|
395
crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs
Normal file
395
crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs
Normal file
@ -0,0 +1,395 @@
|
||||
mod between_values;
|
||||
mod conversion;
|
||||
mod custom_value;
|
||||
mod operations;
|
||||
|
||||
pub(super) use conversion::{Column, ColumnMap};
|
||||
|
||||
use indexmap::map::IndexMap;
|
||||
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
|
||||
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||
// to define the PolarsObject Trait. The polars object trait allows to
|
||||
// create dataframes with mixed datatypes
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DataFrameValue(Value);
|
||||
|
||||
impl DataFrameValue {
|
||||
fn new(value: Value) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
|
||||
fn get_value(&self) -> Value {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for DataFrameValue {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.get_type())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DataFrameValue {
|
||||
fn default() -> Self {
|
||||
Self(Value::Nothing {
|
||||
span: Span::unknown(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for DataFrameValue {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
|
||||
}
|
||||
}
|
||||
impl Eq for DataFrameValue {}
|
||||
|
||||
impl std::hash::Hash for DataFrameValue {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
match &self.0 {
|
||||
Value::Nothing { .. } => 0.hash(state),
|
||||
Value::Int { val, .. } => val.hash(state),
|
||||
Value::String { val, .. } => val.hash(state),
|
||||
// TODO. Define hash for the rest of types
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PolarsObject for DataFrameValue {
|
||||
fn type_name() -> &'static str {
|
||||
"object"
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NuDataFrame(DataFrame);
|
||||
|
||||
impl AsRef<DataFrame> for NuDataFrame {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<DataFrame> for NuDataFrame {
|
||||
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl NuDataFrame {
|
||||
pub fn new(dataframe: DataFrame) -> Self {
|
||||
Self(dataframe)
|
||||
}
|
||||
|
||||
fn default_value(span: Span) -> Value {
|
||||
let dataframe = DataFrame::default();
|
||||
NuDataFrame::dataframe_into_value(dataframe, span)
|
||||
}
|
||||
|
||||
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(Self::new(dataframe)),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
|
||||
match DataFrame::new(vec![series]) {
|
||||
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
|
||||
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||
"Error creating dataframe".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
// Dictionary to store the columnar data extracted from
|
||||
// the input. During the iteration we check if the values
|
||||
// have different type
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for value in iter {
|
||||
match value {
|
||||
Value::List { vals, .. } => {
|
||||
let cols = (0..vals.len())
|
||||
.map(|i| format!("{}", i))
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||
}
|
||||
Value::Record { cols, vals, .. } => {
|
||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||
}
|
||||
_ => {
|
||||
let key = "0".to_string();
|
||||
conversion::insert_value(value, key, &mut column_values)?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
conversion::from_parsed_columns(column_values)
|
||||
}
|
||||
|
||||
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
|
||||
// let dataframe = DataFrame::new(columns)
|
||||
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
|
||||
|
||||
// Ok(Self::new(dataframe))
|
||||
//}
|
||||
|
||||
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for column in columns {
|
||||
let name = column.name().to_string();
|
||||
for value in column {
|
||||
conversion::insert_value(value, name.clone(), &mut column_values)?;
|
||||
}
|
||||
}
|
||||
|
||||
conversion::from_parsed_columns(column_values)
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||
match input.into_value(span) {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
|
||||
Some(df) => Ok(NuDataFrame(df.0.clone())),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"Dataframe not found".into(),
|
||||
"value is not a dataframe".into(),
|
||||
span,
|
||||
)),
|
||||
},
|
||||
_ => Err(ShellError::CantConvert(
|
||||
"Dataframe not found".into(),
|
||||
"value is not a dataframe".into(),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
|
||||
let s = self.0.column(column).map_err(|_| {
|
||||
let possibilities = self
|
||||
.0
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|name| name.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
|
||||
ShellError::DidYouMean(option, span)
|
||||
})?;
|
||||
|
||||
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error creating dataframe".into(), e.to_string(), span)
|
||||
})?;
|
||||
|
||||
Ok(Self(dataframe))
|
||||
}
|
||||
|
||||
pub fn is_series(&self) -> bool {
|
||||
self.0.width() == 1
|
||||
}
|
||||
|
||||
pub fn as_series(&self, span: Span) -> Result<Series, ShellError> {
|
||||
if !self.is_series() {
|
||||
return Err(ShellError::SpannedLabeledError(
|
||||
"Error using as series".into(),
|
||||
"dataframe has more than one column".into(),
|
||||
span,
|
||||
));
|
||||
}
|
||||
|
||||
let series = self
|
||||
.0
|
||||
.get_columns()
|
||||
.get(0)
|
||||
.expect("We have already checked that the width is 1");
|
||||
|
||||
Ok(series.clone())
|
||||
}
|
||||
|
||||
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
|
||||
let series = self.as_series(Span::unknown())?;
|
||||
let column = conversion::create_column(&series, row, row + 1)?;
|
||||
|
||||
if column.len() == 0 {
|
||||
Err(ShellError::AccessBeyondEnd(series.len(), span))
|
||||
} else {
|
||||
let value = column
|
||||
.into_iter()
|
||||
.next()
|
||||
.expect("already checked there is a value");
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
// Print is made out a head and if the dataframe is too large, then a tail
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let size: usize = 20;
|
||||
|
||||
if df.height() > size {
|
||||
let sample_size = size / 2;
|
||||
let mut values = self.head(Some(sample_size))?;
|
||||
conversion::add_separator(&mut values, df);
|
||||
let remaining = df.height() - sample_size;
|
||||
let tail_size = remaining.min(sample_size);
|
||||
let mut tail_values = self.tail(Some(tail_size))?;
|
||||
values.append(&mut tail_values);
|
||||
|
||||
Ok(values)
|
||||
} else {
|
||||
Ok(self.head(Some(size))?)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||
let to_row = rows.unwrap_or(5);
|
||||
let values = self.to_rows(0, to_row)?;
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let to_row = df.height();
|
||||
let size = rows.unwrap_or(5);
|
||||
let from_row = to_row.saturating_sub(size);
|
||||
|
||||
let values = self.to_rows(from_row, to_row)?;
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let upper_row = to_row.min(df.height());
|
||||
|
||||
let mut size: usize = 0;
|
||||
let columns = self
|
||||
.0
|
||||
.get_columns()
|
||||
.iter()
|
||||
.map(
|
||||
|col| match conversion::create_column(col, from_row, upper_row) {
|
||||
Ok(col) => {
|
||||
size = col.len();
|
||||
Ok(col)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
)
|
||||
.collect::<Result<Vec<Column>, ShellError>>()?;
|
||||
|
||||
let mut iterators = columns
|
||||
.into_iter()
|
||||
.map(|col| (col.name().to_string(), col.into_iter()))
|
||||
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
|
||||
|
||||
let values = (0..size)
|
||||
.into_iter()
|
||||
.map(|_| {
|
||||
let mut cols = vec![];
|
||||
let mut vals = vec![];
|
||||
|
||||
for (name, col) in &mut iterators {
|
||||
cols.push(name.clone());
|
||||
|
||||
match col.next() {
|
||||
Some(v) => vals.push(v),
|
||||
None => vals.push(Value::Nothing {
|
||||
span: Span::unknown(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
Value::Record {
|
||||
cols,
|
||||
vals,
|
||||
span: Span::unknown(),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
// Dataframes are considered equal if they have the same shape, column name and values
|
||||
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
|
||||
if self.as_ref().width() == 0 {
|
||||
// checking for empty dataframe
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
||||
// checking both dataframes share the same names
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.as_ref().height() != other.as_ref().height() {
|
||||
// checking both dataframes have the same row size
|
||||
return None;
|
||||
}
|
||||
|
||||
// sorting dataframe by the first column
|
||||
let column_names = self.as_ref().get_column_names();
|
||||
let first_col = column_names
|
||||
.get(0)
|
||||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
for name in self.as_ref().get_column_names() {
|
||||
let self_series = lhs.column(name).expect("name from dataframe names");
|
||||
|
||||
let other_series = rhs
|
||||
.column(name)
|
||||
.expect("already checked that name in other");
|
||||
|
||||
let self_series = match self_series.dtype() {
|
||||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return None,
|
||||
},
|
||||
_ => self_series.clone(),
|
||||
};
|
||||
|
||||
if !self_series.series_equal(other_series) {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
@ -122,7 +122,7 @@ impl NuDataFrame {
|
||||
&self,
|
||||
other: &NuDataFrame,
|
||||
axis: Axis,
|
||||
_span: Span,
|
||||
span: Span,
|
||||
) -> Result<Self, ShellError> {
|
||||
match axis {
|
||||
Axis::Row => {
|
||||
@ -147,8 +147,13 @@ impl NuDataFrame {
|
||||
})
|
||||
.collect::<Vec<Series>>();
|
||||
|
||||
let df_new = DataFrame::new(new_cols)
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let df_new = DataFrame::new(new_cols).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating dataframe".into(),
|
||||
e.to_string(),
|
||||
span,
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(NuDataFrame::new(df_new))
|
||||
} //Axis::Column => {
|
@ -1,4 +1,4 @@
|
||||
use super::super::NuDataFrame;
|
||||
use super::nu_dataframe::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -22,7 +22,7 @@ impl Command for OpenDataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string())
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"file",
|
||||
SyntaxShape::Filepath,
|
||||
@ -54,7 +54,7 @@ impl Command for OpenDataFrame {
|
||||
.named(
|
||||
"columns",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
||||
"Columns to be selected from csv file. CSV file",
|
||||
"Columns to be selected from csv file. CSV and Parquet file",
|
||||
None,
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
@ -87,7 +87,7 @@ fn command(
|
||||
let span = call.head;
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let df = match file.item.extension() {
|
||||
match file.item.extension() {
|
||||
Some(e) => match e.to_str() {
|
||||
Some("csv") => from_csv(engine_state, stack, call),
|
||||
Some("parquet") => from_parquet(engine_state, stack, call),
|
||||
@ -101,11 +101,8 @@ fn command(
|
||||
"File without extension".into(),
|
||||
file.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
|
||||
df, span,
|
||||
)))
|
||||
}
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, span), None))
|
||||
}
|
||||
|
||||
fn from_parquet(
|
||||
@ -114,12 +111,25 @@ fn from_parquet(
|
||||
call: &Call,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
|
||||
})?;
|
||||
let reader = ParquetReader::new(r);
|
||||
|
||||
reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(format!("{:?}", e)))
|
||||
let reader = match columns {
|
||||
None => reader,
|
||||
Some(columns) => reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
reader.finish().map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{:?}", e),
|
||||
call.head,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn from_json(
|
||||
@ -129,13 +139,15 @@ fn from_json(
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
||||
let r = File::open(&file.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
|
||||
})?;
|
||||
|
||||
let reader = JsonReader::new(r);
|
||||
|
||||
reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
||||
reader.finish().map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head)
|
||||
})
|
||||
}
|
||||
|
||||
fn from_csv(
|
||||
@ -151,15 +163,23 @@ fn from_csv(
|
||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||
|
||||
let csv_reader = CsvReader::from_path(&file.item)
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))?
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating CSV reader".into(),
|
||||
e.to_string(),
|
||||
file.span,
|
||||
)
|
||||
})?
|
||||
.with_encoding(CsvEncoding::LossyUtf8);
|
||||
|
||||
let csv_reader = match delimiter {
|
||||
None => csv_reader,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::InternalError(
|
||||
"Delimiter has to be one char".into(),
|
||||
return Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect delimiter".into(),
|
||||
"Delimiter has to be one character".into(),
|
||||
d.span,
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
@ -188,7 +208,11 @@ fn from_csv(
|
||||
Some(columns) => csv_reader.with_columns(Some(columns)),
|
||||
};
|
||||
|
||||
csv_reader
|
||||
.finish()
|
||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
||||
csv_reader.finish().map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Parquet reader error".into(),
|
||||
format!("{:?}", e),
|
||||
call.head,
|
||||
)
|
||||
})
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
use super::super::{Column, NuDataFrame};
|
||||
use super::nu_dataframe::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -19,7 +19,7 @@ impl Command for ToDataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
@ -94,8 +94,8 @@ impl Command for ToDataFrame {
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
||||
Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head)))
|
||||
NuDataFrame::try_from_iter(input.into_iter())
|
||||
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||
}
|
||||
}
|
||||
|
@ -382,4 +382,25 @@ mod tests {
|
||||
PluginResponse::Value(_) => panic!("returned wrong call type"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn response_round_trip_error_none() {
|
||||
let error = LabeledError {
|
||||
label: "label".into(),
|
||||
msg: "msg".into(),
|
||||
span: None,
|
||||
};
|
||||
let response = PluginResponse::Error(error.clone());
|
||||
|
||||
let mut buffer: Vec<u8> = Vec::new();
|
||||
encode_response(&response, &mut buffer).expect("unable to serialize message");
|
||||
let returned =
|
||||
decode_response(&mut buffer.as_slice()).expect("unable to deserialize message");
|
||||
|
||||
match returned {
|
||||
PluginResponse::Error(msg) => assert_eq!(error, msg),
|
||||
PluginResponse::Signature(_) => panic!("returned wrong call type"),
|
||||
PluginResponse::Value(_) => panic!("returned wrong call type"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user