mirror of
https://github.com/nushell/nushell.git
synced 2024-11-26 02:13:47 +01:00
corrected missing shellerror type (#439)
This commit is contained in:
parent
22469a9cb1
commit
29efbee285
35
Cargo.lock
generated
35
Cargo.lock
generated
@ -120,11 +120,10 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrow2"
|
name = "arrow2"
|
||||||
version = "0.7.0"
|
version = "0.8.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d873e2775c3d87a4e8d77aa544cbd43f34a0779d5164c59e7c6a1dd0678eb395"
|
checksum = "d3452b2ae9727464a31a726c07ffec0c0da3b87831610d9ac99fc691c78b3a44"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
|
||||||
"arrow-format",
|
"arrow-format",
|
||||||
"base64",
|
"base64",
|
||||||
"chrono",
|
"chrono",
|
||||||
@ -628,9 +627,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dirs"
|
name = "dirs"
|
||||||
version = "3.0.2"
|
version = "4.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309"
|
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"dirs-sys",
|
"dirs-sys",
|
||||||
]
|
]
|
||||||
@ -1823,9 +1822,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parquet2"
|
name = "parquet2"
|
||||||
version = "0.6.0"
|
version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "db82df54cdd88931d29b850190915b9069bb93fba8e1aefc0d59d8ca81603d6d"
|
checksum = "41051fae4c0fab9040e291b360c6c8037d09d482aa83e94e37f3d080a32a58c3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"bitpacking",
|
"bitpacking",
|
||||||
@ -1916,9 +1915,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars"
|
name = "polars"
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c94a25d46e93b64eac7848c028a545dc08fa01e148e4942c5442b3843c3a598"
|
checksum = "3e9211d1bb8d2d81541e4ab80ce9148a8e2a987d6412c2a48017fbbe24231ea1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"polars-core",
|
"polars-core",
|
||||||
"polars-io",
|
"polars-io",
|
||||||
@ -1927,9 +1926,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-arrow"
|
name = "polars-arrow"
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1cc4488d2f2d6b901bb6e5728e58966013a272cae48861070b676215a79b4a99"
|
checksum = "fa5ee9c385bf6643893f98efa80ff5a07169b50f65962c7843c0a13e12f0b0cf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow2",
|
"arrow2",
|
||||||
"num 0.4.0",
|
"num 0.4.0",
|
||||||
@ -1938,9 +1937,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-core"
|
name = "polars-core"
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6771524063d742a08163d96875ca5df71dff7113f27da58db5ec5fa164165bf6"
|
checksum = "3cb1de44e479ce2764a7a3ad057e16f434efa334feb993284e1a48bb8888c6d1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@ -1963,15 +1962,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-io"
|
name = "polars-io"
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "11a5f5f51525043ee7befd49e586e6919345237826a5f17b53956f8242100957"
|
checksum = "8bcb74f52ee9ff84863ae01de6ba25db092a9880302db4bf8f351f65b3ff0d12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arrow2",
|
"arrow2",
|
||||||
"csv-core",
|
"csv-core",
|
||||||
"dirs 3.0.2",
|
"dirs 4.0.0",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"lexical",
|
"lexical",
|
||||||
"memchr",
|
"memchr",
|
||||||
@ -1987,9 +1986,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-lazy"
|
name = "polars-lazy"
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da3ea647e2fa59d1bbbf90929c5d10ef6a9018aac256d1c6d0e8248211804b61"
|
checksum = "43f91022ba6463df71ad6eb80ac2307884578d9959e85e1fe9dac18988291d46"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
@ -54,7 +54,7 @@ crossterm = "0.22.1"
|
|||||||
num = {version="0.4.0", optional=true}
|
num = {version="0.4.0", optional=true}
|
||||||
|
|
||||||
[dependencies.polars]
|
[dependencies.polars]
|
||||||
version = "0.17.0"
|
version = "0.18.0"
|
||||||
optional = true
|
optional = true
|
||||||
features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"]
|
features = ["default", "parquet", "json", "serde", "object", "checked_arithmetic", "strings"]
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
# nu-dataframe
|
# nu-dataframe
|
||||||
|
|
||||||
The nu-dataframe crate holds the definitions of the dataframe structure
|
The nu-dataframe crate holds the definitions of the dataframe structures and commands
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
mod nu_dataframe;
|
mod nu_dataframe;
|
||||||
|
|
||||||
pub use nu_dataframe::commands::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
|
pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame};
|
||||||
|
@ -1,12 +0,0 @@
|
|||||||
mod describe;
|
|
||||||
mod dtypes;
|
|
||||||
mod open;
|
|
||||||
mod to_df;
|
|
||||||
|
|
||||||
pub use describe::DescribeDF;
|
|
||||||
pub use dtypes::DataTypes;
|
|
||||||
pub use open::OpenDataFrame;
|
|
||||||
pub use to_df::ToDataFrame;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test_dataframe;
|
|
@ -1,6 +1,4 @@
|
|||||||
use crate::dataframe::nu_dataframe::Column;
|
use super::nu_dataframe::{Column, NuDataFrame};
|
||||||
|
|
||||||
use super::super::NuDataFrame;
|
|
||||||
|
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
ast::Call,
|
ast::Call,
|
||||||
@ -19,7 +17,7 @@ pub struct DescribeDF;
|
|||||||
|
|
||||||
impl Command for DescribeDF {
|
impl Command for DescribeDF {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"describe"
|
"describe-df"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
@ -27,13 +25,13 @@ impl Command for DescribeDF {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![Example {
|
||||||
description: "dataframe description",
|
description: "dataframe description",
|
||||||
example: "[[a b]; [1 1] [1 1]] | to df | describe",
|
example: "[[a b]; [1 1] [1 1]] | to df | describe-df",
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_columns(vec![
|
NuDataFrame::try_from_columns(vec![
|
||||||
Column::new(
|
Column::new(
|
||||||
@ -134,13 +132,14 @@ fn command(
|
|||||||
.map(|col| {
|
.map(|col| {
|
||||||
let count = col.len() as f64;
|
let count = col.len() as f64;
|
||||||
|
|
||||||
let sum = match col.sum_as_series().cast(&DataType::Float64) {
|
let sum = col
|
||||||
Ok(ca) => match ca.get(0) {
|
.sum_as_series()
|
||||||
|
.cast(&DataType::Float64)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ca| match ca.get(0) {
|
||||||
AnyValue::Float64(v) => Some(v),
|
AnyValue::Float64(v) => Some(v),
|
||||||
_ => None,
|
_ => None,
|
||||||
},
|
});
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mean = match col.mean_as_series().get(0) {
|
let mean = match col.mean_as_series().get(0) {
|
||||||
AnyValue::Float64(v) => Some(v),
|
AnyValue::Float64(v) => Some(v),
|
||||||
@ -157,54 +156,50 @@ fn command(
|
|||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let min = match col.min_as_series().cast(&DataType::Float64) {
|
let min = col
|
||||||
Ok(ca) => match ca.get(0) {
|
.min_as_series()
|
||||||
|
.cast(&DataType::Float64)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ca| match ca.get(0) {
|
||||||
AnyValue::Float64(v) => Some(v),
|
AnyValue::Float64(v) => Some(v),
|
||||||
_ => None,
|
_ => None,
|
||||||
},
|
});
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let q_25 = match col.quantile_as_series(0.25) {
|
let q_25 = col
|
||||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
.quantile_as_series(0.25)
|
||||||
Ok(ca) => match ca.get(0) {
|
.ok()
|
||||||
AnyValue::Float64(v) => Some(v),
|
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||||
_ => None,
|
.and_then(|ca| match ca.get(0) {
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let q_50 = match col.quantile_as_series(0.50) {
|
|
||||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
|
||||||
Ok(ca) => match ca.get(0) {
|
|
||||||
AnyValue::Float64(v) => Some(v),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let q_75 = match col.quantile_as_series(0.75) {
|
|
||||||
Ok(ca) => match ca.cast(&DataType::Float64) {
|
|
||||||
Ok(ca) => match ca.get(0) {
|
|
||||||
AnyValue::Float64(v) => Some(v),
|
|
||||||
_ => None,
|
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
},
|
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let max = match col.max_as_series().cast(&DataType::Float64) {
|
|
||||||
Ok(ca) => match ca.get(0) {
|
|
||||||
AnyValue::Float64(v) => Some(v),
|
AnyValue::Float64(v) => Some(v),
|
||||||
_ => None,
|
_ => None,
|
||||||
},
|
});
|
||||||
Err(_) => None,
|
|
||||||
};
|
let q_50 = col
|
||||||
|
.quantile_as_series(0.50)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||||
|
.and_then(|ca| match ca.get(0) {
|
||||||
|
AnyValue::Float64(v) => Some(v),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let q_75 = col
|
||||||
|
.quantile_as_series(0.75)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ca| ca.cast(&DataType::Float64).ok())
|
||||||
|
.and_then(|ca| match ca.get(0) {
|
||||||
|
AnyValue::Float64(v) => Some(v),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
|
||||||
|
let max = col
|
||||||
|
.max_as_series()
|
||||||
|
.cast(&DataType::Float64)
|
||||||
|
.ok()
|
||||||
|
.and_then(|ca| match ca.get(0) {
|
||||||
|
AnyValue::Float64(v) => Some(v),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
|
||||||
let name = format!("{} ({})", col.name(), col.dtype());
|
let name = format!("{} ({})", col.name(), col.dtype());
|
||||||
ChunkedArray::<Float64Type>::new_from_opt_slice(
|
ChunkedArray::<Float64Type>::new_from_opt_slice(
|
||||||
@ -226,12 +221,12 @@ fn command(
|
|||||||
});
|
});
|
||||||
|
|
||||||
let res = head.chain(tail).collect::<Vec<Series>>();
|
let res = head.chain(tail).collect::<Vec<Series>>();
|
||||||
let df = DataFrame::new(res).map_err(|e| {
|
|
||||||
ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head)
|
DataFrame::new(res)
|
||||||
})?;
|
.map_err(|e| {
|
||||||
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
|
ShellError::SpannedLabeledError("Dataframe Error".into(), e.to_string(), call.head)
|
||||||
df, call.head,
|
})
|
||||||
)))
|
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
@ -1,4 +1,4 @@
|
|||||||
use super::super::{Column, NuDataFrame};
|
use super::nu_dataframe::{Column, NuDataFrame};
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
ast::Call,
|
ast::Call,
|
||||||
engine::{Command, EngineState, Stack},
|
engine::{Command, EngineState, Stack},
|
||||||
@ -18,12 +18,12 @@ impl Command for DataTypes {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![Example {
|
||||||
description: "drop column a",
|
description: "Dataframe dtypes",
|
||||||
example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
|
example: "[[a b]; [1 2] [3 4]] | to df | dtypes",
|
||||||
result: Some(
|
result: Some(
|
||||||
NuDataFrame::try_from_columns(vec![
|
NuDataFrame::try_from_columns(vec![
|
||||||
@ -90,8 +90,8 @@ fn command(
|
|||||||
let names_col = Column::new("column".to_string(), names);
|
let names_col = Column::new("column".to_string(), names);
|
||||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||||
|
|
||||||
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?;
|
NuDataFrame::try_from_columns(vec![names_col, dtypes_col])
|
||||||
Ok(PipelineData::Value(df.into_value(call.head)))
|
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
@ -1,390 +1,14 @@
|
|||||||
pub mod commands;
|
mod nu_dataframe;
|
||||||
|
|
||||||
mod between_values;
|
mod describe;
|
||||||
mod conversion;
|
mod dtypes;
|
||||||
mod custom_value;
|
mod open;
|
||||||
mod operations;
|
mod to_df;
|
||||||
|
|
||||||
use conversion::{Column, ColumnMap};
|
pub use describe::DescribeDF;
|
||||||
|
pub use dtypes::DataTypes;
|
||||||
|
pub use open::OpenDataFrame;
|
||||||
|
pub use to_df::ToDataFrame;
|
||||||
|
|
||||||
use indexmap::map::IndexMap;
|
#[cfg(test)]
|
||||||
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
|
mod test_dataframe;
|
||||||
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
|
||||||
|
|
||||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
|
||||||
// to define the PolarsObject Trait. The polars object trait allows to
|
|
||||||
// create dataframes with mixed datatypes
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct DataFrameValue(Value);
|
|
||||||
|
|
||||||
impl DataFrameValue {
|
|
||||||
fn new(value: Value) -> Self {
|
|
||||||
Self(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_value(&self) -> Value {
|
|
||||||
self.0.clone()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for DataFrameValue {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(f, "{}", self.0.get_type())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for DataFrameValue {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self(Value::Nothing {
|
|
||||||
span: Span::unknown(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq for DataFrameValue {
|
|
||||||
fn eq(&self, other: &Self) -> bool {
|
|
||||||
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl Eq for DataFrameValue {}
|
|
||||||
|
|
||||||
impl std::hash::Hash for DataFrameValue {
|
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
||||||
match &self.0 {
|
|
||||||
Value::Nothing { .. } => 0.hash(state),
|
|
||||||
Value::Int { val, .. } => val.hash(state),
|
|
||||||
Value::String { val, .. } => val.hash(state),
|
|
||||||
// TODO. Define hash for the rest of types
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PolarsObject for DataFrameValue {
|
|
||||||
fn type_name() -> &'static str {
|
|
||||||
"object"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
pub struct NuDataFrame(DataFrame);
|
|
||||||
|
|
||||||
impl AsRef<DataFrame> for NuDataFrame {
|
|
||||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AsMut<DataFrame> for NuDataFrame {
|
|
||||||
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
|
||||||
&mut self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NuDataFrame {
|
|
||||||
pub fn new(dataframe: DataFrame) -> Self {
|
|
||||||
Self(dataframe)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn default_value(span: Span) -> Value {
|
|
||||||
let dataframe = DataFrame::default();
|
|
||||||
NuDataFrame::dataframe_into_value(dataframe, span)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
|
|
||||||
Value::CustomValue {
|
|
||||||
val: Box::new(Self::new(dataframe)),
|
|
||||||
span,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_value(self, span: Span) -> Value {
|
|
||||||
Value::CustomValue {
|
|
||||||
val: Box::new(self),
|
|
||||||
span,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
|
|
||||||
match DataFrame::new(vec![series]) {
|
|
||||||
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
|
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
|
|
||||||
where
|
|
||||||
T: Iterator<Item = Value>,
|
|
||||||
{
|
|
||||||
// Dictionary to store the columnar data extracted from
|
|
||||||
// the input. During the iteration we check if the values
|
|
||||||
// have different type
|
|
||||||
let mut column_values: ColumnMap = IndexMap::new();
|
|
||||||
|
|
||||||
for value in iter {
|
|
||||||
match value {
|
|
||||||
Value::List { vals, .. } => {
|
|
||||||
let cols = (0..vals.len())
|
|
||||||
.map(|i| format!("{}", i))
|
|
||||||
.collect::<Vec<String>>();
|
|
||||||
|
|
||||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
|
||||||
}
|
|
||||||
Value::Record { cols, vals, .. } => {
|
|
||||||
conversion::insert_record(&mut column_values, &cols, &vals)?
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
let key = "0".to_string();
|
|
||||||
conversion::insert_value(value, key, &mut column_values)?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
conversion::from_parsed_columns(column_values)
|
|
||||||
}
|
|
||||||
|
|
||||||
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
|
|
||||||
// let dataframe = DataFrame::new(columns)
|
|
||||||
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
|
|
||||||
|
|
||||||
// Ok(Self::new(dataframe))
|
|
||||||
//}
|
|
||||||
|
|
||||||
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
|
|
||||||
let mut column_values: ColumnMap = IndexMap::new();
|
|
||||||
|
|
||||||
for column in columns {
|
|
||||||
let name = column.name().to_string();
|
|
||||||
for value in column {
|
|
||||||
conversion::insert_value(value, name.clone(), &mut column_values)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
conversion::from_parsed_columns(column_values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
|
||||||
match input.into_value(span) {
|
|
||||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
|
|
||||||
Some(df) => Ok(NuDataFrame(df.0.clone())),
|
|
||||||
None => Err(ShellError::CantConvert(
|
|
||||||
"Dataframe not found".into(),
|
|
||||||
"value is not a dataframe".into(),
|
|
||||||
span,
|
|
||||||
)),
|
|
||||||
},
|
|
||||||
_ => Err(ShellError::CantConvert(
|
|
||||||
"Dataframe not found".into(),
|
|
||||||
"value is not a dataframe".into(),
|
|
||||||
span,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
|
|
||||||
let s = self.0.column(column).map_err(|_| {
|
|
||||||
let possibilities = self
|
|
||||||
.0
|
|
||||||
.get_column_names()
|
|
||||||
.iter()
|
|
||||||
.map(|name| name.to_string())
|
|
||||||
.collect::<Vec<String>>();
|
|
||||||
|
|
||||||
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
|
|
||||||
ShellError::DidYouMean(option, span)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let dataframe = DataFrame::new(vec![s.clone()])
|
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
|
||||||
|
|
||||||
Ok(Self(dataframe))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_series(&self) -> bool {
|
|
||||||
self.0.width() == 1
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn as_series(&self, _span: Span) -> Result<Series, ShellError> {
|
|
||||||
if !self.is_series() {
|
|
||||||
return Err(ShellError::InternalError(
|
|
||||||
"DataFrame cannot be used as Series".into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let series = self
|
|
||||||
.0
|
|
||||||
.get_columns()
|
|
||||||
.get(0)
|
|
||||||
.expect("We have already checked that the width is 1");
|
|
||||||
|
|
||||||
Ok(series.clone())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
|
|
||||||
let series = self.as_series(Span::unknown())?;
|
|
||||||
let column = conversion::create_column(&series, row, row + 1)?;
|
|
||||||
|
|
||||||
if column.len() == 0 {
|
|
||||||
Err(ShellError::AccessBeyondEnd(series.len(), span))
|
|
||||||
} else {
|
|
||||||
let value = column
|
|
||||||
.into_iter()
|
|
||||||
.next()
|
|
||||||
.expect("already checked there is a value");
|
|
||||||
Ok(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print is made out a head and if the dataframe is too large, then a tail
|
|
||||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
|
||||||
let df = &self.0;
|
|
||||||
let size: usize = 20;
|
|
||||||
|
|
||||||
if df.height() > size {
|
|
||||||
let sample_size = size / 2;
|
|
||||||
let mut values = self.head(Some(sample_size))?;
|
|
||||||
conversion::add_separator(&mut values, df);
|
|
||||||
let remaining = df.height() - sample_size;
|
|
||||||
let tail_size = remaining.min(sample_size);
|
|
||||||
let mut tail_values = self.tail(Some(tail_size))?;
|
|
||||||
values.append(&mut tail_values);
|
|
||||||
|
|
||||||
Ok(values)
|
|
||||||
} else {
|
|
||||||
Ok(self.head(Some(size))?)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
|
||||||
let to_row = rows.unwrap_or(5);
|
|
||||||
let values = self.to_rows(0, to_row)?;
|
|
||||||
|
|
||||||
Ok(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
|
||||||
let df = &self.0;
|
|
||||||
let to_row = df.height();
|
|
||||||
let size = rows.unwrap_or(5);
|
|
||||||
let from_row = to_row.saturating_sub(size);
|
|
||||||
|
|
||||||
let values = self.to_rows(from_row, to_row)?;
|
|
||||||
|
|
||||||
Ok(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
|
||||||
let df = &self.0;
|
|
||||||
let upper_row = to_row.min(df.height());
|
|
||||||
|
|
||||||
let mut size: usize = 0;
|
|
||||||
let columns = self
|
|
||||||
.0
|
|
||||||
.get_columns()
|
|
||||||
.iter()
|
|
||||||
.map(
|
|
||||||
|col| match conversion::create_column(col, from_row, upper_row) {
|
|
||||||
Ok(col) => {
|
|
||||||
size = col.len();
|
|
||||||
Ok(col)
|
|
||||||
}
|
|
||||||
Err(e) => Err(e),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.collect::<Result<Vec<Column>, ShellError>>()?;
|
|
||||||
|
|
||||||
let mut iterators = columns
|
|
||||||
.into_iter()
|
|
||||||
.map(|col| (col.name().to_string(), col.into_iter()))
|
|
||||||
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
|
|
||||||
|
|
||||||
let values = (0..size)
|
|
||||||
.into_iter()
|
|
||||||
.map(|_| {
|
|
||||||
let mut cols = vec![];
|
|
||||||
let mut vals = vec![];
|
|
||||||
|
|
||||||
for (name, col) in &mut iterators {
|
|
||||||
cols.push(name.clone());
|
|
||||||
|
|
||||||
match col.next() {
|
|
||||||
Some(v) => vals.push(v),
|
|
||||||
None => vals.push(Value::Nothing {
|
|
||||||
span: Span::unknown(),
|
|
||||||
}),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Value::Record {
|
|
||||||
cols,
|
|
||||||
vals,
|
|
||||||
span: Span::unknown(),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<Value>>();
|
|
||||||
|
|
||||||
Ok(values)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dataframes are considered equal if they have the same shape, column name and values
|
|
||||||
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
|
|
||||||
if self.as_ref().width() == 0 {
|
|
||||||
// checking for empty dataframe
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
|
||||||
// checking both dataframes share the same names
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.as_ref().height() != other.as_ref().height() {
|
|
||||||
// checking both dataframes have the same row size
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// sorting dataframe by the first column
|
|
||||||
let column_names = self.as_ref().get_column_names();
|
|
||||||
let first_col = column_names
|
|
||||||
.get(0)
|
|
||||||
.expect("already checked that dataframe is different than 0");
|
|
||||||
|
|
||||||
// if unable to sort, then unable to compare
|
|
||||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
|
||||||
Ok(df) => df,
|
|
||||||
Err(_) => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
|
||||||
Ok(df) => df,
|
|
||||||
Err(_) => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
for name in self.as_ref().get_column_names() {
|
|
||||||
let self_series = lhs.column(name).expect("name from dataframe names");
|
|
||||||
|
|
||||||
let other_series = rhs
|
|
||||||
.column(name)
|
|
||||||
.expect("already checked that name in other");
|
|
||||||
|
|
||||||
let self_series = match self_series.dtype() {
|
|
||||||
// Casting needed to compare other numeric types with nushell numeric type.
|
|
||||||
// In nushell we only have i64 integer numeric types and any array created
|
|
||||||
// with nushell untagged primitives will be of type i64
|
|
||||||
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
|
||||||
Ok(series) => series,
|
|
||||||
Err(_) => return None,
|
|
||||||
},
|
|
||||||
_ => self_series.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if !self_series.series_equal(other_series) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(Ordering::Equal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -66,17 +66,21 @@ pub fn compute_between_series(
|
|||||||
res.rename(&name);
|
res.rename(&name);
|
||||||
NuDataFrame::series_to_value(res, operation_span)
|
NuDataFrame::series_to_value(res, operation_span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Division error".into(),
|
||||||
|
e.to_string(),
|
||||||
|
right.span()?,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Operator::Equal => {
|
Operator::Equal => {
|
||||||
let mut res = Series::eq(lhs, rhs).into_series();
|
let mut res = Series::equal(lhs, rhs).into_series();
|
||||||
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
|
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
|
||||||
res.rename(&name);
|
res.rename(&name);
|
||||||
NuDataFrame::series_to_value(res, operation_span)
|
NuDataFrame::series_to_value(res, operation_span)
|
||||||
}
|
}
|
||||||
Operator::NotEqual => {
|
Operator::NotEqual => {
|
||||||
let mut res = Series::neq(lhs, rhs).into_series();
|
let mut res = Series::not_equal(lhs, rhs).into_series();
|
||||||
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
|
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
|
||||||
res.rename(&name);
|
res.rename(&name);
|
||||||
NuDataFrame::series_to_value(res, operation_span)
|
NuDataFrame::series_to_value(res, operation_span)
|
||||||
@ -117,8 +121,10 @@ pub fn compute_between_series(
|
|||||||
res.rename(&name);
|
res.rename(&name);
|
||||||
NuDataFrame::series_to_value(res, operation_span)
|
NuDataFrame::series_to_value(res, operation_span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Incompatible types".into(),
|
||||||
"unable to cast to boolean".into(),
|
"unable to cast to boolean".into(),
|
||||||
|
right.span()?,
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,8 +148,10 @@ pub fn compute_between_series(
|
|||||||
res.rename(&name);
|
res.rename(&name);
|
||||||
NuDataFrame::series_to_value(res, operation_span)
|
NuDataFrame::series_to_value(res, operation_span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Incompatible types".into(),
|
||||||
"unable to cast to boolean".into(),
|
"unable to cast to boolean".into(),
|
||||||
|
right.span()?,
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,9 +262,9 @@ pub fn compute_series_single_value(
|
|||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
Operator::Equal => match &right {
|
Operator::Equal => match &right {
|
||||||
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span),
|
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span),
|
||||||
Value::Float { val, .. } => {
|
Value::Float { val, .. } => {
|
||||||
compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span)
|
compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::OperatorMismatch {
|
_ => Err(ShellError::OperatorMismatch {
|
||||||
op_span: operator.span,
|
op_span: operator.span,
|
||||||
@ -267,9 +275,11 @@ pub fn compute_series_single_value(
|
|||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
Operator::NotEqual => match &right {
|
Operator::NotEqual => match &right {
|
||||||
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span),
|
Value::Int { val, .. } => {
|
||||||
|
compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span)
|
||||||
|
}
|
||||||
Value::Float { val, .. } => {
|
Value::Float { val, .. } => {
|
||||||
compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span)
|
compare_series_decimal(&lhs, *val, ChunkedArray::not_equal, lhs_span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::OperatorMismatch {
|
_ => Err(ShellError::OperatorMismatch {
|
||||||
op_span: operator.span,
|
op_span: operator.span,
|
||||||
@ -364,17 +374,25 @@ where
|
|||||||
let casted = series.i64();
|
let casted = series.i64();
|
||||||
compute_casted_i64(casted, val, f, span)
|
compute_casted_i64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to i64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DataType::Int64 => {
|
DataType::Int64 => {
|
||||||
let casted = series.i64();
|
let casted = series.i64();
|
||||||
compute_casted_i64(casted, val, f, span)
|
compute_casted_i64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(format!(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
"Series of type {} can not be used for operations with an i64 value",
|
"Incorrect type".into(),
|
||||||
series.dtype()
|
format!(
|
||||||
))),
|
"Series of type {} can not be used for operations with an i64 value",
|
||||||
|
series.dtype()
|
||||||
|
),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,7 +411,11 @@ where
|
|||||||
let res = res.into_series();
|
let res = res.into_series();
|
||||||
NuDataFrame::series_to_value(res, span)
|
NuDataFrame::series_to_value(res, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to i64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,17 +437,25 @@ where
|
|||||||
let casted = series.f64();
|
let casted = series.f64();
|
||||||
compute_casted_f64(casted, val, f, span)
|
compute_casted_f64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to f64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DataType::Float64 => {
|
DataType::Float64 => {
|
||||||
let casted = series.f64();
|
let casted = series.f64();
|
||||||
compute_casted_f64(casted, val, f, span)
|
compute_casted_f64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(format!(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
"Series of type {} can not be used for operations with a decimal value",
|
"Incorrect type".into(),
|
||||||
series.dtype()
|
format!(
|
||||||
))),
|
"Series of type {} can not be used for operations with a decimal value",
|
||||||
|
series.dtype()
|
||||||
|
),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -444,7 +474,11 @@ where
|
|||||||
let res = res.into_series();
|
let res = res.into_series();
|
||||||
NuDataFrame::series_to_value(res, span)
|
NuDataFrame::series_to_value(res, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to f64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -461,17 +495,25 @@ where
|
|||||||
let casted = series.i64();
|
let casted = series.i64();
|
||||||
compare_casted_i64(casted, val, f, span)
|
compare_casted_i64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to f64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DataType::Int64 => {
|
DataType::Int64 => {
|
||||||
let casted = series.i64();
|
let casted = series.i64();
|
||||||
compare_casted_i64(casted, val, f, span)
|
compare_casted_i64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(format!(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
"Series of type {} can not be used for operations with an i64 value",
|
"Incorrect type".into(),
|
||||||
series.dtype()
|
format!(
|
||||||
))),
|
"Series of type {} can not be used for operations with an i64 value",
|
||||||
|
series.dtype()
|
||||||
|
),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -490,7 +532,11 @@ where
|
|||||||
let res = res.into_series();
|
let res = res.into_series();
|
||||||
NuDataFrame::series_to_value(res, span)
|
NuDataFrame::series_to_value(res, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to i64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -512,17 +558,25 @@ where
|
|||||||
let casted = series.f64();
|
let casted = series.f64();
|
||||||
compare_casted_f64(casted, val, f, span)
|
compare_casted_f64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to i64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DataType::Float64 => {
|
DataType::Float64 => {
|
||||||
let casted = series.f64();
|
let casted = series.f64();
|
||||||
compare_casted_f64(casted, val, f, span)
|
compare_casted_f64(casted, val, f, span)
|
||||||
}
|
}
|
||||||
_ => Err(ShellError::InternalError(format!(
|
_ => Err(ShellError::SpannedLabeledError(
|
||||||
"Series of type {} can not be used for operations with a decimal value",
|
"Incorrect type".into(),
|
||||||
series.dtype()
|
format!(
|
||||||
))),
|
"Series of type {} can not be used for operations with a decimal value",
|
||||||
|
series.dtype()
|
||||||
|
),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -541,7 +595,11 @@ where
|
|||||||
let res = res.into_series();
|
let res = res.into_series();
|
||||||
NuDataFrame::series_to_value(res, span)
|
NuDataFrame::series_to_value(res, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to f64".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -556,9 +614,17 @@ fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result<Value,
|
|||||||
let res = res.into_series();
|
let res = res.into_series();
|
||||||
NuDataFrame::series_to_value(res, span)
|
NuDataFrame::series_to_value(res, span)
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Error using contains".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Unable to cast to string".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -122,69 +122,69 @@ pub fn create_column(
|
|||||||
Ok(Column::new(series.name().into(), values))
|
Ok(Column::new(series.name().into(), values))
|
||||||
}
|
}
|
||||||
DataType::UInt8 => {
|
DataType::UInt8 => {
|
||||||
let casted = series
|
let casted = series.u8().map_err(|e| {
|
||||||
.u8()
|
ShellError::LabeledError("Error casting column to u8".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::UInt16 => {
|
DataType::UInt16 => {
|
||||||
let casted = series
|
let casted = series.u16().map_err(|e| {
|
||||||
.u16()
|
ShellError::LabeledError("Error casting column to u16".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::UInt32 => {
|
DataType::UInt32 => {
|
||||||
let casted = series
|
let casted = series.u32().map_err(|e| {
|
||||||
.u32()
|
ShellError::LabeledError("Error casting column to u32".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::UInt64 => {
|
DataType::UInt64 => {
|
||||||
let casted = series
|
let casted = series.u64().map_err(|e| {
|
||||||
.u64()
|
ShellError::LabeledError("Error casting column to u64".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Int8 => {
|
DataType::Int8 => {
|
||||||
let casted = series
|
let casted = series.i8().map_err(|e| {
|
||||||
.i8()
|
ShellError::LabeledError("Error casting column to i8".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Int16 => {
|
DataType::Int16 => {
|
||||||
let casted = series
|
let casted = series.i16().map_err(|e| {
|
||||||
.i16()
|
ShellError::LabeledError("Error casting column to i16".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Int32 => {
|
DataType::Int32 => {
|
||||||
let casted = series
|
let casted = series.i32().map_err(|e| {
|
||||||
.i32()
|
ShellError::LabeledError("Error casting column to i32".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Int64 => {
|
DataType::Int64 => {
|
||||||
let casted = series
|
let casted = series.i64().map_err(|e| {
|
||||||
.i64()
|
ShellError::LabeledError("Error casting column to i64".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Float32 => {
|
DataType::Float32 => {
|
||||||
let casted = series
|
let casted = series.f32().map_err(|e| {
|
||||||
.f32()
|
ShellError::LabeledError("Error casting column to f32".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Float64 => {
|
DataType::Float64 => {
|
||||||
let casted = series
|
let casted = series.f64().map_err(|e| {
|
||||||
.f64()
|
ShellError::LabeledError("Error casting column to f64".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
Ok(column_from_casted(casted, from_row, size))
|
Ok(column_from_casted(casted, from_row, size))
|
||||||
}
|
}
|
||||||
DataType::Boolean => {
|
DataType::Boolean => {
|
||||||
let casted = series
|
let casted = series.bool().map_err(|e| {
|
||||||
.bool()
|
ShellError::LabeledError("Error casting column to bool".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
|
|
||||||
let values = casted
|
let values = casted
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -204,9 +204,9 @@ pub fn create_column(
|
|||||||
Ok(Column::new(casted.name().into(), values))
|
Ok(Column::new(casted.name().into(), values))
|
||||||
}
|
}
|
||||||
DataType::Utf8 => {
|
DataType::Utf8 => {
|
||||||
let casted = series
|
let casted = series.utf8().map_err(|e| {
|
||||||
.utf8()
|
ShellError::LabeledError("Error casting column to string".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
|
|
||||||
let values = casted
|
let values = casted
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -231,10 +231,10 @@ pub fn create_column(
|
|||||||
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
|
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
|
||||||
|
|
||||||
match casted {
|
match casted {
|
||||||
None => Err(ShellError::InternalError(format!(
|
None => Err(ShellError::LabeledError(
|
||||||
"Object not supported for conversion: {}",
|
"Error casting object from series".into(),
|
||||||
x
|
format!("Object not supported for conversion: {}", x),
|
||||||
))),
|
)),
|
||||||
Some(ca) => {
|
Some(ca) => {
|
||||||
let values = ca
|
let values = ca
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -253,9 +253,9 @@ pub fn create_column(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
DataType::Date => {
|
DataType::Date => {
|
||||||
let casted = series
|
let casted = series.date().map_err(|e| {
|
||||||
.date()
|
ShellError::LabeledError("Error casting column to date".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
|
|
||||||
let values = casted
|
let values = casted
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -285,9 +285,9 @@ pub fn create_column(
|
|||||||
Ok(Column::new(casted.name().into(), values))
|
Ok(Column::new(casted.name().into(), values))
|
||||||
}
|
}
|
||||||
DataType::Datetime => {
|
DataType::Datetime => {
|
||||||
let casted = series
|
let casted = series.datetime().map_err(|e| {
|
||||||
.datetime()
|
ShellError::LabeledError("Error casting column to datetime".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
|
|
||||||
let values = casted
|
let values = casted
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -317,9 +317,9 @@ pub fn create_column(
|
|||||||
Ok(Column::new(casted.name().into(), values))
|
Ok(Column::new(casted.name().into(), values))
|
||||||
}
|
}
|
||||||
DataType::Time => {
|
DataType::Time => {
|
||||||
let casted = series
|
let casted = series.time().map_err(|e| {
|
||||||
.time()
|
ShellError::LabeledError("Error casting column to time".into(), e.to_string())
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
})?;
|
||||||
|
|
||||||
let values = casted
|
let values = casted
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -338,10 +338,10 @@ pub fn create_column(
|
|||||||
|
|
||||||
Ok(Column::new(casted.name().into(), values))
|
Ok(Column::new(casted.name().into(), values))
|
||||||
}
|
}
|
||||||
e => Err(ShellError::InternalError(format!(
|
e => Err(ShellError::LabeledError(
|
||||||
"Value not supported in nushell: {}",
|
"Error creating Dataframe".into(),
|
||||||
e
|
format!("Value not supported in nushell: {}", e),
|
||||||
))),
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -530,8 +530,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, Shel
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match DataFrame::new(df_series) {
|
DataFrame::new(df_series)
|
||||||
Ok(df) => Ok(NuDataFrame::new(df)),
|
.map(|df| NuDataFrame::new(df))
|
||||||
Err(e) => Err(ShellError::InternalError(e.to_string())),
|
.map_err(|e| ShellError::LabeledError("Error creating dataframe".into(), e.to_string()))
|
||||||
}
|
|
||||||
}
|
}
|
395
crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs
Normal file
395
crates/nu-command/src/dataframe/nu_dataframe/nu_dataframe/mod.rs
Normal file
@ -0,0 +1,395 @@
|
|||||||
|
mod between_values;
|
||||||
|
mod conversion;
|
||||||
|
mod custom_value;
|
||||||
|
mod operations;
|
||||||
|
|
||||||
|
pub(super) use conversion::{Column, ColumnMap};
|
||||||
|
|
||||||
|
use indexmap::map::IndexMap;
|
||||||
|
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
|
||||||
|
use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||||
|
|
||||||
|
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||||
|
// to define the PolarsObject Trait. The polars object trait allows to
|
||||||
|
// create dataframes with mixed datatypes
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct DataFrameValue(Value);
|
||||||
|
|
||||||
|
impl DataFrameValue {
|
||||||
|
fn new(value: Value) -> Self {
|
||||||
|
Self(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_value(&self) -> Value {
|
||||||
|
self.0.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for DataFrameValue {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.0.get_type())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DataFrameValue {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self(Value::Nothing {
|
||||||
|
span: Span::unknown(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for DataFrameValue {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Eq for DataFrameValue {}
|
||||||
|
|
||||||
|
impl std::hash::Hash for DataFrameValue {
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
match &self.0 {
|
||||||
|
Value::Nothing { .. } => 0.hash(state),
|
||||||
|
Value::Int { val, .. } => val.hash(state),
|
||||||
|
Value::String { val, .. } => val.hash(state),
|
||||||
|
// TODO. Define hash for the rest of types
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PolarsObject for DataFrameValue {
|
||||||
|
fn type_name() -> &'static str {
|
||||||
|
"object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct NuDataFrame(DataFrame);
|
||||||
|
|
||||||
|
impl AsRef<DataFrame> for NuDataFrame {
|
||||||
|
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsMut<DataFrame> for NuDataFrame {
|
||||||
|
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
|
||||||
|
&mut self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NuDataFrame {
|
||||||
|
pub fn new(dataframe: DataFrame) -> Self {
|
||||||
|
Self(dataframe)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_value(span: Span) -> Value {
|
||||||
|
let dataframe = DataFrame::default();
|
||||||
|
NuDataFrame::dataframe_into_value(dataframe, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
|
||||||
|
Value::CustomValue {
|
||||||
|
val: Box::new(Self::new(dataframe)),
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_value(self, span: Span) -> Value {
|
||||||
|
Value::CustomValue {
|
||||||
|
val: Box::new(self),
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
|
||||||
|
match DataFrame::new(vec![series]) {
|
||||||
|
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
|
||||||
|
Err(e) => Err(ShellError::SpannedLabeledError(
|
||||||
|
"Error creating dataframe".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
|
||||||
|
where
|
||||||
|
T: Iterator<Item = Value>,
|
||||||
|
{
|
||||||
|
// Dictionary to store the columnar data extracted from
|
||||||
|
// the input. During the iteration we check if the values
|
||||||
|
// have different type
|
||||||
|
let mut column_values: ColumnMap = IndexMap::new();
|
||||||
|
|
||||||
|
for value in iter {
|
||||||
|
match value {
|
||||||
|
Value::List { vals, .. } => {
|
||||||
|
let cols = (0..vals.len())
|
||||||
|
.map(|i| format!("{}", i))
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||||
|
}
|
||||||
|
Value::Record { cols, vals, .. } => {
|
||||||
|
conversion::insert_record(&mut column_values, &cols, &vals)?
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let key = "0".to_string();
|
||||||
|
conversion::insert_value(value, key, &mut column_values)?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
conversion::from_parsed_columns(column_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
|
||||||
|
// let dataframe = DataFrame::new(columns)
|
||||||
|
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
|
||||||
|
|
||||||
|
// Ok(Self::new(dataframe))
|
||||||
|
//}
|
||||||
|
|
||||||
|
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
|
||||||
|
let mut column_values: ColumnMap = IndexMap::new();
|
||||||
|
|
||||||
|
for column in columns {
|
||||||
|
let name = column.name().to_string();
|
||||||
|
for value in column {
|
||||||
|
conversion::insert_value(value, name.clone(), &mut column_values)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
conversion::from_parsed_columns(column_values)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
|
||||||
|
match input.into_value(span) {
|
||||||
|
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
|
||||||
|
Some(df) => Ok(NuDataFrame(df.0.clone())),
|
||||||
|
None => Err(ShellError::CantConvert(
|
||||||
|
"Dataframe not found".into(),
|
||||||
|
"value is not a dataframe".into(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
_ => Err(ShellError::CantConvert(
|
||||||
|
"Dataframe not found".into(),
|
||||||
|
"value is not a dataframe".into(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
|
||||||
|
let s = self.0.column(column).map_err(|_| {
|
||||||
|
let possibilities = self
|
||||||
|
.0
|
||||||
|
.get_column_names()
|
||||||
|
.iter()
|
||||||
|
.map(|name| name.to_string())
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
|
||||||
|
ShellError::DidYouMean(option, span)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
|
||||||
|
ShellError::SpannedLabeledError("Error creating dataframe".into(), e.to_string(), span)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Self(dataframe))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_series(&self) -> bool {
|
||||||
|
self.0.width() == 1
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_series(&self, span: Span) -> Result<Series, ShellError> {
|
||||||
|
if !self.is_series() {
|
||||||
|
return Err(ShellError::SpannedLabeledError(
|
||||||
|
"Error using as series".into(),
|
||||||
|
"dataframe has more than one column".into(),
|
||||||
|
span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let series = self
|
||||||
|
.0
|
||||||
|
.get_columns()
|
||||||
|
.get(0)
|
||||||
|
.expect("We have already checked that the width is 1");
|
||||||
|
|
||||||
|
Ok(series.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
|
||||||
|
let series = self.as_series(Span::unknown())?;
|
||||||
|
let column = conversion::create_column(&series, row, row + 1)?;
|
||||||
|
|
||||||
|
if column.len() == 0 {
|
||||||
|
Err(ShellError::AccessBeyondEnd(series.len(), span))
|
||||||
|
} else {
|
||||||
|
let value = column
|
||||||
|
.into_iter()
|
||||||
|
.next()
|
||||||
|
.expect("already checked there is a value");
|
||||||
|
Ok(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print is made out a head and if the dataframe is too large, then a tail
|
||||||
|
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let df = &self.0;
|
||||||
|
let size: usize = 20;
|
||||||
|
|
||||||
|
if df.height() > size {
|
||||||
|
let sample_size = size / 2;
|
||||||
|
let mut values = self.head(Some(sample_size))?;
|
||||||
|
conversion::add_separator(&mut values, df);
|
||||||
|
let remaining = df.height() - sample_size;
|
||||||
|
let tail_size = remaining.min(sample_size);
|
||||||
|
let mut tail_values = self.tail(Some(tail_size))?;
|
||||||
|
values.append(&mut tail_values);
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
} else {
|
||||||
|
Ok(self.head(Some(size))?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let to_row = rows.unwrap_or(5);
|
||||||
|
let values = self.to_rows(0, to_row)?;
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let df = &self.0;
|
||||||
|
let to_row = df.height();
|
||||||
|
let size = rows.unwrap_or(5);
|
||||||
|
let from_row = to_row.saturating_sub(size);
|
||||||
|
|
||||||
|
let values = self.to_rows(from_row, to_row)?;
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let df = &self.0;
|
||||||
|
let upper_row = to_row.min(df.height());
|
||||||
|
|
||||||
|
let mut size: usize = 0;
|
||||||
|
let columns = self
|
||||||
|
.0
|
||||||
|
.get_columns()
|
||||||
|
.iter()
|
||||||
|
.map(
|
||||||
|
|col| match conversion::create_column(col, from_row, upper_row) {
|
||||||
|
Ok(col) => {
|
||||||
|
size = col.len();
|
||||||
|
Ok(col)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.collect::<Result<Vec<Column>, ShellError>>()?;
|
||||||
|
|
||||||
|
let mut iterators = columns
|
||||||
|
.into_iter()
|
||||||
|
.map(|col| (col.name().to_string(), col.into_iter()))
|
||||||
|
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
|
||||||
|
|
||||||
|
let values = (0..size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|_| {
|
||||||
|
let mut cols = vec![];
|
||||||
|
let mut vals = vec![];
|
||||||
|
|
||||||
|
for (name, col) in &mut iterators {
|
||||||
|
cols.push(name.clone());
|
||||||
|
|
||||||
|
match col.next() {
|
||||||
|
Some(v) => vals.push(v),
|
||||||
|
None => vals.push(Value::Nothing {
|
||||||
|
span: Span::unknown(),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Value::Record {
|
||||||
|
cols,
|
||||||
|
vals,
|
||||||
|
span: Span::unknown(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<Value>>();
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dataframes are considered equal if they have the same shape, column name and values
|
||||||
|
pub fn is_equal(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
if self.as_ref().width() == 0 {
|
||||||
|
// checking for empty dataframe
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
||||||
|
// checking both dataframes share the same names
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.as_ref().height() != other.as_ref().height() {
|
||||||
|
// checking both dataframes have the same row size
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sorting dataframe by the first column
|
||||||
|
let column_names = self.as_ref().get_column_names();
|
||||||
|
let first_col = column_names
|
||||||
|
.get(0)
|
||||||
|
.expect("already checked that dataframe is different than 0");
|
||||||
|
|
||||||
|
// if unable to sort, then unable to compare
|
||||||
|
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||||
|
Ok(df) => df,
|
||||||
|
Err(_) => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||||
|
Ok(df) => df,
|
||||||
|
Err(_) => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
for name in self.as_ref().get_column_names() {
|
||||||
|
let self_series = lhs.column(name).expect("name from dataframe names");
|
||||||
|
|
||||||
|
let other_series = rhs
|
||||||
|
.column(name)
|
||||||
|
.expect("already checked that name in other");
|
||||||
|
|
||||||
|
let self_series = match self_series.dtype() {
|
||||||
|
// Casting needed to compare other numeric types with nushell numeric type.
|
||||||
|
// In nushell we only have i64 integer numeric types and any array created
|
||||||
|
// with nushell untagged primitives will be of type i64
|
||||||
|
DataType::UInt32 => match self_series.cast(&DataType::Int64) {
|
||||||
|
Ok(series) => series,
|
||||||
|
Err(_) => return None,
|
||||||
|
},
|
||||||
|
_ => self_series.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if !self_series.series_equal(other_series) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Ordering::Equal)
|
||||||
|
}
|
||||||
|
}
|
@ -122,7 +122,7 @@ impl NuDataFrame {
|
|||||||
&self,
|
&self,
|
||||||
other: &NuDataFrame,
|
other: &NuDataFrame,
|
||||||
axis: Axis,
|
axis: Axis,
|
||||||
_span: Span,
|
span: Span,
|
||||||
) -> Result<Self, ShellError> {
|
) -> Result<Self, ShellError> {
|
||||||
match axis {
|
match axis {
|
||||||
Axis::Row => {
|
Axis::Row => {
|
||||||
@ -147,8 +147,13 @@ impl NuDataFrame {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<Series>>();
|
.collect::<Vec<Series>>();
|
||||||
|
|
||||||
let df_new = DataFrame::new(new_cols)
|
let df_new = DataFrame::new(new_cols).map_err(|e| {
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?;
|
ShellError::SpannedLabeledError(
|
||||||
|
"Error creating dataframe".into(),
|
||||||
|
e.to_string(),
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(NuDataFrame::new(df_new))
|
Ok(NuDataFrame::new(df_new))
|
||||||
} //Axis::Column => {
|
} //Axis::Column => {
|
@ -1,4 +1,4 @@
|
|||||||
use super::super::NuDataFrame;
|
use super::nu_dataframe::NuDataFrame;
|
||||||
use nu_engine::CallExt;
|
use nu_engine::CallExt;
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
ast::Call,
|
ast::Call,
|
||||||
@ -22,7 +22,7 @@ impl Command for OpenDataFrame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build(self.name().to_string())
|
Signature::build(self.name())
|
||||||
.required(
|
.required(
|
||||||
"file",
|
"file",
|
||||||
SyntaxShape::Filepath,
|
SyntaxShape::Filepath,
|
||||||
@ -54,7 +54,7 @@ impl Command for OpenDataFrame {
|
|||||||
.named(
|
.named(
|
||||||
"columns",
|
"columns",
|
||||||
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
SyntaxShape::List(Box::new(SyntaxShape::String)),
|
||||||
"Columns to be selected from csv file. CSV file",
|
"Columns to be selected from csv file. CSV and Parquet file",
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
.category(Category::Custom("dataframe".into()))
|
.category(Category::Custom("dataframe".into()))
|
||||||
@ -87,7 +87,7 @@ fn command(
|
|||||||
let span = call.head;
|
let span = call.head;
|
||||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||||
|
|
||||||
let df = match file.item.extension() {
|
match file.item.extension() {
|
||||||
Some(e) => match e.to_str() {
|
Some(e) => match e.to_str() {
|
||||||
Some("csv") => from_csv(engine_state, stack, call),
|
Some("csv") => from_csv(engine_state, stack, call),
|
||||||
Some("parquet") => from_parquet(engine_state, stack, call),
|
Some("parquet") => from_parquet(engine_state, stack, call),
|
||||||
@ -101,11 +101,8 @@ fn command(
|
|||||||
"File without extension".into(),
|
"File without extension".into(),
|
||||||
file.span,
|
file.span,
|
||||||
)),
|
)),
|
||||||
}?;
|
}
|
||||||
|
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, span), None))
|
||||||
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
|
|
||||||
df, span,
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_parquet(
|
fn from_parquet(
|
||||||
@ -114,12 +111,25 @@ fn from_parquet(
|
|||||||
call: &Call,
|
call: &Call,
|
||||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||||
|
|
||||||
|
let r = File::open(&file.item).map_err(|e| {
|
||||||
|
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
|
||||||
|
})?;
|
||||||
let reader = ParquetReader::new(r);
|
let reader = ParquetReader::new(r);
|
||||||
|
|
||||||
reader
|
let reader = match columns {
|
||||||
.finish()
|
None => reader,
|
||||||
.map_err(|e| ShellError::InternalError(format!("{:?}", e)))
|
Some(columns) => reader.with_columns(Some(columns)),
|
||||||
|
};
|
||||||
|
|
||||||
|
reader.finish().map_err(|e| {
|
||||||
|
ShellError::SpannedLabeledError(
|
||||||
|
"Parquet reader error".into(),
|
||||||
|
format!("{:?}", e),
|
||||||
|
call.head,
|
||||||
|
)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_json(
|
fn from_json(
|
||||||
@ -129,13 +139,15 @@ fn from_json(
|
|||||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||||
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
let file: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||||
|
|
||||||
let r = File::open(&file.item).map_err(|e| ShellError::InternalError(e.to_string()))?;
|
let r = File::open(&file.item).map_err(|e| {
|
||||||
|
ShellError::SpannedLabeledError("Error opening file".into(), e.to_string(), file.span)
|
||||||
|
})?;
|
||||||
|
|
||||||
let reader = JsonReader::new(r);
|
let reader = JsonReader::new(r);
|
||||||
|
|
||||||
reader
|
reader.finish().map_err(|e| {
|
||||||
.finish()
|
ShellError::SpannedLabeledError("Json reader error".into(), format!("{:?}", e), call.head)
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_csv(
|
fn from_csv(
|
||||||
@ -151,15 +163,23 @@ fn from_csv(
|
|||||||
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
let columns: Option<Vec<String>> = call.get_flag(engine_state, stack, "columns")?;
|
||||||
|
|
||||||
let csv_reader = CsvReader::from_path(&file.item)
|
let csv_reader = CsvReader::from_path(&file.item)
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))?
|
.map_err(|e| {
|
||||||
|
ShellError::SpannedLabeledError(
|
||||||
|
"Error creating CSV reader".into(),
|
||||||
|
e.to_string(),
|
||||||
|
file.span,
|
||||||
|
)
|
||||||
|
})?
|
||||||
.with_encoding(CsvEncoding::LossyUtf8);
|
.with_encoding(CsvEncoding::LossyUtf8);
|
||||||
|
|
||||||
let csv_reader = match delimiter {
|
let csv_reader = match delimiter {
|
||||||
None => csv_reader,
|
None => csv_reader,
|
||||||
Some(d) => {
|
Some(d) => {
|
||||||
if d.item.len() != 1 {
|
if d.item.len() != 1 {
|
||||||
return Err(ShellError::InternalError(
|
return Err(ShellError::SpannedLabeledError(
|
||||||
"Delimiter has to be one char".into(),
|
"Incorrect delimiter".into(),
|
||||||
|
"Delimiter has to be one character".into(),
|
||||||
|
d.span,
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
let delimiter = match d.item.chars().next() {
|
let delimiter = match d.item.chars().next() {
|
||||||
@ -188,7 +208,11 @@ fn from_csv(
|
|||||||
Some(columns) => csv_reader.with_columns(Some(columns)),
|
Some(columns) => csv_reader.with_columns(Some(columns)),
|
||||||
};
|
};
|
||||||
|
|
||||||
csv_reader
|
csv_reader.finish().map_err(|e| {
|
||||||
.finish()
|
ShellError::SpannedLabeledError(
|
||||||
.map_err(|e| ShellError::InternalError(e.to_string()))
|
"Parquet reader error".into(),
|
||||||
|
format!("{:?}", e),
|
||||||
|
call.head,
|
||||||
|
)
|
||||||
|
})
|
||||||
}
|
}
|
@ -1,4 +1,4 @@
|
|||||||
use super::super::{Column, NuDataFrame};
|
use super::nu_dataframe::{Column, NuDataFrame};
|
||||||
|
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
ast::Call,
|
ast::Call,
|
||||||
@ -19,7 +19,7 @@ impl Command for ToDataFrame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
|
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
@ -94,8 +94,8 @@ impl Command for ToDataFrame {
|
|||||||
call: &Call,
|
call: &Call,
|
||||||
input: PipelineData,
|
input: PipelineData,
|
||||||
) -> Result<PipelineData, ShellError> {
|
) -> Result<PipelineData, ShellError> {
|
||||||
let df = NuDataFrame::try_from_iter(input.into_iter())?;
|
NuDataFrame::try_from_iter(input.into_iter())
|
||||||
Ok(PipelineData::Value(NuDataFrame::into_value(df, call.head)))
|
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -382,4 +382,25 @@ mod tests {
|
|||||||
PluginResponse::Value(_) => panic!("returned wrong call type"),
|
PluginResponse::Value(_) => panic!("returned wrong call type"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn response_round_trip_error_none() {
|
||||||
|
let error = LabeledError {
|
||||||
|
label: "label".into(),
|
||||||
|
msg: "msg".into(),
|
||||||
|
span: None,
|
||||||
|
};
|
||||||
|
let response = PluginResponse::Error(error.clone());
|
||||||
|
|
||||||
|
let mut buffer: Vec<u8> = Vec::new();
|
||||||
|
encode_response(&response, &mut buffer).expect("unable to serialize message");
|
||||||
|
let returned =
|
||||||
|
decode_response(&mut buffer.as_slice()).expect("unable to deserialize message");
|
||||||
|
|
||||||
|
match returned {
|
||||||
|
PluginResponse::Error(msg) => assert_eq!(error, msg),
|
||||||
|
PluginResponse::Signature(_) => panic!("returned wrong call type"),
|
||||||
|
PluginResponse::Value(_) => panic!("returned wrong call type"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user