Option to replace command same name (#374)

* option to replace command same name

* moved order of custom value declarations

* arranged dataframe folders and objects

* sort help commands by name

* added dtypes function for debugging

* corrected name for dataframe commands

* command names using function
This commit is contained in:
Fernando Herrera
2021-11-28 19:35:02 +00:00
committed by GitHub
parent e1e7e94261
commit c8b16c14d5
29 changed files with 490 additions and 220 deletions

View File

@ -0,0 +1,3 @@
# nu-dataframe
The nu-dataframe crate holds the definitions of the dataframe structure

View File

@ -0,0 +1,183 @@
use super::objects::nu_dataframe::NuDataFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature,
};
use polars::{
chunked_array::ChunkedArray,
prelude::{
AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray, Series, Utf8Type,
},
};
#[derive(Clone)]
pub struct DescribeDF;
impl Command for DescribeDF {
fn name(&self) -> &str {
"describe"
}
fn usage(&self) -> &str {
"Describes dataframes numeric columns"
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "dataframe description",
example: "[[a b]; [1 1] [1 1]] | to-df | describe",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head.clone())?;
let names = ChunkedArray::<Utf8Type>::new_from_opt_slice(
"descriptor",
&[
Some("count"),
Some("sum"),
Some("mean"),
Some("median"),
Some("std"),
Some("min"),
Some("25%"),
Some("50%"),
Some("75%"),
Some("max"),
],
)
.into_series();
let head = std::iter::once(names);
let tail = df
.as_ref()
.get_columns()
.iter()
.filter(|col| col.dtype() != &DataType::Object("object"))
.map(|col| {
let count = col.len() as f64;
let sum = match col.sum_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
let mean = match col.mean_as_series().get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
};
let median = match col.median_as_series().get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
};
let std = match col.std_as_series().get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
};
let min = match col.min_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
let q_25 = match col.quantile_as_series(0.25) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let q_50 = match col.quantile_as_series(0.50) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let q_75 = match col.quantile_as_series(0.75) {
Ok(ca) => match ca.cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
},
Err(_) => None,
};
let max = match col.max_as_series().cast(&DataType::Float64) {
Ok(ca) => match ca.get(0) {
AnyValue::Float64(v) => Some(v),
_ => None,
},
Err(_) => None,
};
let name = format!("{} ({})", col.name(), col.dtype());
ChunkedArray::<Float64Type>::new_from_opt_slice(
&name,
&[
Some(count),
sum,
mean,
median,
std,
min,
q_25,
q_50,
q_75,
max,
],
)
.into_series()
});
let res = head.chain(tail).collect::<Vec<Series>>();
let df = DataFrame::new(res).map_err(|e| {
ShellError::LabeledError("Dataframe Error".into(), e.to_string(), call.head)
})?;
Ok(PipelineData::Value(NuDataFrame::dataframe_into_value(
df, call.head,
)))
}

View File

@ -0,0 +1,82 @@
use super::objects::nu_dataframe::{Column, NuDataFrame};
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Value,
};
#[derive(Clone)]
pub struct DataTypes;
impl Command for DataTypes {
fn name(&self) -> &str {
"dtypes"
}
fn usage(&self) -> &str {
"Show dataframe data types"
}
fn signature(&self) -> Signature {
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | to-df | dtypes",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
#[allow(clippy::needless_collect)]
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head.clone())?;
let mut dtypes: Vec<Value> = Vec::new();
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| {
let dtype = df
.as_ref()
.column(v)
.expect("using name from list of names from dataframe")
.dtype();
let dtype_str = dtype.to_string();
dtypes.push(Value::String {
val: dtype_str.into(),
span: call.head,
});
Value::String {
val: v.to_string().into(),
span: call.head,
}
})
.collect();
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col])?;
Ok(PipelineData::Value(df.to_value(call.head)))
}

View File

@ -1,5 +1,10 @@
mod describe;
mod dtypes;
mod objects;
mod open;
mod to_df;
pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;

View File

@ -0,0 +1 @@
pub(super) mod nu_dataframe;

View File

@ -0,0 +1,564 @@
use super::{operations::Axis, NuDataFrame};
use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value};
use num::Zero;
use polars::prelude::{
BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries,
NumOpsDispatchChecked, PolarsError, Series,
};
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
pub fn between_dataframes(
operator: Spanned<Operator>,
left: &Value,
lhs: &NuDataFrame,
right: &Value,
rhs: &NuDataFrame,
) -> Result<Value, ShellError> {
let operation_span = span(&[left.span()?, right.span()?]);
match operator.item {
Operator::Plus => match lhs.append_df(rhs, Axis::Row, operation_span) {
Ok(df) => Ok(df.to_value(operation_span)),
Err(e) => Err(e),
},
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
}
}
pub fn compute_between_series(
operator: Spanned<Operator>,
left: &Value,
lhs: &Series,
right: &Value,
rhs: &Series,
) -> Result<Value, ShellError> {
let operation_span = span(&[left.span()?, right.span()?]);
match operator.item {
Operator::Plus => {
let mut res = lhs + rhs;
let name = format!("sum_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::Minus => {
let mut res = lhs - rhs;
let name = format!("sub_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::Multiply => {
let mut res = lhs * rhs;
let name = format!("mul_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::Divide => {
let res = lhs.checked_div(rhs);
match res {
Ok(mut res) => {
let name = format!("div_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
Operator::Equal => {
let mut res = Series::eq(lhs, rhs).into_series();
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::NotEqual => {
let mut res = Series::neq(lhs, rhs).into_series();
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::LessThan => {
let mut res = Series::lt(lhs, rhs).into_series();
let name = format!("lt_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::LessThanOrEqual => {
let mut res = Series::lt_eq(lhs, rhs).into_series();
let name = format!("lte_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::GreaterThan => {
let mut res = Series::gt(lhs, rhs).into_series();
let name = format!("gt_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::GreaterThanOrEqual => {
let mut res = Series::gt_eq(lhs, rhs).into_series();
let name = format!("gte_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
Operator::And => match lhs.dtype() {
DataType::Boolean => {
let lhs_cast = lhs.bool();
let rhs_cast = rhs.bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitand(r).into_series();
let name = format!("and_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
_ => Err(ShellError::InternalError(
"unable to cast to boolean".into(),
)),
}
}
_ => Err(ShellError::IncompatibleParametersSingle(
format!(
"Operation {} can only be done with boolean values",
operator.item
),
operation_span,
)),
},
Operator::Or => match lhs.dtype() {
DataType::Boolean => {
let lhs_cast = lhs.bool();
let rhs_cast = rhs.bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitor(r).into_series();
let name = format!("or_{}_{}", lhs.name(), rhs.name());
res.rename(&name);
NuDataFrame::series_to_value(res, operation_span)
}
_ => Err(ShellError::InternalError(
"unable to cast to boolean".into(),
)),
}
}
_ => Err(ShellError::IncompatibleParametersSingle(
format!(
"Operation {} can only be done with boolean values",
operator.item
),
operation_span,
)),
},
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
}
}
pub fn compute_series_single_value(
operator: Spanned<Operator>,
left: &Value,
lhs: &NuDataFrame,
right: &Value,
) -> Result<Value, ShellError> {
if !lhs.is_series() {
return Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
});
}
let lhs_span = left.span()?;
let lhs = lhs.as_series(lhs_span)?;
match operator.item {
Operator::Plus => match &right {
Value::Int { val, .. } => {
compute_series_i64(&lhs, *val, <ChunkedArray<Int64Type>>::add, lhs_span)
}
Value::Float { val, .. } => {
compute_series_decimal(&lhs, *val, <ChunkedArray<Float64Type>>::add, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::Minus => match &right {
Value::Int { val, .. } => {
compute_series_i64(&lhs, *val, <ChunkedArray<Int64Type>>::sub, lhs_span)
}
Value::Float { val, .. } => {
compute_series_decimal(&lhs, *val, <ChunkedArray<Float64Type>>::sub, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::Multiply => match &right {
Value::Int { val, .. } => {
compute_series_i64(&lhs, *val, <ChunkedArray<Int64Type>>::mul, lhs_span)
}
Value::Float { val, .. } => {
compute_series_decimal(&lhs, *val, <ChunkedArray<Float64Type>>::mul, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::Divide => match &right {
Value::Int { val, span } => {
if *val == 0 {
Err(ShellError::DivisionByZero(*span))
} else {
compute_series_i64(&lhs, *val, <ChunkedArray<Int64Type>>::div, lhs_span)
}
}
Value::Float { val, span } => {
if val.is_zero() {
Err(ShellError::DivisionByZero(*span))
} else {
compute_series_decimal(&lhs, *val, <ChunkedArray<Float64Type>>::div, lhs_span)
}
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::Equal => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::eq, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::eq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::NotEqual => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::neq, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::neq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::LessThan => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::lt, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::LessThanOrEqual => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt_eq, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::lt_eq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::GreaterThan => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::gt, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::GreaterThanOrEqual => match &right {
Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt_eq, lhs_span),
Value::Float { val, .. } => {
compare_series_decimal(&lhs, *val, ChunkedArray::gt_eq, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
Operator::Contains => match &right {
Value::String { val, .. } => contains_series_pat(&lhs, val, lhs_span),
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
},
_ => Err(ShellError::OperatorMismatch {
op_span: operator.span,
lhs_ty: left.get_type(),
lhs_span: left.span()?,
rhs_ty: right.get_type(),
rhs_span: right.span()?,
}),
}
}
fn compute_series_i64<F>(series: &Series, val: i64, f: F, span: Span) -> Result<Value, ShellError>
where
F: Fn(ChunkedArray<Int64Type>, i64) -> ChunkedArray<Int64Type>,
{
match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast(&DataType::Int64);
match to_i64 {
Ok(series) => {
let casted = series.i64();
compute_casted_i64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
DataType::Int64 => {
let casted = series.i64();
compute_casted_i64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
))),
}
}
fn compute_casted_i64<F>(
casted: Result<&ChunkedArray<Int64Type>, PolarsError>,
val: i64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(ChunkedArray<Int64Type>, i64) -> ChunkedArray<Int64Type>,
{
match casted {
Ok(casted) => {
let res = f(casted.clone(), val);
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
fn compute_series_decimal<F>(
series: &Series,
val: f64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(ChunkedArray<Float64Type>, f64) -> ChunkedArray<Float64Type>,
{
match series.dtype() {
DataType::Float32 => {
let to_f64 = series.cast(&DataType::Float64);
match to_f64 {
Ok(series) => {
let casted = series.f64();
compute_casted_f64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
DataType::Float64 => {
let casted = series.f64();
compute_casted_f64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
))),
}
}
fn compute_casted_f64<F>(
casted: Result<&ChunkedArray<Float64Type>, PolarsError>,
val: f64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(ChunkedArray<Float64Type>, f64) -> ChunkedArray<Float64Type>,
{
match casted {
Ok(casted) => {
let res = f(casted.clone(), val);
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
fn compare_series_i64<F>(series: &Series, val: i64, f: F, span: Span) -> Result<Value, ShellError>
where
F: Fn(&ChunkedArray<Int64Type>, i64) -> ChunkedArray<BooleanType>,
{
match series.dtype() {
DataType::UInt32 | DataType::Int32 | DataType::UInt64 => {
let to_i64 = series.cast(&DataType::Int64);
match to_i64 {
Ok(series) => {
let casted = series.i64();
compare_casted_i64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
DataType::Int64 => {
let casted = series.i64();
compare_casted_i64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with an i64 value",
series.dtype()
))),
}
}
fn compare_casted_i64<F>(
casted: Result<&ChunkedArray<Int64Type>, PolarsError>,
val: i64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(&ChunkedArray<Int64Type>, i64) -> ChunkedArray<BooleanType>,
{
match casted {
Ok(casted) => {
let res = f(casted, val);
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
fn compare_series_decimal<F>(
series: &Series,
val: f64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(&ChunkedArray<Float64Type>, f64) -> ChunkedArray<BooleanType>,
{
match series.dtype() {
DataType::Float32 => {
let to_f64 = series.cast(&DataType::Float64);
match to_f64 {
Ok(series) => {
let casted = series.f64();
compare_casted_f64(casted, val, f, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
DataType::Float64 => {
let casted = series.f64();
compare_casted_f64(casted, val, f, span)
}
_ => Err(ShellError::InternalError(format!(
"Series of type {} can not be used for operations with a decimal value",
series.dtype()
))),
}
}
fn compare_casted_f64<F>(
casted: Result<&ChunkedArray<Float64Type>, PolarsError>,
val: f64,
f: F,
span: Span,
) -> Result<Value, ShellError>
where
F: Fn(&ChunkedArray<Float64Type>, f64) -> ChunkedArray<BooleanType>,
{
match casted {
Ok(casted) => {
let res = f(casted, val);
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result<Value, ShellError> {
let casted = series.utf8();
match casted {
Ok(casted) => {
let res = casted.contains(pat);
match res {
Ok(res) => {
let res = res.into_series();
NuDataFrame::series_to_value(res, span)
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}

View File

@ -0,0 +1,536 @@
use super::{DataFrameValue, NuDataFrame};
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use indexmap::map::{Entry, IndexMap};
use nu_protocol::{ShellError, Span, Value};
use polars::chunked_array::object::builder::ObjectChunkedBuilder;
use polars::chunked_array::ChunkedArray;
use polars::prelude::{
DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray,
ObjectType, PolarsNumericType, Series,
};
use std::ops::{Deref, DerefMut};
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
pub struct Column {
name: String,
values: Vec<Value>,
}
impl Column {
pub fn new(name: String, values: Vec<Value>) -> Self {
Self { name, values }
}
pub fn new_empty(name: String) -> Self {
Self {
name,
values: Vec::new(),
}
}
pub fn name(&self) -> &str {
self.name.as_str()
}
//pub fn iter(&self) -> impl Iterator<Item = &Value> {
// self.values.iter()
//}
}
impl IntoIterator for Column {
type Item = Value;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.values.into_iter()
}
}
impl Deref for Column {
type Target = Vec<Value>;
fn deref(&self) -> &Self::Target {
&self.values
}
}
impl DerefMut for Column {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.values
}
}
#[derive(Debug)]
pub enum InputType {
Integer,
Float,
String,
Boolean,
Object,
Date,
Duration,
}
#[derive(Debug)]
pub struct TypedColumn {
column: Column,
column_type: Option<InputType>,
}
impl TypedColumn {
fn new_empty(name: String) -> Self {
Self {
column: Column::new_empty(name),
column_type: None,
}
}
}
impl Deref for TypedColumn {
type Target = Column;
fn deref(&self) -> &Self::Target {
&self.column
}
}
impl DerefMut for TypedColumn {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.column
}
}
pub type ColumnMap = IndexMap<String, TypedColumn>;
pub fn create_column(
series: &Series,
from_row: usize,
to_row: usize,
) -> Result<Column, ShellError> {
let size = to_row - from_row;
match series.dtype() {
DataType::Null => {
let values = std::iter::repeat(Value::Nothing {
span: Span::unknown(),
})
.take(size)
.collect::<Vec<Value>>();
Ok(Column::new(series.name().into(), values))
}
DataType::UInt8 => {
let casted = series
.u8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt16 => {
let casted = series
.u16()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt32 => {
let casted = series
.u32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::UInt64 => {
let casted = series
.u64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int8 => {
let casted = series
.i8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int16 => {
let casted = series
.i16()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int32 => {
let casted = series
.i32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Int64 => {
let casted = series
.i64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Float32 => {
let casted = series
.f32()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Float64 => {
let casted = series
.f64()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(column_from_casted(casted, from_row, size))
}
DataType::Boolean => {
let casted = series
.bool()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => Value::Bool {
val: a,
span: Span::unknown(),
},
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(casted.name().into(), values))
}
DataType::Utf8 => {
let casted = series
.utf8()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => Value::String {
val: a.into(),
span: Span::unknown(),
},
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(casted.name().into(), values))
}
DataType::Object(x) => {
let casted = series
.as_any()
.downcast_ref::<ChunkedArray<ObjectType<DataFrameValue>>>();
match casted {
None => Err(ShellError::InternalError(format!(
"Object not supported for conversion: {}",
x
))),
Some(ca) => {
let values = ca
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => a.get_value(),
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(ca.name().into(), values))
}
}
}
DataType::Date => {
let casted = series
.date()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => {
// elapsed time in day since 1970-01-01
let seconds = a as i64 * SECS_PER_DAY;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
Value::Date {
val: datetime,
span: Span::unknown(),
}
}
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(casted.name().into(), values))
}
DataType::Datetime => {
let casted = series
.datetime()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => {
// elapsed time in milliseconds since 1970-01-01
let seconds = a / 1000;
let naive_datetime = NaiveDateTime::from_timestamp(seconds, 0);
// Zero length offset
let offset = FixedOffset::east(0);
let datetime = DateTime::<FixedOffset>::from_utc(naive_datetime, offset);
Value::Date {
val: datetime,
span: Span::unknown(),
}
}
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(casted.name().into(), values))
}
DataType::Time => {
let casted = series
.time()
.map_err(|e| ShellError::InternalError(e.to_string()))?;
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(nanoseconds) => Value::Duration {
val: nanoseconds,
span: Span::unknown(),
},
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Ok(Column::new(casted.name().into(), values))
}
e => Err(ShellError::InternalError(format!(
"Value not supported in nushell: {}",
e
))),
}
}
fn column_from_casted<T>(casted: &ChunkedArray<T>, from_row: usize, size: usize) -> Column
where
T: PolarsNumericType,
T::Native: Into<Value>,
{
let values = casted
.into_iter()
.skip(from_row)
.take(size)
.map(|v| match v {
Some(a) => a.into(),
None => Value::Nothing {
span: Span::unknown(),
},
})
.collect::<Vec<Value>>();
Column::new(casted.name().into(), values)
}
// Adds a separator to the vector of values using the column names from the
// dataframe to create the Values Row
pub fn add_separator(values: &mut Vec<Value>, df: &DataFrame) {
let mut cols = vec![];
let mut vals = vec![];
for name in df.get_column_names() {
cols.push(name.to_string());
vals.push(Value::String {
val: "...".into(),
span: Span::unknown(),
})
}
let extra_record = Value::Record {
cols,
vals,
span: Span::unknown(),
};
values.push(extra_record);
}
// Inserting the values found in a Value::List
pub fn insert_record(
column_values: &mut ColumnMap,
cols: &[String],
values: &[Value],
) -> Result<(), ShellError> {
for (col, value) in cols.iter().zip(values.iter()) {
insert_value(value.clone(), col.clone(), column_values)?;
}
Ok(())
}
pub fn insert_value(
value: Value,
key: String,
column_values: &mut ColumnMap,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key.clone()) {
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
Entry::Occupied(entry) => entry.into_mut(),
};
// Checking that the type for the value is the same
// for the previous value in the column
if col_val.values.is_empty() {
match &value {
Value::Int { .. } => {
col_val.column_type = Some(InputType::Integer);
}
Value::Float { .. } => {
col_val.column_type = Some(InputType::Float);
}
Value::String { .. } => {
col_val.column_type = Some(InputType::String);
}
Value::Bool { .. } => {
col_val.column_type = Some(InputType::Boolean);
}
Value::Date { .. } => {
col_val.column_type = Some(InputType::Date);
}
Value::Duration { .. } => {
col_val.column_type = Some(InputType::Duration);
}
_ => col_val.column_type = Some(InputType::Object),
}
col_val.values.push(value);
} else {
let prev_value = &col_val.values[col_val.values.len() - 1];
match (&prev_value, &value) {
(Value::Int { .. }, Value::Int { .. })
| (Value::Float { .. }, Value::Float { .. })
| (Value::String { .. }, Value::String { .. })
| (Value::Bool { .. }, Value::Bool { .. })
| (Value::Date { .. }, Value::Date { .. })
| (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value),
_ => {
col_val.column_type = Some(InputType::Object);
col_val.values.push(value);
}
}
}
Ok(())
}
// The ColumnMap has the parsed data from the StreamInput
// This data can be used to create a Series object that can initialize
// the dataframe based on the type of data that is found
pub fn from_parsed_columns(column_values: ColumnMap) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
if let Some(column_type) = &column.column_type {
match column_type {
InputType::Float => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Object => {
let mut builder =
ObjectChunkedBuilder::<DataFrameValue>::new(&name, column.values.len());
for v in &column.values {
builder.append_value(DataFrameValue::new(v.clone()));
}
let res = builder.finish();
df_series.push(res.into_series())
}
InputType::Date => {
let it = column.values.iter().map(|v| {
if let Value::Date { val, .. } = &v {
Some(val.timestamp_millis())
} else {
None
}
});
let res: DatetimeChunked =
ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it).into();
df_series.push(res.into_series())
}
InputType::Duration => {
let it = column.values.iter().map(|v| {
if let Value::Duration { val, .. } = &v {
Some(*val)
} else {
None
}
});
let res = ChunkedArray::<Int64Type>::new_from_opt_iter(&name, it);
df_series.push(res.into_series())
}
}
}
}
match DataFrame::new(df_series) {
Ok(df) => Ok(NuDataFrame::new(df)),
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}

View File

@ -0,0 +1,63 @@
use super::NuDataFrame;
use nu_protocol::{ast::Operator, Category, CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame
impl CustomValue for NuDataFrame {
fn typetag_name(&self) -> &'static str {
"dataframe"
}
fn typetag_deserialize(&self) {
unimplemented!("typetag_deserialize")
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuDataFrame(self.0.clone());
Value::CustomValue {
val: Box::new(cloned),
span,
}
}
fn category(&self) -> Category {
Category::Custom(self.typetag_name().into())
}
fn value_string(&self) -> String {
self.typetag_name().to_string()
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let vals = self.print()?;
Ok(Value::List { vals, span })
}
fn to_json(&self) -> nu_json::Value {
nu_json::Value::Null
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn follow_path_int(&self, count: usize, span: Span) -> Result<Value, ShellError> {
self.get_value(count, span)
}
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError> {
let column = self.column(&column_name, span)?;
Ok(column.to_value(span))
}
fn operation(
&self,
lhs_span: Span,
operator: Operator,
op: Span,
right: &Value,
) -> Result<Value, ShellError> {
self.compute_with_value(lhs_span, operator, op, right)
}
}

View File

@ -0,0 +1,328 @@
mod between_values;
mod conversion;
mod custom_value;
mod operations;
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
pub use conversion::{Column, ColumnMap};
use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
use polars::prelude::{DataFrame, PolarsObject, Series};
use serde::{Deserialize, Serialize};
// DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes
#[derive(Clone, Debug)]
pub struct DataFrameValue(Value);
impl DataFrameValue {
fn new(value: Value) -> Self {
Self(value)
}
fn get_value(&self) -> Value {
self.0.clone()
}
}
impl Display for DataFrameValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.get_type())
}
}
impl Default for DataFrameValue {
fn default() -> Self {
Self(Value::Nothing {
span: Span::unknown(),
})
}
}
impl PartialEq for DataFrameValue {
fn eq(&self, other: &Self) -> bool {
self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq)
}
}
impl Eq for DataFrameValue {}
impl std::hash::Hash for DataFrameValue {
fn hash<H: Hasher>(&self, state: &mut H) {
match &self.0 {
Value::Nothing { .. } => 0.hash(state),
Value::Int { val, .. } => val.hash(state),
Value::String { val, .. } => val.hash(state),
// TODO. Define hash for the rest of types
_ => {}
}
}
}
impl PolarsObject for DataFrameValue {
fn type_name() -> &'static str {
"object"
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NuDataFrame(DataFrame);
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.0
}
}
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.0
}
}
impl NuDataFrame {
pub fn new(dataframe: DataFrame) -> Self {
Self(dataframe)
}
fn default_value(span: Span) -> Value {
let dataframe = DataFrame::default();
NuDataFrame::dataframe_into_value(dataframe, span)
}
pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value {
Value::CustomValue {
val: Box::new(Self::new(dataframe)),
span,
}
}
pub fn to_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn series_to_value(series: Series, span: Span) -> Result<Value, ShellError> {
match DataFrame::new(vec![series]) {
Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)),
Err(e) => Err(ShellError::InternalError(e.to_string())),
}
}
pub fn try_from_iter<T>(iter: T) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value {
Value::List { vals, .. } => {
let cols = (0..vals.len())
.map(|i| format!("{}", i))
.collect::<Vec<String>>();
conversion::insert_record(&mut column_values, &cols, &vals)?
}
Value::Record { cols, vals, .. } => {
conversion::insert_record(&mut column_values, &cols, &vals)?
}
_ => {
let key = "0".to_string();
conversion::insert_value(value, key, &mut column_values)?
}
}
}
conversion::from_parsed_columns(column_values)
}
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns)
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?;
// Ok(Self::new(dataframe))
//}
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
let name = column.name().to_string();
for value in column {
conversion::insert_value(value, name.clone(), &mut column_values)?;
}
}
conversion::from_parsed_columns(column_values)
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
match input.into_value(span) {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
},
_ => Err(ShellError::CantConvert(
"Dataframe not found".into(),
"value is not a dataframe".into(),
span,
)),
}
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| {
let possibilities = self
.0
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string());
ShellError::DidYouMean(option, span)
})?;
let dataframe = DataFrame::new(vec![s.clone()])
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(Self(dataframe))
}
pub fn is_series(&self) -> bool {
self.0.width() == 1
}
pub fn as_series(&self, _span: Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::InternalError(
"DataFrame cannot be used as Series".into(),
));
}
let series = self
.0
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
pub fn get_value(&self, row: usize, span: Span) -> Result<Value, ShellError> {
let series = self.as_series(Span::unknown())?;
let column = conversion::create_column(&series, row, row + 1)?;
if column.len() == 0 {
Err(ShellError::AccessBeyondEnd(series.len(), span))
} else {
let value = column
.into_iter()
.next()
.expect("already checked there is a value");
Ok(value)
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
conversion::add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
}
pub fn head(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let to_row = rows.unwrap_or(5);
let values = self.to_rows(0, to_row)?;
Ok(values)
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = &self.0;
let upper_row = to_row.min(df.height());
let mut size: usize = 0;
let columns = self
.0
.get_columns()
.iter()
.map(
|col| match conversion::create_column(col, from_row, upper_row) {
Ok(col) => {
size = col.len();
Ok(col)
}
Err(e) => Err(e),
},
)
.collect::<Result<Vec<Column>, ShellError>>()?;
let mut iterators = columns
.into_iter()
.map(|col| (col.name().to_string(), col.into_iter()))
.collect::<Vec<(String, std::vec::IntoIter<Value>)>>();
let values = (0..size)
.into_iter()
.map(|_| {
let mut cols = vec![];
let mut vals = vec![];
for (name, col) in &mut iterators {
cols.push(name.clone());
match col.next() {
Some(v) => vals.push(v),
None => vals.push(Value::Nothing {
span: Span::unknown(),
}),
};
}
Value::Record {
cols,
vals,
span: Span::unknown(),
}
})
.collect::<Vec<Value>>();
Ok(values)
}
}

View File

@ -0,0 +1,210 @@
use nu_protocol::{ast::Operator, ShellError, Span, Spanned, Value};
use polars::prelude::{DataFrame, Series};
use super::between_values::{
between_dataframes, compute_between_series, compute_series_single_value,
};
use super::NuDataFrame;
pub enum Axis {
Row,
//Column,
}
//impl Axis {
// pub fn try_from_str(axis: &str, span: Span) -> Result<Axis, ShellError> {
// match axis {
// "row" => Ok(Axis::Row),
// "col" => Ok(Axis::Column),
// _ => Err(ShellError::DidYouMean("'row' or 'col'".into(), span)),
// }
// }
//}
impl NuDataFrame {
pub fn compute_with_value(
&self,
lhs_span: Span,
operator: Operator,
op_span: Span,
right: &Value,
) -> Result<Value, ShellError> {
match right {
Value::CustomValue {
val: rhs,
span: rhs_span,
} => {
let rhs = rhs.as_any().downcast_ref::<NuDataFrame>().ok_or_else(|| {
ShellError::DowncastNotPossible(
"Unable to create dataframe".to_string(),
*rhs_span,
)
})?;
match (self.is_series(), rhs.is_series()) {
(true, true) => {
let lhs = &self
.as_series(lhs_span)
.expect("Already checked that is a series");
let rhs = &rhs
.as_series(*rhs_span)
.expect("Already checked that is a series");
if lhs.dtype() != rhs.dtype() {
return Err(ShellError::IncompatibleParameters {
left_message: format!("datatype {}", lhs.dtype()),
left_span: lhs_span,
right_message: format!("datatype {}", lhs.dtype()),
right_span: *rhs_span,
});
}
if lhs.len() != rhs.len() {
return Err(ShellError::IncompatibleParameters {
left_message: format!("len {}", lhs.len()),
left_span: lhs_span,
right_message: format!("len {}", rhs.len()),
right_span: *rhs_span,
});
}
let op = Spanned {
item: operator,
span: op_span,
};
compute_between_series(
op,
&NuDataFrame::default_value(lhs_span),
lhs,
right,
rhs,
)
}
_ => {
if self.0.height() != rhs.0.height() {
return Err(ShellError::IncompatibleParameters {
left_message: format!("rows {}", self.0.height()),
left_span: lhs_span,
right_message: format!("rows {}", rhs.0.height()),
right_span: *rhs_span,
});
}
let op = Spanned {
item: operator,
span: op_span,
};
between_dataframes(
op,
&NuDataFrame::default_value(lhs_span),
self,
right,
rhs,
)
}
}
}
_ => {
let op = Spanned {
item: operator,
span: op_span,
};
compute_series_single_value(op, &NuDataFrame::default_value(lhs_span), self, right)
}
}
}
pub fn append_df(
&self,
other: &NuDataFrame,
axis: Axis,
_span: Span,
) -> Result<Self, ShellError> {
match axis {
Axis::Row => {
let mut columns: Vec<&str> = Vec::new();
let new_cols = self
.0
.get_columns()
.iter()
.chain(other.0.get_columns())
.map(|s| {
let name = if columns.contains(&s.name()) {
format!("{}_{}", s.name(), "x")
} else {
columns.push(s.name());
s.name().to_string()
};
let mut series = s.clone();
series.rename(&name);
series
})
.collect::<Vec<Series>>();
let df_new = DataFrame::new(new_cols)
.map_err(|e| ShellError::InternalError(e.to_string()))?;
Ok(NuDataFrame::new(df_new))
} //Axis::Column => {
// if self.0.width() != other.0.width() {
// return Err(ShellError::IncompatibleParametersSingle(
// "Dataframes with different number of columns".into(),
// span,
// ));
// }
// if !self
// .0
// .get_column_names()
// .iter()
// .all(|col| other.0.get_column_names().contains(col))
// {
// return Err(ShellError::IncompatibleParametersSingle(
// "Dataframes with different columns names".into(),
// span,
// ));
// }
// let new_cols = self
// .0
// .get_columns()
// .iter()
// .map(|s| {
// let other_col = other
// .0
// .column(s.name())
// .expect("Already checked that dataframes have same columns");
// let mut tmp = s.clone();
// let res = tmp.append(other_col);
// match res {
// Ok(s) => Ok(s.clone()),
// Err(e) => Err({
// ShellError::InternalError(format!(
// "Unable to append dataframes: {}",
// e
// ))
// }),
// }
// })
// .collect::<Result<Vec<Series>, ShellError>>()?;
// let df_new = DataFrame::new(new_cols).map_err(|e| {
// ShellError::InternalError(format!(
// "Unable to append dataframes: {}",
// e.to_string()
// ))
// })?;
// Ok(NuDataFrame::new(df_new))
//}
}
}
}

View File

@ -1,6 +1,6 @@
use std::{fs::File, path::PathBuf};
use nu_dataframe::NuDataFrame;
use super::objects::nu_dataframe::NuDataFrame;
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
@ -23,7 +23,7 @@ impl Command for OpenDataFrame {
}
fn signature(&self) -> Signature {
Signature::build("open-df")
Signature::build(self.name().to_string())
.required(
"file",
SyntaxShape::Filepath,
@ -64,7 +64,7 @@ impl Command for OpenDataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes a file name and creates a dataframe",
example: "dataframe open test.csv",
example: "open-df test.csv",
result: None,
}]
}

View File

@ -1,4 +1,4 @@
use nu_dataframe::NuDataFrame;
use super::objects::nu_dataframe::NuDataFrame;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
@ -10,7 +10,7 @@ pub struct ToDataFrame;
impl Command for ToDataFrame {
fn name(&self) -> &str {
"to-df"
"to df"
}
fn usage(&self) -> &str {
@ -18,29 +18,29 @@ impl Command for ToDataFrame {
}
fn signature(&self) -> Signature {
Signature::build("to-df").category(Category::Custom("dataframe".into()))
Signature::build(self.name().to_string()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | to-df",
example: "[[a b];[1 2] [3 4]] | to df",
result: None,
},
Example {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | to-df",
example: "[[1 2 a] [3 4 b] [5 6 c]] | to df",
result: None,
},
Example {
description: "Takes a list and creates a dataframe",
example: "[a b c] | to-df",
example: "[a b c] | to df",
result: None,
},
Example {
description: "Takes a list of booleans and creates a dataframe",
example: "[$true $true $false] | to-df",
example: "[$true $true $false] | to df",
result: None,
},
]