Batch of dataframe commands (#442)

* corrected missing shellerror type

* batch dataframe commands

* removed option to find declaration with input

* ordered dataframe folders
This commit is contained in:
Fernando Herrera 2021-12-06 04:09:49 +00:00 committed by GitHub
parent fdde95f675
commit 1fd26727c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 471 additions and 127 deletions

View File

@ -0,0 +1,109 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::values::{Axis, Column, NuDataFrame};
#[derive(Clone)]
pub struct AppendDF;
impl Command for AppendDF {
fn name(&self) -> &str {
"append-df"
}
fn usage(&self) -> &str {
"Appends a new dataframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("other", SyntaxShape::Any, "dataframe to be appended")
.switch("col", "appends in col orientation", Some('c'))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Appends a dataframe as new columns",
example: r#"let a = ([[a b]; [1 2] [3 4]] | to df);
$a | append-df $a"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![1.into(), 3.into()]),
Column::new("b".to_string(), vec![2.into(), 4.into()]),
Column::new("a_x".to_string(), vec![1.into(), 3.into()]),
Column::new("b_x".to_string(), vec![2.into(), 4.into()]),
])
.expect("simple df for test should not fail")
.into_value(Span::unknown()),
),
},
Example {
description: "Appends a dataframe merging at the end of columns",
//example: r#"let a = ([[a b]; [1 2] [3 4]] | to df); $a | append-df $a -col"#,
example: r#"let a = ([[a b]; [1 2] [3 4]] | to df);
$a | append-df $a --col"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![1.into(), 3.into(), 1.into(), 3.into()],
),
Column::new(
"b".to_string(),
vec![2.into(), 4.into(), 2.into(), 4.into()],
),
])
.expect("simple df for test should not fail")
.into_value(Span::unknown()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let other: Value = call.req(engine_state, stack, 0)?;
let axis = if call.has_flag("col") {
Axis::Column
} else {
Axis::Row
};
let df_other = NuDataFrame::try_from_value(other)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.append_df(&df_other, axis, call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(AppendDF {})
}
}

View File

@ -0,0 +1,81 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
};
use super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct ColumnDF;
impl Command for ColumnDF {
fn name(&self) -> &str {
"column"
}
fn usage(&self) -> &str {
"Returns the selected column"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("column", SyntaxShape::String, "column name")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns the selected column as series",
example: "[[a b]; [1 2] [3 4]] | to df | column a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"a".to_string(),
vec![1.into(), 3.into()],
)])
.expect("simple df for test should not fail")
.into_value(Span::unknown()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let column: Spanned<String> = call.req(engine_state, stack, 0)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().column(&column.item).map_err(|e| {
ShellError::SpannedLabeledError("Error selecting column".into(), e.to_string(), column.span)
})?;
NuDataFrame::try_from_series(vec![res.clone()], call.head)
.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(ColumnDF {})
}
}

View File

@ -0,0 +1,111 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::values::utils::convert_columns;
use super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct DropDF;
impl Command for DropDF {
fn name(&self) -> &str {
"drop-df"
}
fn usage(&self) -> &str {
"Creates a new dataframe by dropping the selected columns"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to be dropped")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | to df | drop-df a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"b".to_string(),
vec![2.into(), 4.into()],
)])
.expect("simple df for test should not fail")
.into_value(Span::unknown()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
let (col_string, col_span) = convert_columns(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let new_df = col_string
.get(0)
.ok_or_else(|| {
ShellError::SpannedLabeledError(
"Empty names list".into(),
"No column names where found".into(),
col_span,
)
})
.and_then(|col| {
df.as_ref().drop(&col.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error dropping column".into(),
e.to_string(),
col.span,
)
})
})?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
col_string
.iter()
.skip(1)
.try_fold(new_df, |new_df, col| {
new_df.drop(&col.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error dropping column".into(),
e.to_string(),
col.span,
)
})
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(DropDF {})
}
}

View File

@ -1,3 +1,43 @@
mod nu_dataframe; mod values;
pub use nu_dataframe::{DataTypes, DescribeDF, OpenDataFrame, ToDataFrame}; mod append;
mod column;
mod describe;
mod drop;
mod dtypes;
mod open;
mod to_df;
pub use append::AppendDF;
pub use column::ColumnDF;
pub use describe::DescribeDF;
pub use drop::DropDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
use nu_protocol::engine::StateWorkingSet;
pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
( $command:expr ) => {
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
bind_command!(
AppendDF,
ColumnDF,
DataTypes,
DescribeDF,
DropDF,
OpenDataFrame,
ToDataFrame
);
}
#[cfg(test)]
mod test_dataframe;

View File

@ -1,14 +0,0 @@
mod values;
mod describe;
mod dtypes;
mod open;
mod to_df;
pub use describe::DescribeDF;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
#[cfg(test)]
mod test_dataframe;

View File

@ -6,6 +6,7 @@ use nu_protocol::{
}; };
use super::ToDataFrame; use super::ToDataFrame;
use crate::Let;
pub fn test_dataframe(cmd: impl Command + 'static) { pub fn test_dataframe(cmd: impl Command + 'static) {
let examples = cmd.examples(); let examples = cmd.examples();
@ -15,6 +16,7 @@ pub fn test_dataframe(cmd: impl Command + 'static) {
// Base functions that are needed for testing // Base functions that are needed for testing
// Try to keep this working set small to keep tests running as fast as possible // Try to keep this working set small to keep tests running as fast as possible
let mut working_set = StateWorkingSet::new(&*engine_state); let mut working_set = StateWorkingSet::new(&*engine_state);
working_set.add_decl(Box::new(Let));
working_set.add_decl(Box::new(ToDataFrame)); working_set.add_decl(Box::new(ToDataFrame));
// Adding the command that is being tested to the working set // Adding the command that is being tested to the working set

View File

@ -0,0 +1,4 @@
mod nu_dataframe;
pub mod utils;
pub use nu_dataframe::{Axis, Column, NuDataFrame};

View File

@ -8,7 +8,7 @@ use polars::prelude::{
}; };
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
pub fn between_dataframes( pub(super) fn between_dataframes(
operator: Spanned<Operator>, operator: Spanned<Operator>,
left: &Value, left: &Value,
lhs: &NuDataFrame, lhs: &NuDataFrame,
@ -31,7 +31,7 @@ pub fn between_dataframes(
} }
} }
pub fn compute_between_series( pub(super) fn compute_between_series(
operator: Spanned<Operator>, operator: Spanned<Operator>,
left: &Value, left: &Value,
lhs: &Series, lhs: &Series,
@ -173,7 +173,7 @@ pub fn compute_between_series(
} }
} }
pub fn compute_series_single_value( pub(super) fn compute_series_single_value(
operator: Spanned<Operator>, operator: Spanned<Operator>,
left: &Value, left: &Value,
lhs: &NuDataFrame, lhs: &NuDataFrame,

View File

@ -3,7 +3,8 @@ mod conversion;
mod custom_value; mod custom_value;
mod operations; mod operations;
pub(super) use conversion::{Column, ColumnMap}; pub use conversion::{Column, ColumnMap};
pub use operations::Axis;
use indexmap::map::IndexMap; use indexmap::map::IndexMap;
use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value};
@ -147,12 +148,17 @@ impl NuDataFrame {
conversion::from_parsed_columns(column_values) conversion::from_parsed_columns(column_values)
} }
//pub fn try_from_series(columns: Vec<Series>) -> Result<Self, ShellError> { pub fn try_from_series(columns: Vec<Series>, span: Span) -> Result<Self, ShellError> {
// let dataframe = DataFrame::new(columns) let dataframe = DataFrame::new(columns).map_err(|e| {
// .map_err(|e| ShellError::InternalError(format!("Unable to create DataFrame: {}", e)))?; ShellError::SpannedLabeledError(
"Error creating dataframe".into(),
format!("Unable to create DataFrame: {}", e),
span,
)
})?;
// Ok(Self::new(dataframe)) Ok(Self::new(dataframe))
//} }
pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> { pub fn try_from_columns(columns: Vec<Column>) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new(); let mut column_values: ColumnMap = IndexMap::new();
@ -167,8 +173,8 @@ impl NuDataFrame {
conversion::from_parsed_columns(column_values) conversion::from_parsed_columns(column_values)
} }
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> { pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match input.into_value(span) { match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() { Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuDataFrame>() {
Some(df) => Ok(NuDataFrame(df.0.clone())), Some(df) => Ok(NuDataFrame(df.0.clone())),
None => Err(ShellError::CantConvert( None => Err(ShellError::CantConvert(
@ -180,11 +186,16 @@ impl NuDataFrame {
_ => Err(ShellError::CantConvert( _ => Err(ShellError::CantConvert(
"Dataframe not found".into(), "Dataframe not found".into(),
"value is not a dataframe".into(), "value is not a dataframe".into(),
span, value.span()?,
)), )),
} }
} }
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<Self, ShellError> {
let value = input.into_value(span);
NuDataFrame::try_from_value(value)
}
pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> { pub fn column(&self, column: &str, span: Span) -> Result<Self, ShellError> {
let s = self.0.column(column).map_err(|_| { let s = self.0.column(column).map_err(|_| {
let possibilities = self let possibilities = self

View File

@ -9,19 +9,9 @@ use super::NuDataFrame;
pub enum Axis { pub enum Axis {
Row, Row,
//Column, Column,
} }
//impl Axis {
// pub fn try_from_str(axis: &str, span: Span) -> Result<Axis, ShellError> {
// match axis {
// "row" => Ok(Axis::Row),
// "col" => Ok(Axis::Column),
// _ => Err(ShellError::DidYouMean("'row' or 'col'".into(), span)),
// }
// }
//}
impl NuDataFrame { impl NuDataFrame {
pub fn compute_with_value( pub fn compute_with_value(
&self, &self,
@ -156,60 +146,63 @@ impl NuDataFrame {
})?; })?;
Ok(NuDataFrame::new(df_new)) Ok(NuDataFrame::new(df_new))
} //Axis::Column => { }
// if self.0.width() != other.0.width() { Axis::Column => {
// return Err(ShellError::IncompatibleParametersSingle( if self.0.width() != other.0.width() {
// "Dataframes with different number of columns".into(), return Err(ShellError::IncompatibleParametersSingle(
// span, "Dataframes with different number of columns".into(),
// )); span,
// } ));
}
// if !self if !self
// .0 .0
// .get_column_names() .get_column_names()
// .iter() .iter()
// .all(|col| other.0.get_column_names().contains(col)) .all(|col| other.0.get_column_names().contains(col))
// { {
// return Err(ShellError::IncompatibleParametersSingle( return Err(ShellError::IncompatibleParametersSingle(
// "Dataframes with different columns names".into(), "Dataframes with different columns names".into(),
// span, span,
// )); ));
// } }
// let new_cols = self let new_cols = self
// .0 .0
// .get_columns() .get_columns()
// .iter() .iter()
// .map(|s| { .map(|s| {
// let other_col = other let other_col = other
// .0 .0
// .column(s.name()) .column(s.name())
// .expect("Already checked that dataframes have same columns"); .expect("Already checked that dataframes have same columns");
// let mut tmp = s.clone(); let mut tmp = s.clone();
// let res = tmp.append(other_col); let res = tmp.append(other_col);
// match res { match res {
// Ok(s) => Ok(s.clone()), Ok(s) => Ok(s.clone()),
// Err(e) => Err({ Err(e) => Err({
// ShellError::InternalError(format!( ShellError::SpannedLabeledError(
// "Unable to append dataframes: {}", "Error appending dataframe".into(),
// e format!("Unable to append: {}", e),
// )) span,
// }), )
// } }),
// }) }
// .collect::<Result<Vec<Series>, ShellError>>()?; })
.collect::<Result<Vec<Series>, ShellError>>()?;
// let df_new = DataFrame::new(new_cols).map_err(|e| { let df_new = DataFrame::new(new_cols).map_err(|e| {
// ShellError::InternalError(format!( ShellError::SpannedLabeledError(
// "Unable to append dataframes: {}", "Error appending dataframe".into(),
// e.to_string() format!("Unable to append dataframes: {}", e.to_string()),
// )) span,
// })?; )
})?;
// Ok(NuDataFrame::new(df_new)) Ok(NuDataFrame::new(df_new))
//} }
} }
} }
} }

View File

@ -0,0 +1,37 @@
use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value};
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole
// location of the columns for error referencing
pub(crate) fn convert_columns(
columns: Vec<Value>,
span: Span,
) -> Result<(Vec<Spanned<String>>, Span), ShellError> {
// First column span
let mut col_span = columns
.get(0)
.ok_or_else(|| {
ShellError::SpannedLabeledError(
"Empty column list".into(),
"Empty list found for command".into(),
span,
)
})
.and_then(|v| v.span())?;
let res = columns
.into_iter()
.map(|value| match value {
Value::String { val, span } => {
col_span = span_join(&[col_span, span]);
Ok(Spanned { item: val, span })
}
_ => Err(ShellError::SpannedLabeledError(
"Incorrect column format".into(),
"Only string as column name".into(),
span,
)),
})
.collect::<Result<Vec<Spanned<String>>, _>>()?;
Ok((res, col_span))
}

View File

@ -22,7 +22,7 @@ pub fn create_default_context() -> EngineState {
// them only accessible if the correct input value category is used with the // them only accessible if the correct input value category is used with the
// declaration // declaration
#[cfg(feature = "dataframe")] #[cfg(feature = "dataframe")]
bind_command!(DataTypes, DescribeDF, OpenDataFrame, ToDataFrame); add_dataframe_decls(&mut working_set);
// TODO: sort default context items categorically // TODO: sort default context items categorically
bind_command!( bind_command!(

View File

@ -17,8 +17,6 @@ mod strings;
mod system; mod system;
mod viewers; mod viewers;
#[cfg(feature = "dataframe")]
mod dataframe;
pub use calendar::*; pub use calendar::*;
pub use conversions::*; pub use conversions::*;
pub use core_commands::*; pub use core_commands::*;
@ -38,5 +36,8 @@ pub use strings::*;
pub use system::*; pub use system::*;
pub use viewers::*; pub use viewers::*;
#[cfg(feature = "dataframe")]
mod dataframe;
#[cfg(feature = "dataframe")] #[cfg(feature = "dataframe")]
pub use dataframe::*; pub use dataframe::*;

View File

@ -26,7 +26,7 @@ fn eval_call(
call: &Call, call: &Call,
input: PipelineData, input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let decl = engine_state.get_decl_with_input(call.decl_id, &input); let decl = engine_state.get_decl(call.decl_id);
if call.named.iter().any(|(flag, _)| flag.item == "help") { if call.named.iter().any(|(flag, _)| flag.item == "help") {
let full_help = get_full_help(&decl.signature(), &decl.examples(), engine_state); let full_help = get_full_help(&decl.signature(), &decl.examples(), engine_state);

View File

@ -1,7 +1,7 @@
use super::Command; use super::Command;
use crate::{ use crate::{
ast::Block, BlockId, DeclId, Example, Overlay, OverlayId, PipelineData, ShellError, Signature, ast::Block, BlockId, DeclId, Example, Overlay, OverlayId, ShellError, Signature, Span, Type,
Span, Type, Value, VarId, VarId,
}; };
use core::panic; use core::panic;
use std::{ use std::{
@ -370,39 +370,6 @@ impl EngineState {
.expect("internal error: missing declaration") .expect("internal error: missing declaration")
} }
#[allow(clippy::borrowed_box)]
pub fn get_decl_with_input(&self, decl_id: DeclId, input: &PipelineData) -> &Box<dyn Command> {
let decl = self.get_decl(decl_id);
match input {
PipelineData::Stream(..) => decl,
PipelineData::Value(value, ..) => match value {
Value::CustomValue { val, .. } => {
// This filter works because the custom definitions were declared
// before the default nushell declarations. This means that the custom
// declarations that get overridden by the default declarations can only
// be accessed if the input value has the required category
let decls = self
.decls
.iter()
.enumerate()
.filter(|(_, decl_inner)| {
decl.name() == decl_inner.name()
&& decl_inner.signature().category == val.category()
})
.map(|(index, _)| index)
.collect::<Vec<usize>>();
match decls.first() {
Some(index) => self.get_decl(*index),
None => decl,
}
}
_ => decl,
},
}
}
/// Get all IDs of all commands within scope, sorted by the commads' names /// Get all IDs of all commands within scope, sorted by the commads' names
pub fn get_decl_ids_sorted(&self, include_hidden: bool) -> impl Iterator<Item = DeclId> { pub fn get_decl_ids_sorted(&self, include_hidden: bool) -> impl Iterator<Item = DeclId> {
let mut decls_map = HashMap::new(); let mut decls_map = HashMap::new();

View File

@ -30,7 +30,9 @@ pub trait CustomValue: fmt::Debug + Send + Sync {
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError>; fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError>;
// ordering with other value // ordering with other value
fn partial_cmp(&self, other: &Value) -> Option<Ordering>; fn partial_cmp(&self, _other: &Value) -> Option<Ordering> {
None
}
// Definition of an operation between the object that implements the trait // Definition of an operation between the object that implements the trait
// and another Value. // and another Value.