Dataframe commands (#542)

* groupby object

* aggregate command

* eager commands

* rest of dataframe commands
This commit is contained in:
Fernando Herrera 2021-12-21 18:32:09 +00:00 committed by GitHub
parent c3a16902fe
commit 6a35e6b7b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 2936 additions and 130 deletions

19
Cargo.lock generated
View File

@ -1363,6 +1363,12 @@ dependencies = [
"pkg-config", "pkg-config",
] ]
[[package]]
name = "libm"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a"
[[package]] [[package]]
name = "libssh2-sys" name = "libssh2-sys"
version = "0.2.23" version = "0.2.23"
@ -1982,6 +1988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"libm",
] ]
[[package]] [[package]]
@ -2249,6 +2256,8 @@ dependencies = [
"num_cpus", "num_cpus",
"polars-arrow", "polars-arrow",
"prettytable-rs", "prettytable-rs",
"rand",
"rand_distr",
"rayon", "rayon",
"regex", "regex",
"serde", "serde",
@ -2434,6 +2443,16 @@ dependencies = [
"getrandom 0.2.3", "getrandom 0.2.3",
] ]
[[package]]
name = "rand_distr"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f"
dependencies = [
"num-traits",
"rand",
]
[[package]] [[package]]
name = "rand_hc" name = "rand_hc"
version = "0.3.1" version = "0.3.1"

View File

@ -74,7 +74,7 @@ optional = true
features = [ features = [
"default", "parquet", "json", "serde", "object", "default", "parquet", "json", "serde", "object",
"checked_arithmetic", "strings", "cum_agg", "is_in", "checked_arithmetic", "strings", "cum_agg", "is_in",
"rolling_window", "strings" "rolling_window", "strings", "pivot", "random"
] ]
[features] [features]

View File

@ -0,0 +1,375 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
did_you_mean,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
use crate::dataframe::values::NuGroupBy;
use super::super::values::{Column, NuDataFrame};
enum Operation {
Mean,
Sum,
Min,
Max,
First,
Last,
Nunique,
Quantile(f64),
Median,
Var,
Std,
Count,
}
impl Operation {
fn from_tagged(
name: &Spanned<String>,
quantile: Option<Spanned<f64>>,
) -> Result<Operation, ShellError> {
match name.item.as_ref() {
"mean" => Ok(Operation::Mean),
"sum" => Ok(Operation::Sum),
"min" => Ok(Operation::Min),
"max" => Ok(Operation::Max),
"first" => Ok(Operation::First),
"last" => Ok(Operation::Last),
"nunique" => Ok(Operation::Nunique),
"quantile" => match quantile {
None => Err(ShellError::SpannedLabeledError(
"Quantile value not fount".into(),
"Quantile operation requires quantile value".into(),
name.span,
)),
Some(value) => {
if (value.item < 0.0) | (value.item > 1.0) {
Err(ShellError::SpannedLabeledError(
"Inappropriate quantile".into(),
"Quantile value should be between 0.0 and 1.0".into(),
value.span,
))
} else {
Ok(Operation::Quantile(value.item))
}
}
},
"median" => Ok(Operation::Median),
"var" => Ok(Operation::Var),
"std" => Ok(Operation::Std),
"count" => Ok(Operation::Count),
selection => {
let possibilities = [
"mean".to_string(),
"sum".to_string(),
"min".to_string(),
"max".to_string(),
"first".to_string(),
"last".to_string(),
"nunique".to_string(),
"quantile".to_string(),
"median".to_string(),
"var".to_string(),
"std".to_string(),
"count".to_string(),
];
match did_you_mean(&possibilities, selection) {
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, name.span)),
None => Err(ShellError::SpannedLabeledErrorHelp(
"Operation not fount".into(),
"Operation does not exist".into(),
name.span,
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count".into(),
))
}
}
}
}
fn to_str(&self) -> &'static str {
match self {
Self::Mean => "mean",
Self::Sum => "sum",
Self::Min => "min",
Self::Max => "max",
Self::First => "first",
Self::Last => "last",
Self::Nunique => "nunique",
Self::Quantile(_) => "quantile",
Self::Median => "median",
Self::Var => "var",
Self::Std => "std",
Self::Count => "count",
}
}
}
#[derive(Clone)]
pub struct Aggregate;
impl Command for Aggregate {
fn name(&self) -> &str {
"dfr aggregate"
}
fn usage(&self) -> &str {
"Performs an aggregation operation on a dataframe and groupby object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"operation-name",
SyntaxShape::String,
"\n\tDataframes: mean, sum, min, max, quantile, median, var, std
\tGroupBy: mean, sum, min, max, first, last, nunique, quantile, median, var, std, count",
)
.named(
"quantile",
SyntaxShape::Number,
"quantile value for quantile operation",
Some('q'),
)
.switch(
"explicit",
"returns explicit names for groupby aggregations",
Some('e'),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Aggregate sum by grouping by column a and summing on col b",
example:
"[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_string("one")]),
Column::new("b".to_string(), vec![Value::test_int(3)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Aggregate sum in dataframe columns",
example: "[[a b]; [4 1] [5 2]] | dfr to-df | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(9)]),
Column::new("b".to_string(), vec![Value::test_int(3)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Aggregate sum in series",
example: "[4 1 5 6] | dfr to-df | dfr aggregate sum",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(16)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let operation: Spanned<String> = call.req(engine_state, stack, 0)?;
let quantile: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "quantile")?;
let op = Operation::from_tagged(&operation, quantile)?;
match input {
PipelineData::Value(Value::CustomValue { val, span }, _) => {
let df = val.as_any().downcast_ref::<NuDataFrame>();
let groupby = val.as_any().downcast_ref::<NuGroupBy>();
match (df, groupby) {
(Some(df), None) => {
let df = df.as_ref();
let res = perform_dataframe_aggregation(df, op, operation.span)?;
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, span),
None,
))
}
(None, Some(nu_groupby)) => {
let groupby = nu_groupby.to_groupby()?;
let res = perform_groupby_aggregation(
groupby,
op,
operation.span,
call.head,
call.has_flag("explicit"),
)?;
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, span),
None,
))
}
_ => Err(ShellError::SpannedLabeledError(
"Incorrect datatype".into(),
"no groupby or dataframe found in input stream".into(),
call.head,
)),
}
}
_ => Err(ShellError::SpannedLabeledError(
"Incorrect datatype".into(),
"no groupby or dataframe found in input stream".into(),
call.head,
)),
}
}
fn perform_groupby_aggregation(
groupby: GroupBy,
operation: Operation,
operation_span: Span,
agg_span: Span,
explicit: bool,
) -> Result<polars::prelude::DataFrame, ShellError> {
let mut res = match operation {
Operation::Mean => groupby.mean(),
Operation::Sum => groupby.sum(),
Operation::Min => groupby.min(),
Operation::Max => groupby.max(),
Operation::First => groupby.first(),
Operation::Last => groupby.last(),
Operation::Nunique => groupby.n_unique(),
Operation::Quantile(quantile) => groupby.quantile(quantile),
Operation::Median => groupby.median(),
Operation::Var => groupby.var(),
Operation::Std => groupby.std(),
Operation::Count => groupby.count(),
}
.map_err(|e| {
let span = match &e {
PolarsError::NotFound(_) => agg_span,
_ => operation_span,
};
ShellError::SpannedLabeledError("Error calculating aggregation".into(), e.to_string(), span)
})?;
if !explicit {
let col_names = res
.get_column_names()
.iter()
.map(|name| name.to_string())
.collect::<Vec<String>>();
for col in col_names {
let from = match operation {
Operation::Mean => "_mean",
Operation::Sum => "_sum",
Operation::Min => "_min",
Operation::Max => "_max",
Operation::First => "_first",
Operation::Last => "_last",
Operation::Nunique => "_n_unique",
Operation::Quantile(_) => "_quantile",
Operation::Median => "_median",
Operation::Var => "_agg_var",
Operation::Std => "_agg_std",
Operation::Count => "_count",
};
let new_col = match col.find(from) {
Some(index) => &col[..index],
None => &col[..],
};
res.rename(&col, new_col)
.expect("Column is always there. Looping with known names");
}
}
Ok(res)
}
fn perform_dataframe_aggregation(
dataframe: &polars::prelude::DataFrame,
operation: Operation,
operation_span: Span,
) -> Result<polars::prelude::DataFrame, ShellError> {
match operation {
Operation::Mean => Ok(dataframe.mean()),
Operation::Sum => Ok(dataframe.sum()),
Operation::Min => Ok(dataframe.min()),
Operation::Max => Ok(dataframe.max()),
Operation::Quantile(quantile) => dataframe.quantile(quantile).map_err(|e| {
ShellError::SpannedLabeledError(
"Error calculating quantile".into(),
e.to_string(),
operation_span,
)
}),
Operation::Median => Ok(dataframe.median()),
Operation::Var => Ok(dataframe.var()),
Operation::Std => Ok(dataframe.std()),
operation => {
let possibilities = [
"mean".to_string(),
"sum".to_string(),
"min".to_string(),
"max".to_string(),
"quantile".to_string(),
"median".to_string(),
"var".to_string(),
"std".to_string(),
];
match did_you_mean(&possibilities, operation.to_str()) {
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, operation_span)),
None => Err(ShellError::SpannedLabeledErrorHelp(
"Operation not fount".into(),
"Operation does not exist".into(),
operation_span,
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std".into(),
)),
}
}
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::super::CreateGroupBy;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(Aggregate {}), Box::new(CreateGroupBy {})])
}
}

View File

@ -5,7 +5,7 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::values::{Axis, Column, NuDataFrame}; use super::super::values::{Axis, Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct AppendDF; pub struct AppendDF;
@ -120,7 +120,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -5,7 +5,7 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
}; };
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct ColumnDF; pub struct ColumnDF;
@ -71,7 +71,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -1,4 +1,4 @@
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -231,7 +231,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -5,8 +5,8 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::values::utils::convert_columns; use super::super::values::utils::convert_columns;
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct DropDF; pub struct DropDF;
@ -101,7 +101,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -5,8 +5,8 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
}; };
use super::values::utils::convert_columns; use super::super::values::utils::convert_columns_string;
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct DropNulls; pub struct DropNulls;
@ -101,11 +101,7 @@ fn command(
let (subset, col_span) = match columns { let (subset, col_span) = match columns {
Some(cols) => { Some(cols) => {
let (agg_string, col_span) = convert_columns(cols, call.head)?; let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
let agg_string = agg_string
.into_iter()
.map(|col| col.item)
.collect::<Vec<String>>();
(Some(agg_string), col_span) (Some(agg_string), col_span)
} }
None => (None, call.head), None => (None, call.head),
@ -123,7 +119,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::super::WithColumn; use super::super::WithColumn;
use super::*; use super::*;

View File

@ -1,4 +1,4 @@
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
engine::{Command, EngineState, Stack}, engine::{Command, EngineState, Stack},
@ -96,7 +96,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -0,0 +1,137 @@
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct Dummies;
impl Command for Dummies {
fn name(&self) -> &str {
"dfr to-dummies"
}
fn usage(&self) -> &str {
"Creates a new dataframe with dummy variables"
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new dataframe with dummy variables from a dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-dummies",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a_1".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
Column::new(
"a_3".to_string(),
vec![Value::test_int(0), Value::test_int(1)],
),
Column::new(
"b_2".to_string(),
vec![Value::test_int(1), Value::test_int(0)],
),
Column::new(
"b_4".to_string(),
vec![Value::test_int(0), Value::test_int(1)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create new dataframe with dummy variables from a series",
example: "[1 2 2 3 3] | dfr to-df | dfr to-dummies",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"0_1".to_string(),
vec![
Value::test_int(1),
Value::test_int(0),
Value::test_int(0),
Value::test_int(0),
Value::test_int(0),
],
),
Column::new(
"0_2".to_string(),
vec![
Value::test_int(0),
Value::test_int(1),
Value::test_int(1),
Value::test_int(0),
Value::test_int(0),
],
),
Column::new(
"0_3".to_string(),
vec![
Value::test_int(0),
Value::test_int(0),
Value::test_int(0),
Value::test_int(1),
Value::test_int(1),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_ref()
.to_dummies()
.map_err(|e| {
ShellError::SpannedLabeledErrorHelp(
"Error calculating dummies".into(),
e.to_string(),
call.head,
"The only allowed column types for dummies are String or Int".into(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(Dummies {})])
}
}

View File

@ -0,0 +1,98 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct FilterWith;
impl Command for FilterWith {
fn name(&self) -> &str {
"dfr filter-with"
}
fn usage(&self) -> &str {
"Filters dataframe using a mask as reference"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("mask", SyntaxShape::Any, "boolean mask used to filter data")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([$true $false] | dfr to-df);
[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with $mask"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let mask_value: Value = call.req(engine_state, stack, 0)?;
let mask_span = mask_value.span()?;
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
let mask = mask.bool().map_err(|e| {
ShellError::SpannedLabeledErrorHelp(
"Error casting to bool".into(),
e.to_string(),
mask_span,
"Perhaps you want to use a series with booleans as mask".into(),
)
})?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_ref()
.filter(mask)
.map_err(|e| {
ShellError::SpannedLabeledErrorHelp(
"Error calculating dummies".into(),
e.to_string(),
call.head,
"The only allowed column types for dummies are String or Int".into(),
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(FilterWith {})])
}
}

View File

@ -0,0 +1,80 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
#[derive(Clone)]
pub struct FirstDF;
impl Command for FirstDF {
fn name(&self) -> &str {
"dfr first"
}
fn usage(&self) -> &str {
"Creates new dataframe with first rows"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for head")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with head rows",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS);
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().head(Some(rows));
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head),
None,
))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(FirstDF {})])
}
}

View File

@ -0,0 +1,88 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use crate::dataframe::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct GetDF;
impl Command for GetDF {
fn name(&self) -> &str {
"dfr get"
}
fn usage(&self) -> &str {
"Creates dataframe with the selected columns"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates dataframe with selected columns",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr get a",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_ref()
.select(&col_string)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error selecting columns".into(),
e.to_string(),
col_span,
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(GetDF {})])
}
}

View File

@ -0,0 +1,71 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use super::super::values::{utils::convert_columns_string, NuDataFrame, NuGroupBy};
#[derive(Clone)]
pub struct CreateGroupBy;
impl Command for CreateGroupBy {
fn name(&self) -> &str {
"dfr group-by"
}
fn usage(&self) -> &str {
"Creates a groupby object that can be used for other aggregations"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.rest("rest", SyntaxShape::Any, "groupby columns")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Grouping by column a",
example: "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
// Extracting the names of the columns to perform the groupby
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
// This is the expensive part of the groupby; to create the
// groups that will be used for grouping the data in the
// dataframe. Once it has been done these values can be stored
// in a NuGroupBy
let groupby = df.as_ref().groupby(&col_string).map_err(|e| {
ShellError::SpannedLabeledError("Error creating groupby".into(), e.to_string(), col_span)
})?;
let groups = groupby.get_groups().to_vec();
let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups);
Ok(PipelineData::Value(groupby.into_value(call.head), None))
}

View File

@ -0,0 +1,226 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use polars::prelude::JoinType;
use crate::dataframe::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct JoinDF;
impl Command for JoinDF {
fn name(&self) -> &str {
"dfr join"
}
fn usage(&self) -> &str {
"Joins a dataframe using columns as reference"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("dataframe", SyntaxShape::Any, "right dataframe to join")
.required_named(
"left",
SyntaxShape::Table,
"left column names to perform join",
Some('l'),
)
.required_named(
"right",
SyntaxShape::Table,
"right column names to perform join",
Some('r'),
)
.named(
"type",
SyntaxShape::String,
"type of join. Inner by default",
Some('t'),
)
.named(
"suffix",
SyntaxShape::String,
"suffix for the columns of the right dataframe",
Some('s'),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "inner join dataframe",
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dfr to-df);
$right | dfr join $right -l [a b] -r [a b]"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
),
Column::new(
"c".to_string(),
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
),
Column::new(
"c_right".to_string(),
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let r_df: Value = call.req(engine_state, stack, 0)?;
let l_col: Vec<Value> = call
.get_flag(engine_state, stack, "left")?
.expect("required value in syntax");
let r_col: Vec<Value> = call
.get_flag(engine_state, stack, "right")?
.expect("required value in syntax");
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
let join_type_op: Option<Spanned<String>> = call.get_flag(engine_state, stack, "type")?;
let join_type = match join_type_op {
None => JoinType::Inner,
Some(val) => match val.item.as_ref() {
"inner" => JoinType::Inner,
"outer" => JoinType::Outer,
"left" => JoinType::Left,
_ => {
return Err(ShellError::SpannedLabeledErrorHelp(
"Incorrect join type".into(),
"Invalid join type".into(),
val.span,
"Options: inner, outer or left".into(),
))
}
},
};
let (l_col_string, l_col_span) = convert_columns_string(l_col, call.head)?;
let (r_col_string, r_col_span) = convert_columns_string(r_col, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let r_df = NuDataFrame::try_from_value(r_df)?;
check_column_datatypes(
df.as_ref(),
r_df.as_ref(),
&l_col_string,
l_col_span,
&r_col_string,
r_col_span,
)?;
df.as_ref()
.join(
r_df.as_ref(),
&l_col_string,
&r_col_string,
join_type,
suffix,
)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error joining dataframes".into(),
e.to_string(),
l_col_span,
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
fn check_column_datatypes<T: AsRef<str>>(
df_l: &polars::prelude::DataFrame,
df_r: &polars::prelude::DataFrame,
l_cols: &[T],
l_col_span: Span,
r_cols: &[T],
r_col_span: Span,
) -> Result<(), ShellError> {
if l_cols.len() != r_cols.len() {
return Err(ShellError::SpannedLabeledErrorHelp(
"Mismatched number of column names".into(),
format!(
"found {} left names vs {} right names",
l_cols.len(),
r_cols.len()
),
l_col_span,
"perhaps you need to change the number of columns to join".into(),
));
}
for (l, r) in l_cols.iter().zip(r_cols) {
let l_series = df_l.column(l.as_ref()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error selecting the columns".into(),
e.to_string(),
l_col_span,
)
})?;
let r_series = df_r.column(r.as_ref()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error selecting the columns".into(),
e.to_string(),
r_col_span,
)
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::SpannedLabeledErrorHelp(
"Mismatched datatypes".into(),
format!(
"left column type '{}' doesn't match '{}' right column match",
l_series.dtype(),
r_series.dtype()
),
l_col_span,
"perhaps you need to select other column to match".into(),
));
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(JoinDF {})])
}
}

View File

@ -0,0 +1,80 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
#[derive(Clone)]
pub struct LastDF;
impl Command for LastDF {
fn name(&self) -> &str {
"dfr last"
}
fn usage(&self) -> &str {
"Creates new dataframe with tail rows"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(3)]),
Column::new("b".to_string(), vec![Value::test_int(4)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
let rows = rows.unwrap_or(DEFAULT_ROWS);
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().tail(Some(rows));
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head),
None,
))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(LastDF {})])
}
}

View File

@ -0,0 +1,243 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use crate::dataframe::values::utils::convert_columns_string;
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct MeltDF;
impl Command for MeltDF {
fn name(&self) -> &str {
"dfr melt"
}
fn usage(&self) -> &str {
"Unpivot a DataFrame from wide to long format"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required_named(
"columns",
SyntaxShape::Table,
"column names for melting",
Some('c'),
)
.required_named(
"values",
SyntaxShape::Table,
"column names used as value columns",
Some('v'),
)
.named(
"variable-name",
SyntaxShape::String,
"optional name for variable column",
Some('r'),
)
.named(
"value-name",
SyntaxShape::String,
"optional name for value column",
Some('l'),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "melt dataframe",
example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dfr to-df | dfr melt -c [b c] -v [a d]",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"b".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
],
),
Column::new(
"c".to_string(),
vec![
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
Value::test_int(4),
Value::test_int(5),
Value::test_int(6),
],
),
Column::new(
"variable".to_string(),
vec![
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("a"),
Value::test_string("d"),
Value::test_string("d"),
Value::test_string("d"),
],
),
Column::new(
"value".to_string(),
vec![
Value::test_string("x"),
Value::test_string("y"),
Value::test_string("z"),
Value::test_string("a"),
Value::test_string("b"),
Value::test_string("c"),
],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let id_col: Vec<Value> = call
.get_flag(engine_state, stack, "columns")?
.expect("required value");
let val_col: Vec<Value> = call
.get_flag(engine_state, stack, "values")?
.expect("required value");
let value_name: Option<Spanned<String>> = call.get_flag(engine_state, stack, "value-name")?;
let variable_name: Option<Spanned<String>> =
call.get_flag(engine_state, stack, "variable-name")?;
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
let mut res = df
.as_ref()
.melt(&id_col_string, &val_col_string)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error calculating melt".into(),
e.to_string(),
call.head,
)
})?;
if let Some(name) = &variable_name {
res.rename("variable", &name.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error renaming column".into(),
e.to_string(),
name.span,
)
})?;
}
if let Some(name) = &value_name {
res.rename("value", &name.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error renaming column".into(),
e.to_string(),
name.span,
)
})?;
}
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head),
None,
))
}
fn check_column_datatypes<T: AsRef<str>>(
df: &polars::prelude::DataFrame,
cols: &[T],
col_span: Span,
) -> Result<(), ShellError> {
if cols.is_empty() {
return Err(ShellError::SpannedLabeledError(
"Merge error".into(),
"empty column list".into(),
col_span,
));
}
// Checking if they are same type
if cols.len() > 1 {
for w in cols.windows(2) {
let l_series = df.column(w[0].as_ref()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error selecting columns".into(),
e.to_string(),
col_span,
)
})?;
let r_series = df.column(w[1].as_ref()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error selecting columns".into(),
e.to_string(),
col_span,
)
})?;
if l_series.dtype() != r_series.dtype() {
return Err(ShellError::SpannedLabeledErrorHelp(
"Merge error".into(),
"found different column types in list".into(),
col_span,
format!(
"datatypes {} and {} are incompatible",
l_series.dtype(),
r_series.dtype()
),
));
}
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(MeltDF {})])
}
}

View File

@ -0,0 +1,105 @@
mod aggregate;
mod append;
mod column;
mod command;
mod describe;
mod drop;
mod drop_nulls;
mod dtypes;
mod dummies;
mod filter_with;
mod first;
mod get;
mod groupby;
mod join;
mod last;
mod melt;
mod open;
mod pivot;
mod rename;
mod sample;
mod shape;
mod slice;
mod sort;
mod take;
mod to_csv;
mod to_df;
mod to_nu;
mod to_parquet;
mod with_column;
use nu_protocol::engine::StateWorkingSet;
pub use aggregate::Aggregate;
pub use append::AppendDF;
pub use column::ColumnDF;
pub use command::Dataframe;
pub use describe::DescribeDF;
pub use drop::DropDF;
pub use drop_nulls::DropNulls;
pub use dtypes::DataTypes;
pub use dummies::Dummies;
pub use filter_with::FilterWith;
pub use first::FirstDF;
pub use get::GetDF;
pub use groupby::CreateGroupBy;
pub use join::JoinDF;
pub use last::LastDF;
pub use melt::MeltDF;
pub use open::OpenDataFrame;
pub use pivot::PivotDF;
pub use rename::RenameDF;
pub use sample::SampleDF;
pub use shape::ShapeDF;
pub use slice::SliceDF;
pub use sort::SortDF;
pub use take::TakeDF;
pub use to_csv::ToCSV;
pub use to_df::ToDataFrame;
pub use to_nu::ToNu;
pub use to_parquet::ToParquet;
pub use with_column::WithColumn;
pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
( $command:expr ) => {
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
// Dataframe commands
bind_command!(
Aggregate,
AppendDF,
ColumnDF,
CreateGroupBy,
Dataframe,
DataTypes,
DescribeDF,
DropDF,
DropNulls,
Dummies,
FilterWith,
FirstDF,
GetDF,
JoinDF,
LastDF,
MeltDF,
OpenDataFrame,
PivotDF,
RenameDF,
SampleDF,
ShapeDF,
SliceDF,
SortDF,
TakeDF,
ToCSV,
ToDataFrame,
ToNu,
ToParquet,
WithColumn
);
}

View File

@ -1,4 +1,4 @@
use super::values::NuDataFrame; use super::super::values::NuDataFrame;
use nu_engine::CallExt; use nu_engine::CallExt;
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,

View File

@ -0,0 +1,175 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
};
use polars::prelude::DataType;
use crate::dataframe::values::NuGroupBy;
use super::super::values::NuDataFrame;
enum Operation {
First,
Sum,
Min,
Max,
Mean,
Median,
}
impl Operation {
fn from_tagged(name: Spanned<String>) -> Result<Operation, ShellError> {
match name.item.as_ref() {
"first" => Ok(Operation::First),
"sum" => Ok(Operation::Sum),
"min" => Ok(Operation::Min),
"max" => Ok(Operation::Max),
"mean" => Ok(Operation::Mean),
"median" => Ok(Operation::Median),
_ => Err(ShellError::SpannedLabeledErrorHelp(
"Operation not fount".into(),
"Operation does not exist for pivot".into(),
name.span,
"Options: first, sum, min, max, mean, median".into(),
)),
}
}
}
#[derive(Clone)]
pub struct PivotDF;
impl Command for PivotDF {
fn name(&self) -> &str {
"dfr pivot"
}
fn usage(&self) -> &str {
"Performs a pivot operation on a groupby object"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"pivot-column",
SyntaxShape::String,
"pivot column to perform pivot",
)
.required(
"value-column",
SyntaxShape::String,
"value column to perform pivot",
)
.required("operation", SyntaxShape::String, "aggregate operation")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Pivot a dataframe on b and aggregation on col c",
example:
"[[a b c]; [one x 1] [two y 2]] | dfr to-df | dfr group-by a | dfr pivot b c sum",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let pivot_col: Spanned<String> = call.req(engine_state, stack, 0)?;
let value_col: Spanned<String> = call.req(engine_state, stack, 1)?;
let operation: Spanned<String> = call.req(engine_state, stack, 2)?;
let op = Operation::from_tagged(operation)?;
let nu_groupby = NuGroupBy::try_from_pipeline(input, call.head)?;
let df_ref = nu_groupby.as_ref();
check_pivot_column(df_ref, &pivot_col)?;
check_value_column(df_ref, &value_col)?;
let mut groupby = nu_groupby.to_groupby()?;
let pivot = groupby.pivot(&pivot_col.item, &value_col.item);
match op {
Operation::Mean => pivot.mean(),
Operation::Sum => pivot.sum(),
Operation::Min => pivot.min(),
Operation::Max => pivot.max(),
Operation::First => pivot.first(),
Operation::Median => pivot.median(),
}
.map_err(|e| {
ShellError::SpannedLabeledError("Error creating pivot".into(), e.to_string(), call.head)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}
fn check_pivot_column(
df: &polars::prelude::DataFrame,
col: &Spanned<String>,
) -> Result<(), ShellError> {
let series = df.column(&col.item).map_err(|e| {
ShellError::SpannedLabeledError("Column not found".into(), e.to_string(), col.span)
})?;
match series.dtype() {
DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::Utf8 => Ok(()),
_ => Err(ShellError::SpannedLabeledError(
"Pivot error".into(),
format!("Unsupported datatype {}", series.dtype()),
col.span,
)),
}
}
fn check_value_column(
df: &polars::prelude::DataFrame,
col: &Spanned<String>,
) -> Result<(), ShellError> {
let series = df.column(&col.item).map_err(|e| {
ShellError::SpannedLabeledError("Column not found".into(), e.to_string(), col.span)
})?;
match series.dtype() {
DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::Float32
| DataType::Float64 => Ok(()),
_ => Err(ShellError::SpannedLabeledError(
"Pivot error".into(),
format!("Unsupported datatype {}", series.dtype()),
col.span,
)),
}
}

View File

@ -0,0 +1,94 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use super::super::values::{Column, NuDataFrame};
#[derive(Clone)]
pub struct RenameDF;
impl Command for RenameDF {
fn name(&self) -> &str {
"dfr rename-col"
}
fn usage(&self) -> &str {
"rename a dataframe column"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("from", SyntaxShape::String, "column name to be renamed")
.required("to", SyntaxShape::String, "new column name")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Renames a dataframe column",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename-col a a_new",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a_new".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let from: String = call.req(engine_state, stack, 0)?;
let to: String = call.req(engine_state, stack, 1)?;
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
df.as_mut()
.rename(&from, &to)
.map_err(|e| {
ShellError::SpannedLabeledError("Error renaming".into(), e.to_string(), call.head)
})
.map(|df| {
PipelineData::Value(
NuDataFrame::dataframe_into_value(df.clone(), call.head),
None,
)
})
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(RenameDF {})])
}
}

View File

@ -0,0 +1,106 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SampleDF;
impl Command for SampleDF {
fn name(&self) -> &str {
"dfr sample"
}
fn usage(&self) -> &str {
"Create sample dataframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"n-rows",
SyntaxShape::Int,
"number of rows to be taken from dataframe",
Some('n'),
)
.named(
"fraction",
SyntaxShape::Number,
"fraction of dataframe to be taken",
Some('f'),
)
.switch("replace", "sample with replace", Some('e'))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr sample -n 1",
result: None, // No expected value because sampling is random
},
Example {
description: "Shows sample row using fraction and replace",
example: "[[a b]; [1 2] [3 4] [5 6]] | dfr to-df | dfr sample -f 0.5 -e",
result: None, // No expected value because sampling is random
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
let replace: bool = call.has_flag("replace");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
match (rows, fraction) {
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace).map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating sample".into(),
e.to_string(),
rows.span,
)
}),
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace).map_err(|e| {
ShellError::SpannedLabeledError(
"Error creating sample".into(),
e.to_string(),
frac.span,
)
}),
(Some(_), Some(_)) => Err(ShellError::SpannedLabeledError(
"Incompatible flags".into(),
"Only one selection criterion allowed".into(),
call.head,
)),
(None, None) => Err(ShellError::SpannedLabeledErrorHelp(
"No selection".into(),
"No selection criterion was found".into(),
call.head,
"Perhaps you want to use the flag -n or -f".into(),
)),
}
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}

View File

@ -0,0 +1,87 @@
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Value,
};
use crate::dataframe::values::Column;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ShapeDF;
impl Command for ShapeDF {
fn name(&self) -> &str {
"dfr shape"
}
fn usage(&self) -> &str {
"Shows column and row size for a dataframe"
}
fn signature(&self) -> Signature {
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr shape",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("rows".to_string(), vec![Value::test_int(2)]),
Column::new("columns".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let rows = Value::Int {
val: df.as_ref().height() as i64,
span: call.head,
};
let cols = Value::Int {
val: df.as_ref().width() as i64,
span: call.head,
};
let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]);
NuDataFrame::try_from_columns(vec![rows_col, cols_col])
.map(|df| PipelineData::Value(df.into_value(call.head), None))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(ShapeDF {})])
}
}

View File

@ -0,0 +1,85 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use crate::dataframe::values::Column;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SliceDF;
impl Command for SliceDF {
fn name(&self) -> &str {
"dfr slice"
}
fn usage(&self) -> &str {
"Creates new dataframe from a slice of rows"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("offset", SyntaxShape::Int, "start of slice")
.required("size", SyntaxShape::Int, "size of slice")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr slice 0 1",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new("a".to_string(), vec![Value::test_int(1)]),
Column::new("b".to_string(), vec![Value::test_int(2)]),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let offset: i64 = call.req(engine_state, stack, 0)?;
let size: usize = call.req(engine_state, stack, 1)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let res = df.as_ref().slice(offset, size);
Ok(PipelineData::Value(
NuDataFrame::dataframe_into_value(res, call.head),
None,
))
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(SliceDF {})])
}
}

View File

@ -0,0 +1,142 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use crate::dataframe::values::{utils::convert_columns_string, Column};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct SortDF;
impl Command for SortDF {
fn name(&self) -> &str {
"dfr sort"
}
fn usage(&self) -> &str {
"Creates new sorted dataframe or series"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.switch("reverse", "invert sort", Some('r'))
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | dfr to-df | dfr sort a",
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(2), Value::test_int(4)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Create new sorted series",
example: "[3 4 1 2] | dfr to-df | dfr sort",
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![
Value::test_int(1),
Value::test_int(2),
Value::test_int(3),
Value::test_int(4),
],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let reverse = call.has_flag("reverse");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
if df.is_series() {
let columns = df.as_ref().get_column_names();
df.as_ref()
.sort(columns, reverse)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error sorting dataframe".into(),
e.to_string(),
call.head,
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
} else {
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
if !columns.is_empty() {
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
df.as_ref()
.sort(&col_string, reverse)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error sorting dataframe".into(),
e.to_string(),
col_span,
)
})
.map(|df| {
PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)
})
} else {
Err(ShellError::SpannedLabeledError(
"Missing columns".into(),
"missing column name to perform sort".into(),
call.head,
))
}
}
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(SortDF {})])
}
}

View File

@ -0,0 +1,144 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
};
use polars::prelude::DataType;
use crate::dataframe::values::Column;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct TakeDF;
impl Command for TakeDF {
fn name(&self) -> &str {
"dfr take"
}
fn usage(&self) -> &str {
"Creates new dataframe using the given indices"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required(
"indices",
SyntaxShape::Any,
"list of indices used to take data",
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Takes selected rows from dataframe",
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dfr to-df);
let indices = ([0 2] | dfr to-df);
$df | dfr take $indices"#,
result: Some(
NuDataFrame::try_from_columns(vec![
Column::new(
"a".to_string(),
vec![Value::test_int(4), Value::test_int(4)],
),
Column::new(
"b".to_string(),
vec![Value::test_int(1), Value::test_int(3)],
),
])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
Example {
description: "Takes selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | dfr to-df);
let indices = ([0 2] | dfr to-df);
$series | dfr take $indices"#,
result: Some(
NuDataFrame::try_from_columns(vec![Column::new(
"0".to_string(),
vec![Value::test_int(4), Value::test_int(5)],
)])
.expect("simple df for test should not fail")
.into_value(Span::test_data()),
),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let index_value: Value = call.req(engine_state, stack, 0)?;
let index_span = index_value.span()?;
let index = NuDataFrame::try_from_value(index_value)?.as_series(index_span)?;
let casted = match index.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => {
index.cast(&DataType::UInt32).map_err(|e| {
ShellError::SpannedLabeledError(
"Error casting index list".into(),
e.to_string(),
index_span,
)
})
}
_ => Err(ShellError::SpannedLabeledErrorHelp(
"Incorrect type".into(),
"Series with incorrect type".into(),
call.head,
"Consider using a Series with type int type".into(),
)),
}?;
let indices = casted.u32().map_err(|e| {
ShellError::SpannedLabeledError(
"Error casting index list".into(),
e.to_string(),
index_span,
)
})?;
NuDataFrame::try_from_pipeline(input, call.head).and_then(|df| {
df.as_ref()
.take(indices)
.map_err(|e| {
ShellError::SpannedLabeledError(
"Error taking values".into(),
e.to_string(),
call.head,
)
})
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
})
}
#[cfg(test)]
mod test {
use super::super::super::test_dataframe::test_dataframe;
use super::*;
#[test]
fn test_examples() {
test_dataframe(vec![Box::new(TakeDF {})])
}
}

View File

@ -0,0 +1,132 @@
use std::{fs::File, path::PathBuf};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Value,
};
use polars::prelude::{CsvWriter, SerWriter};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToCSV;
impl Command for ToCSV {
fn name(&self) -> &str {
"dfr to-csv"
}
fn usage(&self) -> &str {
"Saves dataframe to csv file"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.named(
"delimiter",
SyntaxShape::String,
"file delimiter character",
Some('d'),
)
.switch("no-header", "Indicates if file doesn't have header", None)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Saves dataframe to csv file",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-csv test.csv",
result: None,
},
Example {
description: "Saves dataframe to csv file using other delimiter",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-csv test.csv -d '|'",
result: None,
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
let no_header: bool = call.has_flag("no_header");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let mut file = File::create(&file_name.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error with file name".into(),
e.to_string(),
file_name.span,
)
})?;
let writer = CsvWriter::new(&mut file);
let writer = if no_header {
writer.has_header(false)
} else {
writer.has_header(true)
};
let writer = match delimiter {
None => writer,
Some(d) => {
if d.item.len() != 1 {
return Err(ShellError::SpannedLabeledError(
"Incorrect delimiter".into(),
"Delimiter has to be one char".into(),
d.span,
));
} else {
let delimiter = match d.item.chars().next() {
Some(d) => d as u8,
None => unreachable!(),
};
writer.with_delimiter(delimiter)
}
}
};
writer.finish(df.as_ref()).map_err(|e| {
ShellError::SpannedLabeledError(
"Error writing to file".into(),
e.to_string(),
file_name.span,
)
})?;
let file_value = Value::String {
val: format!("saved {:?}", &file_name.item),
span: file_name.span,
};
Ok(PipelineData::Value(
Value::List {
vals: vec![file_value],
span: call.head,
},
None,
))
}

View File

@ -1,4 +1,4 @@
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
use nu_protocol::{ use nu_protocol::{
ast::Call, ast::Call,
@ -117,7 +117,7 @@ impl Command for ToDataFrame {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -0,0 +1,83 @@
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
};
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToNu;
impl Command for ToNu {
fn name(&self) -> &str {
"dfr to-nu"
}
fn usage(&self) -> &str {
"Converts a section of the dataframe to Nushell Table"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"n-rows",
SyntaxShape::Number,
"number of rows to be shown",
Some('n'),
)
.switch("tail", "shows tail rows", Some('t'))
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Shows head rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-nu",
result: None,
},
Example {
description: "Shows tail rows from dataframe",
example: "[[a b]; [1 2] [3 4] [5 6]] | dfr to-df | dfr to-nu -t -n 1",
result: None,
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<usize> = call.get_flag(engine_state, stack, "n-rows")?;
let tail: bool = call.has_flag("tail");
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let values = if tail {
df.tail(rows, call.head)?
} else {
df.head(rows, call.head)?
};
let value = Value::List {
vals: values,
span: call.head,
};
Ok(PipelineData::Value(value, None))
}

View File

@ -0,0 +1,84 @@
use std::{fs::File, path::PathBuf};
use nu_engine::CallExt;
use nu_protocol::{
ast::Call,
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Value,
};
use polars::prelude::ParquetWriter;
use super::super::values::NuDataFrame;
#[derive(Clone)]
pub struct ToParquet;
impl Command for ToParquet {
fn name(&self) -> &str {
"dfr to-parquet"
}
fn usage(&self) -> &str {
"Saves dataframe to parquet file"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to csv file",
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-parquet test.parquet",
result: None,
}]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
let file = File::create(&file_name.item).map_err(|e| {
ShellError::SpannedLabeledError(
"Error with file name".into(),
e.to_string(),
file_name.span,
)
})?;
ParquetWriter::new(file).finish(df.as_ref()).map_err(|e| {
ShellError::SpannedLabeledError("Error saving file".into(), e.to_string(), file_name.span)
})?;
let file_value = Value::String {
val: format!("saved {:?}", &file_name.item),
span: file_name.span,
};
Ok(PipelineData::Value(
Value::List {
vals: vec![file_value],
span: call.head,
},
None,
))
}

View File

@ -5,7 +5,7 @@ use nu_protocol::{
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value, Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
}; };
use super::values::{Column, NuDataFrame}; use super::super::values::{Column, NuDataFrame};
#[derive(Clone)] #[derive(Clone)]
pub struct WithColumn; pub struct WithColumn;
@ -99,7 +99,7 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::*; use super::*;
#[test] #[test]

View File

@ -1,101 +1,15 @@
mod eager;
mod series; mod series;
mod values; mod values;
mod append; pub use eager::add_eager_decls;
mod column; pub use series::add_series_decls;
mod command;
mod describe;
mod drop;
mod drop_nulls;
mod dtypes;
mod open;
mod to_df;
mod with_column;
pub use series::*;
pub use append::AppendDF;
pub use column::ColumnDF;
pub use command::Dataframe;
pub use describe::DescribeDF;
pub use drop::DropDF;
pub use drop_nulls::DropNulls;
pub use dtypes::DataTypes;
pub use open::OpenDataFrame;
pub use to_df::ToDataFrame;
pub use with_column::WithColumn;
use nu_protocol::engine::StateWorkingSet; use nu_protocol::engine::StateWorkingSet;
pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) { pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command { add_series_decls(working_set);
( $command:expr ) => { add_eager_decls(working_set);
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
// Series commands
bind_command!(
AllFalse,
AllTrue,
ArgMax,
ArgMin,
ArgSort,
ArgTrue,
ArgUnique,
Concatenate,
Contains,
Cumulative,
GetDay,
GetHour,
GetMinute,
GetMonth,
GetNanosecond,
GetOrdinal,
GetSecond,
GetWeek,
GetWeekDay,
GetYear,
IsDuplicated,
IsIn,
IsNotNull,
IsNull,
IsUnique,
NNull,
NUnique,
NotSeries,
Rename,
Replace,
ReplaceAll,
Rolling,
SetSeries,
SetWithIndex,
Shift,
StrLengths,
StrSlice,
StrFTime,
ToLowerCase,
ToUpperCase,
Unique,
ValueCount
);
// Dataframe commands
bind_command!(
AppendDF,
ColumnDF,
Dataframe,
DataTypes,
DescribeDF,
DropDF,
DropNulls,
OpenDataFrame,
ToDataFrame,
WithColumn
);
} }
#[cfg(test)] #[cfg(test)]

View File

@ -23,6 +23,8 @@ mod shift;
mod unique; mod unique;
mod value_counts; mod value_counts;
use nu_protocol::engine::StateWorkingSet;
pub use all_false::AllFalse; pub use all_false::AllFalse;
pub use all_true::AllTrue; pub use all_true::AllTrue;
pub use arg_max::ArgMax; pub use arg_max::ArgMax;
@ -35,3 +37,60 @@ pub use rolling::Rolling;
pub use shift::Shift; pub use shift::Shift;
pub use unique::Unique; pub use unique::Unique;
pub use value_counts::ValueCount; pub use value_counts::ValueCount;
pub fn add_series_decls(working_set: &mut StateWorkingSet) {
macro_rules! bind_command {
( $command:expr ) => {
working_set.add_decl(Box::new($command));
};
( $( $command:expr ),* ) => {
$( working_set.add_decl(Box::new($command)); )*
};
}
// Series commands
bind_command!(
AllFalse,
AllTrue,
ArgMax,
ArgMin,
ArgSort,
ArgTrue,
ArgUnique,
Concatenate,
Contains,
Cumulative,
GetDay,
GetHour,
GetMinute,
GetMonth,
GetNanosecond,
GetOrdinal,
GetSecond,
GetWeek,
GetWeekDay,
GetYear,
IsDuplicated,
IsIn,
IsNotNull,
IsNull,
IsUnique,
NNull,
NUnique,
NotSeries,
Rename,
Replace,
ReplaceAll,
Rolling,
SetSeries,
SetWithIndex,
Shift,
StrLengths,
StrSlice,
StrFTime,
ToLowerCase,
ToUpperCase,
Unique,
ValueCount
);
}

View File

@ -162,8 +162,8 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::eager::DropNulls;
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::super::super::DropNulls;
use super::*; use super::*;
#[test] #[test]

View File

@ -68,8 +68,8 @@ fn command(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::super::super::eager::DropNulls;
use super::super::super::test_dataframe::test_dataframe; use super::super::super::test_dataframe::test_dataframe;
use super::super::super::DropNulls;
use super::*; use super::*;
#[test] #[test]

View File

@ -5,7 +5,7 @@ use nu_protocol::{
PipelineData, Span, Value, CONFIG_VARIABLE_ID, PipelineData, Span, Value, CONFIG_VARIABLE_ID,
}; };
use super::ToDataFrame; use super::eager::ToDataFrame;
use crate::Let; use crate::Let;
pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) { pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {

View File

@ -1,4 +1,6 @@
mod nu_dataframe; mod nu_dataframe;
mod nu_groupby;
pub mod utils; pub mod utils;
pub use nu_dataframe::{Axis, Column, NuDataFrame}; pub use nu_dataframe::{Axis, Column, NuDataFrame};
pub use nu_groupby::NuGroupBy;

View File

@ -1,5 +1,5 @@
use super::NuDataFrame; use super::NuDataFrame;
use nu_protocol::{ast::Operator, Category, CustomValue, ShellError, Span, Value}; use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame // CustomValue implementation for NuDataFrame
impl CustomValue for NuDataFrame { impl CustomValue for NuDataFrame {
@ -20,10 +20,6 @@ impl CustomValue for NuDataFrame {
} }
} }
fn category(&self) -> Category {
Category::Custom(self.typetag_name().into())
}
fn value_string(&self) -> String { fn value_string(&self) -> String {
self.typetag_name().to_string() self.typetag_name().to_string()
} }

View File

@ -12,6 +12,8 @@ use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, hash::Hasher}; use std::{cmp::Ordering, fmt::Display, hash::Hasher};
use super::utils::DEFAULT_ROWS;
// DataFrameValue is an encapsulation of Nushell Value that can be used // DataFrameValue is an encapsulation of Nushell Value that can be used
// to define the PolarsObject Trait. The polars object trait allows to // to define the PolarsObject Trait. The polars object trait allows to
// create dataframes with mixed datatypes // create dataframes with mixed datatypes
@ -283,7 +285,7 @@ impl NuDataFrame {
pub fn tail(&self, rows: Option<usize>, span: Span) -> Result<Vec<Value>, ShellError> { pub fn tail(&self, rows: Option<usize>, span: Span) -> Result<Vec<Value>, ShellError> {
let df = &self.0; let df = &self.0;
let to_row = df.height(); let to_row = df.height();
let size = rows.unwrap_or(5); let size = rows.unwrap_or(DEFAULT_ROWS);
let from_row = to_row.saturating_sub(size); let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row, span)?; let values = self.to_rows(from_row, to_row, span)?;

View File

@ -0,0 +1,44 @@
use super::NuGroupBy;
use nu_protocol::{CustomValue, ShellError, Span, Value};
// CustomValue implementation for NuDataFrame
impl CustomValue for NuGroupBy {
fn typetag_name(&self) -> &'static str {
"groupby"
}
fn typetag_deserialize(&self) {
unimplemented!("typetag_deserialize")
}
fn clone_value(&self, span: nu_protocol::Span) -> Value {
let cloned = NuGroupBy {
dataframe: self.dataframe.clone(),
by: self.by.clone(),
groups: self.groups.clone(),
};
Value::CustomValue {
val: Box::new(cloned),
span,
}
}
fn value_string(&self) -> String {
self.typetag_name().to_string()
}
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
let vals = self.print(span)?;
Ok(Value::List { vals, span })
}
fn to_json(&self) -> nu_json::Value {
nu_json::Value::Null
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
}

View File

@ -0,0 +1,89 @@
mod custom_value;
use nu_protocol::{PipelineData, ShellError, Span, Value};
use polars::frame::groupby::{GroupBy, GroupTuples};
use polars::prelude::DataFrame;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct NuGroupBy {
dataframe: DataFrame,
by: Vec<String>,
groups: GroupTuples,
}
impl NuGroupBy {
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
NuGroupBy {
dataframe,
by,
groups,
}
}
pub fn into_value(self, span: Span) -> Value {
Value::CustomValue {
val: Box::new(self),
span,
}
}
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
match value {
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuGroupBy>() {
Some(groupby) => Ok(NuGroupBy {
dataframe: groupby.dataframe.clone(),
by: groupby.by.clone(),
groups: groupby.groups.clone(),
}),
None => Err(ShellError::CantConvert(
"groupby".into(),
"non-dataframe".into(),
span,
)),
},
x => Err(ShellError::CantConvert(
"groupby".into(),
x.get_type().to_string(),
x.span()?,
)),
}
}
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<NuGroupBy, ShellError> {
let value = input.into_value(span);
NuGroupBy::try_from_value(value)
}
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
let by = self.dataframe.select_series(&self.by).map_err(|e| {
ShellError::LabeledError("Error creating groupby".into(), e.to_string())
})?;
Ok(GroupBy::new(&self.dataframe, by, self.groups.clone(), None))
}
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
let values = self
.by
.iter()
.map(|col| {
let cols = vec!["group by".to_string()];
let vals = vec![Value::String {
val: col.into(),
span,
}];
Value::Record { cols, vals, span }
})
.collect::<Vec<Value>>();
Ok(values)
}
}
impl AsRef<DataFrame> for NuGroupBy {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.dataframe
}
}

View File

@ -1,6 +1,9 @@
use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value}; use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value};
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole // Default value used when selecting rows from dataframe
pub const DEFAULT_ROWS: usize = 5;
// Converts a Vec<Value> to a Vec<Spanned<String>> with a Span marking the whole
// location of the columns for error referencing // location of the columns for error referencing
pub(crate) fn convert_columns( pub(crate) fn convert_columns(
columns: Vec<Value>, columns: Vec<Value>,
@ -35,3 +38,39 @@ pub(crate) fn convert_columns(
Ok((res, col_span)) Ok((res, col_span))
} }
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole
// location of the columns for error referencing
pub(crate) fn convert_columns_string(
columns: Vec<Value>,
span: Span,
) -> Result<(Vec<String>, Span), ShellError> {
// First column span
let mut col_span = columns
.get(0)
.ok_or_else(|| {
ShellError::SpannedLabeledError(
"Empty column list".into(),
"Empty list found for command".into(),
span,
)
})
.and_then(|v| v.span())?;
let res = columns
.into_iter()
.map(|value| match value {
Value::String { val, span } => {
col_span = span_join(&[col_span, span]);
Ok(val)
}
_ => Err(ShellError::SpannedLabeledError(
"Incorrect column format".into(),
"Only string as column name".into(),
span,
)),
})
.collect::<Result<Vec<String>, _>>()?;
Ok((res, col_span))
}

View File

@ -1,13 +1,13 @@
use std::{cmp::Ordering, fmt}; use std::{cmp::Ordering, fmt};
use crate::{ast::Operator, Category, ShellError, Span, Value}; use crate::{ast::Operator, ShellError, Span, Value};
// Trait definition for a custom value // Trait definition for a custom value
#[typetag::serde(tag = "type")] #[typetag::serde(tag = "type")]
pub trait CustomValue: fmt::Debug + Send + Sync { pub trait CustomValue: fmt::Debug + Send + Sync {
fn clone_value(&self, span: Span) -> Value; fn clone_value(&self, span: Span) -> Value;
fn category(&self) -> Category; //fn category(&self) -> Category;
// Define string representation of the custom value // Define string representation of the custom value
fn value_string(&self) -> String; fn value_string(&self) -> String;
@ -26,8 +26,19 @@ pub trait CustomValue: fmt::Debug + Send + Sync {
fn as_any(&self) -> &dyn std::any::Any; fn as_any(&self) -> &dyn std::any::Any;
// Follow cell path functions // Follow cell path functions
fn follow_path_int(&self, count: usize, span: Span) -> Result<Value, ShellError>; fn follow_path_int(&self, _count: usize, span: Span) -> Result<Value, ShellError> {
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError>; Err(ShellError::IncompatiblePathAccess(
format!("{} does't support path access", self.value_string()),
span,
))
}
fn follow_path_string(&self, _column_name: String, span: Span) -> Result<Value, ShellError> {
Err(ShellError::IncompatiblePathAccess(
format!("{} does't support path access", self.value_string()),
span,
))
}
// ordering with other value // ordering with other value
fn partial_cmp(&self, _other: &Value) -> Option<Ordering> { fn partial_cmp(&self, _other: &Value) -> Option<Ordering> {