mirror of
https://github.com/nushell/nushell.git
synced 2024-11-25 09:53:43 +01:00
Dataframe commands (#542)
* groupby object * aggregate command * eager commands * rest of dataframe commands
This commit is contained in:
parent
c3a16902fe
commit
6a35e6b7b6
19
Cargo.lock
generated
19
Cargo.lock
generated
@ -1363,6 +1363,12 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a"
|
||||
|
||||
[[package]]
|
||||
name = "libssh2-sys"
|
||||
version = "0.2.23"
|
||||
@ -1982,6 +1988,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2249,6 +2256,8 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"polars-arrow",
|
||||
"prettytable-rs",
|
||||
"rand",
|
||||
"rand_distr",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
@ -2434,6 +2443,16 @@ dependencies = [
|
||||
"getrandom 0.2.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_distr"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.3.1"
|
||||
|
@ -74,7 +74,7 @@ optional = true
|
||||
features = [
|
||||
"default", "parquet", "json", "serde", "object",
|
||||
"checked_arithmetic", "strings", "cum_agg", "is_in",
|
||||
"rolling_window", "strings"
|
||||
"rolling_window", "strings", "pivot", "random"
|
||||
]
|
||||
|
||||
[features]
|
||||
|
375
crates/nu-command/src/dataframe/eager/aggregate.rs
Normal file
375
crates/nu-command/src/dataframe/eager/aggregate.rs
Normal file
@ -0,0 +1,375 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
did_you_mean,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
|
||||
|
||||
use crate::dataframe::values::NuGroupBy;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
enum Operation {
|
||||
Mean,
|
||||
Sum,
|
||||
Min,
|
||||
Max,
|
||||
First,
|
||||
Last,
|
||||
Nunique,
|
||||
Quantile(f64),
|
||||
Median,
|
||||
Var,
|
||||
Std,
|
||||
Count,
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn from_tagged(
|
||||
name: &Spanned<String>,
|
||||
quantile: Option<Spanned<f64>>,
|
||||
) -> Result<Operation, ShellError> {
|
||||
match name.item.as_ref() {
|
||||
"mean" => Ok(Operation::Mean),
|
||||
"sum" => Ok(Operation::Sum),
|
||||
"min" => Ok(Operation::Min),
|
||||
"max" => Ok(Operation::Max),
|
||||
"first" => Ok(Operation::First),
|
||||
"last" => Ok(Operation::Last),
|
||||
"nunique" => Ok(Operation::Nunique),
|
||||
"quantile" => match quantile {
|
||||
None => Err(ShellError::SpannedLabeledError(
|
||||
"Quantile value not fount".into(),
|
||||
"Quantile operation requires quantile value".into(),
|
||||
name.span,
|
||||
)),
|
||||
Some(value) => {
|
||||
if (value.item < 0.0) | (value.item > 1.0) {
|
||||
Err(ShellError::SpannedLabeledError(
|
||||
"Inappropriate quantile".into(),
|
||||
"Quantile value should be between 0.0 and 1.0".into(),
|
||||
value.span,
|
||||
))
|
||||
} else {
|
||||
Ok(Operation::Quantile(value.item))
|
||||
}
|
||||
}
|
||||
},
|
||||
"median" => Ok(Operation::Median),
|
||||
"var" => Ok(Operation::Var),
|
||||
"std" => Ok(Operation::Std),
|
||||
"count" => Ok(Operation::Count),
|
||||
selection => {
|
||||
let possibilities = [
|
||||
"mean".to_string(),
|
||||
"sum".to_string(),
|
||||
"min".to_string(),
|
||||
"max".to_string(),
|
||||
"first".to_string(),
|
||||
"last".to_string(),
|
||||
"nunique".to_string(),
|
||||
"quantile".to_string(),
|
||||
"median".to_string(),
|
||||
"var".to_string(),
|
||||
"std".to_string(),
|
||||
"count".to_string(),
|
||||
];
|
||||
|
||||
match did_you_mean(&possibilities, selection) {
|
||||
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, name.span)),
|
||||
None => Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist".into(),
|
||||
name.span,
|
||||
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Mean => "mean",
|
||||
Self::Sum => "sum",
|
||||
Self::Min => "min",
|
||||
Self::Max => "max",
|
||||
Self::First => "first",
|
||||
Self::Last => "last",
|
||||
Self::Nunique => "nunique",
|
||||
Self::Quantile(_) => "quantile",
|
||||
Self::Median => "median",
|
||||
Self::Var => "var",
|
||||
Self::Std => "std",
|
||||
Self::Count => "count",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Aggregate;
|
||||
|
||||
impl Command for Aggregate {
|
||||
fn name(&self) -> &str {
|
||||
"dfr aggregate"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs an aggregation operation on a dataframe and groupby object"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"operation-name",
|
||||
SyntaxShape::String,
|
||||
"\n\tDataframes: mean, sum, min, max, quantile, median, var, std
|
||||
\tGroupBy: mean, sum, min, max, first, last, nunique, quantile, median, var, std, count",
|
||||
)
|
||||
.named(
|
||||
"quantile",
|
||||
SyntaxShape::Number,
|
||||
"quantile value for quantile operation",
|
||||
Some('q'),
|
||||
)
|
||||
.switch(
|
||||
"explicit",
|
||||
"returns explicit names for groupby aggregations",
|
||||
Some('e'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||
example:
|
||||
"[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_string("one")]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(3)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in dataframe columns",
|
||||
example: "[[a b]; [4 1] [5 2]] | dfr to-df | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(9)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(3)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in series",
|
||||
example: "[4 1 5 6] | dfr to-df | dfr aggregate sum",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(16)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let operation: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||
let quantile: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "quantile")?;
|
||||
let op = Operation::from_tagged(&operation, quantile)?;
|
||||
|
||||
match input {
|
||||
PipelineData::Value(Value::CustomValue { val, span }, _) => {
|
||||
let df = val.as_any().downcast_ref::<NuDataFrame>();
|
||||
let groupby = val.as_any().downcast_ref::<NuGroupBy>();
|
||||
|
||||
match (df, groupby) {
|
||||
(Some(df), None) => {
|
||||
let df = df.as_ref();
|
||||
let res = perform_dataframe_aggregation(df, op, operation.span)?;
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, span),
|
||||
None,
|
||||
))
|
||||
}
|
||||
(None, Some(nu_groupby)) => {
|
||||
let groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let res = perform_groupby_aggregation(
|
||||
groupby,
|
||||
op,
|
||||
operation.span,
|
||||
call.head,
|
||||
call.has_flag("explicit"),
|
||||
)?;
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, span),
|
||||
None,
|
||||
))
|
||||
}
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect datatype".into(),
|
||||
"no groupby or dataframe found in input stream".into(),
|
||||
call.head,
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect datatype".into(),
|
||||
"no groupby or dataframe found in input stream".into(),
|
||||
call.head,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_groupby_aggregation(
|
||||
groupby: GroupBy,
|
||||
operation: Operation,
|
||||
operation_span: Span,
|
||||
agg_span: Span,
|
||||
explicit: bool,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
let mut res = match operation {
|
||||
Operation::Mean => groupby.mean(),
|
||||
Operation::Sum => groupby.sum(),
|
||||
Operation::Min => groupby.min(),
|
||||
Operation::Max => groupby.max(),
|
||||
Operation::First => groupby.first(),
|
||||
Operation::Last => groupby.last(),
|
||||
Operation::Nunique => groupby.n_unique(),
|
||||
Operation::Quantile(quantile) => groupby.quantile(quantile),
|
||||
Operation::Median => groupby.median(),
|
||||
Operation::Var => groupby.var(),
|
||||
Operation::Std => groupby.std(),
|
||||
Operation::Count => groupby.count(),
|
||||
}
|
||||
.map_err(|e| {
|
||||
let span = match &e {
|
||||
PolarsError::NotFound(_) => agg_span,
|
||||
_ => operation_span,
|
||||
};
|
||||
|
||||
ShellError::SpannedLabeledError("Error calculating aggregation".into(), e.to_string(), span)
|
||||
})?;
|
||||
|
||||
if !explicit {
|
||||
let col_names = res
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|name| name.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
for col in col_names {
|
||||
let from = match operation {
|
||||
Operation::Mean => "_mean",
|
||||
Operation::Sum => "_sum",
|
||||
Operation::Min => "_min",
|
||||
Operation::Max => "_max",
|
||||
Operation::First => "_first",
|
||||
Operation::Last => "_last",
|
||||
Operation::Nunique => "_n_unique",
|
||||
Operation::Quantile(_) => "_quantile",
|
||||
Operation::Median => "_median",
|
||||
Operation::Var => "_agg_var",
|
||||
Operation::Std => "_agg_std",
|
||||
Operation::Count => "_count",
|
||||
};
|
||||
|
||||
let new_col = match col.find(from) {
|
||||
Some(index) => &col[..index],
|
||||
None => &col[..],
|
||||
};
|
||||
|
||||
res.rename(&col, new_col)
|
||||
.expect("Column is always there. Looping with known names");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn perform_dataframe_aggregation(
|
||||
dataframe: &polars::prelude::DataFrame,
|
||||
operation: Operation,
|
||||
operation_span: Span,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
match operation {
|
||||
Operation::Mean => Ok(dataframe.mean()),
|
||||
Operation::Sum => Ok(dataframe.sum()),
|
||||
Operation::Min => Ok(dataframe.min()),
|
||||
Operation::Max => Ok(dataframe.max()),
|
||||
Operation::Quantile(quantile) => dataframe.quantile(quantile).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error calculating quantile".into(),
|
||||
e.to_string(),
|
||||
operation_span,
|
||||
)
|
||||
}),
|
||||
Operation::Median => Ok(dataframe.median()),
|
||||
Operation::Var => Ok(dataframe.var()),
|
||||
Operation::Std => Ok(dataframe.std()),
|
||||
operation => {
|
||||
let possibilities = [
|
||||
"mean".to_string(),
|
||||
"sum".to_string(),
|
||||
"min".to_string(),
|
||||
"max".to_string(),
|
||||
"quantile".to_string(),
|
||||
"median".to_string(),
|
||||
"var".to_string(),
|
||||
"std".to_string(),
|
||||
];
|
||||
|
||||
match did_you_mean(&possibilities, operation.to_str()) {
|
||||
Some(suggestion) => Err(ShellError::DidYouMean(suggestion, operation_span)),
|
||||
None => Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist".into(),
|
||||
operation_span,
|
||||
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std".into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::CreateGroupBy;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(Aggregate {}), Box::new(CreateGroupBy {})])
|
||||
}
|
||||
}
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::values::{Axis, Column, NuDataFrame};
|
||||
use super::super::values::{Axis, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppendDF;
|
||||
@ -120,7 +120,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ColumnDF;
|
||||
@ -71,7 +71,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
@ -1,4 +1,4 @@
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -231,7 +231,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
@ -5,8 +5,8 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::values::utils::convert_columns;
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::utils::convert_columns;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropDF;
|
||||
@ -101,7 +101,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
@ -5,8 +5,8 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::values::utils::convert_columns;
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::utils::convert_columns_string;
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DropNulls;
|
||||
@ -101,11 +101,7 @@ fn command(
|
||||
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns(cols, call.head)?;
|
||||
let agg_string = agg_string
|
||||
.into_iter()
|
||||
.map(|col| col.item)
|
||||
.collect::<Vec<String>>();
|
||||
let (agg_string, col_span) = convert_columns_string(cols, call.head)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, call.head),
|
||||
@ -123,7 +119,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::WithColumn;
|
||||
use super::*;
|
||||
|
@ -1,4 +1,4 @@
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
@ -96,7 +96,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
137
crates/nu-command/src/dataframe/eager/dummies.rs
Normal file
137
crates/nu-command/src/dataframe/eager/dummies.rs
Normal file
@ -0,0 +1,137 @@
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Dummies;
|
||||
|
||||
impl Command for Dummies {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-dummies"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a new dataframe with dummy variables"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_1".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
Column::new(
|
||||
"a_3".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1)],
|
||||
),
|
||||
Column::new(
|
||||
"b_2".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(0)],
|
||||
),
|
||||
Column::new(
|
||||
"b_4".to_string(),
|
||||
vec![Value::test_int(0), Value::test_int(1)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a series",
|
||||
example: "[1 2 2 3 3] | dfr to-df | dfr to-dummies",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"0_1".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_2".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_3".to_string(),
|
||||
vec![
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(0),
|
||||
Value::test_int(1),
|
||||
Value::test_int(1),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.to_dummies()
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledErrorHelp(
|
||||
"Error calculating dummies".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
"The only allowed column types for dummies are String or Int".into(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(Dummies {})])
|
||||
}
|
||||
}
|
98
crates/nu-command/src/dataframe/eager/filter_with.rs
Normal file
98
crates/nu-command/src/dataframe/eager/filter_with.rs
Normal file
@ -0,0 +1,98 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FilterWith;
|
||||
|
||||
impl Command for FilterWith {
|
||||
fn name(&self) -> &str {
|
||||
"dfr filter-with"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Filters dataframe using a mask as reference"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("mask", SyntaxShape::Any, "boolean mask used to filter data")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([$true $false] | dfr to-df);
|
||||
[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with $mask"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let mask_value: Value = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let mask_span = mask_value.span()?;
|
||||
let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?;
|
||||
let mask = mask.bool().map_err(|e| {
|
||||
ShellError::SpannedLabeledErrorHelp(
|
||||
"Error casting to bool".into(),
|
||||
e.to_string(),
|
||||
mask_span,
|
||||
"Perhaps you want to use a series with booleans as mask".into(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.filter(mask)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledErrorHelp(
|
||||
"Error calculating dummies".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
"The only allowed column types for dummies are String or Int".into(),
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(FilterWith {})])
|
||||
}
|
||||
}
|
80
crates/nu-command/src/dataframe/eager/first.rs
Normal file
80
crates/nu-command/src/dataframe/eager/first.rs
Normal file
@ -0,0 +1,80 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FirstDF;
|
||||
|
||||
impl Command for FirstDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr first"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with first rows"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for head")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with head rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr first 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(FirstDF {})])
|
||||
}
|
||||
}
|
88
crates/nu-command/src/dataframe/eager/get.rs
Normal file
88
crates/nu-command/src/dataframe/eager/get.rs
Normal file
@ -0,0 +1,88 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GetDF;
|
||||
|
||||
impl Command for GetDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr get"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates dataframe with the selected columns"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates dataframe with selected columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr get a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.select(&col_string)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
col_span,
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(GetDF {})])
|
||||
}
|
||||
}
|
71
crates/nu-command/src/dataframe/eager/groupby.rs
Normal file
71
crates/nu-command/src/dataframe/eager/groupby.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::convert_columns_string, NuDataFrame, NuGroupBy};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CreateGroupBy;
|
||||
|
||||
impl Command for CreateGroupBy {
|
||||
fn name(&self) -> &str {
|
||||
"dfr group-by"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates a groupby object that can be used for other aggregations"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.rest("rest", SyntaxShape::Any, "groupby columns")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Grouping by column a",
|
||||
example: "[[a b]; [one 1] [one 2]] | dfr to-df | dfr group-by a",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
// Extracting the names of the columns to perform the groupby
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
// This is the expensive part of the groupby; to create the
|
||||
// groups that will be used for grouping the data in the
|
||||
// dataframe. Once it has been done these values can be stored
|
||||
// in a NuGroupBy
|
||||
let groupby = df.as_ref().groupby(&col_string).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error creating groupby".into(), e.to_string(), col_span)
|
||||
})?;
|
||||
|
||||
let groups = groupby.get_groups().to_vec();
|
||||
let groupby = NuGroupBy::new(df.as_ref().clone(), col_string, groups);
|
||||
|
||||
Ok(PipelineData::Value(groupby.into_value(call.head), None))
|
||||
}
|
226
crates/nu-command/src/dataframe/eager/join.rs
Normal file
226
crates/nu-command/src/dataframe/eager/join.rs
Normal file
@ -0,0 +1,226 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::JoinType;
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct JoinDF;
|
||||
|
||||
impl Command for JoinDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr join"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Joins a dataframe using columns as reference"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("dataframe", SyntaxShape::Any, "right dataframe to join")
|
||||
.required_named(
|
||||
"left",
|
||||
SyntaxShape::Table,
|
||||
"left column names to perform join",
|
||||
Some('l'),
|
||||
)
|
||||
.required_named(
|
||||
"right",
|
||||
SyntaxShape::Table,
|
||||
"right column names to perform join",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"type",
|
||||
SyntaxShape::String,
|
||||
"type of join. Inner by default",
|
||||
Some('t'),
|
||||
)
|
||||
.named(
|
||||
"suffix",
|
||||
SyntaxShape::String,
|
||||
"suffix for the columns of the right dataframe",
|
||||
Some('s'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "inner join dataframe",
|
||||
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dfr to-df);
|
||||
$right | dfr join $right -l [a b] -r [a b]"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
Column::new(
|
||||
"c_right".to_string(),
|
||||
vec![Value::test_int(5), Value::test_int(5), Value::test_int(6)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let r_df: Value = call.req(engine_state, stack, 0)?;
|
||||
let l_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "left")?
|
||||
.expect("required value in syntax");
|
||||
let r_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "right")?
|
||||
.expect("required value in syntax");
|
||||
let suffix: Option<String> = call.get_flag(engine_state, stack, "suffix")?;
|
||||
let join_type_op: Option<Spanned<String>> = call.get_flag(engine_state, stack, "type")?;
|
||||
|
||||
let join_type = match join_type_op {
|
||||
None => JoinType::Inner,
|
||||
Some(val) => match val.item.as_ref() {
|
||||
"inner" => JoinType::Inner,
|
||||
"outer" => JoinType::Outer,
|
||||
"left" => JoinType::Left,
|
||||
_ => {
|
||||
return Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Incorrect join type".into(),
|
||||
"Invalid join type".into(),
|
||||
val.span,
|
||||
"Options: inner, outer or left".into(),
|
||||
))
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let (l_col_string, l_col_span) = convert_columns_string(l_col, call.head)?;
|
||||
let (r_col_string, r_col_span) = convert_columns_string(r_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let r_df = NuDataFrame::try_from_value(r_df)?;
|
||||
|
||||
check_column_datatypes(
|
||||
df.as_ref(),
|
||||
r_df.as_ref(),
|
||||
&l_col_string,
|
||||
l_col_span,
|
||||
&r_col_string,
|
||||
r_col_span,
|
||||
)?;
|
||||
|
||||
df.as_ref()
|
||||
.join(
|
||||
r_df.as_ref(),
|
||||
&l_col_string,
|
||||
&r_col_string,
|
||||
join_type,
|
||||
suffix,
|
||||
)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error joining dataframes".into(),
|
||||
e.to_string(),
|
||||
l_col_span,
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df_l: &polars::prelude::DataFrame,
|
||||
df_r: &polars::prelude::DataFrame,
|
||||
l_cols: &[T],
|
||||
l_col_span: Span,
|
||||
r_cols: &[T],
|
||||
r_col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if l_cols.len() != r_cols.len() {
|
||||
return Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Mismatched number of column names".into(),
|
||||
format!(
|
||||
"found {} left names vs {} right names",
|
||||
l_cols.len(),
|
||||
r_cols.len()
|
||||
),
|
||||
l_col_span,
|
||||
"perhaps you need to change the number of columns to join".into(),
|
||||
));
|
||||
}
|
||||
|
||||
for (l, r) in l_cols.iter().zip(r_cols) {
|
||||
let l_series = df_l.column(l.as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error selecting the columns".into(),
|
||||
e.to_string(),
|
||||
l_col_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
let r_series = df_r.column(r.as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error selecting the columns".into(),
|
||||
e.to_string(),
|
||||
r_col_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Mismatched datatypes".into(),
|
||||
format!(
|
||||
"left column type '{}' doesn't match '{}' right column match",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
),
|
||||
l_col_span,
|
||||
"perhaps you need to select other column to match".into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(JoinDF {})])
|
||||
}
|
||||
}
|
80
crates/nu-command/src/dataframe/eager/last.rs
Normal file
80
crates/nu-command/src/dataframe/eager/last.rs
Normal file
@ -0,0 +1,80 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{utils::DEFAULT_ROWS, Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct LastDF;
|
||||
|
||||
impl Command for LastDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr last"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe with tail rows"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.optional("rows", SyntaxShape::Int, "Number of rows for tail")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr last 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(3)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(4)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.opt(engine_state, stack, 0)?;
|
||||
let rows = rows.unwrap_or(DEFAULT_ROWS);
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(LastDF {})])
|
||||
}
|
||||
}
|
243
crates/nu-command/src/dataframe/eager/melt.rs
Normal file
243
crates/nu-command/src/dataframe/eager/melt.rs
Normal file
@ -0,0 +1,243 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::utils::convert_columns_string;
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MeltDF;
|
||||
|
||||
impl Command for MeltDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr melt"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Unpivot a DataFrame from wide to long format"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table,
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table,
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
.named(
|
||||
"variable-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for variable column",
|
||||
Some('r'),
|
||||
)
|
||||
.named(
|
||||
"value-name",
|
||||
SyntaxShape::String,
|
||||
"optional name for value column",
|
||||
Some('l'),
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dfr to-df | dfr melt -c [b c] -v [a d]",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
Value::test_int(4),
|
||||
Value::test_int(5),
|
||||
Value::test_int(6),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
Value::test_string("d"),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
Value::test_string("x"),
|
||||
Value::test_string("y"),
|
||||
Value::test_string("z"),
|
||||
Value::test_string("a"),
|
||||
Value::test_string("b"),
|
||||
Value::test_string("c"),
|
||||
],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let id_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "columns")?
|
||||
.expect("required value");
|
||||
let val_col: Vec<Value> = call
|
||||
.get_flag(engine_state, stack, "values")?
|
||||
.expect("required value");
|
||||
|
||||
let value_name: Option<Spanned<String>> = call.get_flag(engine_state, stack, "value-name")?;
|
||||
let variable_name: Option<Spanned<String>> =
|
||||
call.get_flag(engine_state, stack, "variable-name")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?;
|
||||
let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?;
|
||||
|
||||
let mut res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error calculating melt".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
)
|
||||
})?;
|
||||
|
||||
if let Some(name) = &variable_name {
|
||||
res.rename("variable", &name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error renaming column".into(),
|
||||
e.to_string(),
|
||||
name.span,
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Some(name) = &value_name {
|
||||
res.rename("value", &name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error renaming column".into(),
|
||||
e.to_string(),
|
||||
name.span,
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
df: &polars::prelude::DataFrame,
|
||||
cols: &[T],
|
||||
col_span: Span,
|
||||
) -> Result<(), ShellError> {
|
||||
if cols.is_empty() {
|
||||
return Err(ShellError::SpannedLabeledError(
|
||||
"Merge error".into(),
|
||||
"empty column list".into(),
|
||||
col_span,
|
||||
));
|
||||
}
|
||||
|
||||
// Checking if they are same type
|
||||
if cols.len() > 1 {
|
||||
for w in cols.windows(2) {
|
||||
let l_series = df.column(w[0].as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
col_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
let r_series = df.column(w[1].as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error selecting columns".into(),
|
||||
e.to_string(),
|
||||
col_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
if l_series.dtype() != r_series.dtype() {
|
||||
return Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Merge error".into(),
|
||||
"found different column types in list".into(),
|
||||
col_span,
|
||||
format!(
|
||||
"datatypes {} and {} are incompatible",
|
||||
l_series.dtype(),
|
||||
r_series.dtype()
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(MeltDF {})])
|
||||
}
|
||||
}
|
105
crates/nu-command/src/dataframe/eager/mod.rs
Normal file
105
crates/nu-command/src/dataframe/eager/mod.rs
Normal file
@ -0,0 +1,105 @@
|
||||
mod aggregate;
|
||||
mod append;
|
||||
mod column;
|
||||
mod command;
|
||||
mod describe;
|
||||
mod drop;
|
||||
mod drop_nulls;
|
||||
mod dtypes;
|
||||
mod dummies;
|
||||
mod filter_with;
|
||||
mod first;
|
||||
mod get;
|
||||
mod groupby;
|
||||
mod join;
|
||||
mod last;
|
||||
mod melt;
|
||||
mod open;
|
||||
mod pivot;
|
||||
mod rename;
|
||||
mod sample;
|
||||
mod shape;
|
||||
mod slice;
|
||||
mod sort;
|
||||
mod take;
|
||||
mod to_csv;
|
||||
mod to_df;
|
||||
mod to_nu;
|
||||
mod to_parquet;
|
||||
mod with_column;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub use aggregate::Aggregate;
|
||||
pub use append::AppendDF;
|
||||
pub use column::ColumnDF;
|
||||
pub use command::Dataframe;
|
||||
pub use describe::DescribeDF;
|
||||
pub use drop::DropDF;
|
||||
pub use drop_nulls::DropNulls;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use dummies::Dummies;
|
||||
pub use filter_with::FilterWith;
|
||||
pub use first::FirstDF;
|
||||
pub use get::GetDF;
|
||||
pub use groupby::CreateGroupBy;
|
||||
pub use join::JoinDF;
|
||||
pub use last::LastDF;
|
||||
pub use melt::MeltDF;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use pivot::PivotDF;
|
||||
pub use rename::RenameDF;
|
||||
pub use sample::SampleDF;
|
||||
pub use shape::ShapeDF;
|
||||
pub use slice::SliceDF;
|
||||
pub use sort::SortDF;
|
||||
pub use take::TakeDF;
|
||||
pub use to_csv::ToCSV;
|
||||
pub use to_df::ToDataFrame;
|
||||
pub use to_nu::ToNu;
|
||||
pub use to_parquet::ToParquet;
|
||||
pub use with_column::WithColumn;
|
||||
|
||||
pub fn add_eager_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
Aggregate,
|
||||
AppendDF,
|
||||
ColumnDF,
|
||||
CreateGroupBy,
|
||||
Dataframe,
|
||||
DataTypes,
|
||||
DescribeDF,
|
||||
DropDF,
|
||||
DropNulls,
|
||||
Dummies,
|
||||
FilterWith,
|
||||
FirstDF,
|
||||
GetDF,
|
||||
JoinDF,
|
||||
LastDF,
|
||||
MeltDF,
|
||||
OpenDataFrame,
|
||||
PivotDF,
|
||||
RenameDF,
|
||||
SampleDF,
|
||||
ShapeDF,
|
||||
SliceDF,
|
||||
SortDF,
|
||||
TakeDF,
|
||||
ToCSV,
|
||||
ToDataFrame,
|
||||
ToNu,
|
||||
ToParquet,
|
||||
WithColumn
|
||||
);
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
use super::values::NuDataFrame;
|
||||
use super::super::values::NuDataFrame;
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
175
crates/nu-command/src/dataframe/eager/pivot.rs
Normal file
175
crates/nu-command/src/dataframe/eager/pivot.rs
Normal file
@ -0,0 +1,175 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
|
||||
use crate::dataframe::values::NuGroupBy;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
enum Operation {
|
||||
First,
|
||||
Sum,
|
||||
Min,
|
||||
Max,
|
||||
Mean,
|
||||
Median,
|
||||
}
|
||||
|
||||
impl Operation {
|
||||
fn from_tagged(name: Spanned<String>) -> Result<Operation, ShellError> {
|
||||
match name.item.as_ref() {
|
||||
"first" => Ok(Operation::First),
|
||||
"sum" => Ok(Operation::Sum),
|
||||
"min" => Ok(Operation::Min),
|
||||
"max" => Ok(Operation::Max),
|
||||
"mean" => Ok(Operation::Mean),
|
||||
"median" => Ok(Operation::Median),
|
||||
_ => Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Operation not fount".into(),
|
||||
"Operation does not exist for pivot".into(),
|
||||
name.span,
|
||||
"Options: first, sum, min, max, mean, median".into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PivotDF;
|
||||
|
||||
impl Command for PivotDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr pivot"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs a pivot operation on a groupby object"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"pivot-column",
|
||||
SyntaxShape::String,
|
||||
"pivot column to perform pivot",
|
||||
)
|
||||
.required(
|
||||
"value-column",
|
||||
SyntaxShape::String,
|
||||
"value column to perform pivot",
|
||||
)
|
||||
.required("operation", SyntaxShape::String, "aggregate operation")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Pivot a dataframe on b and aggregation on col c",
|
||||
example:
|
||||
"[[a b c]; [one x 1] [two y 2]] | dfr to-df | dfr group-by a | dfr pivot b c sum",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let pivot_col: Spanned<String> = call.req(engine_state, stack, 0)?;
|
||||
let value_col: Spanned<String> = call.req(engine_state, stack, 1)?;
|
||||
let operation: Spanned<String> = call.req(engine_state, stack, 2)?;
|
||||
let op = Operation::from_tagged(operation)?;
|
||||
|
||||
let nu_groupby = NuGroupBy::try_from_pipeline(input, call.head)?;
|
||||
let df_ref = nu_groupby.as_ref();
|
||||
|
||||
check_pivot_column(df_ref, &pivot_col)?;
|
||||
check_value_column(df_ref, &value_col)?;
|
||||
|
||||
let mut groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let pivot = groupby.pivot(&pivot_col.item, &value_col.item);
|
||||
|
||||
match op {
|
||||
Operation::Mean => pivot.mean(),
|
||||
Operation::Sum => pivot.sum(),
|
||||
Operation::Min => pivot.min(),
|
||||
Operation::Max => pivot.max(),
|
||||
Operation::First => pivot.first(),
|
||||
Operation::Median => pivot.median(),
|
||||
}
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error creating pivot".into(), e.to_string(), call.head)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
||||
|
||||
fn check_pivot_column(
|
||||
df: &polars::prelude::DataFrame,
|
||||
col: &Spanned<String>,
|
||||
) -> Result<(), ShellError> {
|
||||
let series = df.column(&col.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Column not found".into(), e.to_string(), col.span)
|
||||
})?;
|
||||
|
||||
match series.dtype() {
|
||||
DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::Utf8 => Ok(()),
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Pivot error".into(),
|
||||
format!("Unsupported datatype {}", series.dtype()),
|
||||
col.span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_value_column(
|
||||
df: &polars::prelude::DataFrame,
|
||||
col: &Spanned<String>,
|
||||
) -> Result<(), ShellError> {
|
||||
let series = df.column(&col.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Column not found".into(), e.to_string(), col.span)
|
||||
})?;
|
||||
|
||||
match series.dtype() {
|
||||
DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::Float32
|
||||
| DataType::Float64 => Ok(()),
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Pivot error".into(),
|
||||
format!("Unsupported datatype {}", series.dtype()),
|
||||
col.span,
|
||||
)),
|
||||
}
|
||||
}
|
94
crates/nu-command/src/dataframe/eager/rename.rs
Normal file
94
crates/nu-command/src/dataframe/eager/rename.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RenameDF;
|
||||
|
||||
impl Command for RenameDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr rename-col"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"rename a dataframe column"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("from", SyntaxShape::String, "column name to be renamed")
|
||||
.required("to", SyntaxShape::String, "new column name")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Renames a dataframe column",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename-col a a_new",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a_new".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let from: String = call.req(engine_state, stack, 0)?;
|
||||
let to: String = call.req(engine_state, stack, 1)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
df.as_mut()
|
||||
.rename(&from, &to)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error renaming".into(), e.to_string(), call.head)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(df.clone(), call.head),
|
||||
None,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(RenameDF {})])
|
||||
}
|
||||
}
|
106
crates/nu-command/src/dataframe/eager/sample.rs
Normal file
106
crates/nu-command/src/dataframe/eager/sample.rs
Normal file
@ -0,0 +1,106 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SampleDF;
|
||||
|
||||
impl Command for SampleDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr sample"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Create sample dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"n-rows",
|
||||
SyntaxShape::Int,
|
||||
"number of rows to be taken from dataframe",
|
||||
Some('n'),
|
||||
)
|
||||
.named(
|
||||
"fraction",
|
||||
SyntaxShape::Number,
|
||||
"fraction of dataframe to be taken",
|
||||
Some('f'),
|
||||
)
|
||||
.switch("replace", "sample with replace", Some('e'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sample rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr sample -n 1",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
Example {
|
||||
description: "Shows sample row using fraction and replace",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | dfr to-df | dfr sample -f 0.5 -e",
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<Spanned<usize>> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
|
||||
let replace: bool = call.has_flag("replace");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
match (rows, fraction) {
|
||||
(Some(rows), None) => df.as_ref().sample_n(rows.item, replace).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
rows.span,
|
||||
)
|
||||
}),
|
||||
(None, Some(frac)) => df.as_ref().sample_frac(frac.item, replace).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error creating sample".into(),
|
||||
e.to_string(),
|
||||
frac.span,
|
||||
)
|
||||
}),
|
||||
(Some(_), Some(_)) => Err(ShellError::SpannedLabeledError(
|
||||
"Incompatible flags".into(),
|
||||
"Only one selection criterion allowed".into(),
|
||||
call.head,
|
||||
)),
|
||||
(None, None) => Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"No selection".into(),
|
||||
"No selection criterion was found".into(),
|
||||
call.head,
|
||||
"Perhaps you want to use the flag -n or -f".into(),
|
||||
)),
|
||||
}
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
}
|
87
crates/nu-command/src/dataframe/eager/shape.rs
Normal file
87
crates/nu-command/src/dataframe/eager/shape.rs
Normal file
@ -0,0 +1,87 @@
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ShapeDF;
|
||||
|
||||
impl Command for ShapeDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr shape"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Shows column and row size for a dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name()).category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shows row and column shape",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr shape",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("rows".to_string(), vec![Value::test_int(2)]),
|
||||
Column::new("columns".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
_engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let rows = Value::Int {
|
||||
val: df.as_ref().height() as i64,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
let cols = Value::Int {
|
||||
val: df.as_ref().width() as i64,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
let rows_col = Column::new("rows".to_string(), vec![rows]);
|
||||
let cols_col = Column::new("columns".to_string(), vec![cols]);
|
||||
|
||||
NuDataFrame::try_from_columns(vec![rows_col, cols_col])
|
||||
.map(|df| PipelineData::Value(df.into_value(call.head), None))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(ShapeDF {})])
|
||||
}
|
||||
}
|
85
crates/nu-command/src/dataframe/eager/slice.rs
Normal file
85
crates/nu-command/src/dataframe/eager/slice.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SliceDF;
|
||||
|
||||
impl Command for SliceDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr slice"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe from a slice of rows"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("offset", SyntaxShape::Int, "start of slice")
|
||||
.required("size", SyntaxShape::Int, "size of slice")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr slice 0 1",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new("a".to_string(), vec![Value::test_int(1)]),
|
||||
Column::new("b".to_string(), vec![Value::test_int(2)]),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let offset: i64 = call.req(engine_state, stack, 0)?;
|
||||
let size: usize = call.req(engine_state, stack, 1)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let res = df.as_ref().slice(offset, size);
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
NuDataFrame::dataframe_into_value(res, call.head),
|
||||
None,
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(SliceDF {})])
|
||||
}
|
||||
}
|
142
crates/nu-command/src/dataframe/eager/sort.rs
Normal file
142
crates/nu-command/src/dataframe/eager/sort.rs
Normal file
@ -0,0 +1,142 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use crate::dataframe::values::{utils::convert_columns_string, Column};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SortDF;
|
||||
|
||||
impl Command for SortDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr sort"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new sorted dataframe or series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.switch("reverse", "invert sort", Some('r'))
|
||||
.rest("rest", SyntaxShape::Any, "column names to sort dataframe")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Create new sorted dataframe",
|
||||
example: "[[a b]; [3 4] [1 2]] | dfr to-df | dfr sort a",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(2), Value::test_int(4)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Create new sorted series",
|
||||
example: "[3 4 1 2] | dfr to-df | dfr sort",
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
Value::test_int(1),
|
||||
Value::test_int(2),
|
||||
Value::test_int(3),
|
||||
Value::test_int(4),
|
||||
],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let reverse = call.has_flag("reverse");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
if df.is_series() {
|
||||
let columns = df.as_ref().get_column_names();
|
||||
|
||||
df.as_ref()
|
||||
.sort(columns, reverse)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error sorting dataframe".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
} else {
|
||||
let columns: Vec<Value> = call.rest(engine_state, stack, 0)?;
|
||||
|
||||
if !columns.is_empty() {
|
||||
let (col_string, col_span) = convert_columns_string(columns, call.head)?;
|
||||
|
||||
df.as_ref()
|
||||
.sort(&col_string, reverse)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error sorting dataframe".into(),
|
||||
e.to_string(),
|
||||
col_span,
|
||||
)
|
||||
})
|
||||
.map(|df| {
|
||||
PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)
|
||||
})
|
||||
} else {
|
||||
Err(ShellError::SpannedLabeledError(
|
||||
"Missing columns".into(),
|
||||
"missing column name to perform sort".into(),
|
||||
call.head,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(SortDF {})])
|
||||
}
|
||||
}
|
144
crates/nu-command/src/dataframe/eager/take.rs
Normal file
144
crates/nu-command/src/dataframe/eager/take.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
|
||||
use crate::dataframe::values::Column;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TakeDF;
|
||||
|
||||
impl Command for TakeDF {
|
||||
fn name(&self) -> &str {
|
||||
"dfr take"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new dataframe using the given indices"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required(
|
||||
"indices",
|
||||
SyntaxShape::Any,
|
||||
"list of indices used to take data",
|
||||
)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes selected rows from dataframe",
|
||||
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dfr to-df);
|
||||
let indices = ([0 2] | dfr to-df);
|
||||
$df | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(4)],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![Value::test_int(1), Value::test_int(3)],
|
||||
),
|
||||
])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
Example {
|
||||
description: "Takes selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dfr to-df);
|
||||
let indices = ([0 2] | dfr to-df);
|
||||
$series | dfr take $indices"#,
|
||||
result: Some(
|
||||
NuDataFrame::try_from_columns(vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![Value::test_int(4), Value::test_int(5)],
|
||||
)])
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Span::test_data()),
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let index_value: Value = call.req(engine_state, stack, 0)?;
|
||||
let index_span = index_value.span()?;
|
||||
let index = NuDataFrame::try_from_value(index_value)?.as_series(index_span)?;
|
||||
|
||||
let casted = match index.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => {
|
||||
index.cast(&DataType::UInt32).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error casting index list".into(),
|
||||
e.to_string(),
|
||||
index_span,
|
||||
)
|
||||
})
|
||||
}
|
||||
_ => Err(ShellError::SpannedLabeledErrorHelp(
|
||||
"Incorrect type".into(),
|
||||
"Series with incorrect type".into(),
|
||||
call.head,
|
||||
"Consider using a Series with type int type".into(),
|
||||
)),
|
||||
}?;
|
||||
|
||||
let indices = casted.u32().map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error casting index list".into(),
|
||||
e.to_string(),
|
||||
index_span,
|
||||
)
|
||||
})?;
|
||||
|
||||
NuDataFrame::try_from_pipeline(input, call.head).and_then(|df| {
|
||||
df.as_ref()
|
||||
.take(indices)
|
||||
.map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error taking values".into(),
|
||||
e.to_string(),
|
||||
call.head,
|
||||
)
|
||||
})
|
||||
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_examples() {
|
||||
test_dataframe(vec![Box::new(TakeDF {})])
|
||||
}
|
||||
}
|
132
crates/nu-command/src/dataframe/eager/to_csv.rs
Normal file
132
crates/nu-command/src/dataframe/eager/to_csv.rs
Normal file
@ -0,0 +1,132 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::{CsvWriter, SerWriter};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToCSV;
|
||||
|
||||
impl Command for ToCSV {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-csv"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to csv file"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.named(
|
||||
"delimiter",
|
||||
SyntaxShape::String,
|
||||
"file delimiter character",
|
||||
Some('d'),
|
||||
)
|
||||
.switch("no-header", "Indicates if file doesn't have header", None)
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Saves dataframe to csv file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-csv test.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Saves dataframe to csv file using other delimiter",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-csv test.csv -d '|'",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
let delimiter: Option<Spanned<String>> = call.get_flag(engine_state, stack, "delimiter")?;
|
||||
let no_header: bool = call.has_flag("no_header");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error with file name".into(),
|
||||
e.to_string(),
|
||||
file_name.span,
|
||||
)
|
||||
})?;
|
||||
|
||||
let writer = CsvWriter::new(&mut file);
|
||||
|
||||
let writer = if no_header {
|
||||
writer.has_header(false)
|
||||
} else {
|
||||
writer.has_header(true)
|
||||
};
|
||||
|
||||
let writer = match delimiter {
|
||||
None => writer,
|
||||
Some(d) => {
|
||||
if d.item.len() != 1 {
|
||||
return Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect delimiter".into(),
|
||||
"Delimiter has to be one char".into(),
|
||||
d.span,
|
||||
));
|
||||
} else {
|
||||
let delimiter = match d.item.chars().next() {
|
||||
Some(d) => d as u8,
|
||||
None => unreachable!(),
|
||||
};
|
||||
|
||||
writer.with_delimiter(delimiter)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
writer.finish(df.as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error writing to file".into(),
|
||||
e.to_string(),
|
||||
file_name.span,
|
||||
)
|
||||
})?;
|
||||
|
||||
let file_value = Value::String {
|
||||
val: format!("saved {:?}", &file_name.item),
|
||||
span: file_name.span,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::List {
|
||||
vals: vec![file_value],
|
||||
span: call.head,
|
||||
},
|
||||
None,
|
||||
))
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
@ -117,7 +117,7 @@ impl Command for ToDataFrame {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
83
crates/nu-command/src/dataframe/eager/to_nu.rs
Normal file
83
crates/nu-command/src/dataframe/eager/to_nu.rs
Normal file
@ -0,0 +1,83 @@
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToNu;
|
||||
|
||||
impl Command for ToNu {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-nu"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a section of the dataframe to Nushell Table"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.named(
|
||||
"n-rows",
|
||||
SyntaxShape::Number,
|
||||
"number of rows to be shown",
|
||||
Some('n'),
|
||||
)
|
||||
.switch("tail", "shows tail rows", Some('t'))
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
description: "Shows head rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-nu",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Shows tail rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | dfr to-df | dfr to-nu -t -n 1",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let rows: Option<usize> = call.get_flag(engine_state, stack, "n-rows")?;
|
||||
let tail: bool = call.has_flag("tail");
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let values = if tail {
|
||||
df.tail(rows, call.head)?
|
||||
} else {
|
||||
df.head(rows, call.head)?
|
||||
};
|
||||
|
||||
let value = Value::List {
|
||||
vals: values,
|
||||
span: call.head,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(value, None))
|
||||
}
|
84
crates/nu-command/src/dataframe/eager/to_parquet.rs
Normal file
84
crates/nu-command/src/dataframe/eager/to_parquet.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::{
|
||||
ast::Call,
|
||||
engine::{Command, EngineState, Stack},
|
||||
Category, Example, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
use polars::prelude::ParquetWriter;
|
||||
|
||||
use super::super::values::NuDataFrame;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ToParquet;
|
||||
|
||||
impl Command for ToParquet {
|
||||
fn name(&self) -> &str {
|
||||
"dfr to-parquet"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Saves dataframe to parquet file"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build(self.name())
|
||||
.required("file", SyntaxShape::Filepath, "file path to save dataframe")
|
||||
.category(Category::Custom("dataframe".into()))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to csv file",
|
||||
example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr to-parquet test.parquet",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
command(engine_state, stack, call, input)
|
||||
}
|
||||
}
|
||||
|
||||
fn command(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let file_name: Spanned<PathBuf> = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Error with file name".into(),
|
||||
e.to_string(),
|
||||
file_name.span,
|
||||
)
|
||||
})?;
|
||||
|
||||
ParquetWriter::new(file).finish(df.as_ref()).map_err(|e| {
|
||||
ShellError::SpannedLabeledError("Error saving file".into(), e.to_string(), file_name.span)
|
||||
})?;
|
||||
|
||||
let file_value = Value::String {
|
||||
val: format!("saved {:?}", &file_name.item),
|
||||
span: file_name.span,
|
||||
};
|
||||
|
||||
Ok(PipelineData::Value(
|
||||
Value::List {
|
||||
vals: vec![file_value],
|
||||
span: call.head,
|
||||
},
|
||||
None,
|
||||
))
|
||||
}
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
|
||||
};
|
||||
|
||||
use super::values::{Column, NuDataFrame};
|
||||
use super::super::values::{Column, NuDataFrame};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WithColumn;
|
||||
@ -99,7 +99,7 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
@ -1,101 +1,15 @@
|
||||
mod eager;
|
||||
mod series;
|
||||
mod values;
|
||||
|
||||
mod append;
|
||||
mod column;
|
||||
mod command;
|
||||
mod describe;
|
||||
mod drop;
|
||||
mod drop_nulls;
|
||||
mod dtypes;
|
||||
mod open;
|
||||
mod to_df;
|
||||
mod with_column;
|
||||
|
||||
pub use series::*;
|
||||
|
||||
pub use append::AppendDF;
|
||||
pub use column::ColumnDF;
|
||||
pub use command::Dataframe;
|
||||
pub use describe::DescribeDF;
|
||||
pub use drop::DropDF;
|
||||
pub use drop_nulls::DropNulls;
|
||||
pub use dtypes::DataTypes;
|
||||
pub use open::OpenDataFrame;
|
||||
pub use to_df::ToDataFrame;
|
||||
pub use with_column::WithColumn;
|
||||
pub use eager::add_eager_decls;
|
||||
pub use series::add_series_decls;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub fn add_dataframe_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Series commands
|
||||
bind_command!(
|
||||
AllFalse,
|
||||
AllTrue,
|
||||
ArgMax,
|
||||
ArgMin,
|
||||
ArgSort,
|
||||
ArgTrue,
|
||||
ArgUnique,
|
||||
Concatenate,
|
||||
Contains,
|
||||
Cumulative,
|
||||
GetDay,
|
||||
GetHour,
|
||||
GetMinute,
|
||||
GetMonth,
|
||||
GetNanosecond,
|
||||
GetOrdinal,
|
||||
GetSecond,
|
||||
GetWeek,
|
||||
GetWeekDay,
|
||||
GetYear,
|
||||
IsDuplicated,
|
||||
IsIn,
|
||||
IsNotNull,
|
||||
IsNull,
|
||||
IsUnique,
|
||||
NNull,
|
||||
NUnique,
|
||||
NotSeries,
|
||||
Rename,
|
||||
Replace,
|
||||
ReplaceAll,
|
||||
Rolling,
|
||||
SetSeries,
|
||||
SetWithIndex,
|
||||
Shift,
|
||||
StrLengths,
|
||||
StrSlice,
|
||||
StrFTime,
|
||||
ToLowerCase,
|
||||
ToUpperCase,
|
||||
Unique,
|
||||
ValueCount
|
||||
);
|
||||
|
||||
// Dataframe commands
|
||||
bind_command!(
|
||||
AppendDF,
|
||||
ColumnDF,
|
||||
Dataframe,
|
||||
DataTypes,
|
||||
DescribeDF,
|
||||
DropDF,
|
||||
DropNulls,
|
||||
OpenDataFrame,
|
||||
ToDataFrame,
|
||||
WithColumn
|
||||
);
|
||||
add_series_decls(working_set);
|
||||
add_eager_decls(working_set);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -23,6 +23,8 @@ mod shift;
|
||||
mod unique;
|
||||
mod value_counts;
|
||||
|
||||
use nu_protocol::engine::StateWorkingSet;
|
||||
|
||||
pub use all_false::AllFalse;
|
||||
pub use all_true::AllTrue;
|
||||
pub use arg_max::ArgMax;
|
||||
@ -35,3 +37,60 @@ pub use rolling::Rolling;
|
||||
pub use shift::Shift;
|
||||
pub use unique::Unique;
|
||||
pub use value_counts::ValueCount;
|
||||
|
||||
pub fn add_series_decls(working_set: &mut StateWorkingSet) {
|
||||
macro_rules! bind_command {
|
||||
( $command:expr ) => {
|
||||
working_set.add_decl(Box::new($command));
|
||||
};
|
||||
( $( $command:expr ),* ) => {
|
||||
$( working_set.add_decl(Box::new($command)); )*
|
||||
};
|
||||
}
|
||||
|
||||
// Series commands
|
||||
bind_command!(
|
||||
AllFalse,
|
||||
AllTrue,
|
||||
ArgMax,
|
||||
ArgMin,
|
||||
ArgSort,
|
||||
ArgTrue,
|
||||
ArgUnique,
|
||||
Concatenate,
|
||||
Contains,
|
||||
Cumulative,
|
||||
GetDay,
|
||||
GetHour,
|
||||
GetMinute,
|
||||
GetMonth,
|
||||
GetNanosecond,
|
||||
GetOrdinal,
|
||||
GetSecond,
|
||||
GetWeek,
|
||||
GetWeekDay,
|
||||
GetYear,
|
||||
IsDuplicated,
|
||||
IsIn,
|
||||
IsNotNull,
|
||||
IsNull,
|
||||
IsUnique,
|
||||
NNull,
|
||||
NUnique,
|
||||
NotSeries,
|
||||
Rename,
|
||||
Replace,
|
||||
ReplaceAll,
|
||||
Rolling,
|
||||
SetSeries,
|
||||
SetWithIndex,
|
||||
Shift,
|
||||
StrLengths,
|
||||
StrSlice,
|
||||
StrFTime,
|
||||
ToLowerCase,
|
||||
ToUpperCase,
|
||||
Unique,
|
||||
ValueCount
|
||||
);
|
||||
}
|
||||
|
@ -162,8 +162,8 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::eager::DropNulls;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::DropNulls;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
@ -68,8 +68,8 @@ fn command(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::super::eager::DropNulls;
|
||||
use super::super::super::test_dataframe::test_dataframe;
|
||||
use super::super::super::DropNulls;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
@ -5,7 +5,7 @@ use nu_protocol::{
|
||||
PipelineData, Span, Value, CONFIG_VARIABLE_ID,
|
||||
};
|
||||
|
||||
use super::ToDataFrame;
|
||||
use super::eager::ToDataFrame;
|
||||
use crate::Let;
|
||||
|
||||
pub fn test_dataframe(cmds: Vec<Box<dyn Command + 'static>>) {
|
||||
|
@ -1,4 +1,6 @@
|
||||
mod nu_dataframe;
|
||||
mod nu_groupby;
|
||||
pub mod utils;
|
||||
|
||||
pub use nu_dataframe::{Axis, Column, NuDataFrame};
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::NuDataFrame;
|
||||
use nu_protocol::{ast::Operator, Category, CustomValue, ShellError, Span, Value};
|
||||
use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Value};
|
||||
|
||||
// CustomValue implementation for NuDataFrame
|
||||
impl CustomValue for NuDataFrame {
|
||||
@ -20,10 +20,6 @@ impl CustomValue for NuDataFrame {
|
||||
}
|
||||
}
|
||||
|
||||
fn category(&self) -> Category {
|
||||
Category::Custom(self.typetag_name().into())
|
||||
}
|
||||
|
||||
fn value_string(&self) -> String {
|
||||
self.typetag_name().to_string()
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ use polars::prelude::{DataFrame, DataType, PolarsObject, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{cmp::Ordering, fmt::Display, hash::Hasher};
|
||||
|
||||
use super::utils::DEFAULT_ROWS;
|
||||
|
||||
// DataFrameValue is an encapsulation of Nushell Value that can be used
|
||||
// to define the PolarsObject Trait. The polars object trait allows to
|
||||
// create dataframes with mixed datatypes
|
||||
@ -283,7 +285,7 @@ impl NuDataFrame {
|
||||
pub fn tail(&self, rows: Option<usize>, span: Span) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.0;
|
||||
let to_row = df.height();
|
||||
let size = rows.unwrap_or(5);
|
||||
let size = rows.unwrap_or(DEFAULT_ROWS);
|
||||
let from_row = to_row.saturating_sub(size);
|
||||
|
||||
let values = self.to_rows(from_row, to_row, span)?;
|
||||
|
@ -0,0 +1,44 @@
|
||||
use super::NuGroupBy;
|
||||
use nu_protocol::{CustomValue, ShellError, Span, Value};
|
||||
|
||||
// CustomValue implementation for NuDataFrame
|
||||
impl CustomValue for NuGroupBy {
|
||||
fn typetag_name(&self) -> &'static str {
|
||||
"groupby"
|
||||
}
|
||||
|
||||
fn typetag_deserialize(&self) {
|
||||
unimplemented!("typetag_deserialize")
|
||||
}
|
||||
|
||||
fn clone_value(&self, span: nu_protocol::Span) -> Value {
|
||||
let cloned = NuGroupBy {
|
||||
dataframe: self.dataframe.clone(),
|
||||
by: self.by.clone(),
|
||||
groups: self.groups.clone(),
|
||||
};
|
||||
|
||||
Value::CustomValue {
|
||||
val: Box::new(cloned),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
fn value_string(&self) -> String {
|
||||
self.typetag_name().to_string()
|
||||
}
|
||||
|
||||
fn to_base_value(&self, span: Span) -> Result<Value, ShellError> {
|
||||
let vals = self.print(span)?;
|
||||
|
||||
Ok(Value::List { vals, span })
|
||||
}
|
||||
|
||||
fn to_json(&self) -> nu_json::Value {
|
||||
nu_json::Value::Null
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
}
|
89
crates/nu-command/src/dataframe/values/nu_groupby/mod.rs
Normal file
89
crates/nu-command/src/dataframe/values/nu_groupby/mod.rs
Normal file
@ -0,0 +1,89 @@
|
||||
mod custom_value;
|
||||
|
||||
use nu_protocol::{PipelineData, ShellError, Span, Value};
|
||||
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||
use polars::prelude::DataFrame;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct NuGroupBy {
|
||||
dataframe: DataFrame,
|
||||
by: Vec<String>,
|
||||
groups: GroupTuples,
|
||||
}
|
||||
|
||||
impl NuGroupBy {
|
||||
pub fn new(dataframe: DataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
|
||||
NuGroupBy {
|
||||
dataframe,
|
||||
by,
|
||||
groups,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_value(self, span: Span) -> Value {
|
||||
Value::CustomValue {
|
||||
val: Box::new(self),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_value(value: Value) -> Result<Self, ShellError> {
|
||||
match value {
|
||||
Value::CustomValue { val, span } => match val.as_any().downcast_ref::<NuGroupBy>() {
|
||||
Some(groupby) => Ok(NuGroupBy {
|
||||
dataframe: groupby.dataframe.clone(),
|
||||
by: groupby.by.clone(),
|
||||
groups: groupby.groups.clone(),
|
||||
}),
|
||||
None => Err(ShellError::CantConvert(
|
||||
"groupby".into(),
|
||||
"non-dataframe".into(),
|
||||
span,
|
||||
)),
|
||||
},
|
||||
x => Err(ShellError::CantConvert(
|
||||
"groupby".into(),
|
||||
x.get_type().to_string(),
|
||||
x.span()?,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result<NuGroupBy, ShellError> {
|
||||
let value = input.into_value(span);
|
||||
NuGroupBy::try_from_value(value)
|
||||
}
|
||||
|
||||
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
|
||||
let by = self.dataframe.select_series(&self.by).map_err(|e| {
|
||||
ShellError::LabeledError("Error creating groupby".into(), e.to_string())
|
||||
})?;
|
||||
|
||||
Ok(GroupBy::new(&self.dataframe, by, self.groups.clone(), None))
|
||||
}
|
||||
|
||||
pub fn print(&self, span: Span) -> Result<Vec<Value>, ShellError> {
|
||||
let values = self
|
||||
.by
|
||||
.iter()
|
||||
.map(|col| {
|
||||
let cols = vec!["group by".to_string()];
|
||||
let vals = vec![Value::String {
|
||||
val: col.into(),
|
||||
span,
|
||||
}];
|
||||
|
||||
Value::Record { cols, vals, span }
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<DataFrame> for NuGroupBy {
|
||||
fn as_ref(&self) -> &polars::prelude::DataFrame {
|
||||
&self.dataframe
|
||||
}
|
||||
}
|
@ -1,6 +1,9 @@
|
||||
use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value};
|
||||
|
||||
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole
|
||||
// Default value used when selecting rows from dataframe
|
||||
pub const DEFAULT_ROWS: usize = 5;
|
||||
|
||||
// Converts a Vec<Value> to a Vec<Spanned<String>> with a Span marking the whole
|
||||
// location of the columns for error referencing
|
||||
pub(crate) fn convert_columns(
|
||||
columns: Vec<Value>,
|
||||
@ -35,3 +38,39 @@ pub(crate) fn convert_columns(
|
||||
|
||||
Ok((res, col_span))
|
||||
}
|
||||
|
||||
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole
|
||||
// location of the columns for error referencing
|
||||
pub(crate) fn convert_columns_string(
|
||||
columns: Vec<Value>,
|
||||
span: Span,
|
||||
) -> Result<(Vec<String>, Span), ShellError> {
|
||||
// First column span
|
||||
let mut col_span = columns
|
||||
.get(0)
|
||||
.ok_or_else(|| {
|
||||
ShellError::SpannedLabeledError(
|
||||
"Empty column list".into(),
|
||||
"Empty list found for command".into(),
|
||||
span,
|
||||
)
|
||||
})
|
||||
.and_then(|v| v.span())?;
|
||||
|
||||
let res = columns
|
||||
.into_iter()
|
||||
.map(|value| match value {
|
||||
Value::String { val, span } => {
|
||||
col_span = span_join(&[col_span, span]);
|
||||
Ok(val)
|
||||
}
|
||||
_ => Err(ShellError::SpannedLabeledError(
|
||||
"Incorrect column format".into(),
|
||||
"Only string as column name".into(),
|
||||
span,
|
||||
)),
|
||||
})
|
||||
.collect::<Result<Vec<String>, _>>()?;
|
||||
|
||||
Ok((res, col_span))
|
||||
}
|
||||
|
@ -1,13 +1,13 @@
|
||||
use std::{cmp::Ordering, fmt};
|
||||
|
||||
use crate::{ast::Operator, Category, ShellError, Span, Value};
|
||||
use crate::{ast::Operator, ShellError, Span, Value};
|
||||
|
||||
// Trait definition for a custom value
|
||||
#[typetag::serde(tag = "type")]
|
||||
pub trait CustomValue: fmt::Debug + Send + Sync {
|
||||
fn clone_value(&self, span: Span) -> Value;
|
||||
|
||||
fn category(&self) -> Category;
|
||||
//fn category(&self) -> Category;
|
||||
|
||||
// Define string representation of the custom value
|
||||
fn value_string(&self) -> String;
|
||||
@ -26,8 +26,19 @@ pub trait CustomValue: fmt::Debug + Send + Sync {
|
||||
fn as_any(&self) -> &dyn std::any::Any;
|
||||
|
||||
// Follow cell path functions
|
||||
fn follow_path_int(&self, count: usize, span: Span) -> Result<Value, ShellError>;
|
||||
fn follow_path_string(&self, column_name: String, span: Span) -> Result<Value, ShellError>;
|
||||
fn follow_path_int(&self, _count: usize, span: Span) -> Result<Value, ShellError> {
|
||||
Err(ShellError::IncompatiblePathAccess(
|
||||
format!("{} does't support path access", self.value_string()),
|
||||
span,
|
||||
))
|
||||
}
|
||||
|
||||
fn follow_path_string(&self, _column_name: String, span: Span) -> Result<Value, ShellError> {
|
||||
Err(ShellError::IncompatiblePathAccess(
|
||||
format!("{} does't support path access", self.value_string()),
|
||||
span,
|
||||
))
|
||||
}
|
||||
|
||||
// ordering with other value
|
||||
fn partial_cmp(&self, _other: &Value) -> Option<Ordering> {
|
||||
|
Loading…
Reference in New Issue
Block a user