All is a DataFrame (#3812)

* nuframe in its own type in UntaggedValue

* Removed eager dataframe from enum

* Dataframe created from list of values

* Corrected order in dataframe columns

* Returned tag from stream collection

* Removed series from dataframe commands

* Arithmetic operators

* forced push

* forced push

* Replace all command

* String commands

* appending operations with dfs

* Testing suite for dataframes

* Unit test for dataframe commands

* improved equality for dataframes
This commit is contained in:
Fernando Herrera 2021-07-25 11:01:54 +01:00 committed by GitHub
parent 9120a64cfb
commit f1ee9113ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 3293 additions and 1422 deletions

View File

@ -2,14 +2,11 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
dataframe::{Column, FrameStruct, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::{
frame::groupby::GroupBy,
prelude::{DataType, PolarsError, Series},
};
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
enum Operation {
Mean,
@ -111,17 +108,40 @@ impl WholeStreamCommand for DataFrame {
description: "Aggregate sum by grouping by column a and summing on col b",
example:
"[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe group-by a | dataframe aggregate sum",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::string("one").into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Aggregate sum in dataframe columns",
example: "[[a b]; [4 1] [5 2]] | dataframe to-df | dataframe aggregate sum",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(9).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Aggregate sum in series",
example: "[4 1 5 6] | dataframe to-series | dataframe aggregate sum",
result: None,
example: "[4 1 5 6] | dataframe to-df | dataframe aggregate sum",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("0".to_string(), vec![UntaggedValue::int(16).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -139,7 +159,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => {
UntaggedValue::FrameStruct(FrameStruct::GroupBy(nu_groupby)) => {
let groupby = nu_groupby.to_groupby()?;
let res = perform_groupby_aggregation(
@ -152,18 +172,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
UntaggedValue::DataFrame(df) => {
let df = df.as_ref();
let res = perform_dataframe_aggregation(&df, op, &operation.tag)?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let value = perform_series_aggregation(series.as_ref(), op, &operation.tag)?;
Ok(OutputStream::one(value))
}
_ => Err(ShellError::labeled_error(
"No groupby, dataframe or series in stream",
"no groupby, dataframe or series found in input stream",
@ -264,162 +279,15 @@ fn perform_dataframe_aggregation(
}
}
fn perform_series_aggregation(
series: &Series,
operation: Operation,
operation_tag: &Tag,
) -> Result<Value, ShellError> {
match operation {
Operation::Mean => {
let res = match series.mean() {
Some(val) => UntaggedValue::Primitive(val.into()),
None => UntaggedValue::Primitive(0.into()),
};
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
let value = Value {
value: res,
tag: operation_tag.clone(),
};
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value(series.name(), value);
Ok(data.into_value())
}
Operation::Median => {
let res = match series.median() {
Some(val) => UntaggedValue::Primitive(val.into()),
None => UntaggedValue::Primitive(0.into()),
};
let value = Value {
value: res,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value(series.name(), value);
Ok(data.into_value())
}
Operation::Sum => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.sum().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.sum().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value(series.name(), value);
Ok(data.into_value())
}
Operation::Max => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.max().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.max().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value(series.name(), value);
Ok(data.into_value())
}
Operation::Min => {
let untagged = match series.dtype() {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64 => {
let res: i64 = series.min().unwrap_or(0);
Ok(UntaggedValue::Primitive(res.into()))
}
DataType::Float32 | DataType::Float64 => {
let res: f64 = series.min().unwrap_or(0.0);
Ok(UntaggedValue::Primitive(res.into()))
}
_ => Err(ShellError::labeled_error(
"Not valid type",
format!(
"this operation can not be performed with series of type {}",
series.dtype()
),
&operation_tag.span,
)),
}?;
let value = Value {
value: untagged,
tag: operation_tag.clone(),
};
let mut data = TaggedDictBuilder::new(operation_tag.clone());
data.insert_value(series.name(), value);
Ok(data.into_value())
}
_ => Err(ShellError::labeled_error_with_secondary(
"Not valid operation",
"operation not valid for series",
&operation_tag.span,
"Perhaps you want: mean, median, sum, max, min",
&operation_tag.span,
)),
test_examples(DataFrame {})
}
}

View File

@ -2,8 +2,8 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries},
Signature, SyntaxShape,
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
@ -32,7 +32,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Returns the selected column as series",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe column a",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -41,15 +49,26 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let column: Tagged<String> = args.req(0)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.column(column.item.as_ref())
.map_err(|e| parse_polars_error::<&str>(&e, &column.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.clone(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.clone()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe drop a",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"b".to_string(),
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -39,7 +50,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let columns: Vec<Value> = args.rest(0)?;
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let new_df = match col_string.get(0) {
Some(col) => df
@ -63,3 +74,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
@ -34,7 +37,21 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | dataframe to-df | dataframe drop-duplicates",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
),
Column::new(
"b".to_string(),
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -52,7 +69,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
None => (None, Span::unknown()),
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
@ -63,3 +80,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
@ -38,15 +38,45 @@ impl WholeStreamCommand for DataFrame {
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df);
let res = ($df.b / $df.b);
let df = ($df | dataframe with-column $res --name res);
$df | dataframe drop-nulls
"#,
result: None,
$df | dataframe drop-nulls"#,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()],
),
Column::new(
"b".to_string(),
vec![UntaggedValue::int(2).into(), UntaggedValue::int(2).into()],
),
Column::new(
"res".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "drop null values in dataframe",
example: r#"let s = ([1 2 0 0 3 4] | dataframe to-series);
example: r#"let s = ([1 2 0 0 3 4] | dataframe to-df);
($s / $s) | dataframe drop-nulls"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"div_0_0".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(1).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -60,7 +90,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
UntaggedValue::DataFrame(df) => {
// Extracting the selection columns of the columns to perform the aggregation
let columns: Option<Vec<Value>> = args.opt(0)?;
let (subset, col_span) = match columns {
@ -80,10 +110,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().drop_nulls();
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"drop nulls cannot be done with this value",
@ -91,3 +117,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -26,7 +29,27 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe dtypes",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"column".to_string(),
vec![
UntaggedValue::string("a").into(),
UntaggedValue::string("b").into(),
],
),
Column::new(
"dtype".to_string(),
vec![
UntaggedValue::string("i64").into(),
UntaggedValue::string("i64").into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,26 +58,49 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let col_names = df
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let mut dtypes: Vec<Value> = Vec::new();
let names: Vec<Value> = df
.as_ref()
.get_column_names()
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>();
.map(|v| {
let dtype = df
.as_ref()
.column(v)
.expect("using name from list of names from dataframe")
.dtype();
let values = df
.as_ref()
.dtypes()
.into_iter()
.zip(col_names.into_iter())
.map(move |(dtype, name)| {
let mut data = TaggedDictBuilder::new(tag.clone());
data.insert_value("column", name.as_ref());
data.insert_value("dtype", format!("{}", dtype));
let dtype_str = format!("{}", dtype);
dtypes.push(Value {
value: dtype_str.into(),
tag: Tag::default(),
});
data.into_value()
});
Value {
value: v.to_string().into(),
tag: Tag::default(),
}
})
.collect();
Ok(OutputStream::from_stream(values))
let names_col = Column::new("column".to_string(), names);
let dtypes_col = Column::new("dtype".to_string(), dtypes);
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], &tag.span)?;
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
@ -32,12 +32,70 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Create new dataframe with dummy variables from a dataframe",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a_1".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()],
),
Column::new(
"a_3".to_string(),
vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()],
),
Column::new(
"b_2".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()],
),
Column::new(
"b_4".to_string(),
vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Create new dataframe with dummy variables from a series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe to-dummies",
result: None,
example: "[1 2 2 3 3] | dataframe to-df | dataframe to-dummies",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"0_1".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
],
),
Column::new(
"0_2".to_string(),
vec![
UntaggedValue::int(0).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
],
),
Column::new(
"0_3".to_string(),
vec![
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(1).into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -51,7 +109,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
UntaggedValue::DataFrame(df) => {
let res = df.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
@ -62,17 +120,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
})?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"dummies cannot be done with this value",
@ -80,3 +127,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
@ -34,13 +34,21 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Filter dataframe using a bool mask",
example: r#"let mask = ([$true $false] | dataframe to-series);
example: r#"let mask = ([$true $false] | dataframe to-df);
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Filter dataframe by creating a mask from operation",
example: r#"let mask = (([5 6] | dataframe to-series) > 5);
example: r#"let mask = (([5 6] | dataframe to-df) > 5);
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
result: None,
},
@ -53,16 +61,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let value: Value = args.req(0)?;
let series_span = value.tag.span;
let series = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
let df = match value.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only add a series to a dataframe",
value.tag.span,
)),
}?;
let casted = series.as_ref().bool().map_err(|e| {
let series = df.as_series(&series_span)?;
let casted = series.bool().map_err(|e| {
parse_polars_error(
&e,
&&series_span,
@ -70,12 +78,25 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)
})?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.filter(&casted)
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
@ -31,8 +34,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with head rows",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first",
result: None,
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first 1",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -46,8 +57,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
None => 5,
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().head(Some(rows));
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
@ -27,7 +30,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Creates dataframe with selected columns",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe get a",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -38,7 +49,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
@ -47,3 +58,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuGroupBy, PolarsData},
dataframe::{FrameStruct, NuDataFrame, NuGroupBy},
Signature, SyntaxShape, UntaggedValue, Value,
};
@ -43,7 +43,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let by_columns: Vec<Value> = args.rest(0)?;
let (columns_string, col_span) = convert_columns(&by_columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
// This is the expensive part of the groupby; to create the
// groups that will be used for grouping the data in the
@ -57,7 +57,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let groups = groupby.get_groups().to_vec();
let groupby = Value {
tag,
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(NuGroupBy::new(
NuDataFrame::new(df.as_ref().clone()),
columns_string,
groups,

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
@ -51,20 +51,50 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "inner join dataframe",
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dataframe to-df);
vec![Example {
description: "inner join dataframe",
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dataframe to-df);
$right | dataframe join $right -l [a b] -r [a b]"#,
result: None,
},
Example {
description: "right join dataframe",
example: r#"let right = ([[a b c]; [1 2 3] [3 4 5] [5 6 7]] | dataframe to-df);
$right | dataframe join $right -l [a c] -r [a c] -t inner"#,
result: None,
},
]
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(3).into(),
UntaggedValue::int(5).into(),
],
),
Column::new(
"b".to_string(),
vec![
UntaggedValue::int(2).into(),
UntaggedValue::int(4).into(),
UntaggedValue::int(6).into(),
],
),
Column::new(
"c".to_string(),
vec![
UntaggedValue::int(5).into(),
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
],
),
Column::new(
"c_right".to_string(),
vec![
UntaggedValue::int(5).into(),
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -97,10 +127,10 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = match r_df.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
UntaggedValue::DataFrame(r_df) => {
// Checking the column types before performing the join
check_column_datatypes(
df.as_ref(),
@ -173,3 +203,16 @@ fn check_column_datatypes<T: AsRef<str>>(
Ok(())
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
pub struct DataFrame;
@ -30,8 +33,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with last rows",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last",
result: None,
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last 1",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(3).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(4).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -45,9 +56,22 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
None => 5,
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().tail(Some(rows));
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::PolarsData, Signature, TaggedDictBuilder, UntaggedValue};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -19,46 +22,94 @@ impl WholeStreamCommand for DataFrame {
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let values = args
let data = args
.context
.scope
.get_vars()
.into_iter()
.filter_map(|(name, value)| {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = &value.value {
let mut data = TaggedDictBuilder::new(value.tag.clone());
if let UntaggedValue::DataFrame(df) = &value.value {
let rows = Value {
value: (df.as_ref().height() as i64).into(),
tag: Tag::default(),
};
let rows = df.as_ref().height();
let cols = df.as_ref().width();
let cols = Value {
value: (df.as_ref().width() as i64).into(),
tag: Tag::default(),
};
data.insert_value("name", name.as_ref());
data.insert_value("rows", format!("{}", rows));
data.insert_value("columns", format!("{}", cols));
let location = match value.tag.anchor {
Some(AnchorLocation::File(name)) => name,
Some(AnchorLocation::Url(name)) => name,
Some(AnchorLocation::Source(text)) => text.slice(0..text.end).text,
None => "stream".to_string(),
};
match value.tag.anchor {
Some(AnchorLocation::File(name)) => data.insert_value("location", name),
Some(AnchorLocation::Url(name)) => data.insert_value("location", name),
Some(AnchorLocation::Source(text)) => {
let loc_name = text.slice(0..text.end);
data.insert_value("location", loc_name.text)
}
None => data.insert_value("location", "stream"),
}
let location = Value {
value: location.into(),
tag: Tag::default(),
};
Some(data.into_value())
let name = Value {
value: name.into(),
tag: Tag::default(),
};
Some((name, rows, cols, location))
} else {
None
}
});
Ok(OutputStream::from_stream(values))
let mut name = Column::new_empty("name".to_string());
let mut rows = Column::new_empty("rows".to_string());
let mut cols = Column::new_empty("columns".to_string());
let mut location = Column::new_empty("location".to_string());
for tuple in data {
name.push(tuple.0);
rows.push(tuple.1);
cols.push(tuple.2);
location.push(tuple.3);
}
let tag = args.call_info.name_tag;
let df = NuDataFrame::try_from_columns(vec![name, rows, cols, location], &tag.span)?;
Ok(OutputStream::one(df.into_value(tag)))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Lists loaded dataframes in current scope",
example: "dataframe list",
result: None,
example: "let a = ([[a b];[1 2] [3 4]] | dataframe to-df); dataframe list",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("name".to_string(), vec![UntaggedValue::string("$a").into()]),
Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]),
Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]),
Column::new(
"location".to_string(),
vec![UntaggedValue::string("stream").into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::convert_columns;
@ -18,8 +21,18 @@ impl WholeStreamCommand for DataFrame {
fn signature(&self) -> Signature {
Signature::build("dataframe melt")
.required("id_columns", SyntaxShape::Table, "Id columns for melting")
.rest(SyntaxShape::Any, "columns used as value columns")
.required_named(
"columns",
SyntaxShape::Table,
"column names for melting",
Some('c'),
)
.required_named(
"values",
SyntaxShape::Table,
"column names used as value columns",
Some('v'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -29,8 +42,59 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "melt dataframe",
example: "[[a b]; [a 2] [b 4] [a 6]] | dataframe to-df | dataframe melt a b",
result: None,
example:
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dataframe to-df | dataframe melt -c [b c] -v [a d]",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"b".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(3).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(3).into(),
],
),
Column::new(
"c".to_string(),
vec![
UntaggedValue::int(4).into(),
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
UntaggedValue::int(4).into(),
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
],
),
Column::new(
"variable".to_string(),
vec![
UntaggedValue::string("a").into(),
UntaggedValue::string("a").into(),
UntaggedValue::string("a").into(),
UntaggedValue::string("d").into(),
UntaggedValue::string("d").into(),
UntaggedValue::string("d").into(),
],
),
Column::new(
"value".to_string(),
vec![
UntaggedValue::string("x").into(),
UntaggedValue::string("y").into(),
UntaggedValue::string("z").into(),
UntaggedValue::string("a").into(),
UntaggedValue::string("b").into(),
UntaggedValue::string("c").into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -38,13 +102,13 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let id_col: Vec<Value> = args.req(0)?;
let val_col: Vec<Value> = args.rest(1)?;
let id_col: Vec<Value> = args.req_named("columns")?;
let val_col: Vec<Value> = args.req_named("values")?;
let (id_col_string, id_col_span) = convert_columns(&id_col, &tag)?;
let (val_col_string, val_col_span) = convert_columns(&val_col, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
@ -99,3 +163,16 @@ fn check_column_datatypes<T: AsRef<str>>(
Ok(())
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -26,7 +26,6 @@ pub mod take;
pub mod to_csv;
pub mod to_df;
pub mod to_parquet;
pub mod to_series;
pub(crate) mod utils;
pub mod where_;
pub mod with_column;
@ -59,7 +58,6 @@ pub use take::DataFrame as DataFrameTake;
pub use to_csv::DataFrame as DataFrameToCsv;
pub use to_df::DataFrame as DataFrameToDF;
pub use to_parquet::DataFrame as DataFrameToParquet;
pub use to_series::DataFrame as DataFrameToSeries;
pub use where_::DataFrame as DataFrameWhere;
pub use with_column::DataFrame as DataFrameWithColumn;
@ -71,6 +69,8 @@ pub use series::DataFrameArgMin;
pub use series::DataFrameArgSort;
pub use series::DataFrameArgTrue;
pub use series::DataFrameArgUnique;
pub use series::DataFrameConcatenate;
pub use series::DataFrameContains;
pub use series::DataFrameIsDuplicated;
pub use series::DataFrameIsIn;
pub use series::DataFrameIsNotNull;
@ -80,9 +80,14 @@ pub use series::DataFrameNNull;
pub use series::DataFrameNUnique;
pub use series::DataFrameNot;
pub use series::DataFrameReplace;
pub use series::DataFrameReplaceAll;
pub use series::DataFrameSeriesRename;
pub use series::DataFrameSet;
pub use series::DataFrameSetWithIdx;
pub use series::DataFrameShift;
pub use series::DataFrameStringLengths;
pub use series::DataFrameStringSlice;
pub use series::DataFrameToLowercase;
pub use series::DataFrameToUppercase;
pub use series::DataFrameUnique;
pub use series::DataFrameValueCounts;

View File

@ -73,7 +73,7 @@ impl WholeStreamCommand for DataFrame {
description: "Pivot a dataframe on b and aggregation on col c",
example:
"[[a b c]; [one x 1] [two y 2]] | dataframe to-df | dataframe group-by a | dataframe pivot b c sum",
result: None,
result: None, // No sample because there are nulls in the result dataframe
}]
}
}

View File

@ -42,13 +42,13 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe sample -r 1",
result: None,
result: None, // No expected value because sampling is random
},
Example {
description: "Shows sample row using fraction and replace",
example:
"[[a b]; [1 2] [3 4] [5 6]] | dataframe to-df | dataframe sample -f 0.5 -e",
result: None,
result: None, // No expected value because sampling is random
},
]
}
@ -61,7 +61,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let fraction: Option<Tagged<f64>> = args.get_flag("fraction")?;
let replace: bool = args.has_flag("replace");
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = match (rows, fraction) {
(Some(rows), None) => df

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::{convert_columns, parse_polars_error};
@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Create new dataframe with column a",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe select a",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -40,7 +51,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
@ -49,3 +60,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Returns true if all values are false",
example: "[$false $false $false] | dataframe to-series | dataframe all-false",
result: None,
example: "[$false $false $false] | dataframe to-df | dataframe all-false",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![UntaggedValue::boolean(true).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | dataframe to-series);
example: r#"let s = ([5 6 2 10] | dataframe to-df);
let res = ($s > 9);
$res | dataframe all-false"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"all_false".to_string(),
vec![UntaggedValue::boolean(false).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
let series = df.as_series(&df_tag.span)?;
let bool = series.bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("all_false", value);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_false".to_string(), vec![value])],
&tag.span,
)?;
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Returns true if all values are true",
example: "[$true $true $true] | dataframe to-series | dataframe all-true",
result: None,
example: "[$true $true $true] | dataframe to-df | dataframe all-true",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![UntaggedValue::boolean(true).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Checks the result from a comparison",
example: r#"let s = ([5 6 2 8] | dataframe to-series);
example: r#"let s = ([5 6 2 8] | dataframe to-df);
let res = ($s > 9);
$res | dataframe all-true"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"all_true".to_string(),
vec![UntaggedValue::boolean(false).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
let series = df.as_series(&df_tag.span)?;
let bool = series.bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("all_true", value);
let df = NuDataFrame::try_from_columns(
vec![Column::new("all_true".to_string(), vec![value])],
&tag.span,
)?;
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for max value",
example: "[1 3 2] | dataframe to-series | dataframe arg-max",
result: None,
example: "[1 3 2] | dataframe to-df | dataframe arg-max",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"arg_max".to_string(),
vec![UntaggedValue::int(1).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -36,9 +47,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let res = series.as_ref().arg_max();
let res = series.arg_max();
let chunked = match res {
Some(index) => UInt32Chunked::new_from_slice("arg_max", &[index as u32]),
@ -46,6 +58,20 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns index for min value",
example: "[1 3 2] | dataframe to-series | dataframe arg-min",
result: None,
example: "[1 3 2] | dataframe to-df | dataframe arg-min",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"arg_min".to_string(),
vec![UntaggedValue::int(0).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -36,9 +47,9 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().arg_min();
let res = df.as_series(&df_tag.span)?.arg_min();
let chunked = match res {
Some(index) => UInt32Chunked::new_from_slice("arg_min", &[index as u32]),
@ -46,6 +57,20 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
};
let res = chunked.into_series();
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,8 +29,22 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for a sorted series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-sort",
result: None,
example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-sort",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"arg_sort".to_string(),
vec![
UntaggedValue::int(0).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(3).into(),
UntaggedValue::int(4).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -36,12 +53,24 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let reverse = args.has_flag("reverse");
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().argsort(reverse);
let mut res = df.as_series(&df_tag.span)?.argsort(reverse).into_series();
res.rename("arg_sort");
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,8 +29,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes where values are true",
example: "[$false $true $false] | dataframe to-series | dataframe arg-true",
result: None,
example: "[$false $true $false] | dataframe to-df | dataframe arg-true",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"arg_true".to_string(),
vec![UntaggedValue::int(1).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,9 +46,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
let series = df.as_series(&df_tag.span)?;
let bool = series.bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
@ -46,7 +58,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
})?;
let mut res = bool.arg_true().into_series();
res.rename("int");
res.rename("arg_true");
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,8 +29,20 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns indexes for unique values",
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-unique",
result: None,
example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-unique",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"arg_unique".to_string(),
vec![
UntaggedValue::int(0).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(3).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,15 +50,29 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
let mut res = df
.as_series(&df_tag.span)?
.arg_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
.into_series();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
res.rename("arg_unique");
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,107 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe concatenate"
}
fn usage(&self) -> &str {
"[Series] Concatenates strings with other array"
}
fn signature(&self) -> Signature {
Signature::build("dataframe concatenate").required(
"other",
SyntaxShape::Any,
"Other array with string to be concatenated",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Concatenate string",
example: r#"let other = ([za xs cd] | dataframe to-df);
[abc abc abc] | dataframe to-df | dataframe concatenate $other"#,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("abcza").into(),
UntaggedValue::string("abcxs").into(),
UntaggedValue::string("abccd").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let other: Value = args.req(0)?;
let other_df = match &other.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only concatenate another series",
other.tag.span,
)),
}?;
let other_series = other_df.as_series(&other.tag.span)?;
let other_chunked = other_series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&other.tag.span,
Some("The concatenate command can only be used with string columns"),
)
})?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The concatenate command can only be used with string columns"),
)
})?;
let mut res = chunked.concat(&other_chunked);
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,90 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe contains"
}
fn usage(&self) -> &str {
"[Series] Checks if a patter is contained in a string"
}
fn signature(&self) -> Signature {
Signature::build("dataframe contains").required_named(
"pattern",
SyntaxShape::String,
"Regex pattern to be searched",
Some('p'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns boolean indicating if patter was found",
example: "[abc acb acb] | dataframe to-df | dataframe contains -p ab",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let pattern: Tagged<String> = args.req_named("pattern")?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The contains command can only be used with string columns"),
)
})?;
let res = chunked
.contains(pattern.as_str())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating duplicated values",
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-duplicated",
result: None,
example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-duplicated",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"is_duplicated".to_string(),
vec![
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
let mut res = df
.as_series(&df_tag.span)?
.is_duplicated()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
.into_series();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
res.rename("is_duplicated");
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuSeries, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::IntoSeries;
@ -29,9 +29,25 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Checks if elements from a series are contained in right series",
example: r#"let other = ([1 3 6] | dataframe to-series);
[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-in $other"#,
result: None,
example: r#"let other = ([1 3 6] | dataframe to-df);
[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-in $other"#,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"is_in".to_string(),
vec![
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -40,8 +56,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let other = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
let other_df = match value.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only search in a series",
@ -49,15 +65,31 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let other = other_df.as_series(&value.tag.span)?;
let res = series
.as_ref()
.is_in(other.as_ref())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let mut res = df
.as_series(&df_tag.span)?
.is_in(&other)
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
.into_series();
res.rename("is_in");
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask where values are not null",
example: r#"let s = ([5 6 0 8] | dataframe to-series);
example: r#"let s = ([5 6 0 8] | dataframe to-df);
let res = ($s / $s);
$res | dataframe is-not-null"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"is_not_null".to_string(),
vec![
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(true).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().is_not_null();
let res = df.as_series(&df_tag.span)?.is_not_null();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask where values are null",
example: r#"let s = ([5 6 0 8] | dataframe to-series);
example: r#"let s = ([5 6 0 8] | dataframe to-df);
let res = ($s / $s);
$res | dataframe is-null"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"is_null".to_string(),
vec![
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().is_null();
let res = df.as_series(&df_tag.span)?.is_null();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create mask indicating unique values",
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-unique",
result: None,
example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-unique",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"is_unique".to_string(),
vec![
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
let mut res = df
.as_series(&df_tag.span)?
.is_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
.into_series();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
res.rename("is_unique");
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -5,6 +5,8 @@ pub mod arg_min;
pub mod arg_sort;
pub mod arg_true;
pub mod arg_unique;
pub mod concatenate;
pub mod contains;
pub mod is_duplicated;
pub mod is_in;
pub mod is_not_null;
@ -15,9 +17,14 @@ pub mod n_unique;
pub mod not;
pub mod rename;
pub mod replace;
pub mod replace_all;
pub mod set;
pub mod set_with_idx;
pub mod shift;
pub mod str_lengths;
pub mod str_slice;
pub mod to_lowercase;
pub mod to_uppercase;
pub mod unique;
pub mod value_counts;
@ -28,6 +35,8 @@ pub use arg_min::DataFrame as DataFrameArgMin;
pub use arg_sort::DataFrame as DataFrameArgSort;
pub use arg_true::DataFrame as DataFrameArgTrue;
pub use arg_unique::DataFrame as DataFrameArgUnique;
pub use concatenate::DataFrame as DataFrameConcatenate;
pub use contains::DataFrame as DataFrameContains;
pub use is_duplicated::DataFrame as DataFrameIsDuplicated;
pub use is_in::DataFrame as DataFrameIsIn;
pub use is_not_null::DataFrame as DataFrameIsNotNull;
@ -38,8 +47,13 @@ pub use n_unique::DataFrame as DataFrameNUnique;
pub use not::DataFrame as DataFrameNot;
pub use rename::DataFrame as DataFrameSeriesRename;
pub use replace::DataFrame as DataFrameReplace;
pub use replace_all::DataFrame as DataFrameReplaceAll;
pub use set::DataFrame as DataFrameSet;
pub use set_with_idx::DataFrame as DataFrameSetWithIdx;
pub use shift::DataFrame as DataFrameShift;
pub use str_lengths::DataFrame as DataFrameStringLengths;
pub use str_slice::DataFrame as DataFrameStringSlice;
pub use to_lowercase::DataFrame as DataFrameToLowercase;
pub use to_uppercase::DataFrame as DataFrameToUppercase;
pub use unique::DataFrame as DataFrameUnique;
pub use value_counts::DataFrame as DataFrameValueCounts;

View File

@ -2,7 +2,8 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
dataframe::{Column, NuDataFrame},
Primitive, Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -27,9 +28,17 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Counts null values",
example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-series);
($s / ss) | dataframe count-null"#,
result: None,
example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-df);
($s / $s) | dataframe count-null"#,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"count_null".to_string(),
vec![UntaggedValue::int(2).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -37,17 +46,32 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().null_count();
let res = df.as_series(&df_tag.span)?.null_count();
let value = Value {
value: UntaggedValue::Primitive(Primitive::Int(res as i64)),
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("count-null", value);
let df = NuDataFrame::try_from_columns(
vec![Column::new("count_null".to_string(), vec![value])],
&tag.span,
)?;
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,8 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
dataframe::{Column, NuDataFrame},
Primitive, Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -27,8 +28,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Counts unique values",
example: "[1 1 2 2 3 3 4] | dataframe to-series | dataframe count-unique",
result: None,
example: "[1 1 2 2 3 3 4] | dataframe to-df | dataframe count-unique",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"count_unique".to_string(),
vec![UntaggedValue::int(4).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -36,10 +45,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
let res = df
.as_series(&df_tag.span)?
.n_unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
@ -48,8 +57,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
tag: tag.clone(),
};
let mut data = TaggedDictBuilder::new(tag);
data.insert_value("count-unique", value);
let df = NuDataFrame::try_from_columns(
vec![Column::new("count_unique".to_string(), vec![value])],
&tag.span,
)?;
Ok(OutputStream::one(data.into_value()))
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
use std::ops::Not;
@ -27,8 +30,20 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Inverts boolean mask",
example: "[$true $false $true] | dataframe to-series | dataframe not",
result: None,
example: "[$true $false $true] | dataframe to-df | dataframe not",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::boolean(false).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -36,9 +51,10 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let bool = series.as_ref().bool().map_err(|e| {
let bool = series.bool().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
@ -48,8 +64,19 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let res = bool.not();
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
pub struct DataFrame;
@ -30,8 +33,21 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Renames a series",
example: "[5 6 7 8] | dataframe to-series | dataframe rename-series new_name",
result: None,
example: "[5 6 7 8] | dataframe to-df | dataframe rename new_name",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"new_name".to_string(),
vec![
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
UntaggedValue::int(7).into(),
UntaggedValue::int(8).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -40,9 +56,25 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let name: Tagged<String> = args.req(0)?;
let mut series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
series.as_mut().rename(name.item.as_ref());
let mut series = df.as_series(&df_tag.span)?;
Ok(OutputStream::one(series.into_value(tag)))
series.rename(name.item.as_ref());
let df = NuDataFrame::try_from_series(vec![series], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,8 +1,11 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_source::Tagged;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::{Span, Tagged};
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -39,8 +42,20 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Replaces string",
example: "[abc abc abc] | dataframe to-series | dataframe replace -p ab -r AB",
result: None,
example: "[abc abc abc] | dataframe to-df | dataframe replace -p ab -r AB",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("ABc").into(),
UntaggedValue::string("ABc").into(),
UntaggedValue::string("ABc").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -50,23 +65,36 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let pattern: Tagged<String> = args.req_named("pattern")?;
let replace: Tagged<String> = args.req_named("replace")?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let chunked = series.as_ref().utf8().map_err(|e| {
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&tag.span,
Some("The replace command can only be used with string columns"),
&df_tag.span,
Some("The replace-all command can only be used with string columns"),
)
})?;
let res = chunked
.as_ref()
let mut res = chunked
.replace(pattern.as_str(), replace.as_str())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,100 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe replace-all"
}
fn usage(&self) -> &str {
"[Series] Replace all (sub)strings by a regex pattern"
}
fn signature(&self) -> Signature {
Signature::build("dataframe replace")
.required_named(
"pattern",
SyntaxShape::String,
"Regex pattern to be matched",
Some('p'),
)
.required_named(
"replace",
SyntaxShape::String,
"replacing string",
Some('r'),
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Replaces string",
example: "[abac abac abac] | dataframe to-df | dataframe replace-all -p a -r A",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("AbAc").into(),
UntaggedValue::string("AbAc").into(),
UntaggedValue::string("AbAc").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let pattern: Tagged<String> = args.req_named("pattern")?;
let replace: Tagged<String> = args.req_named("replace")?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The replace command can only be used with string columns"),
)
})?;
let mut res = chunked
.replace_all(pattern.as_str(), replace.as_str())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
pub struct DataFrame;
@ -33,10 +36,24 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: r#"let s = ([1 2 2 3 3] | dataframe to-series | dataframe shift 2);
example: r#"let s = ([1 2 2 3 3] | dataframe to-df | dataframe shift 2);
let mask = ($s | dataframe is-null);
$s | dataframe set 0 --mask $mask"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::int(0).into(),
UntaggedValue::int(0).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(2).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -46,20 +63,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let value: Value = args.req(0)?;
let mask: Value = args.req_named("mask")?;
let bool_mask = match &mask.value {
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => {
match series.as_ref().dtype() {
DataType::Boolean => series
.as_ref()
.bool()
.map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use bool series as mask",
value.tag.span,
)),
}
}
let mask_df = match &mask.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use a series as mask",
value.tag.span,
)),
}?;
let mask_series = mask_df.as_series(&mask.tag.span)?;
let bool_mask = match mask_series.dtype() {
DataType::Boolean => mask_series
.bool()
.map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use bool series as mask",
@ -67,11 +85,12 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
match &value.value {
UntaggedValue::Primitive(Primitive::Int(val)) => {
let chunked = series.as_ref().i64().map_err(|e| {
let chunked = series.i64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
@ -83,10 +102,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
.set(bool_mask, Some(*val))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
let chunked = series.as_ref().f64().map_err(|e| {
@ -107,10 +124,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
UntaggedValue::Primitive(Primitive::String(val)) => {
let chunked = series.as_ref().utf8().map_err(|e| {
@ -128,18 +143,29 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let mut res = res.into_series();
res.rename("string");
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
format!(
"this value cannot be set in a series of type '{}'",
series.as_ref().dtype()
series.dtype()
),
value.tag.span,
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::{ChunkSet, DataType, IntoSeries};
pub struct DataFrame;
@ -33,10 +36,25 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Set value in selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
let indices = ([0 2] | dataframe to-series);
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df);
let indices = ([0 2] | dataframe to-df);
$series | dataframe set-with-idx 6 -i $indices"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::int(6).into(),
UntaggedValue::int(1).into(),
UntaggedValue::int(6).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(4).into(),
UntaggedValue::int(3).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -47,7 +65,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let indices: Value = args.req_named("indices")?;
let indices = match &indices.value {
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => Ok(series),
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use a series for set command",
@ -55,7 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}?;
let casted = match indices.as_ref().dtype() {
let indices = indices.as_series(&value.tag.span)?;
let casted = match indices.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
.as_ref()
.cast_with_dtype(&DataType::UInt32)
@ -75,11 +95,12 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
.into_iter()
.filter_map(|val| val.map(|v| v as usize));
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
match &value.value {
UntaggedValue::Primitive(Primitive::Int(val)) => {
let chunked = series.as_ref().i64().map_err(|e| {
let chunked = series.i64().map_err(|e| {
parse_polars_error::<&str>(
&e,
&value.tag.span,
@ -91,10 +112,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
.set_at_idx(indices, Some(*val))
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
let chunked = series.as_ref().f64().map_err(|e| {
@ -115,10 +134,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
UntaggedValue::Primitive(Primitive::String(val)) => {
let chunked = series.as_ref().utf8().map_err(|e| {
@ -136,10 +153,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let mut res = res.into_series();
res.rename("string");
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
@ -151,3 +166,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,9 +1,8 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
use polars::prelude::IntoSeries;
pub struct DataFrame;
@ -27,7 +26,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Shifts the values by a given period",
example: "[1 2 2 3 3] | dataframe to-series | dataframe shift 2",
example: "[1 2 2 3 3] | dataframe to-df | dataframe shift 2",
result: None,
}]
}
@ -37,12 +36,10 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let period: Tagged<i64> = args.req(0)?;
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series.as_ref().shift(period.item);
let res = df.as_series(&df_tag.span)?.shift(period.item);
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}

View File

@ -0,0 +1,81 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe str-lengths"
}
fn usage(&self) -> &str {
"[Series] Get lengths of all strings"
}
fn signature(&self) -> Signature {
Signature::build("dataframe str-lengths")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns string lengths",
example: "[a ab abc] | dataframe to-df | dataframe str-lengths",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(3).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The str-lengths command can only be used with string columns"),
)
})?;
let res = chunked.as_ref().str_lengths();
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,92 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe str-slice"
}
fn usage(&self) -> &str {
"[Series] Slices the string from the start position until the selected length"
}
fn signature(&self) -> Signature {
Signature::build("dataframe replace")
.required_named("start", SyntaxShape::Int, "start of slice", Some('s'))
.named("length", SyntaxShape::Int, "optional length", Some('l'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates slices from the strings",
example: "[abcded abc321 abc123] | dataframe to-df | dataframe str-slice -s 1 -l 2",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("bc").into(),
UntaggedValue::string("bc").into(),
UntaggedValue::string("bc").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let start: Tagged<i64> = args.req_named("start")?;
let length: Option<Tagged<i64>> = args.get_flag("length")?;
let length = length.map(|v| v.item as u64);
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The str-slice command can only be used with string columns"),
)
})?;
let mut res = chunked
.str_slice(start.item, length)
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,82 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe to-lowercase"
}
fn usage(&self) -> &str {
"[Series] Lowercase the strings in the column"
}
fn signature(&self) -> Signature {
Signature::build("dataframe to-lowercase")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Modifies strings to lowercase",
example: "[Abc aBc abC] | dataframe to-df | dataframe to-lowercase",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("abc").into(),
UntaggedValue::string("abc").into(),
UntaggedValue::string("abc").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The to-lowercase command can only be used with string columns"),
)
})?;
let mut res = chunked.to_lowercase();
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -0,0 +1,82 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use polars::prelude::IntoSeries;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe to-uppercase"
}
fn usage(&self) -> &str {
"[Series] Uppercase the strings in the column"
}
fn signature(&self) -> Signature {
Signature::build("dataframe to-uppercase")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Modifies strings to uppercase",
example: "[Abc aBc abC] | dataframe to-df | dataframe to-uppercase",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("ABC").into(),
UntaggedValue::string("ABC").into(),
UntaggedValue::string("ABC").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = df.as_series(&df_tag.span)?;
let chunked = series.utf8().map_err(|e| {
parse_polars_error::<&str>(
&e,
&df_tag.span,
Some("The to-uppercase command can only be used with string columns"),
)
})?;
let mut res = chunked.to_uppercase();
res.rename(series.name());
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,8 +1,10 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature};
use polars::prelude::IntoSeries;
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
pub struct DataFrame;
@ -26,8 +28,16 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns unique values from a series",
example: "[1 2 2 3 3] | dataframe to-series | dataframe unique",
result: None,
example: "[2 2 2 2 2] | dataframe to-df | dataframe unique",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![UntaggedValue::int(2).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -35,15 +45,26 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = series
.as_ref()
let res = df
.as_series(&df_tag.span)?
.unique()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.into_series(),
tag,
)))
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
Ok(OutputStream::one(df.into_value(df_tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,8 +2,8 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries},
Signature,
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
use crate::commands::dataframe::utils::parse_polars_error;
@ -30,8 +30,22 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Calculates value counts",
example: "[5 5 6 6] | dataframe to-series | dataframe value-counts",
result: None,
example: "[5 5 5 5 6 6] | dataframe to-df | dataframe value-counts",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![UntaggedValue::int(5).into(), UntaggedValue::int(6).into()],
),
Column::new(
"counts".to_string(),
vec![UntaggedValue::int(4).into(), UntaggedValue::int(2).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -39,12 +53,27 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let df = series
.as_ref()
let df_new = df
.as_series(&df_tag.span)?
.value_counts()
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(df, tag)))
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
df_new, tag,
)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue, Value,
};
pub struct DataFrame;
@ -26,7 +29,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Shows row and column shape",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe shape",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]),
Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -34,14 +45,34 @@ impl WholeStreamCommand for DataFrame {
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let rows = df.as_ref().height();
let cols = df.as_ref().width();
let rows = Value {
value: (df.as_ref().height() as i64).into(),
tag: Tag::default(),
};
let mut data = TaggedDictBuilder::new(&tag);
data.insert_value("rows", format!("{}", rows));
data.insert_value("columns", format!("{}", cols));
let cols = Value {
value: (df.as_ref().width() as i64).into(),
tag: Tag::default(),
};
Ok(OutputStream::one(data.into_value()))
let rows_col = Column::new("rows".to_string(), vec![rows]);
let cols_col = Column::new("columns".to_string(), vec![cols]);
let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], &tag.span)?;
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let rows: Option<Tagged<usize>> = args.get_flag("n_rows")?;
let tail: bool = args.has_flag("tail");
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let rows = rows.map(|v| v.item);
let values = if tail { df.tail(rows)? } else { df.head(rows)? };

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue,
};
use nu_source::Tagged;
pub struct DataFrame;
@ -29,7 +32,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe slice 0 1",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -40,8 +51,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let offset: Tagged<usize> = args.req(0)?;
let size: Tagged<usize> = args.req(1)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().slice(offset.item as i64, size.item);
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
@ -33,12 +33,39 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort a",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
),
Column::new(
"b".to_string(),
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Create new sorted series",
example: "[3 4 1 2] | dataframe to-series | dataframe sort",
result: None,
example: "[3 4 1 2] | dataframe to-df | dataframe sort",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(2).into(),
UntaggedValue::int(3).into(),
UntaggedValue::int(4).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -53,31 +80,38 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let reverse = args.has_flag("reverse");
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let columns: Vec<Value> = args.rest(0)?;
if !columns.is_empty() {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
match &value.value {
UntaggedValue::DataFrame(df) => {
if df.is_series() {
let columns = df.as_ref().get_column_names();
let res = df
.as_ref()
.sort(&col_string, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
.sort(columns, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
} else {
Err(ShellError::labeled_error(
"Missing columns",
"missing column name to perform sort",
&tag.span,
))
let columns: Vec<Value> = args.rest(0)?;
if !columns.is_empty() {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let res = df
.as_ref()
.sort(&col_string, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
} else {
Err(ShellError::labeled_error(
"Missing columns",
"missing column name to perform sort",
&tag.span,
))
}
}
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().sort(reverse);
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"Incorrect type",
"sort cannot be done with this value",
@ -85,3 +119,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use polars::prelude::DataType;
@ -37,16 +37,38 @@ impl WholeStreamCommand for DataFrame {
Example {
description: "Takes selected rows from dataframe",
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dataframe to-df);
let indices = ([0 2] | dataframe to-series);
let indices = ([0 2] | dataframe to-df);
$df | dataframe take $indices"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![UntaggedValue::int(4).into(), UntaggedValue::int(4).into()],
),
Column::new(
"b".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Takes selected rows from series",
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
let indices = ([0 2] | dataframe to-series);
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df);
let indices = ([0 2] | dataframe to-df);
$series | dataframe take $indices"#,
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![UntaggedValue::int(4).into(), UntaggedValue::int(5).into()],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
@ -56,8 +78,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let value: Value = args.req(0)?;
let series = match &value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
let df = match &value.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only use a series for take command",
@ -65,7 +87,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}?;
let casted = match series.as_ref().dtype() {
let series = df.as_series(&value.tag.span)?;
let casted = match series.dtype() {
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
.as_ref()
.cast_with_dtype(&DataType::UInt32)
@ -88,16 +112,11 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
})?;
match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
UntaggedValue::DataFrame(df) => {
let res = df.as_ref().take(indices);
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
let res = series.as_ref().take(indices);
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
}
_ => Err(ShellError::labeled_error(
"No dataframe or series in stream",
"no dataframe or series found in input stream",
@ -105,3 +124,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -64,7 +64,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let delimiter: Option<Tagged<String>> = args.get_flag("delimiter")?;
let no_header: bool = args.has_flag("no_header");
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let mut file = File::create(&file_name.item).map_err(|e| {
ShellError::labeled_error(

View File

@ -1,7 +1,10 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature};
use nu_protocol::{
dataframe::{Column, NuDataFrame},
Signature, UntaggedValue,
};
pub struct DataFrame;
@ -11,7 +14,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Converts a pipelined Table or List into a polars dataframe"
"Converts a List, Table or Dictionary into a polars dataframe"
}
fn signature(&self) -> Signature {
@ -27,10 +30,108 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a polars dataframe",
example: "[[a b];[1 2] [3 4]] | dataframe to-df",
result: None,
}]
vec![
Example {
description: "Takes a dictionary and creates a dataframe",
example: "[[a b];[1 2] [3 4]] | dataframe to-df",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
),
Column::new(
"b".to_string(),
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Takes a list of tables and creates a dataframe",
example: "[[1 2 a] [3 4 b] [5 6 c]] | dataframe to-df",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"0".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(3).into(),
UntaggedValue::int(5).into(),
],
),
Column::new(
"1".to_string(),
vec![
UntaggedValue::int(2).into(),
UntaggedValue::int(4).into(),
UntaggedValue::int(6).into(),
],
),
Column::new(
"2".to_string(),
vec![
UntaggedValue::string("a").into(),
UntaggedValue::string("b").into(),
UntaggedValue::string("c").into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Takes a list and creates a dataframe",
example: "[a b c] | dataframe to-df",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::string("a").into(),
UntaggedValue::string("b").into(),
UntaggedValue::string("c").into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
Example {
description: "Takes a list of booleans and creates a dataframe",
example: "[$true $true $false] | dataframe to-df",
result: Some(vec![NuDataFrame::try_from_columns(
vec![Column::new(
"0".to_string(),
vec![
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(true).into(),
UntaggedValue::boolean(false).into(),
],
)],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
},
]
}
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -48,7 +48,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let file_name: Tagged<PathBuf> = args.req(0)?;
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let file = File::create(&file_name.item).map_err(|e| {
ShellError::labeled_error(

View File

@ -1,44 +0,0 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_source::Tagged;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"dataframe to-series"
}
fn usage(&self) -> &str {
"Converts a pipelined List into a polars series"
}
fn signature(&self) -> Signature {
Signature::build("dataframe to-series").optional(
"name",
SyntaxShape::String,
"Optional series name",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let name: Option<Tagged<String>> = args.opt(0)?;
let name = name.map(|v| v.item);
let series = NuSeries::try_from_iter(args.input, name)?;
Ok(InputStream::one(series.into_value(tag)))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a polars series",
example: "[1 2 3 4] | dataframe to-series my-col",
result: None,
}]
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::{evaluate_baseline_expr, WholeStreamCommand};
use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuDataFrame,
dataframe::{Column, NuDataFrame},
hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression},
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value,
};
@ -37,7 +37,15 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Filter dataframe based on column a",
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe where a == 1",
result: None,
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -143,7 +151,7 @@ fn filter_dataframe(
}?;
let span = args.call_info.name_tag.span;
let df = NuDataFrame::try_from_stream(&mut args.input, &span)?;
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &span)?;
let col = df
.as_ref()
@ -214,3 +222,16 @@ fn filter_dataframe(
args.call_info.name_tag,
)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -2,7 +2,7 @@ use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
dataframe::{Column, NuDataFrame},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_source::Tagged;
@ -33,8 +33,35 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Adds a series to the dataframe",
example:
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-series) --name c",
result: None,
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-df) --name c",
result: Some(vec![NuDataFrame::try_from_columns(
vec![
Column::new(
"a".to_string(),
vec![
UntaggedValue::int(1).into(),
UntaggedValue::int(3).into(),
],
),
Column::new(
"b".to_string(),
vec![
UntaggedValue::int(2).into(),
UntaggedValue::int(4).into(),
],
),
Column::new(
"c".to_string(),
vec![
UntaggedValue::int(5).into(),
UntaggedValue::int(6).into(),
],
),
],
&Span::default(),
)
.expect("simple df for test should not fail")
.into_value(Tag::default())]),
}]
}
}
@ -44,8 +71,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
let value: Value = args.req(0)?;
let name: Tagged<String> = args.req_named("name")?;
let mut series = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
let df = match value.value {
UntaggedValue::DataFrame(df) => Ok(df),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only add a series to a dataframe",
@ -53,9 +80,11 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
)),
}?;
let series = series.as_mut().rename(name.item.as_ref()).clone();
let mut series = df.as_series(&value.tag.span)?;
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let series = series.rename(name.item.as_ref()).clone();
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
df.as_mut()
.with_column(series)
@ -63,3 +92,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
Ok(OutputStream::one(df.into_value(tag)))
}
#[cfg(test)]
mod tests {
use super::DataFrame;
use super::ShellError;
#[test]
fn examples_work_as_expected() -> Result<(), ShellError> {
use crate::examples::test_dataframe as test_examples;
test_examples(DataFrame {})
}
}

View File

@ -126,11 +126,13 @@ fn first(args: CommandArgs) -> Result<OutputStream, ShellError> {
tag,
)),
#[cfg(all(not(target_arch = "wasm32"), feature = "dataframe"))]
UntaggedValue::DataFrame(_) => Err(ShellError::labeled_error(
"unsure how to handled UntaggedValue::DataFrame",
"found dataframe",
tag,
)),
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
Err(ShellError::labeled_error(
"unsure how to handled dataframe struct",
"found dataframe",
tag,
))
}
},
None => Ok(input_peek.take(rows_desired).into_output_stream()),
}

View File

@ -166,7 +166,7 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
))
}
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => {
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
return Err(ShellError::labeled_error(
"uniq -c cannot operate on data structs",
"source",

View File

@ -115,7 +115,7 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
serde_json::Value::Null
}
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => serde_json::Value::Null,
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_json::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
b.iter()
.map(|x| {

View File

@ -74,7 +74,9 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => toml::Value::String("<Data>".to_string()),
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
toml::Value::String("<Data>".to_string())
}
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -96,7 +96,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
serde_yaml::Value::Null
}
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => serde_yaml::Value::Null,
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_yaml::Value::Null,
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
b.iter()
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))

View File

@ -27,15 +27,16 @@ pub use core_commands::*;
pub use dataframe::{
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax,
DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn,
DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies,
DataFrameFilter, DataFrameFirst, DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated,
DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin,
DataFrameLast, DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot,
DataFrameOpen, DataFramePivot, DataFrameReplace, DataFrameSample, DataFrameSelect,
DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift,
DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTake, DataFrameToCsv, DataFrameToDF,
DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere,
DataFrameWithColumn,
DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop,
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst,
DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull,
DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt,
DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen, DataFramePivot,
DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect, DataFrameSeriesRename,
DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, DataFrameShow,
DataFrameSlice, DataFrameSort, DataFrameStringLengths, DataFrameStringSlice, DataFrameTake,
DataFrameToCsv, DataFrameToDF, DataFrameToLowercase, DataFrameToParquet, DataFrameToUppercase,
DataFrameUnique, DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn,
};
pub use env::*;
pub use filesystem::*;

View File

@ -9,7 +9,7 @@ use nu_protocol::{Primitive, Signature, UntaggedValue, Value};
use nu_table::TextStyle;
#[cfg(feature = "dataframe")]
use nu_protocol::dataframe::PolarsData;
use nu_protocol::dataframe::FrameStruct;
pub struct Command;
@ -239,7 +239,7 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
}
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)),
value: UntaggedValue::DataFrame(df),
tag,
} => {
if let Some(table) = table {
@ -253,7 +253,7 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
}
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::DataFrame(PolarsData::GroupBy(groupby)),
value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(groupby)),
tag,
} => {
if let Some(table) = table {
@ -265,20 +265,6 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
let _ = result.collect::<Vec<_>>();
}
}
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(series)),
tag,
} => {
if let Some(table) = table {
// TODO. Configure the parameter rows from file. It can be
// adjusted to see a certain amount of values in the head
let command_args =
create_default_command_args(&context, series.print()?.into(), tag);
let result = table.run(command_args)?;
let _ = result.collect::<Vec<_>>();
}
}
Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
..

View File

@ -287,7 +287,6 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(DataFramePivot),
whole_stream_command(DataFrameWhere),
whole_stream_command(DataFrameToDF),
whole_stream_command(DataFrameToSeries),
whole_stream_command(DataFrameToParquet),
whole_stream_command(DataFrameToCsv),
whole_stream_command(DataFrameSort),
@ -321,6 +320,13 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(DataFrameSetWithIdx),
whole_stream_command(DataFrameShape),
whole_stream_command(DataFrameReplace),
whole_stream_command(DataFrameReplaceAll),
whole_stream_command(DataFrameStringLengths),
whole_stream_command(DataFrameContains),
whole_stream_command(DataFrameToLowercase),
whole_stream_command(DataFrameToUppercase),
whole_stream_command(DataFrameStringSlice),
whole_stream_command(DataFrameConcatenate),
]);
#[cfg(feature = "clipboard-cli")]

View File

@ -14,6 +14,11 @@ use nu_protocol::hir::{ClassifiedBlock, ExternalRedirection};
use nu_protocol::{ShellTypeName, Value};
use nu_source::AnchorLocation;
#[cfg(feature = "dataframe")]
use crate::commands::{
DataFrameGroupBy, DataFrameIsNull, DataFrameShift, DataFrameToDF, DataFrameWithColumn,
};
use crate::commands::{
Append, BuildString, Each, Echo, First, Get, Keep, Last, Let, Math, MathMode, Nth, Select,
StrCollect, Wrap,
@ -149,6 +154,85 @@ pub fn test(cmd: impl WholeStreamCommand + 'static) -> Result<(), ShellError> {
Ok(())
}
#[cfg(feature = "dataframe")]
pub fn test_dataframe(cmd: impl WholeStreamCommand + 'static) -> Result<(), ShellError> {
use nu_protocol::UntaggedValue;
let examples = cmd.examples();
let base_context = EvaluationContext::basic();
base_context.add_commands(vec![
whole_stream_command(cmd),
// Commands used with dataframe
whole_stream_command(DataFrameToDF),
whole_stream_command(DataFrameShift),
whole_stream_command(DataFrameIsNull),
whole_stream_command(DataFrameGroupBy),
whole_stream_command(DataFrameWithColumn),
// Base commands for context
whole_stream_command(Math),
whole_stream_command(MathMode {}),
whole_stream_command(Echo {}),
whole_stream_command(BuildString {}),
whole_stream_command(Get {}),
whole_stream_command(Keep {}),
whole_stream_command(Each {}),
whole_stream_command(Let {}),
whole_stream_command(Select),
whole_stream_command(StrCollect),
whole_stream_command(Wrap),
]);
for sample_pipeline in examples {
let mut ctx = base_context.clone();
println!("{:?}", &sample_pipeline.example);
let block = parse_line(sample_pipeline.example, &ctx)?;
if let Some(expected) = &sample_pipeline.result {
let start = std::time::Instant::now();
let result = evaluate_block(block, &mut ctx)?;
println!("input: {}", sample_pipeline.example);
println!("result: {:?}", result);
println!("done: {:?}", start.elapsed());
let value = match result.get(0) {
Some(v) => v,
None => panic!(
"Unable to extract a value after parsing example: {}",
sample_pipeline.example
),
};
let df = match &value.value {
UntaggedValue::DataFrame(df) => df,
_ => panic!(
"Unable to extract dataframe from parsed example: {}",
sample_pipeline.example
),
};
let expected = match expected.get(0) {
Some(v) => v,
None => panic!("Empty vector in result example"),
};
let df_expected = match &expected.value {
UntaggedValue::DataFrame(df) => df,
_ => panic!("Unable to extract dataframe from example result"),
};
println!("expected: {:?}", df_expected);
assert_eq!(df, df_expected)
}
}
Ok(())
}
pub fn test_anchors(cmd: Command) -> Result<(), ShellError> {
let examples = cmd.examples();

View File

@ -131,7 +131,7 @@ impl InlineShape {
UntaggedValue::Error(_) => InlineShape::Error,
UntaggedValue::Block(_) => InlineShape::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => InlineShape::DataFrame,
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => InlineShape::DataFrame,
}
}

View File

@ -117,7 +117,9 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => toml::Value::String("<DataFrame>".to_string()),
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
toml::Value::String("<DataFrame>".to_string())
}
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
UntaggedValue::Primitive(Primitive::Binary(b)) => {
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())

View File

@ -1,218 +1,291 @@
use bigdecimal::BigDecimal;
use nu_errors::ShellError;
use nu_protocol::dataframe::NuDataFrame;
use nu_protocol::hir::Operator;
use nu_protocol::{
dataframe::{NuSeries, PolarsData},
Primitive, ShellTypeName, UntaggedValue, Value,
};
use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value};
use nu_source::Span;
use num_traits::ToPrimitive;
use polars::prelude::{
BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries,
NumOpsDispatchChecked, PolarsError, Series,
BooleanType, ChunkCompare, ChunkedArray, DataFrame, DataType, Float64Type, Int64Type,
IntoSeries, NumOpsDispatchChecked, PolarsError, Series,
};
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
pub fn compute_between_series(
pub fn compute_between_dataframes(
operator: Operator,
left: &Value,
right: &Value,
) -> Result<UntaggedValue, (&'static str, &'static str)> {
if let (
UntaggedValue::DataFrame(PolarsData::Series(lhs)),
UntaggedValue::DataFrame(PolarsData::Series(rhs)),
) = (&left.value, &right.value)
if let (UntaggedValue::DataFrame(lhs), UntaggedValue::DataFrame(rhs)) =
(&left.value, &right.value)
{
if lhs.as_ref().dtype() != rhs.as_ref().dtype() {
return Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Mixed datatypes",
"this datatype does not match the right hand side datatype",
&left.tag.span,
format!(
"Perhaps you want to change this datatype to '{}'",
lhs.as_ref().dtype()
),
&right.tag.span,
),
));
}
let operation_span = left.tag.span.until(right.tag.span);
match (lhs.is_series(), rhs.is_series()) {
(true, true) => {
let lhs = &lhs
.as_series(&left.tag.span)
.expect("Already checked that is a series");
let rhs = &rhs
.as_series(&right.tag.span)
.expect("Already checked that is a series");
if lhs.as_ref().len() != rhs.as_ref().len() {
return Ok(UntaggedValue::Error(ShellError::labeled_error(
"Different length",
"this column length does not match the right hand column length",
&left.tag.span,
)));
}
if lhs.dtype() != rhs.dtype() {
return Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Mixed datatypes",
"this datatype does not match the right hand side datatype",
&left.tag.span,
format!(
"Perhaps you want to change this datatype to '{}'",
lhs.as_ref().dtype()
),
&right.tag.span,
),
));
}
match operator {
Operator::Plus => {
let mut res = lhs.as_ref() + rhs.as_ref();
let name = format!("sum_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::Minus => {
let mut res = lhs.as_ref() - rhs.as_ref();
let name = format!("sub_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::Multiply => {
let mut res = lhs.as_ref() * rhs.as_ref();
let name = format!("mul_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::Divide => {
let res = lhs.as_ref().checked_div(rhs.as_ref());
match res {
Ok(mut res) => {
let name = format!("div_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Division error",
format!("{}", e),
if lhs.len() != rhs.len() {
return Ok(UntaggedValue::Error(ShellError::labeled_error(
"Different length",
"this column length does not match the right hand column length",
&left.tag.span,
))),
)));
}
}
Operator::Equal => {
let mut res = Series::eq(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("eq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::NotEqual => {
let mut res = Series::neq(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("neq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::LessThan => {
let mut res = Series::lt(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("lt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::LessThanOrEqual => {
let mut res = Series::lt_eq(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("lte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::GreaterThan => {
let mut res = Series::gt(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("gt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::GreaterThanOrEqual => {
let mut res = Series::gt_eq(lhs.as_ref(), rhs.as_ref()).into_series();
let name = format!("gte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
Operator::And => match lhs.as_ref().dtype() {
DataType::Boolean => {
let lhs_cast = lhs.as_ref().bool();
let rhs_cast = rhs.as_ref().bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitand(r).into_series();
let name =
format!("and_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Casting error",
"unable to cast to boolean",
&left.tag.span,
"unable to cast to boolean",
&right.tag.span,
),
)),
}
compute_between_series(operator, lhs, rhs, &operation_span)
}
_ => {
if lhs.as_ref().height() != rhs.as_ref().height() {
return Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Mixed datatypes",
"this datatype size does not match the right hand side datatype",
&left.tag.span,
"Perhaps you want to select another dataframe with same number of rows",
&right.tag.span,
),
));
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"And operation can only be done with boolean values",
&left.tag.span,
))),
},
Operator::Or => match lhs.as_ref().dtype() {
DataType::Boolean => {
let lhs_cast = lhs.as_ref().bool();
let rhs_cast = rhs.as_ref().bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitor(r).into_series();
let name =
format!("or_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
res.rename(name.as_ref());
Ok(NuSeries::series_to_untagged(res))
}
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Casting error",
"unable to cast to boolean",
&left.tag.span,
"unable to cast to boolean",
&right.tag.span,
),
)),
}
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"And operation can only be done with boolean values",
&left.tag.span,
))),
},
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"unable to use this datatype for this operation",
&left.tag.span,
))),
between_dataframes(operator, lhs, rhs, &operation_span)
}
}
} else {
Err((left.type_name(), right.type_name()))
}
}
pub fn between_dataframes(
operator: Operator,
lhs: &NuDataFrame,
rhs: &NuDataFrame,
operation_span: &Span,
) -> Result<UntaggedValue, (&'static str, &'static str)> {
match operator {
Operator::Plus => {
let mut columns: Vec<&str> = Vec::new();
let new = lhs
.as_ref()
.get_columns()
.iter()
.chain(rhs.as_ref().get_columns().iter())
.map(|s| {
let name = if columns.contains(&s.name()) {
format!("{}_{}", s.name(), "x")
} else {
columns.push(s.name());
s.name().to_string()
};
let mut series = s.clone();
series.rename(name.as_str());
series
})
.collect::<Vec<Series>>();
match DataFrame::new(new) {
Ok(df) => Ok(NuDataFrame::dataframe_to_untagged(df)),
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Appending error",
format!("{}", e),
operation_span,
))),
}
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"unable to use this datatype for this operation",
operation_span,
))),
}
}
pub fn compute_between_series(
operator: Operator,
lhs: &Series,
rhs: &Series,
operation_span: &Span,
) -> Result<UntaggedValue, (&'static str, &'static str)> {
match operator {
Operator::Plus => {
let mut res = lhs + rhs;
let name = format!("sum_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::Minus => {
let mut res = lhs - rhs;
let name = format!("sub_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::Multiply => {
let mut res = lhs * rhs;
let name = format!("mul_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::Divide => {
let res = lhs.checked_div(rhs);
match res {
Ok(mut res) => {
let name = format!("div_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Division error",
format!("{}", e),
operation_span,
))),
}
}
Operator::Equal => {
let mut res = Series::eq(lhs, rhs).into_series();
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::NotEqual => {
let mut res = Series::neq(lhs, rhs).into_series();
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::LessThan => {
let mut res = Series::lt(lhs, rhs).into_series();
let name = format!("lt_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::LessThanOrEqual => {
let mut res = Series::lt_eq(lhs, rhs).into_series();
let name = format!("lte_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::GreaterThan => {
let mut res = Series::gt(lhs, rhs).into_series();
let name = format!("gt_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::GreaterThanOrEqual => {
let mut res = Series::gt_eq(lhs, rhs).into_series();
let name = format!("gte_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, operation_span))
}
Operator::And => match lhs.dtype() {
DataType::Boolean => {
let lhs_cast = lhs.bool();
let rhs_cast = rhs.bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitand(r).into_series();
let name = format!("and_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, &operation_span))
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
"unable to cast to boolean",
operation_span,
))),
}
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"And operation can only be done with boolean values",
operation_span,
))),
},
Operator::Or => match lhs.dtype() {
DataType::Boolean => {
let lhs_cast = lhs.bool();
let rhs_cast = rhs.bool();
match (lhs_cast, rhs_cast) {
(Ok(l), Ok(r)) => {
let mut res = l.bitor(r).into_series();
let name = format!("or_{}_{}", lhs.name(), rhs.name());
res.rename(name.as_ref());
Ok(NuDataFrame::series_to_untagged(res, &operation_span))
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
"unable to cast to boolean",
operation_span,
))),
}
}
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"And operation can only be done with boolean values",
operation_span,
))),
},
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
"Incorrect datatype",
"unable to use this datatype for this operation",
operation_span,
))),
}
}
pub fn compute_series_single_value(
operator: Operator,
left: &Value,
right: &Value,
) -> Result<UntaggedValue, (&'static str, &'static str)> {
if let (UntaggedValue::DataFrame(PolarsData::Series(lhs)), UntaggedValue::Primitive(_)) =
if let (UntaggedValue::DataFrame(lhs), UntaggedValue::Primitive(_)) =
(&left.value, &right.value)
{
let lhs = match lhs.as_series(&left.tag.span) {
Ok(series) => series,
Err(e) => return Ok(UntaggedValue::Error(e)),
};
match operator {
Operator::Plus => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Int64Type>>::add,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
<ChunkedArray<Int64Type>>::add,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Float64Type>>::add,
&left.tag.span,
@ -229,20 +302,20 @@ pub fn compute_series_single_value(
},
Operator::Minus => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Int64Type>>::sub,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
<ChunkedArray<Int64Type>>::sub,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Float64Type>>::sub,
&left.tag.span,
@ -259,20 +332,20 @@ pub fn compute_series_single_value(
},
Operator::Multiply => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Int64Type>>::mul,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
<ChunkedArray<Int64Type>>::mul,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Float64Type>>::mul,
&left.tag.span,
@ -297,7 +370,7 @@ pub fn compute_series_single_value(
)))
} else {
Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Int64Type>>::div,
&left.tag.span,
@ -313,7 +386,7 @@ pub fn compute_series_single_value(
)))
} else {
Ok(compute_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
<ChunkedArray<Int64Type>>::div,
@ -330,7 +403,7 @@ pub fn compute_series_single_value(
)))
} else {
Ok(compute_series_decimal(
lhs.as_ref(),
&lhs,
val,
<ChunkedArray<Float64Type>>::div,
&left.tag.span,
@ -350,20 +423,20 @@ pub fn compute_series_single_value(
Operator::Equal => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
val,
ChunkedArray::eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::eq, &left.tag.span),
compare_series_decimal(&lhs, val, ChunkedArray::eq, &left.tag.span),
),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
@ -376,53 +449,52 @@ pub fn compute_series_single_value(
)),
}
}
Operator::NotEqual => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
val,
ChunkedArray::neq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::neq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
lhs.as_ref(),
val,
ChunkedArray::neq,
&left.tag.span,
)),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
Operator::NotEqual => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
&lhs,
val,
ChunkedArray::neq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::neq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(&lhs, val, ChunkedArray::neq, &left.tag.span),
),
)),
},
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
),
)),
}
}
Operator::LessThan => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
val,
ChunkedArray::lt,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::lt,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::lt, &left.tag.span),
compare_series_decimal(&lhs, val, ChunkedArray::lt, &left.tag.span),
),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
@ -435,53 +507,52 @@ pub fn compute_series_single_value(
)),
}
}
Operator::LessThanOrEqual => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
val,
ChunkedArray::lt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::lt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
lhs.as_ref(),
val,
ChunkedArray::lt_eq,
&left.tag.span,
)),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
Operator::LessThanOrEqual => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
&lhs,
val,
ChunkedArray::lt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::lt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(&lhs, val, ChunkedArray::lt_eq, &left.tag.span),
),
)),
},
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
),
)),
}
}
Operator::GreaterThan => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
val,
ChunkedArray::gt,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::gt,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::gt, &left.tag.span),
compare_series_decimal(&lhs, val, ChunkedArray::gt, &left.tag.span),
),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
@ -494,39 +565,38 @@ pub fn compute_series_single_value(
)),
}
}
Operator::GreaterThanOrEqual => match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
lhs.as_ref(),
val,
ChunkedArray::gt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
lhs.as_ref(),
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::gt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
lhs.as_ref(),
val,
ChunkedArray::gt_eq,
&left.tag.span,
)),
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
Operator::GreaterThanOrEqual => {
match &right.value {
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
&lhs,
val,
ChunkedArray::gt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
&lhs,
&val.to_i64()
.expect("Internal error: protocol did not use compatible decimal"),
ChunkedArray::gt_eq,
&left.tag.span,
)),
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
compare_series_decimal(&lhs, val, ChunkedArray::gt_eq, &left.tag.span),
),
)),
},
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
"Operation unavailable",
"unable to compare this value to the series",
&right.tag.span,
"Only primary values are allowed",
&right.tag.span,
),
)),
}
}
Operator::Contains => match &right.value {
UntaggedValue::Primitive(Primitive::String(val)) => {
Ok(contains_series_pat(lhs.as_ref(), val, &left.tag.span))
Ok(contains_series_pat(&lhs, val, &left.tag.span))
}
_ => Ok(UntaggedValue::Error(
ShellError::labeled_error_with_secondary(
@ -597,7 +667,7 @@ where
Ok(casted) => {
let res = f(casted.clone(), val);
let res = res.into_series();
NuSeries::series_to_untagged(res)
NuDataFrame::series_to_untagged(res, span)
}
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
@ -667,7 +737,7 @@ where
Ok(casted) => {
let res = f(casted.clone(), val);
let res = res.into_series();
NuSeries::series_to_untagged(res)
NuDataFrame::series_to_untagged(res, span)
}
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
@ -725,7 +795,7 @@ where
Ok(casted) => {
let res = f(casted, val);
let res = res.into_series();
NuSeries::series_to_untagged(res)
NuDataFrame::series_to_untagged(res, span)
}
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
@ -795,7 +865,7 @@ where
Ok(casted) => {
let res = f(casted, val);
let res = res.into_series();
NuSeries::series_to_untagged(res)
NuDataFrame::series_to_untagged(res, span)
}
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"Casting error",
@ -814,7 +884,7 @@ fn contains_series_pat(series: &Series, pat: &str, span: &Span) -> UntaggedValue
match res {
Ok(res) => {
let res = res.into_series();
NuSeries::series_to_untagged(res)
NuDataFrame::series_to_untagged(res, span)
}
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"Search error",

View File

@ -5,9 +5,7 @@ use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value};
use std::ops::Not;
#[cfg(feature = "dataframe")]
use nu_data::dataframe::{compute_between_series, compute_series_single_value};
#[cfg(feature = "dataframe")]
use nu_protocol::dataframe::PolarsData;
use nu_data::dataframe::{compute_between_dataframes, compute_series_single_value};
pub fn apply_operator(
op: Operator,
@ -15,13 +13,10 @@ pub fn apply_operator(
right: &Value,
) -> Result<UntaggedValue, (&'static str, &'static str)> {
#[cfg(feature = "dataframe")]
if let (
UntaggedValue::DataFrame(PolarsData::Series(_)),
UntaggedValue::DataFrame(PolarsData::Series(_)),
) = (&left.value, &right.value)
if let (UntaggedValue::DataFrame(_), UntaggedValue::DataFrame(_)) = (&left.value, &right.value)
{
return compute_between_series(op, left, right);
} else if let (UntaggedValue::DataFrame(PolarsData::Series(_)), UntaggedValue::Primitive(_)) =
return compute_between_dataframes(op, left, right);
} else if let (UntaggedValue::DataFrame(_), UntaggedValue::Primitive(_)) =
(&left.value, &right.value)
{
return compute_series_single_value(op, left, right);

View File

@ -1,15 +1,11 @@
pub mod nu_dataframe;
pub mod nu_groupby;
pub mod nu_series;
pub use nu_dataframe::NuDataFrame;
pub use nu_dataframe::{Column, NuDataFrame};
pub use nu_groupby::NuGroupBy;
pub use nu_series::NuSeries;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum PolarsData {
EagerDataFrame(NuDataFrame),
pub enum FrameStruct {
GroupBy(NuGroupBy),
Series(NuSeries),
}

View File

@ -1,54 +1,147 @@
use indexmap::{map::Entry, IndexMap};
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
use std::ops::{Deref, DerefMut};
use bigdecimal::FromPrimitive;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use nu_errors::ShellError;
use nu_source::{Span, Tag};
use num_bigint::BigInt;
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
use polars::prelude::{AnyValue, DataFrame, DataType, NamedFrom, Series, TimeUnit};
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
enum InputValue {
Integer,
Decimal,
String,
pub struct Column {
name: String,
values: Vec<Value>,
}
impl Column {
pub fn new(name: String, values: Vec<Value>) -> Self {
Self { name, values }
}
pub fn new_empty(name: String) -> Self {
Self {
name,
values: Vec::new(),
}
}
pub fn push(&mut self, value: Value) {
self.values.push(value)
}
}
#[derive(Debug)]
struct ColumnValues {
pub value_type: InputValue,
pub values: Vec<Value>,
enum InputType {
Integer,
Decimal,
String,
Boolean,
}
impl Default for ColumnValues {
fn default() -> Self {
#[derive(Debug)]
struct TypedColumn {
pub column: Column,
pub column_type: Option<InputType>,
}
impl TypedColumn {
fn new_empty(name: String) -> Self {
Self {
value_type: InputValue::Integer,
values: Vec::new(),
column: Column::new_empty(name),
column_type: None,
}
}
}
type ColumnMap = HashMap<String, ColumnValues>;
impl Deref for TypedColumn {
type Target = Column;
fn deref(&self) -> &Self::Target {
&self.column
}
}
impl DerefMut for TypedColumn {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.column
}
}
type ColumnMap = IndexMap<String, TypedColumn>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuDataFrame {
dataframe: DataFrame,
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
// Dataframes are considered equal if they have the same shape, column name
// and values
impl PartialEq for NuDataFrame {
fn eq(&self, _: &Self) -> bool {
false
fn eq(&self, other: &Self) -> bool {
if self.as_ref().width() == 0 {
// checking for empty dataframe
return false;
}
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
// checking both dataframes share the same names
return false;
}
if self.as_ref().height() != other.as_ref().height() {
// checking both dataframes have the same row size
return false;
}
// sorting dataframe by the first column
let column_names = self.as_ref().get_column_names();
let first_col = column_names
.get(0)
.expect("already checked that dataframe is different than 0");
// if unable to sort, then unable to compare
let lhs = match self.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return false,
};
let rhs = match other.as_ref().sort(*first_col, false) {
Ok(df) => df,
Err(_) => return false,
};
for name in self.as_ref().get_column_names() {
let self_series = lhs.column(name).expect("name from dataframe names");
let other_series = rhs
.column(name)
.expect("already checked that name in other");
let self_series = match self_series.dtype() {
// Casting needed to compare other numeric types with nushell numeric type.
// In nushell we only have i64 integer numeric types and any array created
// with nushell untagged primitives will be of type i64
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
Ok(series) => series,
Err(_) => return false,
},
_ => self_series.clone(),
};
if !self_series.series_equal(&other_series) {
return false;
}
}
true
}
}
@ -87,14 +180,14 @@ impl NuDataFrame {
NuDataFrame { dataframe }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<(Self, Tag), ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
UntaggedValue::DataFrame(df) => Some((df, value.tag)),
_ => None,
})
.ok_or_else(|| {
@ -113,41 +206,127 @@ impl NuDataFrame {
// Dictionary to store the columnar data extracted from
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = HashMap::new();
let mut column_values: ColumnMap = IndexMap::new();
for value in iter {
match value.value {
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
UntaggedValue::Primitive(Primitive::Int(_))
| UntaggedValue::Primitive(Primitive::Decimal(_))
| UntaggedValue::Primitive(Primitive::String(_))
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
let key = format!("{}", 0);
insert_value(value, key, &mut column_values)?
}
_ => {
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag,
"Perhaps you want to use a List of Tables or a Dictionary",
"Perhaps you want to use a List, a List of Tables or a Dictionary",
&value.tag,
));
}
}
}
from_parsed_columns(column_values, tag)
from_parsed_columns(column_values, &tag.span)
}
pub fn try_from_series(columns: Vec<Series>, span: &Span) -> Result<Self, ShellError> {
let dataframe = DataFrame::new(columns).map_err(|e| {
ShellError::labeled_error(
"DataFrame Creation",
format!("Unable to create DataFrame: {}", e),
span,
)
})?;
Ok(Self { dataframe })
}
pub fn try_from_columns(columns: Vec<Column>, span: &Span) -> Result<Self, ShellError> {
let mut column_values: ColumnMap = IndexMap::new();
for column in columns {
for value in column.values {
insert_value(value, column.name.clone(), &mut column_values)?;
}
}
from_parsed_columns(column_values, span)
}
pub fn into_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
value: Self::into_untagged(self),
tag,
}
}
pub fn into_untagged(self) -> UntaggedValue {
UntaggedValue::DataFrame(self)
}
pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
value: Self::dataframe_to_untagged(df),
tag,
}
}
pub fn dataframe_to_untagged(df: DataFrame) -> UntaggedValue {
UntaggedValue::DataFrame(Self::new(df))
}
pub fn series_to_untagged(series: Series, span: &Span) -> UntaggedValue {
match DataFrame::new(vec![series]) {
Ok(dataframe) => UntaggedValue::DataFrame(Self { dataframe }),
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
"DataFrame Creation",
format!("Unable to create DataFrame: {}", e),
span,
)),
}
}
pub fn column(&self, column: &str, tag: &Tag) -> Result<Self, ShellError> {
let s = self.as_ref().column(column).map_err(|e| {
ShellError::labeled_error("Column not found", format!("{}", e), tag.span)
})?;
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
ShellError::labeled_error("DataFrame error", format!("{}", e), tag.span)
})?;
Ok(Self { dataframe })
}
pub fn is_series(&self) -> bool {
self.as_ref().width() == 1
}
pub fn as_series(&self, span: &Span) -> Result<Series, ShellError> {
if !self.is_series() {
return Err(ShellError::labeled_error_with_secondary(
"Not a Series",
"DataFrame cannot be used as Series",
span,
"Note that a Series is a DataFrame with one column",
span,
));
}
let series = self
.as_ref()
.get_columns()
.get(0)
.expect("We have already checked that the width is 1");
Ok(series.clone())
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
let df = &self.as_ref();
@ -188,24 +367,17 @@ impl NuDataFrame {
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
let df = self.as_ref();
let column_names = df.get_column_names();
let upper_row = to_row.min(df.height());
let mut values: Vec<Value> = Vec::new();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
for col in df.get_columns() {
let dict_val = Value {
value: untagged_val,
value: anyvalue_to_untagged(&col.get(i))?,
tag: Tag::unknown(),
};
dictionary_row.insert(name.to_string(), dict_val);
dictionary_row.insert(col.name().into(), dict_val);
}
let value = Value {
@ -213,7 +385,7 @@ impl NuDataFrame {
tag: Tag::unknown(),
};
values.push(value);
values.push(value)
}
Ok(values)
@ -336,8 +508,8 @@ fn insert_value(
key: String,
column_values: &mut ColumnMap,
) -> Result<(), ShellError> {
let col_val = match column_values.entry(key) {
Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
let col_val = match column_values.entry(key.clone()) {
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
Entry::Occupied(entry) => entry.into_mut(),
};
@ -346,13 +518,16 @@ fn insert_value(
if col_val.values.is_empty() {
match &value.value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
col_val.value_type = InputValue::Integer;
col_val.column_type = Some(InputType::Integer);
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
col_val.value_type = InputValue::Decimal;
col_val.column_type = Some(InputType::Decimal);
}
UntaggedValue::Primitive(Primitive::String(_)) => {
col_val.value_type = InputValue::String;
col_val.column_type = Some(InputType::String);
}
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
col_val.column_type = Some(InputType::Boolean);
}
_ => {
return Err(ShellError::labeled_error(
@ -378,6 +553,10 @@ fn insert_value(
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
)
| (
UntaggedValue::Primitive(Primitive::Boolean(_)),
UntaggedValue::Primitive(Primitive::Boolean(_)),
) => col_val.values.push(value),
_ => {
return Err(ShellError::labeled_error_with_secondary(
@ -397,27 +576,35 @@ fn insert_value(
// The ColumnMap has the parsed data from the StreamInput
// This data can be used to create a Series object that can initialize
// the dataframe based on the type of data that is found
fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
fn from_parsed_columns(column_values: ColumnMap, span: &Span) -> Result<NuDataFrame, ShellError> {
let mut df_series: Vec<Series> = Vec::new();
for (name, column) in column_values {
match column.value_type {
InputValue::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputValue::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
if let Some(column_type) = &column.column_type {
match column_type {
InputType::Decimal => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::String => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_string()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
InputType::Boolean => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_bool()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
}
}
}
@ -430,7 +617,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
return Err(ShellError::labeled_error(
"Error while creating dataframe",
format!("{}", e),
tag,
span,
))
}
}

View File

@ -2,7 +2,7 @@ use nu_source::{Span, Tag};
use polars::frame::groupby::{GroupBy, GroupTuples};
use serde::{Deserialize, Serialize};
use super::{NuDataFrame, PolarsData};
use super::{FrameStruct, NuDataFrame};
use nu_errors::ShellError;
use crate::{TaggedDictBuilder, UntaggedValue, Value};
@ -30,7 +30,7 @@ impl NuGroupBy {
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
UntaggedValue::FrameStruct(FrameStruct::GroupBy(group)) => Some(group),
_ => None,
})
.ok_or_else(|| {

View File

@ -1,345 +0,0 @@
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::vec;
use nu_errors::ShellError;
use nu_source::{Span, Tag};
use polars::prelude::{DataType, NamedFrom, Series};
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuSeries {
series: Series,
dtype: String,
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
impl PartialEq for NuSeries {
fn eq(&self, _: &Self) -> bool {
false
}
}
impl Eq for NuSeries {}
impl PartialOrd for NuSeries {
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
Some(Ordering::Equal)
}
}
impl Ord for NuSeries {
fn cmp(&self, _: &Self) -> Ordering {
Ordering::Equal
}
}
impl Hash for NuSeries {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl NuSeries {
pub fn new(series: Series) -> Self {
let dtype = series.dtype().to_string();
NuSeries { series, dtype }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
_ => None,
})
.ok_or_else(|| {
ShellError::labeled_error(
"No series in stream",
"no series found in input stream",
span,
)
})
}
pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
let mut vec_values: Vec<Value> = Vec::new();
for value in iter {
match value.value {
UntaggedValue::Primitive(Primitive::Int(_))
| UntaggedValue::Primitive(Primitive::Decimal(_))
| UntaggedValue::Primitive(Primitive::String(_))
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
insert_value(value, &mut vec_values)?
}
_ => {
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag.span,
"Perhaps you want to use a list of primitive values (int, decimal, string, or bool)",
&value.tag.span,
));
}
}
}
from_parsed_vector(vec_values, name)
}
pub fn into_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(self)),
tag,
}
}
pub fn series_to_value(series: Series, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
tag,
}
}
pub fn series_to_untagged(series: Series) -> UntaggedValue {
UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
}
pub fn dtype(&self) -> &str {
&self.dtype
}
pub fn series(self) -> Series {
self.series
}
}
impl AsRef<Series> for NuSeries {
fn as_ref(&self) -> &Series {
&self.series
}
}
impl AsMut<Series> for NuSeries {
fn as_mut(&mut self) -> &mut Series {
&mut self.series
}
}
macro_rules! series_to_chunked {
($converter: expr, $self: expr) => {{
let chunked_array = $converter.map_err(|e| {
ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
})?;
let size = 20;
let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
let remaining = $self.as_ref().len() - (size / 2);
let skip = $self.as_ref().len() - remaining;
(size / 2, skip, remaining.min(size / 2))
} else {
(size, 0, 0)
};
let head = chunked_array.into_iter().take(head_size).map(|value| {
let value = match value {
Some(v) => Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
},
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
};
let mut dictionary_row = Dictionary::default();
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
});
let res = if $self.as_ref().len() < size {
head.collect::<Vec<Value>>()
} else {
let middle = std::iter::once({
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive("...".into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
});
let tail =
chunked_array
.into_iter()
.skip(skip)
.take(tail_size)
.map(|value| match value {
Some(v) => {
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
}
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
});
head.chain(middle).chain(tail).collect::<Vec<Value>>()
};
Ok(res)
}};
}
impl NuSeries {
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
match self.as_ref().dtype() {
DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
DataType::Null => Ok(vec![Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
}]),
//DataType::List(_) => None,
//DataType::Time64(TimeUnit) => None,
//DataType::Duration(TimeUnit) => None,
// DataType::Categorical => None,
_ => unimplemented!(),
}
}
}
fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
// Checking that the type for the value is the same
// for the previous value in the column
if vec_values.is_empty() {
vec_values.push(value);
Ok(())
} else {
let prev_value = &vec_values[vec_values.len() - 1];
match (&prev_value.value, &value.value) {
(
UntaggedValue::Primitive(Primitive::Int(_)),
UntaggedValue::Primitive(Primitive::Int(_)),
)
| (
UntaggedValue::Primitive(Primitive::Decimal(_)),
UntaggedValue::Primitive(Primitive::Decimal(_)),
)
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
)
| (
UntaggedValue::Primitive(Primitive::Boolean(_)),
UntaggedValue::Primitive(Primitive::Boolean(_)),
) => {
vec_values.push(value);
Ok(())
}
_ => Err(ShellError::labeled_error_with_secondary(
"Different values in column",
"Value with different type",
&value.tag,
"Perhaps you want to change it to this value type",
&prev_value.tag,
)),
}
}
}
fn from_parsed_vector(
vec_values: Vec<Value>,
name: Option<String>,
) -> Result<NuSeries, ShellError> {
let series = match &vec_values[0].value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "int",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "decimal",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::String(_)) => {
let series_values: Result<Vec<_>, _> =
vec_values.iter().map(|v| v.as_string()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "string",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_bool()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "string",
};
Series::new(series_name, series_values?)
}
_ => unreachable!("The untagged type is checked while creating vec_values"),
};
Ok(NuSeries::new(series))
}

View File

@ -75,6 +75,10 @@ pub enum Type {
/// Dataframe
#[cfg(feature = "dataframe")]
DataFrame,
/// Dataframe
#[cfg(feature = "dataframe")]
FrameStruct,
}
/// A shape representation of the type of a row
@ -192,6 +196,8 @@ impl Type {
UntaggedValue::Block(_) => Type::Block,
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => Type::DataFrame,
#[cfg(feature = "dataframe")]
UntaggedValue::FrameStruct(_) => Type::DataFrame,
}
}
}
@ -298,7 +304,7 @@ impl PrettyDebug for Type {
}
Type::Block => ty("block"),
#[cfg(feature = "dataframe")]
Type::DataFrame => ty("data_type_formatter"),
Type::DataFrame | Type::FrameStruct => ty("data_type_formatter"),
}
}
}

View File

@ -31,7 +31,7 @@ use std::path::PathBuf;
use std::time::SystemTime;
#[cfg(feature = "dataframe")]
use crate::dataframe::PolarsData;
use crate::dataframe::{FrameStruct, NuDataFrame};
/// The core structured values that flow through a pipeline
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
@ -51,10 +51,15 @@ pub enum UntaggedValue {
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
Block(Box<hir::CapturedBlock>),
/// Data option that holds the polars structs required to to data
/// manipulation and operations using polars dataframes
/// Main nushell dataframe
#[cfg(feature = "dataframe")]
DataFrame(PolarsData),
DataFrame(NuDataFrame),
/// Data option that holds intermediate struct required to do data
/// manipulation and operations for dataframes such as groupby, lazy frames
/// and lazy groupby
#[cfg(feature = "dataframe")]
FrameStruct(FrameStruct),
}
impl UntaggedValue {
@ -685,11 +690,9 @@ impl ShellTypeName for UntaggedValue {
UntaggedValue::Error(_) => "error",
UntaggedValue::Block(_) => "block",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
UntaggedValue::DataFrame(_) => "dataframe",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
UntaggedValue::FrameStruct(FrameStruct::GroupBy(_)) => "groupby",
}
}
}

View File

@ -25,7 +25,9 @@ impl PrettyDebug for Value {
UntaggedValue::Error(_) => DbgDocBldr::error("error"),
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"),
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
DbgDocBldr::opaque("dataframe")
}
}
}
}

View File

@ -12,7 +12,7 @@ use nu_source::{
use num_traits::cast::ToPrimitive;
#[cfg(feature = "dataframe")]
use nu_protocol::dataframe::{NuSeries, PolarsData};
use nu_protocol::dataframe::NuDataFrame;
pub trait ValueExt {
fn into_parts(self) -> (UntaggedValue, Tag);
@ -203,14 +203,14 @@ pub fn get_data_by_member(value: &Value, name: &PathMember) -> Result<Value, She
}
}
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => match &name.unspanned {
UntaggedValue::DataFrame(df) => match &name.unspanned {
UnspannedPathMember::String(string) => {
let column = df.as_ref().column(string.as_ref()).map_err(|e| {
let column = df.as_ref().select(string.as_str()).map_err(|e| {
ShellError::labeled_error("Dataframe error", format!("{}", e), &name.span)
})?;
Ok(NuSeries::series_to_value(
column.clone(),
Ok(NuDataFrame::dataframe_to_value(
column,
Tag::new(value.anchor(), name.span),
))
}
@ -746,7 +746,7 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
}
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => {
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
}
}

View File

@ -406,7 +406,7 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
UntaggedValue::Table(l) => serde_json::Value::Array(json_list(l)?),
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => {
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
return Err(ShellError::labeled_error(
"Cannot convert data struct",
"Cannot convert data struct",

View File

@ -64,7 +64,7 @@ pub fn value_to_bson_value(v: &Value) -> Result<Bson, ShellError> {
),
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null,
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => Bson::Null,
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => Bson::Null,
UntaggedValue::Error(e) => return Err(e.clone()),
UntaggedValue::Primitive(Primitive::Binary(b)) => {
Bson::Binary(BinarySubtype::Generic, b.clone())