mirror of
https://github.com/nushell/nushell.git
synced 2024-12-23 15:39:06 +01:00
All is a DataFrame (#3812)
* nuframe in its own type in UntaggedValue * Removed eager dataframe from enum * Dataframe created from list of values * Corrected order in dataframe columns * Returned tag from stream collection * Removed series from dataframe commands * Arithmetic operators * forced push * forced push * Replace all command * String commands * appending operations with dfs * Testing suite for dataframes * Unit test for dataframe commands * improved equality for dataframes
This commit is contained in:
parent
9120a64cfb
commit
f1ee9113ac
@ -2,14 +2,11 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
|
||||
dataframe::{Column, FrameStruct, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
use polars::{
|
||||
frame::groupby::GroupBy,
|
||||
prelude::{DataType, PolarsError, Series},
|
||||
};
|
||||
use polars::{frame::groupby::GroupBy, prelude::PolarsError};
|
||||
|
||||
enum Operation {
|
||||
Mean,
|
||||
@ -111,17 +108,40 @@ impl WholeStreamCommand for DataFrame {
|
||||
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||
example:
|
||||
"[[a b]; [one 1] [one 2]] | dataframe to-df | dataframe group-by a | dataframe aggregate sum",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::string("one").into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in dataframe columns",
|
||||
example: "[[a b]; [4 1] [5 2]] | dataframe to-df | dataframe aggregate sum",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(9).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(3).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in series",
|
||||
example: "[4 1 5 6] | dataframe to-series | dataframe aggregate sum",
|
||||
result: None,
|
||||
example: "[4 1 5 6] | dataframe to-df | dataframe aggregate sum",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("0".to_string(), vec![UntaggedValue::int(16).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -139,7 +159,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
})?;
|
||||
|
||||
match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => {
|
||||
UntaggedValue::FrameStruct(FrameStruct::GroupBy(nu_groupby)) => {
|
||||
let groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let res = perform_groupby_aggregation(
|
||||
@ -152,18 +172,13 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
UntaggedValue::DataFrame(df) => {
|
||||
let df = df.as_ref();
|
||||
|
||||
let res = perform_dataframe_aggregation(&df, op, &operation.tag)?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
|
||||
let value = perform_series_aggregation(series.as_ref(), op, &operation.tag)?;
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"No groupby, dataframe or series in stream",
|
||||
"no groupby, dataframe or series found in input stream",
|
||||
@ -264,162 +279,15 @@ fn perform_dataframe_aggregation(
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_series_aggregation(
|
||||
series: &Series,
|
||||
operation: Operation,
|
||||
operation_tag: &Tag,
|
||||
) -> Result<Value, ShellError> {
|
||||
match operation {
|
||||
Operation::Mean => {
|
||||
let res = match series.mean() {
|
||||
Some(val) => UntaggedValue::Primitive(val.into()),
|
||||
None => UntaggedValue::Primitive(0.into()),
|
||||
};
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
let value = Value {
|
||||
value: res,
|
||||
tag: operation_tag.clone(),
|
||||
};
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
let mut data = TaggedDictBuilder::new(operation_tag.clone());
|
||||
data.insert_value(series.name(), value);
|
||||
|
||||
Ok(data.into_value())
|
||||
}
|
||||
Operation::Median => {
|
||||
let res = match series.median() {
|
||||
Some(val) => UntaggedValue::Primitive(val.into()),
|
||||
None => UntaggedValue::Primitive(0.into()),
|
||||
};
|
||||
|
||||
let value = Value {
|
||||
value: res,
|
||||
tag: operation_tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(operation_tag.clone());
|
||||
data.insert_value(series.name(), value);
|
||||
|
||||
Ok(data.into_value())
|
||||
}
|
||||
Operation::Sum => {
|
||||
let untagged = match series.dtype() {
|
||||
DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64 => {
|
||||
let res: i64 = series.sum().unwrap_or(0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
DataType::Float32 | DataType::Float64 => {
|
||||
let res: f64 = series.sum().unwrap_or(0.0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Not valid type",
|
||||
format!(
|
||||
"this operation can not be performed with series of type {}",
|
||||
series.dtype()
|
||||
),
|
||||
&operation_tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let value = Value {
|
||||
value: untagged,
|
||||
tag: operation_tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(operation_tag.clone());
|
||||
data.insert_value(series.name(), value);
|
||||
|
||||
Ok(data.into_value())
|
||||
}
|
||||
Operation::Max => {
|
||||
let untagged = match series.dtype() {
|
||||
DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64 => {
|
||||
let res: i64 = series.max().unwrap_or(0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
DataType::Float32 | DataType::Float64 => {
|
||||
let res: f64 = series.max().unwrap_or(0.0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Not valid type",
|
||||
format!(
|
||||
"this operation can not be performed with series of type {}",
|
||||
series.dtype()
|
||||
),
|
||||
&operation_tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let value = Value {
|
||||
value: untagged,
|
||||
tag: operation_tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(operation_tag.clone());
|
||||
data.insert_value(series.name(), value);
|
||||
|
||||
Ok(data.into_value())
|
||||
}
|
||||
Operation::Min => {
|
||||
let untagged = match series.dtype() {
|
||||
DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::Int64
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64 => {
|
||||
let res: i64 = series.min().unwrap_or(0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
DataType::Float32 | DataType::Float64 => {
|
||||
let res: f64 = series.min().unwrap_or(0.0);
|
||||
Ok(UntaggedValue::Primitive(res.into()))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Not valid type",
|
||||
format!(
|
||||
"this operation can not be performed with series of type {}",
|
||||
series.dtype()
|
||||
),
|
||||
&operation_tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let value = Value {
|
||||
value: untagged,
|
||||
tag: operation_tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(operation_tag.clone());
|
||||
data.insert_value(series.name(), value);
|
||||
|
||||
Ok(data.into_value())
|
||||
}
|
||||
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Not valid operation",
|
||||
"operation not valid for series",
|
||||
&operation_tag.span,
|
||||
"Perhaps you want: mean, median, sum, max, min",
|
||||
&operation_tag.span,
|
||||
)),
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,8 +2,8 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries},
|
||||
Signature, SyntaxShape,
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
@ -32,7 +32,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Returns the selected column as series",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe column a",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -41,15 +49,26 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let column: Tagged<String> = args.req(0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.column(column.item.as_ref())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &column.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.clone(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.clone()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe drop a",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -39,7 +50,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let columns: Vec<Value> = args.rest(0)?;
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let new_df = match col_string.get(0) {
|
||||
Some(col) => df
|
||||
@ -63,3 +74,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
@ -34,7 +37,21 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | dataframe to-df | dataframe drop-duplicates",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -52,7 +69,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => (None, Span::unknown()),
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
@ -63,3 +80,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
@ -38,15 +38,45 @@ impl WholeStreamCommand for DataFrame {
|
||||
example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dataframe to-df);
|
||||
let res = ($df.b / $df.b);
|
||||
let df = ($df | dataframe with-column $res --name res);
|
||||
$df | dataframe drop-nulls
|
||||
"#,
|
||||
result: None,
|
||||
$df | dataframe drop-nulls"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(2).into()],
|
||||
),
|
||||
Column::new(
|
||||
"res".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(1).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "drop null values in dataframe",
|
||||
example: r#"let s = ([1 2 0 0 3 4] | dataframe to-series);
|
||||
example: r#"let s = ([1 2 0 0 3 4] | dataframe to-df);
|
||||
($s / $s) | dataframe drop-nulls"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"div_0_0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -60,7 +90,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
})?;
|
||||
|
||||
match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
UntaggedValue::DataFrame(df) => {
|
||||
// Extracting the selection columns of the columns to perform the aggregation
|
||||
let columns: Option<Vec<Value>> = args.opt(0)?;
|
||||
let (subset, col_span) = match columns {
|
||||
@ -80,10 +110,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
|
||||
let res = series.as_ref().drop_nulls();
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"drop nulls cannot be done with this value",
|
||||
@ -91,3 +117,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -26,7 +29,27 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe dtypes",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"column".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("b").into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"dtype".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("i64").into(),
|
||||
UntaggedValue::string("i64").into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,26 +58,49 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let col_names = df
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let mut dtypes: Vec<Value> = Vec::new();
|
||||
let names: Vec<Value> = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let values = df
|
||||
.map(|v| {
|
||||
let dtype = df
|
||||
.as_ref()
|
||||
.dtypes()
|
||||
.into_iter()
|
||||
.zip(col_names.into_iter())
|
||||
.map(move |(dtype, name)| {
|
||||
let mut data = TaggedDictBuilder::new(tag.clone());
|
||||
data.insert_value("column", name.as_ref());
|
||||
data.insert_value("dtype", format!("{}", dtype));
|
||||
.column(v)
|
||||
.expect("using name from list of names from dataframe")
|
||||
.dtype();
|
||||
|
||||
data.into_value()
|
||||
let dtype_str = format!("{}", dtype);
|
||||
dtypes.push(Value {
|
||||
value: dtype_str.into(),
|
||||
tag: Tag::default(),
|
||||
});
|
||||
|
||||
Ok(OutputStream::from_stream(values))
|
||||
Value {
|
||||
value: v.to_string().into(),
|
||||
tag: Tag::default(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let names_col = Column::new("column".to_string(), names);
|
||||
let dtypes_col = Column::new("dtype".to_string(), dtypes);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![names_col, dtypes_col], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
@ -32,12 +32,70 @@ impl WholeStreamCommand for DataFrame {
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe to-dummies",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a_1".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()],
|
||||
),
|
||||
Column::new(
|
||||
"a_3".to_string(),
|
||||
vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b_2".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(0).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b_4".to_string(),
|
||||
vec![UntaggedValue::int(0).into(), UntaggedValue::int(1).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Create new dataframe with dummy variables from a series",
|
||||
example: "[1 2 2 3 3] | dataframe to-series | dataframe to-dummies",
|
||||
result: None,
|
||||
example: "[1 2 2 3 3] | dataframe to-df | dataframe to-dummies",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0_1".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_2".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"0_3".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -51,7 +109,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
})?;
|
||||
|
||||
match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
UntaggedValue::DataFrame(df) => {
|
||||
let res = df.as_ref().to_dummies().map_err(|e| {
|
||||
parse_polars_error(
|
||||
&e,
|
||||
@ -62,17 +120,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
|
||||
let res = series.as_ref().to_dummies().map_err(|e| {
|
||||
parse_polars_error(
|
||||
&e,
|
||||
&tag.span,
|
||||
Some("The only allowed column types for dummies are String or Int"),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"dummies cannot be done with this value",
|
||||
@ -80,3 +127,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
@ -34,13 +34,21 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Filter dataframe using a bool mask",
|
||||
example: r#"let mask = ([$true $false] | dataframe to-series);
|
||||
example: r#"let mask = ([$true $false] | dataframe to-df);
|
||||
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Filter dataframe by creating a mask from operation",
|
||||
example: r#"let mask = (([5 6] | dataframe to-series) > 5);
|
||||
example: r#"let mask = (([5 6] | dataframe to-df) > 5);
|
||||
[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe filter-with $mask"#,
|
||||
result: None,
|
||||
},
|
||||
@ -53,16 +61,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let value: Value = args.req(0)?;
|
||||
|
||||
let series_span = value.tag.span;
|
||||
let series = match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
|
||||
let df = match value.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only add a series to a dataframe",
|
||||
value.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let casted = series.as_ref().bool().map_err(|e| {
|
||||
let series = df.as_series(&series_span)?;
|
||||
let casted = series.bool().map_err(|e| {
|
||||
parse_polars_error(
|
||||
&e,
|
||||
&&series_span,
|
||||
@ -70,12 +78,25 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)
|
||||
})?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.filter(&casted)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
|
||||
@ -31,8 +34,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with head rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first",
|
||||
result: None,
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe first 1",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -46,8 +57,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => 5,
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
pub struct DataFrame;
|
||||
@ -27,7 +30,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Creates dataframe with selected columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe get a",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -38,7 +49,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
@ -47,3 +58,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuGroupBy, PolarsData},
|
||||
dataframe::{FrameStruct, NuDataFrame, NuGroupBy},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
@ -43,7 +43,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let by_columns: Vec<Value> = args.rest(0)?;
|
||||
let (columns_string, col_span) = convert_columns(&by_columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
// This is the expensive part of the groupby; to create the
|
||||
// groups that will be used for grouping the data in the
|
||||
@ -57,7 +57,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let groups = groupby.get_groups().to_vec();
|
||||
let groupby = Value {
|
||||
tag,
|
||||
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
|
||||
value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(NuGroupBy::new(
|
||||
NuDataFrame::new(df.as_ref().clone()),
|
||||
columns_string,
|
||||
groups,
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
@ -51,20 +51,50 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![
|
||||
Example {
|
||||
vec![Example {
|
||||
description: "inner join dataframe",
|
||||
example: r#"let right = ([[a b c]; [1 2 5] [3 4 5] [5 6 6]] | dataframe to-df);
|
||||
$right | dataframe join $right -l [a b] -r [a b]"#,
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "right join dataframe",
|
||||
example: r#"let right = ([[a b c]; [1 2 3] [3 4 5] [5 6 7]] | dataframe to-df);
|
||||
$right | dataframe join $right -l [a c] -r [a c] -t inner"#,
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c_right".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,10 +127,10 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
|
||||
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = match r_df.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
|
||||
UntaggedValue::DataFrame(r_df) => {
|
||||
// Checking the column types before performing the join
|
||||
check_column_datatypes(
|
||||
df.as_ref(),
|
||||
@ -173,3 +203,16 @@ fn check_column_datatypes<T: AsRef<str>>(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
pub struct DataFrame;
|
||||
@ -30,8 +33,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with last rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last",
|
||||
result: None,
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe last 1",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(3).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(4).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -45,9 +56,22 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => 5,
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::PolarsData, Signature, TaggedDictBuilder, UntaggedValue};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -19,46 +22,94 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let values = args
|
||||
let data = args
|
||||
.context
|
||||
.scope
|
||||
.get_vars()
|
||||
.into_iter()
|
||||
.filter_map(|(name, value)| {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = &value.value {
|
||||
let mut data = TaggedDictBuilder::new(value.tag.clone());
|
||||
if let UntaggedValue::DataFrame(df) = &value.value {
|
||||
let rows = Value {
|
||||
value: (df.as_ref().height() as i64).into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
let rows = df.as_ref().height();
|
||||
let cols = df.as_ref().width();
|
||||
let cols = Value {
|
||||
value: (df.as_ref().width() as i64).into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
data.insert_value("name", name.as_ref());
|
||||
data.insert_value("rows", format!("{}", rows));
|
||||
data.insert_value("columns", format!("{}", cols));
|
||||
let location = match value.tag.anchor {
|
||||
Some(AnchorLocation::File(name)) => name,
|
||||
Some(AnchorLocation::Url(name)) => name,
|
||||
Some(AnchorLocation::Source(text)) => text.slice(0..text.end).text,
|
||||
None => "stream".to_string(),
|
||||
};
|
||||
|
||||
match value.tag.anchor {
|
||||
Some(AnchorLocation::File(name)) => data.insert_value("location", name),
|
||||
Some(AnchorLocation::Url(name)) => data.insert_value("location", name),
|
||||
Some(AnchorLocation::Source(text)) => {
|
||||
let loc_name = text.slice(0..text.end);
|
||||
data.insert_value("location", loc_name.text)
|
||||
}
|
||||
None => data.insert_value("location", "stream"),
|
||||
}
|
||||
let location = Value {
|
||||
value: location.into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
Some(data.into_value())
|
||||
let name = Value {
|
||||
value: name.into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
Some((name, rows, cols, location))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
Ok(OutputStream::from_stream(values))
|
||||
let mut name = Column::new_empty("name".to_string());
|
||||
let mut rows = Column::new_empty("rows".to_string());
|
||||
let mut cols = Column::new_empty("columns".to_string());
|
||||
let mut location = Column::new_empty("location".to_string());
|
||||
|
||||
for tuple in data {
|
||||
name.push(tuple.0);
|
||||
rows.push(tuple.1);
|
||||
cols.push(tuple.2);
|
||||
location.push(tuple.3);
|
||||
}
|
||||
|
||||
let tag = args.call_info.name_tag;
|
||||
let df = NuDataFrame::try_from_columns(vec![name, rows, cols, location], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Lists loaded dataframes in current scope",
|
||||
example: "dataframe list",
|
||||
result: None,
|
||||
example: "let a = ([[a b];[1 2] [3 4]] | dataframe to-df); dataframe list",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("name".to_string(), vec![UntaggedValue::string("$a").into()]),
|
||||
Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
Column::new(
|
||||
"location".to_string(),
|
||||
vec![UntaggedValue::string("stream").into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::convert_columns;
|
||||
|
||||
@ -18,8 +21,18 @@ impl WholeStreamCommand for DataFrame {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe melt")
|
||||
.required("id_columns", SyntaxShape::Table, "Id columns for melting")
|
||||
.rest(SyntaxShape::Any, "columns used as value columns")
|
||||
.required_named(
|
||||
"columns",
|
||||
SyntaxShape::Table,
|
||||
"column names for melting",
|
||||
Some('c'),
|
||||
)
|
||||
.required_named(
|
||||
"values",
|
||||
SyntaxShape::Table,
|
||||
"column names used as value columns",
|
||||
Some('v'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
@ -29,8 +42,59 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example: "[[a b]; [a 2] [b 4] [a 6]] | dataframe to-df | dataframe melt a b",
|
||||
result: None,
|
||||
example:
|
||||
"[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dataframe to-df | dataframe melt -c [b c] -v [a d]",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"variable".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("d").into(),
|
||||
UntaggedValue::string("d").into(),
|
||||
UntaggedValue::string("d").into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"value".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("x").into(),
|
||||
UntaggedValue::string("y").into(),
|
||||
UntaggedValue::string("z").into(),
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("b").into(),
|
||||
UntaggedValue::string("c").into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -38,13 +102,13 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let id_col: Vec<Value> = args.req(0)?;
|
||||
let val_col: Vec<Value> = args.rest(1)?;
|
||||
let id_col: Vec<Value> = args.req_named("columns")?;
|
||||
let val_col: Vec<Value> = args.req_named("values")?;
|
||||
|
||||
let (id_col_string, id_col_span) = convert_columns(&id_col, &tag)?;
|
||||
let (val_col_string, val_col_span) = convert_columns(&val_col, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
|
||||
@ -99,3 +163,16 @@ fn check_column_datatypes<T: AsRef<str>>(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -26,7 +26,6 @@ pub mod take;
|
||||
pub mod to_csv;
|
||||
pub mod to_df;
|
||||
pub mod to_parquet;
|
||||
pub mod to_series;
|
||||
pub(crate) mod utils;
|
||||
pub mod where_;
|
||||
pub mod with_column;
|
||||
@ -59,7 +58,6 @@ pub use take::DataFrame as DataFrameTake;
|
||||
pub use to_csv::DataFrame as DataFrameToCsv;
|
||||
pub use to_df::DataFrame as DataFrameToDF;
|
||||
pub use to_parquet::DataFrame as DataFrameToParquet;
|
||||
pub use to_series::DataFrame as DataFrameToSeries;
|
||||
pub use where_::DataFrame as DataFrameWhere;
|
||||
pub use with_column::DataFrame as DataFrameWithColumn;
|
||||
|
||||
@ -71,6 +69,8 @@ pub use series::DataFrameArgMin;
|
||||
pub use series::DataFrameArgSort;
|
||||
pub use series::DataFrameArgTrue;
|
||||
pub use series::DataFrameArgUnique;
|
||||
pub use series::DataFrameConcatenate;
|
||||
pub use series::DataFrameContains;
|
||||
pub use series::DataFrameIsDuplicated;
|
||||
pub use series::DataFrameIsIn;
|
||||
pub use series::DataFrameIsNotNull;
|
||||
@ -80,9 +80,14 @@ pub use series::DataFrameNNull;
|
||||
pub use series::DataFrameNUnique;
|
||||
pub use series::DataFrameNot;
|
||||
pub use series::DataFrameReplace;
|
||||
pub use series::DataFrameReplaceAll;
|
||||
pub use series::DataFrameSeriesRename;
|
||||
pub use series::DataFrameSet;
|
||||
pub use series::DataFrameSetWithIdx;
|
||||
pub use series::DataFrameShift;
|
||||
pub use series::DataFrameStringLengths;
|
||||
pub use series::DataFrameStringSlice;
|
||||
pub use series::DataFrameToLowercase;
|
||||
pub use series::DataFrameToUppercase;
|
||||
pub use series::DataFrameUnique;
|
||||
pub use series::DataFrameValueCounts;
|
||||
|
@ -73,7 +73,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
description: "Pivot a dataframe on b and aggregation on col c",
|
||||
example:
|
||||
"[[a b c]; [one x 1] [two y 2]] | dataframe to-df | dataframe group-by a | dataframe pivot b c sum",
|
||||
result: None,
|
||||
result: None, // No sample because there are nulls in the result dataframe
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
@ -42,13 +42,13 @@ impl WholeStreamCommand for DataFrame {
|
||||
Example {
|
||||
description: "Sample rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe sample -r 1",
|
||||
result: None,
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
Example {
|
||||
description: "Shows sample row using fraction and replace",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4] [5 6]] | dataframe to-df | dataframe sample -f 0.5 -e",
|
||||
result: None,
|
||||
result: None, // No expected value because sampling is random
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -61,7 +61,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let fraction: Option<Tagged<f64>> = args.get_flag("fraction")?;
|
||||
let replace: bool = args.has_flag("replace");
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
@ -28,7 +31,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe select a",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -40,7 +51,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
@ -49,3 +60,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Returns true if all values are false",
|
||||
example: "[$false $false $false] | dataframe to-series | dataframe all-false",
|
||||
result: None,
|
||||
example: "[$false $false $false] | dataframe to-df | dataframe all-false",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![UntaggedValue::boolean(true).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Checks the result from a comparison",
|
||||
example: r#"let s = ([5 6 2 8] | dataframe to-series);
|
||||
example: r#"let s = ([5 6 2 10] | dataframe to-df);
|
||||
let res = ($s > 9);
|
||||
$res | dataframe all-false"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_false".to_string(),
|
||||
vec![UntaggedValue::boolean(false).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let bool = series.as_ref().bool().map_err(|e| {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let bool = series.bool().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&tag.span,
|
||||
@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(tag);
|
||||
data.insert_value("all_false", value);
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("all_false".to_string(), vec![value])],
|
||||
&tag.span,
|
||||
)?;
|
||||
|
||||
Ok(OutputStream::one(data.into_value()))
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -26,15 +29,31 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Returns true if all values are true",
|
||||
example: "[$true $true $true] | dataframe to-series | dataframe all-true",
|
||||
result: None,
|
||||
example: "[$true $true $true] | dataframe to-df | dataframe all-true",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![UntaggedValue::boolean(true).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Checks the result from a comparison",
|
||||
example: r#"let s = ([5 6 2 8] | dataframe to-series);
|
||||
example: r#"let s = ([5 6 2 8] | dataframe to-df);
|
||||
let res = ($s > 9);
|
||||
$res | dataframe all-true"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"all_true".to_string(),
|
||||
vec![UntaggedValue::boolean(false).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -43,9 +62,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let bool = series.as_ref().bool().map_err(|e| {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let bool = series.bool().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&tag.span,
|
||||
@ -60,8 +80,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(tag);
|
||||
data.insert_value("all_true", value);
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("all_true".to_string(), vec![value])],
|
||||
&tag.span,
|
||||
)?;
|
||||
|
||||
Ok(OutputStream::one(data.into_value()))
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
|
||||
|
||||
@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns index for max value",
|
||||
example: "[1 3 2] | dataframe to-series | dataframe arg-max",
|
||||
result: None,
|
||||
example: "[1 3 2] | dataframe to-df | dataframe arg-max",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_max".to_string(),
|
||||
vec![UntaggedValue::int(1).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -36,9 +47,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let res = series.as_ref().arg_max();
|
||||
let res = series.arg_max();
|
||||
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::new_from_slice("arg_max", &[index as u32]),
|
||||
@ -46,6 +58,20 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
use polars::prelude::{IntoSeries, NewChunkedArray, UInt32Chunked};
|
||||
|
||||
@ -27,8 +30,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns index for min value",
|
||||
example: "[1 3 2] | dataframe to-series | dataframe arg-min",
|
||||
result: None,
|
||||
example: "[1 3 2] | dataframe to-df | dataframe arg-min",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_min".to_string(),
|
||||
vec![UntaggedValue::int(0).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -36,9 +47,9 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().arg_min();
|
||||
let res = df.as_series(&df_tag.span)?.arg_min();
|
||||
|
||||
let chunked = match res {
|
||||
Some(index) => UInt32Chunked::new_from_slice("arg_min", &[index as u32]),
|
||||
@ -46,6 +57,20 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
};
|
||||
|
||||
let res = chunked.into_series();
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,8 +29,22 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns indexes for a sorted series",
|
||||
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-sort",
|
||||
result: None,
|
||||
example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-sort",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_sort".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -36,12 +53,24 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let reverse = args.has_flag("reverse");
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().argsort(reverse);
|
||||
let mut res = df.as_series(&df_tag.span)?.argsort(reverse).into_series();
|
||||
res.rename("arg_sort");
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,8 +29,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns indexes where values are true",
|
||||
example: "[$false $true $false] | dataframe to-series | dataframe arg-true",
|
||||
result: None,
|
||||
example: "[$false $true $false] | dataframe to-df | dataframe arg-true",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_true".to_string(),
|
||||
vec![UntaggedValue::int(1).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,9 +46,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let bool = series.as_ref().bool().map_err(|e| {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let bool = series.bool().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&tag.span,
|
||||
@ -46,7 +58,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
})?;
|
||||
|
||||
let mut res = bool.arg_true().into_series();
|
||||
res.rename("int");
|
||||
res.rename("arg_true");
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,8 +29,20 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns indexes for unique values",
|
||||
example: "[1 2 2 3 3] | dataframe to-series | dataframe arg-unique",
|
||||
result: None,
|
||||
example: "[1 2 2 3 3] | dataframe to-df | dataframe arg-unique",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"arg_unique".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,15 +50,29 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
let mut res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.arg_unique()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
|
||||
.into_series();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
res.rename("arg_unique");
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
107
crates/nu-command/src/commands/dataframe/series/concatenate.rs
Normal file
107
crates/nu-command/src/commands/dataframe/series/concatenate.rs
Normal file
@ -0,0 +1,107 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe concatenate"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Concatenates strings with other array"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe concatenate").required(
|
||||
"other",
|
||||
SyntaxShape::Any,
|
||||
"Other array with string to be concatenated",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Concatenate string",
|
||||
example: r#"let other = ([za xs cd] | dataframe to-df);
|
||||
[abc abc abc] | dataframe to-df | dataframe concatenate $other"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("abcza").into(),
|
||||
UntaggedValue::string("abcxs").into(),
|
||||
UntaggedValue::string("abccd").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let other: Value = args.req(0)?;
|
||||
|
||||
let other_df = match &other.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only concatenate another series",
|
||||
other.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let other_series = other_df.as_series(&other.tag.span)?;
|
||||
let other_chunked = other_series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&other.tag.span,
|
||||
Some("The concatenate command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The concatenate command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked.concat(&other_chunked);
|
||||
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
90
crates/nu-command/src/commands/dataframe/series/contains.rs
Normal file
90
crates/nu-command/src/commands/dataframe/series/contains.rs
Normal file
@ -0,0 +1,90 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe contains"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Checks if a patter is contained in a string"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe contains").required_named(
|
||||
"pattern",
|
||||
SyntaxShape::String,
|
||||
"Regex pattern to be searched",
|
||||
Some('p'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns boolean indicating if patter was found",
|
||||
example: "[abc acb acb] | dataframe to-df | dataframe contains -p ab",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let pattern: Tagged<String> = args.req_named("pattern")?;
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The contains command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let res = chunked
|
||||
.contains(pattern.as_str())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask indicating duplicated values",
|
||||
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-duplicated",
|
||||
result: None,
|
||||
example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-duplicated",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_duplicated".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
let mut res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.is_duplicated()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
|
||||
.into_series();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
res.rename("is_duplicated");
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuSeries, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
@ -29,9 +29,25 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Checks if elements from a series are contained in right series",
|
||||
example: r#"let other = ([1 3 6] | dataframe to-series);
|
||||
[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-in $other"#,
|
||||
result: None,
|
||||
example: r#"let other = ([1 3 6] | dataframe to-df);
|
||||
[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-in $other"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_in".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -40,8 +56,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let value: Value = args.req(0)?;
|
||||
|
||||
let other = match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
|
||||
let other_df = match value.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only search in a series",
|
||||
@ -49,15 +65,31 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}?;
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let other = other_df.as_series(&value.tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
.is_in(other.as_ref())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let mut res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.is_in(&other)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
|
||||
.into_series();
|
||||
|
||||
res.rename("is_in");
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask where values are not null",
|
||||
example: r#"let s = ([5 6 0 8] | dataframe to-series);
|
||||
example: r#"let s = ([5 6 0 8] | dataframe to-df);
|
||||
let res = ($s / $s);
|
||||
$res | dataframe is-not-null"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_not_null".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().is_not_null();
|
||||
let res = df.as_series(&df_tag.span)?.is_not_null();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,10 +29,23 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask where values are null",
|
||||
example: r#"let s = ([5 6 0 8] | dataframe to-series);
|
||||
example: r#"let s = ([5 6 0 8] | dataframe to-df);
|
||||
let res = ($s / $s);
|
||||
$res | dataframe is-null"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_null".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -37,12 +53,23 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().is_null();
|
||||
let res = df.as_series(&df_tag.span)?.is_null();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -26,8 +29,24 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create mask indicating unique values",
|
||||
example: "[5 6 6 6 8 8 8] | dataframe to-series | dataframe is-unique",
|
||||
result: None,
|
||||
example: "[5 6 6 6 8 8 8] | dataframe to-df | dataframe is-unique",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"is_unique".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,15 +54,29 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
let mut res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.is_unique()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?
|
||||
.into_series();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
res.rename("is_unique");
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ pub mod arg_min;
|
||||
pub mod arg_sort;
|
||||
pub mod arg_true;
|
||||
pub mod arg_unique;
|
||||
pub mod concatenate;
|
||||
pub mod contains;
|
||||
pub mod is_duplicated;
|
||||
pub mod is_in;
|
||||
pub mod is_not_null;
|
||||
@ -15,9 +17,14 @@ pub mod n_unique;
|
||||
pub mod not;
|
||||
pub mod rename;
|
||||
pub mod replace;
|
||||
pub mod replace_all;
|
||||
pub mod set;
|
||||
pub mod set_with_idx;
|
||||
pub mod shift;
|
||||
pub mod str_lengths;
|
||||
pub mod str_slice;
|
||||
pub mod to_lowercase;
|
||||
pub mod to_uppercase;
|
||||
pub mod unique;
|
||||
pub mod value_counts;
|
||||
|
||||
@ -28,6 +35,8 @@ pub use arg_min::DataFrame as DataFrameArgMin;
|
||||
pub use arg_sort::DataFrame as DataFrameArgSort;
|
||||
pub use arg_true::DataFrame as DataFrameArgTrue;
|
||||
pub use arg_unique::DataFrame as DataFrameArgUnique;
|
||||
pub use concatenate::DataFrame as DataFrameConcatenate;
|
||||
pub use contains::DataFrame as DataFrameContains;
|
||||
pub use is_duplicated::DataFrame as DataFrameIsDuplicated;
|
||||
pub use is_in::DataFrame as DataFrameIsIn;
|
||||
pub use is_not_null::DataFrame as DataFrameIsNotNull;
|
||||
@ -38,8 +47,13 @@ pub use n_unique::DataFrame as DataFrameNUnique;
|
||||
pub use not::DataFrame as DataFrameNot;
|
||||
pub use rename::DataFrame as DataFrameSeriesRename;
|
||||
pub use replace::DataFrame as DataFrameReplace;
|
||||
pub use replace_all::DataFrame as DataFrameReplaceAll;
|
||||
pub use set::DataFrame as DataFrameSet;
|
||||
pub use set_with_idx::DataFrame as DataFrameSetWithIdx;
|
||||
pub use shift::DataFrame as DataFrameShift;
|
||||
pub use str_lengths::DataFrame as DataFrameStringLengths;
|
||||
pub use str_slice::DataFrame as DataFrameStringSlice;
|
||||
pub use to_lowercase::DataFrame as DataFrameToLowercase;
|
||||
pub use to_uppercase::DataFrame as DataFrameToUppercase;
|
||||
pub use unique::DataFrame as DataFrameUnique;
|
||||
pub use value_counts::DataFrame as DataFrameValueCounts;
|
||||
|
@ -2,7 +2,8 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Primitive, Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -27,9 +28,17 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Counts null values",
|
||||
example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-series);
|
||||
($s / ss) | dataframe count-null"#,
|
||||
result: None,
|
||||
example: r#"let s = ([1 1 0 0 3 3 4] | dataframe to-df);
|
||||
($s / $s) | dataframe count-null"#,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"count_null".to_string(),
|
||||
vec![UntaggedValue::int(2).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -37,17 +46,32 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().null_count();
|
||||
let res = df.as_series(&df_tag.span)?.null_count();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Int(res as i64)),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(tag);
|
||||
data.insert_value("count-null", value);
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("count_null".to_string(), vec![value])],
|
||||
&tag.span,
|
||||
)?;
|
||||
|
||||
Ok(OutputStream::one(data.into_value()))
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,8 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::NuSeries, Primitive, Signature, TaggedDictBuilder, UntaggedValue, Value,
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Primitive, Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -27,8 +28,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Counts unique values",
|
||||
example: "[1 1 2 2 3 3 4] | dataframe to-series | dataframe count-unique",
|
||||
result: None,
|
||||
example: "[1 1 2 2 3 3 4] | dataframe to-df | dataframe count-unique",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"count_unique".to_string(),
|
||||
vec![UntaggedValue::int(4).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -36,10 +45,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
let res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.n_unique()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
@ -48,8 +57,23 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(tag);
|
||||
data.insert_value("count-unique", value);
|
||||
let df = NuDataFrame::try_from_columns(
|
||||
vec![Column::new("count_unique".to_string(), vec![value])],
|
||||
&tag.span,
|
||||
)?;
|
||||
|
||||
Ok(OutputStream::one(data.into_value()))
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
use std::ops::Not;
|
||||
|
||||
@ -27,8 +30,20 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Inverts boolean mask",
|
||||
example: "[$true $false $true] | dataframe to-series | dataframe not",
|
||||
result: None,
|
||||
example: "[$true $false $true] | dataframe to-df | dataframe not",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(false).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -36,9 +51,10 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
let bool = series.as_ref().bool().map_err(|e| {
|
||||
let bool = series.bool().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&tag.span,
|
||||
@ -48,8 +64,19 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
let res = bool.not();
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -30,8 +33,21 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Renames a series",
|
||||
example: "[5 6 7 8] | dataframe to-series | dataframe rename-series new_name",
|
||||
result: None,
|
||||
example: "[5 6 7 8] | dataframe to-df | dataframe rename new_name",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"new_name".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
UntaggedValue::int(7).into(),
|
||||
UntaggedValue::int(8).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -40,9 +56,25 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let name: Tagged<String> = args.req(0)?;
|
||||
|
||||
let mut series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
series.as_mut().rename(name.item.as_ref());
|
||||
let mut series = df.as_series(&df_tag.span)?;
|
||||
|
||||
Ok(OutputStream::one(series.into_value(tag)))
|
||||
series.rename(name.item.as_ref());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![series], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,11 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
|
||||
use nu_source::Tagged;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::{Span, Tagged};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -39,8 +42,20 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Replaces string",
|
||||
example: "[abc abc abc] | dataframe to-series | dataframe replace -p ab -r AB",
|
||||
result: None,
|
||||
example: "[abc abc abc] | dataframe to-df | dataframe replace -p ab -r AB",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("ABc").into(),
|
||||
UntaggedValue::string("ABc").into(),
|
||||
UntaggedValue::string("ABc").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -50,23 +65,36 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let pattern: Tagged<String> = args.req_named("pattern")?;
|
||||
let replace: Tagged<String> = args.req_named("replace")?;
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let chunked = series.as_ref().utf8().map_err(|e| {
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&tag.span,
|
||||
Some("The replace command can only be used with string columns"),
|
||||
&df_tag.span,
|
||||
Some("The replace-all command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let res = chunked
|
||||
.as_ref()
|
||||
let mut res = chunked
|
||||
.replace(pattern.as_str(), replace.as_str())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
100
crates/nu-command/src/commands/dataframe/series/replace_all.rs
Normal file
100
crates/nu-command/src/commands/dataframe/series/replace_all.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe replace-all"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Replace all (sub)strings by a regex pattern"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe replace")
|
||||
.required_named(
|
||||
"pattern",
|
||||
SyntaxShape::String,
|
||||
"Regex pattern to be matched",
|
||||
Some('p'),
|
||||
)
|
||||
.required_named(
|
||||
"replace",
|
||||
SyntaxShape::String,
|
||||
"replacing string",
|
||||
Some('r'),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Replaces string",
|
||||
example: "[abac abac abac] | dataframe to-df | dataframe replace-all -p a -r A",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("AbAc").into(),
|
||||
UntaggedValue::string("AbAc").into(),
|
||||
UntaggedValue::string("AbAc").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let pattern: Tagged<String> = args.req_named("pattern")?;
|
||||
let replace: Tagged<String> = args.req_named("replace")?;
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The replace command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked
|
||||
.replace_all(pattern.as_str(), replace.as_str())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -33,10 +36,24 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shifts the values by a given period",
|
||||
example: r#"let s = ([1 2 2 3 3] | dataframe to-series | dataframe shift 2);
|
||||
example: r#"let s = ([1 2 2 3 3] | dataframe to-df | dataframe shift 2);
|
||||
let mask = ($s | dataframe is-null);
|
||||
$s | dataframe set 0 --mask $mask"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(0).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -46,11 +63,19 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let value: Value = args.req(0)?;
|
||||
let mask: Value = args.req_named("mask")?;
|
||||
|
||||
let bool_mask = match &mask.value {
|
||||
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => {
|
||||
match series.as_ref().dtype() {
|
||||
DataType::Boolean => series
|
||||
.as_ref()
|
||||
let mask_df = match &mask.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only use a series as mask",
|
||||
value.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let mask_series = mask_df.as_series(&mask.tag.span)?;
|
||||
|
||||
let bool_mask = match mask_series.dtype() {
|
||||
DataType::Boolean => mask_series
|
||||
.bool()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &mask.tag.span, None)),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
@ -58,20 +83,14 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
"can only use bool series as mask",
|
||||
value.tag.span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only use bool series as mask",
|
||||
value.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
match &value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => {
|
||||
let chunked = series.as_ref().i64().map_err(|e| {
|
||||
let chunked = series.i64().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&value.tag.span,
|
||||
@ -83,10 +102,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
.set(bool_mask, Some(*val))
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
|
||||
let chunked = series.as_ref().f64().map_err(|e| {
|
||||
@ -107,10 +124,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::String(val)) => {
|
||||
let chunked = series.as_ref().utf8().map_err(|e| {
|
||||
@ -128,18 +143,29 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let mut res = res.into_series();
|
||||
res.rename("string");
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
format!(
|
||||
"this value cannot be set in a series of type '{}'",
|
||||
series.as_ref().dtype()
|
||||
series.dtype()
|
||||
),
|
||||
value.tag.span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use polars::prelude::{ChunkSet, DataType, IntoSeries};
|
||||
|
||||
pub struct DataFrame;
|
||||
@ -33,10 +36,25 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Set value in selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
|
||||
let indices = ([0 2] | dataframe to-series);
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df);
|
||||
let indices = ([0 2] | dataframe to-df);
|
||||
$series | dataframe set-with-idx 6 -i $indices"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(6).into(),
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -47,7 +65,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let indices: Value = args.req_named("indices")?;
|
||||
|
||||
let indices = match &indices.value {
|
||||
UntaggedValue::DataFrame(nu_protocol::dataframe::PolarsData::Series(series)) => Ok(series),
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only use a series for set command",
|
||||
@ -55,7 +73,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}?;
|
||||
|
||||
let casted = match indices.as_ref().dtype() {
|
||||
let indices = indices.as_series(&value.tag.span)?;
|
||||
|
||||
let casted = match indices.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices
|
||||
.as_ref()
|
||||
.cast_with_dtype(&DataType::UInt32)
|
||||
@ -75,11 +95,12 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
.into_iter()
|
||||
.filter_map(|val| val.map(|v| v as usize));
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
|
||||
match &value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => {
|
||||
let chunked = series.as_ref().i64().map_err(|e| {
|
||||
let chunked = series.i64().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&value.tag.span,
|
||||
@ -91,10 +112,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
.set_at_idx(indices, Some(*val))
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => {
|
||||
let chunked = series.as_ref().f64().map_err(|e| {
|
||||
@ -115,10 +134,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::String(val)) => {
|
||||
let chunked = series.as_ref().utf8().map_err(|e| {
|
||||
@ -136,10 +153,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let mut res = res.into_series();
|
||||
res.rename("string");
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
@ -151,3 +166,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -27,7 +26,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Shifts the values by a given period",
|
||||
example: "[1 2 2 3 3] | dataframe to-series | dataframe shift 2",
|
||||
example: "[1 2 2 3 3] | dataframe to-df | dataframe shift 2",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -37,12 +36,10 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let period: Tagged<i64> = args.req(0)?;
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series.as_ref().shift(period.item);
|
||||
let res = df.as_series(&df_tag.span)?.shift(period.item);
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
@ -0,0 +1,81 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe str-lengths"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Get lengths of all strings"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe str-lengths")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns string lengths",
|
||||
example: "[a ab abc] | dataframe to-df | dataframe str-lengths",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The str-lengths command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let res = chunked.as_ref().str_lengths();
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
92
crates/nu-command/src/commands/dataframe/series/str_slice.rs
Normal file
92
crates/nu-command/src/commands/dataframe/series/str_slice.rs
Normal file
@ -0,0 +1,92 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe str-slice"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Slices the string from the start position until the selected length"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe replace")
|
||||
.required_named("start", SyntaxShape::Int, "start of slice", Some('s'))
|
||||
.named("length", SyntaxShape::Int, "optional length", Some('l'))
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates slices from the strings",
|
||||
example: "[abcded abc321 abc123] | dataframe to-df | dataframe str-slice -s 1 -l 2",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("bc").into(),
|
||||
UntaggedValue::string("bc").into(),
|
||||
UntaggedValue::string("bc").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let start: Tagged<i64> = args.req_named("start")?;
|
||||
|
||||
let length: Option<Tagged<i64>> = args.get_flag("length")?;
|
||||
let length = length.map(|v| v.item as u64);
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The str-slice command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked
|
||||
.str_slice(start.item, length)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe to-lowercase"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Lowercase the strings in the column"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe to-lowercase")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Modifies strings to lowercase",
|
||||
example: "[Abc aBc abC] | dataframe to-df | dataframe to-lowercase",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("abc").into(),
|
||||
UntaggedValue::string("abc").into(),
|
||||
UntaggedValue::string("abc").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The to-lowercase command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked.to_lowercase();
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use polars::prelude::IntoSeries;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe to-uppercase"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"[Series] Uppercase the strings in the column"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe to-uppercase")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Modifies strings to uppercase",
|
||||
example: "[Abc aBc abC] | dataframe to-df | dataframe to-uppercase",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("ABC").into(),
|
||||
UntaggedValue::string("ABC").into(),
|
||||
UntaggedValue::string("ABC").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let series = df.as_series(&df_tag.span)?;
|
||||
let chunked = series.utf8().map_err(|e| {
|
||||
parse_polars_error::<&str>(
|
||||
&e,
|
||||
&df_tag.span,
|
||||
Some("The to-uppercase command can only be used with string columns"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut res = chunked.to_uppercase();
|
||||
res.rename(series.name());
|
||||
|
||||
let df = NuDataFrame::try_from_series(vec![res.into_series()], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
@ -1,8 +1,10 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature};
|
||||
use polars::prelude::IntoSeries;
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -26,8 +28,16 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns unique values from a series",
|
||||
example: "[1 2 2 3 3] | dataframe to-series | dataframe unique",
|
||||
result: None,
|
||||
example: "[2 2 2 2 2] | dataframe to-df | dataframe unique",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![UntaggedValue::int(2).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -35,15 +45,26 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = series
|
||||
.as_ref()
|
||||
let res = df
|
||||
.as_series(&df_tag.span)?
|
||||
.unique()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.into_series(),
|
||||
tag,
|
||||
)))
|
||||
let df = NuDataFrame::try_from_series(vec![res], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(df_tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,8 +2,8 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries},
|
||||
Signature,
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
use crate::commands::dataframe::utils::parse_polars_error;
|
||||
@ -30,8 +30,22 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Calculates value counts",
|
||||
example: "[5 5 6 6] | dataframe to-series | dataframe value-counts",
|
||||
result: None,
|
||||
example: "[5 5 5 5 6 6] | dataframe to-df | dataframe value-counts",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![UntaggedValue::int(5).into(), UntaggedValue::int(6).into()],
|
||||
),
|
||||
Column::new(
|
||||
"counts".to_string(),
|
||||
vec![UntaggedValue::int(4).into(), UntaggedValue::int(2).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -39,12 +53,27 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let series = NuSeries::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let df = series
|
||||
.as_ref()
|
||||
let df_new = df
|
||||
.as_series(&df_tag.span)?
|
||||
.value_counts()
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(df, tag)))
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
|
||||
df_new, tag,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -26,7 +29,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Shows row and column shape",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe shape",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("rows".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
Column::new("columns".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -34,14 +45,34 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let rows = df.as_ref().height();
|
||||
let cols = df.as_ref().width();
|
||||
let rows = Value {
|
||||
value: (df.as_ref().height() as i64).into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
let mut data = TaggedDictBuilder::new(&tag);
|
||||
data.insert_value("rows", format!("{}", rows));
|
||||
data.insert_value("columns", format!("{}", cols));
|
||||
let cols = Value {
|
||||
value: (df.as_ref().width() as i64).into(),
|
||||
tag: Tag::default(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(data.into_value()))
|
||||
let rows_col = Column::new("rows".to_string(), vec![rows]);
|
||||
let cols_col = Column::new("columns".to_string(), vec![cols]);
|
||||
|
||||
let df = NuDataFrame::try_from_columns(vec![rows_col, cols_col], &tag.span)?;
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let rows: Option<Tagged<usize>> = args.get_flag("n_rows")?;
|
||||
let tail: bool = args.has_flag("tail");
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let rows = rows.map(|v| v.item);
|
||||
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
pub struct DataFrame;
|
||||
@ -29,7 +32,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe slice 0 1",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -40,8 +51,21 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let offset: Tagged<usize> = args.req(0)?;
|
||||
let size: Tagged<usize> = args.req(1)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let res = df.as_ref().slice(offset.item as i64, size.item);
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
@ -33,12 +33,39 @@ impl WholeStreamCommand for DataFrame {
|
||||
Example {
|
||||
description: "Create new sorted dataframe",
|
||||
example: "[[a b]; [3 4] [1 2]] | dataframe to-df | dataframe sort a",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Create new sorted series",
|
||||
example: "[3 4 1 2] | dataframe to-series | dataframe sort",
|
||||
result: None,
|
||||
example: "[3 4 1 2] | dataframe to-df | dataframe sort",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -53,8 +80,18 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
let reverse = args.has_flag("reverse");
|
||||
|
||||
match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
match &value.value {
|
||||
UntaggedValue::DataFrame(df) => {
|
||||
if df.is_series() {
|
||||
let columns = df.as_ref().get_column_names();
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.sort(columns, reverse)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
} else {
|
||||
let columns: Vec<Value> = args.rest(0)?;
|
||||
|
||||
if !columns.is_empty() {
|
||||
@ -74,9 +111,6 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
))
|
||||
}
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
|
||||
let res = series.as_ref().sort(reverse);
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
@ -85,3 +119,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use polars::prelude::DataType;
|
||||
@ -37,16 +37,38 @@ impl WholeStreamCommand for DataFrame {
|
||||
Example {
|
||||
description: "Takes selected rows from dataframe",
|
||||
example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dataframe to-df);
|
||||
let indices = ([0 2] | dataframe to-series);
|
||||
let indices = ([0 2] | dataframe to-df);
|
||||
$df | dataframe take $indices"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(4).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Takes selected rows from series",
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-series);
|
||||
let indices = ([0 2] | dataframe to-series);
|
||||
example: r#"let series = ([4 1 5 2 4 3] | dataframe to-df);
|
||||
let indices = ([0 2] | dataframe to-df);
|
||||
$series | dataframe take $indices"#,
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![UntaggedValue::int(4).into(), UntaggedValue::int(5).into()],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
@ -56,8 +78,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let value: Value = args.req(0)?;
|
||||
|
||||
let series = match &value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
|
||||
let df = match &value.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only use a series for take command",
|
||||
@ -65,7 +87,9 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}?;
|
||||
|
||||
let casted = match series.as_ref().dtype() {
|
||||
let series = df.as_series(&value.tag.span)?;
|
||||
|
||||
let casted = match series.dtype() {
|
||||
DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series
|
||||
.as_ref()
|
||||
.cast_with_dtype(&DataType::UInt32)
|
||||
@ -88,16 +112,11 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
})?;
|
||||
|
||||
match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
UntaggedValue::DataFrame(df) => {
|
||||
let res = df.as_ref().take(indices);
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => {
|
||||
let res = series.as_ref().take(indices);
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(res, tag)))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"No dataframe or series in stream",
|
||||
"no dataframe or series found in input stream",
|
||||
@ -105,3 +124,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let delimiter: Option<Tagged<String>> = args.get_flag("delimiter")?;
|
||||
let no_header: bool = args.has_flag("no_header");
|
||||
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
|
@ -1,7 +1,10 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature};
|
||||
use nu_protocol::{
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -11,7 +14,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a pipelined Table or List into a polars dataframe"
|
||||
"Converts a List, Table or Dictionary into a polars dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -27,10 +30,108 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes an input stream and converts it to a polars dataframe",
|
||||
vec![
|
||||
Example {
|
||||
description: "Takes a dictionary and creates a dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | dataframe to-df",
|
||||
result: None,
|
||||
}]
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![UntaggedValue::int(1).into(), UntaggedValue::int(3).into()],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![UntaggedValue::int(2).into(), UntaggedValue::int(4).into()],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of tables and creates a dataframe",
|
||||
example: "[[1 2 a] [3 4 b] [5 6 c]] | dataframe to-df",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
UntaggedValue::int(5).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"1".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"2".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("b").into(),
|
||||
UntaggedValue::string("c").into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list and creates a dataframe",
|
||||
example: "[a b c] | dataframe to-df",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::string("a").into(),
|
||||
UntaggedValue::string("b").into(),
|
||||
UntaggedValue::string("c").into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
Example {
|
||||
description: "Takes a list of booleans and creates a dataframe",
|
||||
example: "[$true $true $false] | dataframe to-df",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![Column::new(
|
||||
"0".to_string(),
|
||||
vec![
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(true).into(),
|
||||
UntaggedValue::boolean(false).into(),
|
||||
],
|
||||
)],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let file_name: Tagged<PathBuf> = args.req(0)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
|
@ -1,44 +0,0 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
|
||||
use nu_source::Tagged;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"dataframe to-series"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a pipelined List into a polars series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("dataframe to-series").optional(
|
||||
"name",
|
||||
SyntaxShape::String,
|
||||
"Optional series name",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
|
||||
let name: Option<Tagged<String>> = args.opt(0)?;
|
||||
let name = name.map(|v| v.item);
|
||||
|
||||
let series = NuSeries::try_from_iter(args.input, name)?;
|
||||
|
||||
Ok(InputStream::one(series.into_value(tag)))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes an input stream and converts it to a polars series",
|
||||
example: "[1 2 3 4] | dataframe to-series my-col",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::{evaluate_baseline_expr, WholeStreamCommand};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::NuDataFrame,
|
||||
dataframe::{Column, NuDataFrame},
|
||||
hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression},
|
||||
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value,
|
||||
};
|
||||
@ -37,7 +37,15 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Filter dataframe based on column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe where a == 1",
|
||||
result: None,
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new("a".to_string(), vec![UntaggedValue::int(1).into()]),
|
||||
Column::new("b".to_string(), vec![UntaggedValue::int(2).into()]),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -143,7 +151,7 @@ fn filter_dataframe(
|
||||
}?;
|
||||
|
||||
let span = args.call_info.name_tag.span;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &span)?;
|
||||
let (df, _) = NuDataFrame::try_from_stream(&mut args.input, &span)?;
|
||||
|
||||
let col = df
|
||||
.as_ref()
|
||||
@ -214,3 +222,16 @@ fn filter_dataframe(
|
||||
args.call_info.name_tag,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
dataframe::{Column, NuDataFrame},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
@ -33,8 +33,35 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-series) --name c",
|
||||
result: None,
|
||||
"[[a b]; [1 2] [3 4]] | dataframe to-df | dataframe with-column ([5 6] | dataframe to-df) --name c",
|
||||
result: Some(vec![NuDataFrame::try_from_columns(
|
||||
vec![
|
||||
Column::new(
|
||||
"a".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(1).into(),
|
||||
UntaggedValue::int(3).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"b".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(2).into(),
|
||||
UntaggedValue::int(4).into(),
|
||||
],
|
||||
),
|
||||
Column::new(
|
||||
"c".to_string(),
|
||||
vec![
|
||||
UntaggedValue::int(5).into(),
|
||||
UntaggedValue::int(6).into(),
|
||||
],
|
||||
),
|
||||
],
|
||||
&Span::default(),
|
||||
)
|
||||
.expect("simple df for test should not fail")
|
||||
.into_value(Tag::default())]),
|
||||
}]
|
||||
}
|
||||
}
|
||||
@ -44,8 +71,8 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let value: Value = args.req(0)?;
|
||||
let name: Tagged<String> = args.req_named("name")?;
|
||||
|
||||
let mut series = match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
|
||||
let df = match value.value {
|
||||
UntaggedValue::DataFrame(df) => Ok(df),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only add a series to a dataframe",
|
||||
@ -53,9 +80,11 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
)),
|
||||
}?;
|
||||
|
||||
let series = series.as_mut().rename(name.item.as_ref()).clone();
|
||||
let mut series = df.as_series(&value.tag.span)?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let series = series.rename(name.item.as_ref()).clone();
|
||||
|
||||
let (mut df, _) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
df.as_mut()
|
||||
.with_column(series)
|
||||
@ -63,3 +92,16 @@ fn command(mut args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
Ok(OutputStream::one(df.into_value(tag)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DataFrame;
|
||||
use super::ShellError;
|
||||
|
||||
#[test]
|
||||
fn examples_work_as_expected() -> Result<(), ShellError> {
|
||||
use crate::examples::test_dataframe as test_examples;
|
||||
|
||||
test_examples(DataFrame {})
|
||||
}
|
||||
}
|
||||
|
@ -126,11 +126,13 @@ fn first(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
tag,
|
||||
)),
|
||||
#[cfg(all(not(target_arch = "wasm32"), feature = "dataframe"))]
|
||||
UntaggedValue::DataFrame(_) => Err(ShellError::labeled_error(
|
||||
"unsure how to handled UntaggedValue::DataFrame",
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
Err(ShellError::labeled_error(
|
||||
"unsure how to handled dataframe struct",
|
||||
"found dataframe",
|
||||
tag,
|
||||
)),
|
||||
))
|
||||
}
|
||||
},
|
||||
None => Ok(input_peek.take(rows_desired).into_output_stream()),
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
|
||||
))
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => {
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"uniq -c cannot operate on data structs",
|
||||
"source",
|
||||
|
@ -115,7 +115,7 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
|
||||
serde_json::Value::Null
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => serde_json::Value::Null,
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_json::Value::Null,
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
|
||||
b.iter()
|
||||
.map(|x| {
|
||||
|
@ -74,7 +74,9 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => toml::Value::String("<Data>".to_string()),
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
toml::Value::String("<Data>".to_string())
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||
|
@ -96,7 +96,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
|
||||
serde_yaml::Value::Null
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => serde_yaml::Value::Null,
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => serde_yaml::Value::Null,
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
|
||||
b.iter()
|
||||
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))
|
||||
|
@ -27,15 +27,16 @@ pub use core_commands::*;
|
||||
pub use dataframe::{
|
||||
DataFrame, DataFrameAggregate, DataFrameAllFalse, DataFrameAllTrue, DataFrameArgMax,
|
||||
DataFrameArgMin, DataFrameArgSort, DataFrameArgTrue, DataFrameArgUnique, DataFrameColumn,
|
||||
DataFrameDTypes, DataFrameDrop, DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies,
|
||||
DataFrameFilter, DataFrameFirst, DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated,
|
||||
DataFrameIsIn, DataFrameIsNotNull, DataFrameIsNull, DataFrameIsUnique, DataFrameJoin,
|
||||
DataFrameLast, DataFrameList, DataFrameMelt, DataFrameNNull, DataFrameNUnique, DataFrameNot,
|
||||
DataFrameOpen, DataFramePivot, DataFrameReplace, DataFrameSample, DataFrameSelect,
|
||||
DataFrameSeriesRename, DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift,
|
||||
DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTake, DataFrameToCsv, DataFrameToDF,
|
||||
DataFrameToParquet, DataFrameToSeries, DataFrameUnique, DataFrameValueCounts, DataFrameWhere,
|
||||
DataFrameWithColumn,
|
||||
DataFrameConcatenate, DataFrameContains, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameFilter, DataFrameFirst,
|
||||
DataFrameGet, DataFrameGroupBy, DataFrameIsDuplicated, DataFrameIsIn, DataFrameIsNotNull,
|
||||
DataFrameIsNull, DataFrameIsUnique, DataFrameJoin, DataFrameLast, DataFrameList, DataFrameMelt,
|
||||
DataFrameNNull, DataFrameNUnique, DataFrameNot, DataFrameOpen, DataFramePivot,
|
||||
DataFrameReplace, DataFrameReplaceAll, DataFrameSample, DataFrameSelect, DataFrameSeriesRename,
|
||||
DataFrameSet, DataFrameSetWithIdx, DataFrameShape, DataFrameShift, DataFrameShow,
|
||||
DataFrameSlice, DataFrameSort, DataFrameStringLengths, DataFrameStringSlice, DataFrameTake,
|
||||
DataFrameToCsv, DataFrameToDF, DataFrameToLowercase, DataFrameToParquet, DataFrameToUppercase,
|
||||
DataFrameUnique, DataFrameValueCounts, DataFrameWhere, DataFrameWithColumn,
|
||||
};
|
||||
pub use env::*;
|
||||
pub use filesystem::*;
|
||||
|
@ -9,7 +9,7 @@ use nu_protocol::{Primitive, Signature, UntaggedValue, Value};
|
||||
use nu_table::TextStyle;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use nu_protocol::dataframe::PolarsData;
|
||||
use nu_protocol::dataframe::FrameStruct;
|
||||
|
||||
pub struct Command;
|
||||
|
||||
@ -239,7 +239,7 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)),
|
||||
value: UntaggedValue::DataFrame(df),
|
||||
tag,
|
||||
} => {
|
||||
if let Some(table) = table {
|
||||
@ -253,7 +253,7 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::GroupBy(groupby)),
|
||||
value: UntaggedValue::FrameStruct(FrameStruct::GroupBy(groupby)),
|
||||
tag,
|
||||
} => {
|
||||
if let Some(table) = table {
|
||||
@ -265,20 +265,6 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let _ = result.collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(series)),
|
||||
tag,
|
||||
} => {
|
||||
if let Some(table) = table {
|
||||
// TODO. Configure the parameter rows from file. It can be
|
||||
// adjusted to see a certain amount of values in the head
|
||||
let command_args =
|
||||
create_default_command_args(&context, series.print()?.into(), tag);
|
||||
let result = table.run(command_args)?;
|
||||
let _ = result.collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
..
|
||||
|
@ -287,7 +287,6 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||
whole_stream_command(DataFramePivot),
|
||||
whole_stream_command(DataFrameWhere),
|
||||
whole_stream_command(DataFrameToDF),
|
||||
whole_stream_command(DataFrameToSeries),
|
||||
whole_stream_command(DataFrameToParquet),
|
||||
whole_stream_command(DataFrameToCsv),
|
||||
whole_stream_command(DataFrameSort),
|
||||
@ -321,6 +320,13 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||
whole_stream_command(DataFrameSetWithIdx),
|
||||
whole_stream_command(DataFrameShape),
|
||||
whole_stream_command(DataFrameReplace),
|
||||
whole_stream_command(DataFrameReplaceAll),
|
||||
whole_stream_command(DataFrameStringLengths),
|
||||
whole_stream_command(DataFrameContains),
|
||||
whole_stream_command(DataFrameToLowercase),
|
||||
whole_stream_command(DataFrameToUppercase),
|
||||
whole_stream_command(DataFrameStringSlice),
|
||||
whole_stream_command(DataFrameConcatenate),
|
||||
]);
|
||||
|
||||
#[cfg(feature = "clipboard-cli")]
|
||||
|
@ -14,6 +14,11 @@ use nu_protocol::hir::{ClassifiedBlock, ExternalRedirection};
|
||||
use nu_protocol::{ShellTypeName, Value};
|
||||
use nu_source::AnchorLocation;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use crate::commands::{
|
||||
DataFrameGroupBy, DataFrameIsNull, DataFrameShift, DataFrameToDF, DataFrameWithColumn,
|
||||
};
|
||||
|
||||
use crate::commands::{
|
||||
Append, BuildString, Each, Echo, First, Get, Keep, Last, Let, Math, MathMode, Nth, Select,
|
||||
StrCollect, Wrap,
|
||||
@ -149,6 +154,85 @@ pub fn test(cmd: impl WholeStreamCommand + 'static) -> Result<(), ShellError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
pub fn test_dataframe(cmd: impl WholeStreamCommand + 'static) -> Result<(), ShellError> {
|
||||
use nu_protocol::UntaggedValue;
|
||||
|
||||
let examples = cmd.examples();
|
||||
|
||||
let base_context = EvaluationContext::basic();
|
||||
|
||||
base_context.add_commands(vec![
|
||||
whole_stream_command(cmd),
|
||||
// Commands used with dataframe
|
||||
whole_stream_command(DataFrameToDF),
|
||||
whole_stream_command(DataFrameShift),
|
||||
whole_stream_command(DataFrameIsNull),
|
||||
whole_stream_command(DataFrameGroupBy),
|
||||
whole_stream_command(DataFrameWithColumn),
|
||||
// Base commands for context
|
||||
whole_stream_command(Math),
|
||||
whole_stream_command(MathMode {}),
|
||||
whole_stream_command(Echo {}),
|
||||
whole_stream_command(BuildString {}),
|
||||
whole_stream_command(Get {}),
|
||||
whole_stream_command(Keep {}),
|
||||
whole_stream_command(Each {}),
|
||||
whole_stream_command(Let {}),
|
||||
whole_stream_command(Select),
|
||||
whole_stream_command(StrCollect),
|
||||
whole_stream_command(Wrap),
|
||||
]);
|
||||
|
||||
for sample_pipeline in examples {
|
||||
let mut ctx = base_context.clone();
|
||||
|
||||
println!("{:?}", &sample_pipeline.example);
|
||||
let block = parse_line(sample_pipeline.example, &ctx)?;
|
||||
|
||||
if let Some(expected) = &sample_pipeline.result {
|
||||
let start = std::time::Instant::now();
|
||||
let result = evaluate_block(block, &mut ctx)?;
|
||||
|
||||
println!("input: {}", sample_pipeline.example);
|
||||
println!("result: {:?}", result);
|
||||
println!("done: {:?}", start.elapsed());
|
||||
|
||||
let value = match result.get(0) {
|
||||
Some(v) => v,
|
||||
None => panic!(
|
||||
"Unable to extract a value after parsing example: {}",
|
||||
sample_pipeline.example
|
||||
),
|
||||
};
|
||||
|
||||
let df = match &value.value {
|
||||
UntaggedValue::DataFrame(df) => df,
|
||||
_ => panic!(
|
||||
"Unable to extract dataframe from parsed example: {}",
|
||||
sample_pipeline.example
|
||||
),
|
||||
};
|
||||
|
||||
let expected = match expected.get(0) {
|
||||
Some(v) => v,
|
||||
None => panic!("Empty vector in result example"),
|
||||
};
|
||||
|
||||
let df_expected = match &expected.value {
|
||||
UntaggedValue::DataFrame(df) => df,
|
||||
_ => panic!("Unable to extract dataframe from example result"),
|
||||
};
|
||||
|
||||
println!("expected: {:?}", df_expected);
|
||||
|
||||
assert_eq!(df, df_expected)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn test_anchors(cmd: Command) -> Result<(), ShellError> {
|
||||
let examples = cmd.examples();
|
||||
|
||||
|
@ -131,7 +131,7 @@ impl InlineShape {
|
||||
UntaggedValue::Error(_) => InlineShape::Error,
|
||||
UntaggedValue::Block(_) => InlineShape::Block,
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => InlineShape::DataFrame,
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => InlineShape::DataFrame,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,9 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => toml::Value::String("<DataFrame>".to_string()),
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
toml::Value::String("<DataFrame>".to_string())
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||
|
@ -1,30 +1,36 @@
|
||||
use bigdecimal::BigDecimal;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::dataframe::NuDataFrame;
|
||||
use nu_protocol::hir::Operator;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuSeries, PolarsData},
|
||||
Primitive, ShellTypeName, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value};
|
||||
use nu_source::Span;
|
||||
use num_traits::ToPrimitive;
|
||||
|
||||
use polars::prelude::{
|
||||
BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries,
|
||||
NumOpsDispatchChecked, PolarsError, Series,
|
||||
BooleanType, ChunkCompare, ChunkedArray, DataFrame, DataType, Float64Type, Int64Type,
|
||||
IntoSeries, NumOpsDispatchChecked, PolarsError, Series,
|
||||
};
|
||||
use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub};
|
||||
|
||||
pub fn compute_between_series(
|
||||
pub fn compute_between_dataframes(
|
||||
operator: Operator,
|
||||
left: &Value,
|
||||
right: &Value,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
if let (
|
||||
UntaggedValue::DataFrame(PolarsData::Series(lhs)),
|
||||
UntaggedValue::DataFrame(PolarsData::Series(rhs)),
|
||||
) = (&left.value, &right.value)
|
||||
if let (UntaggedValue::DataFrame(lhs), UntaggedValue::DataFrame(rhs)) =
|
||||
(&left.value, &right.value)
|
||||
{
|
||||
if lhs.as_ref().dtype() != rhs.as_ref().dtype() {
|
||||
let operation_span = left.tag.span.until(right.tag.span);
|
||||
match (lhs.is_series(), rhs.is_series()) {
|
||||
(true, true) => {
|
||||
let lhs = &lhs
|
||||
.as_series(&left.tag.span)
|
||||
.expect("Already checked that is a series");
|
||||
let rhs = &rhs
|
||||
.as_series(&right.tag.span)
|
||||
.expect("Already checked that is a series");
|
||||
|
||||
if lhs.dtype() != rhs.dtype() {
|
||||
return Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
"Mixed datatypes",
|
||||
@ -39,7 +45,7 @@ pub fn compute_between_series(
|
||||
));
|
||||
}
|
||||
|
||||
if lhs.as_ref().len() != rhs.as_ref().len() {
|
||||
if lhs.len() != rhs.len() {
|
||||
return Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Different length",
|
||||
"this column length does not match the right hand column length",
|
||||
@ -47,145 +53,207 @@ pub fn compute_between_series(
|
||||
)));
|
||||
}
|
||||
|
||||
compute_between_series(operator, lhs, rhs, &operation_span)
|
||||
}
|
||||
_ => {
|
||||
if lhs.as_ref().height() != rhs.as_ref().height() {
|
||||
return Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
"Mixed datatypes",
|
||||
"this datatype size does not match the right hand side datatype",
|
||||
&left.tag.span,
|
||||
"Perhaps you want to select another dataframe with same number of rows",
|
||||
&right.tag.span,
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
between_dataframes(operator, lhs, rhs, &operation_span)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Err((left.type_name(), right.type_name()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn between_dataframes(
|
||||
operator: Operator,
|
||||
lhs: &NuDataFrame,
|
||||
rhs: &NuDataFrame,
|
||||
operation_span: &Span,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
match operator {
|
||||
Operator::Plus => {
|
||||
let mut res = lhs.as_ref() + rhs.as_ref();
|
||||
let name = format!("sum_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut columns: Vec<&str> = Vec::new();
|
||||
|
||||
let new = lhs
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.iter()
|
||||
.chain(rhs.as_ref().get_columns().iter())
|
||||
.map(|s| {
|
||||
let name = if columns.contains(&s.name()) {
|
||||
format!("{}_{}", s.name(), "x")
|
||||
} else {
|
||||
columns.push(s.name());
|
||||
s.name().to_string()
|
||||
};
|
||||
|
||||
let mut series = s.clone();
|
||||
series.rename(name.as_str());
|
||||
series
|
||||
})
|
||||
.collect::<Vec<Series>>();
|
||||
|
||||
match DataFrame::new(new) {
|
||||
Ok(df) => Ok(NuDataFrame::dataframe_to_untagged(df)),
|
||||
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Appending error",
|
||||
format!("{}", e),
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Incorrect datatype",
|
||||
"unable to use this datatype for this operation",
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_between_series(
|
||||
operator: Operator,
|
||||
lhs: &Series,
|
||||
rhs: &Series,
|
||||
operation_span: &Span,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
match operator {
|
||||
Operator::Plus => {
|
||||
let mut res = lhs + rhs;
|
||||
let name = format!("sum_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::Minus => {
|
||||
let mut res = lhs.as_ref() - rhs.as_ref();
|
||||
let name = format!("sub_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = lhs - rhs;
|
||||
let name = format!("sub_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::Multiply => {
|
||||
let mut res = lhs.as_ref() * rhs.as_ref();
|
||||
let name = format!("mul_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = lhs * rhs;
|
||||
let name = format!("mul_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::Divide => {
|
||||
let res = lhs.as_ref().checked_div(rhs.as_ref());
|
||||
let res = lhs.checked_div(rhs);
|
||||
match res {
|
||||
Ok(mut res) => {
|
||||
let name = format!("div_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let name = format!("div_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Err(e) => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Division error",
|
||||
format!("{}", e),
|
||||
&left.tag.span,
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
Operator::Equal => {
|
||||
let mut res = Series::eq(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("eq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::eq(lhs, rhs).into_series();
|
||||
let name = format!("eq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::NotEqual => {
|
||||
let mut res = Series::neq(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("neq_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::neq(lhs, rhs).into_series();
|
||||
let name = format!("neq_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::LessThan => {
|
||||
let mut res = Series::lt(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("lt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::lt(lhs, rhs).into_series();
|
||||
let name = format!("lt_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::LessThanOrEqual => {
|
||||
let mut res = Series::lt_eq(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("lte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::lt_eq(lhs, rhs).into_series();
|
||||
let name = format!("lte_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::GreaterThan => {
|
||||
let mut res = Series::gt(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("gt_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::gt(lhs, rhs).into_series();
|
||||
let name = format!("gt_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::GreaterThanOrEqual => {
|
||||
let mut res = Series::gt_eq(lhs.as_ref(), rhs.as_ref()).into_series();
|
||||
let name = format!("gte_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let mut res = Series::gt_eq(lhs, rhs).into_series();
|
||||
let name = format!("gte_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, operation_span))
|
||||
}
|
||||
Operator::And => match lhs.as_ref().dtype() {
|
||||
Operator::And => match lhs.dtype() {
|
||||
DataType::Boolean => {
|
||||
let lhs_cast = lhs.as_ref().bool();
|
||||
let rhs_cast = rhs.as_ref().bool();
|
||||
let lhs_cast = lhs.bool();
|
||||
let rhs_cast = rhs.bool();
|
||||
|
||||
match (lhs_cast, rhs_cast) {
|
||||
(Ok(l), Ok(r)) => {
|
||||
let mut res = l.bitand(r).into_series();
|
||||
let name =
|
||||
format!("and_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let name = format!("and_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, &operation_span))
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
"unable to cast to boolean",
|
||||
&left.tag.span,
|
||||
"unable to cast to boolean",
|
||||
&right.tag.span,
|
||||
),
|
||||
)),
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Incorrect datatype",
|
||||
"And operation can only be done with boolean values",
|
||||
&left.tag.span,
|
||||
operation_span,
|
||||
))),
|
||||
},
|
||||
Operator::Or => match lhs.as_ref().dtype() {
|
||||
Operator::Or => match lhs.dtype() {
|
||||
DataType::Boolean => {
|
||||
let lhs_cast = lhs.as_ref().bool();
|
||||
let rhs_cast = rhs.as_ref().bool();
|
||||
let lhs_cast = lhs.bool();
|
||||
let rhs_cast = rhs.bool();
|
||||
|
||||
match (lhs_cast, rhs_cast) {
|
||||
(Ok(l), Ok(r)) => {
|
||||
let mut res = l.bitor(r).into_series();
|
||||
let name =
|
||||
format!("or_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
|
||||
let name = format!("or_{}_{}", lhs.name(), rhs.name());
|
||||
res.rename(name.as_ref());
|
||||
Ok(NuSeries::series_to_untagged(res))
|
||||
Ok(NuDataFrame::series_to_untagged(res, &operation_span))
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
"unable to cast to boolean",
|
||||
&left.tag.span,
|
||||
"unable to cast to boolean",
|
||||
&right.tag.span,
|
||||
),
|
||||
)),
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Incorrect datatype",
|
||||
"And operation can only be done with boolean values",
|
||||
&left.tag.span,
|
||||
operation_span,
|
||||
))),
|
||||
},
|
||||
_ => Ok(UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Incorrect datatype",
|
||||
"unable to use this datatype for this operation",
|
||||
&left.tag.span,
|
||||
operation_span,
|
||||
))),
|
||||
}
|
||||
} else {
|
||||
Err((left.type_name(), right.type_name()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_series_single_value(
|
||||
@ -193,26 +261,31 @@ pub fn compute_series_single_value(
|
||||
left: &Value,
|
||||
right: &Value,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
if let (UntaggedValue::DataFrame(PolarsData::Series(lhs)), UntaggedValue::Primitive(_)) =
|
||||
if let (UntaggedValue::DataFrame(lhs), UntaggedValue::Primitive(_)) =
|
||||
(&left.value, &right.value)
|
||||
{
|
||||
let lhs = match lhs.as_series(&left.tag.span) {
|
||||
Ok(series) => series,
|
||||
Err(e) => return Ok(UntaggedValue::Error(e)),
|
||||
};
|
||||
|
||||
match operator {
|
||||
Operator::Plus => match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Int64Type>>::add,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
<ChunkedArray<Int64Type>>::add,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Float64Type>>::add,
|
||||
&left.tag.span,
|
||||
@ -229,20 +302,20 @@ pub fn compute_series_single_value(
|
||||
},
|
||||
Operator::Minus => match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Int64Type>>::sub,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
<ChunkedArray<Int64Type>>::sub,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Float64Type>>::sub,
|
||||
&left.tag.span,
|
||||
@ -259,20 +332,20 @@ pub fn compute_series_single_value(
|
||||
},
|
||||
Operator::Multiply => match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Int64Type>>::mul,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
<ChunkedArray<Int64Type>>::mul,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compute_series_decimal(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Float64Type>>::mul,
|
||||
&left.tag.span,
|
||||
@ -297,7 +370,7 @@ pub fn compute_series_single_value(
|
||||
)))
|
||||
} else {
|
||||
Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Int64Type>>::div,
|
||||
&left.tag.span,
|
||||
@ -313,7 +386,7 @@ pub fn compute_series_single_value(
|
||||
)))
|
||||
} else {
|
||||
Ok(compute_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
<ChunkedArray<Int64Type>>::div,
|
||||
@ -330,7 +403,7 @@ pub fn compute_series_single_value(
|
||||
)))
|
||||
} else {
|
||||
Ok(compute_series_decimal(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
<ChunkedArray<Float64Type>>::div,
|
||||
&left.tag.span,
|
||||
@ -350,20 +423,20 @@ pub fn compute_series_single_value(
|
||||
Operator::Equal => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::eq, &left.tag.span),
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::eq, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
@ -376,26 +449,24 @@ pub fn compute_series_single_value(
|
||||
)),
|
||||
}
|
||||
}
|
||||
Operator::NotEqual => match &right.value {
|
||||
Operator::NotEqual => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::neq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::neq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
|
||||
lhs.as_ref(),
|
||||
val,
|
||||
ChunkedArray::neq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::neq, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
"Operation unavailable",
|
||||
@ -405,24 +476,25 @@ pub fn compute_series_single_value(
|
||||
&right.tag.span,
|
||||
),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
Operator::LessThan => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::lt,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::lt,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::lt, &left.tag.span),
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::lt, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
@ -435,26 +507,24 @@ pub fn compute_series_single_value(
|
||||
)),
|
||||
}
|
||||
}
|
||||
Operator::LessThanOrEqual => match &right.value {
|
||||
Operator::LessThanOrEqual => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::lt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::lt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
|
||||
lhs.as_ref(),
|
||||
val,
|
||||
ChunkedArray::lt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::lt_eq, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
"Operation unavailable",
|
||||
@ -464,24 +534,25 @@ pub fn compute_series_single_value(
|
||||
&right.tag.span,
|
||||
),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
Operator::GreaterThan => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::gt,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::gt,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(lhs.as_ref(), val, ChunkedArray::gt, &left.tag.span),
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::gt, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
@ -494,26 +565,24 @@ pub fn compute_series_single_value(
|
||||
)),
|
||||
}
|
||||
}
|
||||
Operator::GreaterThanOrEqual => match &right.value {
|
||||
Operator::GreaterThanOrEqual => {
|
||||
match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
val,
|
||||
ChunkedArray::gt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::BigInt(val)) => Ok(compare_series_i64(
|
||||
lhs.as_ref(),
|
||||
&lhs,
|
||||
&val.to_i64()
|
||||
.expect("Internal error: protocol did not use compatible decimal"),
|
||||
ChunkedArray::gt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(compare_series_decimal(
|
||||
lhs.as_ref(),
|
||||
val,
|
||||
ChunkedArray::gt_eq,
|
||||
&left.tag.span,
|
||||
)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(val)) => Ok(
|
||||
compare_series_decimal(&lhs, val, ChunkedArray::gt_eq, &left.tag.span),
|
||||
),
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
"Operation unavailable",
|
||||
@ -523,10 +592,11 @@ pub fn compute_series_single_value(
|
||||
&right.tag.span,
|
||||
),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
Operator::Contains => match &right.value {
|
||||
UntaggedValue::Primitive(Primitive::String(val)) => {
|
||||
Ok(contains_series_pat(lhs.as_ref(), val, &left.tag.span))
|
||||
Ok(contains_series_pat(&lhs, val, &left.tag.span))
|
||||
}
|
||||
_ => Ok(UntaggedValue::Error(
|
||||
ShellError::labeled_error_with_secondary(
|
||||
@ -597,7 +667,7 @@ where
|
||||
Ok(casted) => {
|
||||
let res = f(casted.clone(), val);
|
||||
let res = res.into_series();
|
||||
NuSeries::series_to_untagged(res)
|
||||
NuDataFrame::series_to_untagged(res, span)
|
||||
}
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
@ -667,7 +737,7 @@ where
|
||||
Ok(casted) => {
|
||||
let res = f(casted.clone(), val);
|
||||
let res = res.into_series();
|
||||
NuSeries::series_to_untagged(res)
|
||||
NuDataFrame::series_to_untagged(res, span)
|
||||
}
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
@ -725,7 +795,7 @@ where
|
||||
Ok(casted) => {
|
||||
let res = f(casted, val);
|
||||
let res = res.into_series();
|
||||
NuSeries::series_to_untagged(res)
|
||||
NuDataFrame::series_to_untagged(res, span)
|
||||
}
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
@ -795,7 +865,7 @@ where
|
||||
Ok(casted) => {
|
||||
let res = f(casted, val);
|
||||
let res = res.into_series();
|
||||
NuSeries::series_to_untagged(res)
|
||||
NuDataFrame::series_to_untagged(res, span)
|
||||
}
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Casting error",
|
||||
@ -814,7 +884,7 @@ fn contains_series_pat(series: &Series, pat: &str, span: &Span) -> UntaggedValue
|
||||
match res {
|
||||
Ok(res) => {
|
||||
let res = res.into_series();
|
||||
NuSeries::series_to_untagged(res)
|
||||
NuDataFrame::series_to_untagged(res, span)
|
||||
}
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"Search error",
|
||||
|
@ -5,9 +5,7 @@ use nu_protocol::{Primitive, ShellTypeName, UntaggedValue, Value};
|
||||
use std::ops::Not;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use nu_data::dataframe::{compute_between_series, compute_series_single_value};
|
||||
#[cfg(feature = "dataframe")]
|
||||
use nu_protocol::dataframe::PolarsData;
|
||||
use nu_data::dataframe::{compute_between_dataframes, compute_series_single_value};
|
||||
|
||||
pub fn apply_operator(
|
||||
op: Operator,
|
||||
@ -15,13 +13,10 @@ pub fn apply_operator(
|
||||
right: &Value,
|
||||
) -> Result<UntaggedValue, (&'static str, &'static str)> {
|
||||
#[cfg(feature = "dataframe")]
|
||||
if let (
|
||||
UntaggedValue::DataFrame(PolarsData::Series(_)),
|
||||
UntaggedValue::DataFrame(PolarsData::Series(_)),
|
||||
) = (&left.value, &right.value)
|
||||
if let (UntaggedValue::DataFrame(_), UntaggedValue::DataFrame(_)) = (&left.value, &right.value)
|
||||
{
|
||||
return compute_between_series(op, left, right);
|
||||
} else if let (UntaggedValue::DataFrame(PolarsData::Series(_)), UntaggedValue::Primitive(_)) =
|
||||
return compute_between_dataframes(op, left, right);
|
||||
} else if let (UntaggedValue::DataFrame(_), UntaggedValue::Primitive(_)) =
|
||||
(&left.value, &right.value)
|
||||
{
|
||||
return compute_series_single_value(op, left, right);
|
||||
|
@ -1,15 +1,11 @@
|
||||
pub mod nu_dataframe;
|
||||
pub mod nu_groupby;
|
||||
pub mod nu_series;
|
||||
|
||||
pub use nu_dataframe::NuDataFrame;
|
||||
pub use nu_dataframe::{Column, NuDataFrame};
|
||||
pub use nu_groupby::NuGroupBy;
|
||||
pub use nu_series::NuSeries;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
pub enum PolarsData {
|
||||
EagerDataFrame(NuDataFrame),
|
||||
pub enum FrameStruct {
|
||||
GroupBy(NuGroupBy),
|
||||
Series(NuSeries),
|
||||
}
|
||||
|
@ -1,54 +1,147 @@
|
||||
use indexmap::{map::Entry, IndexMap};
|
||||
use std::cmp::Ordering;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
use bigdecimal::FromPrimitive;
|
||||
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
||||
use nu_errors::ShellError;
|
||||
use nu_source::{Span, Tag};
|
||||
use num_bigint::BigInt;
|
||||
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
|
||||
use polars::prelude::{AnyValue, DataFrame, DataType, NamedFrom, Series, TimeUnit};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Dictionary, Primitive, UntaggedValue, Value};
|
||||
|
||||
use super::PolarsData;
|
||||
|
||||
const SECS_PER_DAY: i64 = 86_400;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum InputValue {
|
||||
Integer,
|
||||
Decimal,
|
||||
String,
|
||||
pub struct Column {
|
||||
name: String,
|
||||
values: Vec<Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ColumnValues {
|
||||
pub value_type: InputValue,
|
||||
pub values: Vec<Value>,
|
||||
}
|
||||
impl Column {
|
||||
pub fn new(name: String, values: Vec<Value>) -> Self {
|
||||
Self { name, values }
|
||||
}
|
||||
|
||||
impl Default for ColumnValues {
|
||||
fn default() -> Self {
|
||||
pub fn new_empty(name: String) -> Self {
|
||||
Self {
|
||||
value_type: InputValue::Integer,
|
||||
name,
|
||||
values: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, value: Value) {
|
||||
self.values.push(value)
|
||||
}
|
||||
}
|
||||
|
||||
type ColumnMap = HashMap<String, ColumnValues>;
|
||||
#[derive(Debug)]
|
||||
enum InputType {
|
||||
Integer,
|
||||
Decimal,
|
||||
String,
|
||||
Boolean,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TypedColumn {
|
||||
pub column: Column,
|
||||
pub column_type: Option<InputType>,
|
||||
}
|
||||
|
||||
impl TypedColumn {
|
||||
fn new_empty(name: String) -> Self {
|
||||
Self {
|
||||
column: Column::new_empty(name),
|
||||
column_type: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for TypedColumn {
|
||||
type Target = Column;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.column
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for TypedColumn {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.column
|
||||
}
|
||||
}
|
||||
|
||||
type ColumnMap = IndexMap<String, TypedColumn>;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NuDataFrame {
|
||||
dataframe: DataFrame,
|
||||
}
|
||||
|
||||
// TODO. Better definition of equality and comparison for a dataframe.
|
||||
// Probably it make sense to have a name field and use it for comparisons
|
||||
// Dataframes are considered equal if they have the same shape, column name
|
||||
// and values
|
||||
impl PartialEq for NuDataFrame {
|
||||
fn eq(&self, _: &Self) -> bool {
|
||||
false
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
if self.as_ref().width() == 0 {
|
||||
// checking for empty dataframe
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.as_ref().get_column_names() != other.as_ref().get_column_names() {
|
||||
// checking both dataframes share the same names
|
||||
return false;
|
||||
}
|
||||
|
||||
if self.as_ref().height() != other.as_ref().height() {
|
||||
// checking both dataframes have the same row size
|
||||
return false;
|
||||
}
|
||||
|
||||
// sorting dataframe by the first column
|
||||
let column_names = self.as_ref().get_column_names();
|
||||
let first_col = column_names
|
||||
.get(0)
|
||||
.expect("already checked that dataframe is different than 0");
|
||||
|
||||
// if unable to sort, then unable to compare
|
||||
let lhs = match self.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return false,
|
||||
};
|
||||
|
||||
let rhs = match other.as_ref().sort(*first_col, false) {
|
||||
Ok(df) => df,
|
||||
Err(_) => return false,
|
||||
};
|
||||
|
||||
for name in self.as_ref().get_column_names() {
|
||||
let self_series = lhs.column(name).expect("name from dataframe names");
|
||||
|
||||
let other_series = rhs
|
||||
.column(name)
|
||||
.expect("already checked that name in other");
|
||||
|
||||
let self_series = match self_series.dtype() {
|
||||
// Casting needed to compare other numeric types with nushell numeric type.
|
||||
// In nushell we only have i64 integer numeric types and any array created
|
||||
// with nushell untagged primitives will be of type i64
|
||||
DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) {
|
||||
Ok(series) => series,
|
||||
Err(_) => return false,
|
||||
},
|
||||
_ => self_series.clone(),
|
||||
};
|
||||
|
||||
if !self_series.series_equal(&other_series) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,14 +180,14 @@ impl NuDataFrame {
|
||||
NuDataFrame { dataframe }
|
||||
}
|
||||
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<(Self, Tag), ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
|
||||
UntaggedValue::DataFrame(df) => Some((df, value.tag)),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
@ -113,41 +206,127 @@ impl NuDataFrame {
|
||||
// Dictionary to store the columnar data extracted from
|
||||
// the input. During the iteration we check if the values
|
||||
// have different type
|
||||
let mut column_values: ColumnMap = HashMap::new();
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for value in iter {
|
||||
match value.value {
|
||||
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
|
||||
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
|
||||
UntaggedValue::Primitive(Primitive::Int(_))
|
||||
| UntaggedValue::Primitive(Primitive::Decimal(_))
|
||||
| UntaggedValue::Primitive(Primitive::String(_))
|
||||
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
|
||||
let key = format!("{}", 0);
|
||||
insert_value(value, key, &mut column_values)?
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Format not supported",
|
||||
"Value not supported for conversion",
|
||||
&value.tag,
|
||||
"Perhaps you want to use a List of Tables or a Dictionary",
|
||||
"Perhaps you want to use a List, a List of Tables or a Dictionary",
|
||||
&value.tag,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
from_parsed_columns(column_values, tag)
|
||||
from_parsed_columns(column_values, &tag.span)
|
||||
}
|
||||
|
||||
pub fn try_from_series(columns: Vec<Series>, span: &Span) -> Result<Self, ShellError> {
|
||||
let dataframe = DataFrame::new(columns).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
"DataFrame Creation",
|
||||
format!("Unable to create DataFrame: {}", e),
|
||||
span,
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Self { dataframe })
|
||||
}
|
||||
|
||||
pub fn try_from_columns(columns: Vec<Column>, span: &Span) -> Result<Self, ShellError> {
|
||||
let mut column_values: ColumnMap = IndexMap::new();
|
||||
|
||||
for column in columns {
|
||||
for value in column.values {
|
||||
insert_value(value, column.name.clone(), &mut column_values)?;
|
||||
}
|
||||
}
|
||||
|
||||
from_parsed_columns(column_values, span)
|
||||
}
|
||||
|
||||
pub fn into_value(self, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
|
||||
value: Self::into_untagged(self),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_untagged(self) -> UntaggedValue {
|
||||
UntaggedValue::DataFrame(self)
|
||||
}
|
||||
|
||||
pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
|
||||
value: Self::dataframe_to_untagged(df),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dataframe_to_untagged(df: DataFrame) -> UntaggedValue {
|
||||
UntaggedValue::DataFrame(Self::new(df))
|
||||
}
|
||||
|
||||
pub fn series_to_untagged(series: Series, span: &Span) -> UntaggedValue {
|
||||
match DataFrame::new(vec![series]) {
|
||||
Ok(dataframe) => UntaggedValue::DataFrame(Self { dataframe }),
|
||||
Err(e) => UntaggedValue::Error(ShellError::labeled_error(
|
||||
"DataFrame Creation",
|
||||
format!("Unable to create DataFrame: {}", e),
|
||||
span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn column(&self, column: &str, tag: &Tag) -> Result<Self, ShellError> {
|
||||
let s = self.as_ref().column(column).map_err(|e| {
|
||||
ShellError::labeled_error("Column not found", format!("{}", e), tag.span)
|
||||
})?;
|
||||
|
||||
let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| {
|
||||
ShellError::labeled_error("DataFrame error", format!("{}", e), tag.span)
|
||||
})?;
|
||||
|
||||
Ok(Self { dataframe })
|
||||
}
|
||||
|
||||
pub fn is_series(&self) -> bool {
|
||||
self.as_ref().width() == 1
|
||||
}
|
||||
|
||||
pub fn as_series(&self, span: &Span) -> Result<Series, ShellError> {
|
||||
if !self.is_series() {
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Not a Series",
|
||||
"DataFrame cannot be used as Series",
|
||||
span,
|
||||
"Note that a Series is a DataFrame with one column",
|
||||
span,
|
||||
));
|
||||
}
|
||||
|
||||
let series = self
|
||||
.as_ref()
|
||||
.get_columns()
|
||||
.get(0)
|
||||
.expect("We have already checked that the width is 1");
|
||||
|
||||
Ok(series.clone())
|
||||
}
|
||||
|
||||
// Print is made out a head and if the dataframe is too large, then a tail
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
let df = &self.as_ref();
|
||||
@ -188,24 +367,17 @@ impl NuDataFrame {
|
||||
|
||||
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
|
||||
let df = self.as_ref();
|
||||
let column_names = df.get_column_names();
|
||||
let upper_row = to_row.min(df.height());
|
||||
|
||||
let mut values: Vec<Value> = Vec::new();
|
||||
|
||||
let upper_row = to_row.min(df.height());
|
||||
for i in from_row..upper_row {
|
||||
let row = df.get_row(i);
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
for (val, name) in row.0.iter().zip(column_names.iter()) {
|
||||
let untagged_val = anyvalue_to_untagged(val)?;
|
||||
|
||||
for col in df.get_columns() {
|
||||
let dict_val = Value {
|
||||
value: untagged_val,
|
||||
value: anyvalue_to_untagged(&col.get(i))?,
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
dictionary_row.insert(name.to_string(), dict_val);
|
||||
dictionary_row.insert(col.name().into(), dict_val);
|
||||
}
|
||||
|
||||
let value = Value {
|
||||
@ -213,7 +385,7 @@ impl NuDataFrame {
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
values.push(value);
|
||||
values.push(value)
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
@ -336,8 +508,8 @@ fn insert_value(
|
||||
key: String,
|
||||
column_values: &mut ColumnMap,
|
||||
) -> Result<(), ShellError> {
|
||||
let col_val = match column_values.entry(key) {
|
||||
Entry::Vacant(entry) => entry.insert(ColumnValues::default()),
|
||||
let col_val = match column_values.entry(key.clone()) {
|
||||
Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key)),
|
||||
Entry::Occupied(entry) => entry.into_mut(),
|
||||
};
|
||||
|
||||
@ -346,13 +518,16 @@ fn insert_value(
|
||||
if col_val.values.is_empty() {
|
||||
match &value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(_)) => {
|
||||
col_val.value_type = InputValue::Integer;
|
||||
col_val.column_type = Some(InputType::Integer);
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
|
||||
col_val.value_type = InputValue::Decimal;
|
||||
col_val.column_type = Some(InputType::Decimal);
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||
col_val.value_type = InputValue::String;
|
||||
col_val.column_type = Some(InputType::String);
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
|
||||
col_val.column_type = Some(InputType::Boolean);
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error(
|
||||
@ -378,6 +553,10 @@ fn insert_value(
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)),
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)),
|
||||
) => col_val.values.push(value),
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
@ -397,28 +576,36 @@ fn insert_value(
|
||||
// The ColumnMap has the parsed data from the StreamInput
|
||||
// This data can be used to create a Series object that can initialize
|
||||
// the dataframe based on the type of data that is found
|
||||
fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFrame, ShellError> {
|
||||
fn from_parsed_columns(column_values: ColumnMap, span: &Span) -> Result<NuDataFrame, ShellError> {
|
||||
let mut df_series: Vec<Series> = Vec::new();
|
||||
for (name, column) in column_values {
|
||||
match column.value_type {
|
||||
InputValue::Decimal => {
|
||||
if let Some(column_type) = &column.column_type {
|
||||
match column_type {
|
||||
InputType::Decimal => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_f64()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputValue::Integer => {
|
||||
InputType::Integer => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_i64()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputValue::String => {
|
||||
InputType::String => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_string()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
InputType::Boolean => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
column.values.iter().map(|v| v.as_bool()).collect();
|
||||
let series = Series::new(&name, series_values?);
|
||||
df_series.push(series)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -430,7 +617,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
|
||||
return Err(ShellError::labeled_error(
|
||||
"Error while creating dataframe",
|
||||
format!("{}", e),
|
||||
tag,
|
||||
span,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use nu_source::{Span, Tag};
|
||||
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::{NuDataFrame, PolarsData};
|
||||
use super::{FrameStruct, NuDataFrame};
|
||||
use nu_errors::ShellError;
|
||||
|
||||
use crate::{TaggedDictBuilder, UntaggedValue, Value};
|
||||
@ -30,7 +30,7 @@ impl NuGroupBy {
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
|
||||
UntaggedValue::FrameStruct(FrameStruct::GroupBy(group)) => Some(group),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
|
@ -1,345 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::vec;
|
||||
|
||||
use nu_errors::ShellError;
|
||||
use nu_source::{Span, Tag};
|
||||
use polars::prelude::{DataType, NamedFrom, Series};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Dictionary, Primitive, UntaggedValue, Value};
|
||||
|
||||
use super::PolarsData;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NuSeries {
|
||||
series: Series,
|
||||
dtype: String,
|
||||
}
|
||||
|
||||
// TODO. Better definition of equality and comparison for a dataframe.
|
||||
// Probably it make sense to have a name field and use it for comparisons
|
||||
impl PartialEq for NuSeries {
|
||||
fn eq(&self, _: &Self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for NuSeries {}
|
||||
|
||||
impl PartialOrd for NuSeries {
|
||||
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
|
||||
Some(Ordering::Equal)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for NuSeries {
|
||||
fn cmp(&self, _: &Self) -> Ordering {
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for NuSeries {
|
||||
fn hash<H: Hasher>(&self, _: &mut H) {}
|
||||
}
|
||||
|
||||
impl NuSeries {
|
||||
pub fn new(series: Series) -> Self {
|
||||
let dtype = series.dtype().to_string();
|
||||
|
||||
NuSeries { series, dtype }
|
||||
}
|
||||
|
||||
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
ShellError::labeled_error(
|
||||
"No series in stream",
|
||||
"no series found in input stream",
|
||||
span,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
|
||||
where
|
||||
T: Iterator<Item = Value>,
|
||||
{
|
||||
let mut vec_values: Vec<Value> = Vec::new();
|
||||
|
||||
for value in iter {
|
||||
match value.value {
|
||||
UntaggedValue::Primitive(Primitive::Int(_))
|
||||
| UntaggedValue::Primitive(Primitive::Decimal(_))
|
||||
| UntaggedValue::Primitive(Primitive::String(_))
|
||||
| UntaggedValue::Primitive(Primitive::Boolean(_)) => {
|
||||
insert_value(value, &mut vec_values)?
|
||||
}
|
||||
_ => {
|
||||
return Err(ShellError::labeled_error_with_secondary(
|
||||
"Format not supported",
|
||||
"Value not supported for conversion",
|
||||
&value.tag.span,
|
||||
"Perhaps you want to use a list of primitive values (int, decimal, string, or bool)",
|
||||
&value.tag.span,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
from_parsed_vector(vec_values, name)
|
||||
}
|
||||
|
||||
pub fn into_value(self, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(self)),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_value(series: Series, tag: Tag) -> Value {
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn series_to_untagged(series: Series) -> UntaggedValue {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
|
||||
}
|
||||
|
||||
pub fn dtype(&self) -> &str {
|
||||
&self.dtype
|
||||
}
|
||||
|
||||
pub fn series(self) -> Series {
|
||||
self.series
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Series> for NuSeries {
|
||||
fn as_ref(&self) -> &Series {
|
||||
&self.series
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<Series> for NuSeries {
|
||||
fn as_mut(&mut self) -> &mut Series {
|
||||
&mut self.series
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! series_to_chunked {
|
||||
($converter: expr, $self: expr) => {{
|
||||
let chunked_array = $converter.map_err(|e| {
|
||||
ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
|
||||
})?;
|
||||
|
||||
let size = 20;
|
||||
|
||||
let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
|
||||
let remaining = $self.as_ref().len() - (size / 2);
|
||||
let skip = $self.as_ref().len() - remaining;
|
||||
(size / 2, skip, remaining.min(size / 2))
|
||||
} else {
|
||||
(size, 0, 0)
|
||||
};
|
||||
|
||||
let head = chunked_array.into_iter().take(head_size).map(|value| {
|
||||
let value = match value {
|
||||
Some(v) => Value {
|
||||
value: UntaggedValue::Primitive(v.into()),
|
||||
tag: Tag::unknown(),
|
||||
},
|
||||
None => Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
},
|
||||
};
|
||||
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
});
|
||||
|
||||
let res = if $self.as_ref().len() < size {
|
||||
head.collect::<Vec<Value>>()
|
||||
} else {
|
||||
let middle = std::iter::once({
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive("...".into()),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
});
|
||||
|
||||
let tail =
|
||||
chunked_array
|
||||
.into_iter()
|
||||
.skip(skip)
|
||||
.take(tail_size)
|
||||
.map(|value| match value {
|
||||
Some(v) => {
|
||||
let mut dictionary_row = Dictionary::default();
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::Primitive(v.into()),
|
||||
tag: Tag::unknown(),
|
||||
};
|
||||
|
||||
let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
|
||||
dictionary_row.insert(header, value);
|
||||
|
||||
Value {
|
||||
value: UntaggedValue::Row(dictionary_row),
|
||||
tag: Tag::unknown(),
|
||||
}
|
||||
}
|
||||
None => Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
},
|
||||
});
|
||||
|
||||
head.chain(middle).chain(tail).collect::<Vec<Value>>()
|
||||
};
|
||||
|
||||
Ok(res)
|
||||
}};
|
||||
}
|
||||
|
||||
impl NuSeries {
|
||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||
match self.as_ref().dtype() {
|
||||
DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
|
||||
DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
|
||||
DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
|
||||
DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
|
||||
DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
|
||||
DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
|
||||
DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
|
||||
DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
|
||||
DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
|
||||
DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
|
||||
DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
|
||||
DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
|
||||
DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
|
||||
DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
|
||||
DataType::Null => Ok(vec![Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
tag: Tag::unknown(),
|
||||
}]),
|
||||
//DataType::List(_) => None,
|
||||
//DataType::Time64(TimeUnit) => None,
|
||||
//DataType::Duration(TimeUnit) => None,
|
||||
// DataType::Categorical => None,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
|
||||
// Checking that the type for the value is the same
|
||||
// for the previous value in the column
|
||||
if vec_values.is_empty() {
|
||||
vec_values.push(value);
|
||||
Ok(())
|
||||
} else {
|
||||
let prev_value = &vec_values[vec_values.len() - 1];
|
||||
|
||||
match (&prev_value.value, &value.value) {
|
||||
(
|
||||
UntaggedValue::Primitive(Primitive::Int(_)),
|
||||
UntaggedValue::Primitive(Primitive::Int(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)),
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
UntaggedValue::Primitive(Primitive::String(_)),
|
||||
)
|
||||
| (
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)),
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)),
|
||||
) => {
|
||||
vec_values.push(value);
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Different values in column",
|
||||
"Value with different type",
|
||||
&value.tag,
|
||||
"Perhaps you want to change it to this value type",
|
||||
&prev_value.tag,
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_parsed_vector(
|
||||
vec_values: Vec<Value>,
|
||||
name: Option<String>,
|
||||
) -> Result<NuSeries, ShellError> {
|
||||
let series = match &vec_values[0].value {
|
||||
UntaggedValue::Primitive(Primitive::Int(_)) => {
|
||||
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "int",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
|
||||
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "decimal",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::String(_)) => {
|
||||
let series_values: Result<Vec<_>, _> =
|
||||
vec_values.iter().map(|v| v.as_string()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "string",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
UntaggedValue::Primitive(Primitive::Boolean(_)) => {
|
||||
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_bool()).collect();
|
||||
let series_name = match &name {
|
||||
Some(n) => n.as_ref(),
|
||||
None => "string",
|
||||
};
|
||||
Series::new(series_name, series_values?)
|
||||
}
|
||||
_ => unreachable!("The untagged type is checked while creating vec_values"),
|
||||
};
|
||||
|
||||
Ok(NuSeries::new(series))
|
||||
}
|
@ -75,6 +75,10 @@ pub enum Type {
|
||||
/// Dataframe
|
||||
#[cfg(feature = "dataframe")]
|
||||
DataFrame,
|
||||
|
||||
/// Dataframe
|
||||
#[cfg(feature = "dataframe")]
|
||||
FrameStruct,
|
||||
}
|
||||
|
||||
/// A shape representation of the type of a row
|
||||
@ -192,6 +196,8 @@ impl Type {
|
||||
UntaggedValue::Block(_) => Type::Block,
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => Type::DataFrame,
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::FrameStruct(_) => Type::DataFrame,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -298,7 +304,7 @@ impl PrettyDebug for Type {
|
||||
}
|
||||
Type::Block => ty("block"),
|
||||
#[cfg(feature = "dataframe")]
|
||||
Type::DataFrame => ty("data_type_formatter"),
|
||||
Type::DataFrame | Type::FrameStruct => ty("data_type_formatter"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ use std::path::PathBuf;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use crate::dataframe::PolarsData;
|
||||
use crate::dataframe::{FrameStruct, NuDataFrame};
|
||||
|
||||
/// The core structured values that flow through a pipeline
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||
@ -51,10 +51,15 @@ pub enum UntaggedValue {
|
||||
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
|
||||
Block(Box<hir::CapturedBlock>),
|
||||
|
||||
/// Data option that holds the polars structs required to to data
|
||||
/// manipulation and operations using polars dataframes
|
||||
/// Main nushell dataframe
|
||||
#[cfg(feature = "dataframe")]
|
||||
DataFrame(PolarsData),
|
||||
DataFrame(NuDataFrame),
|
||||
|
||||
/// Data option that holds intermediate struct required to do data
|
||||
/// manipulation and operations for dataframes such as groupby, lazy frames
|
||||
/// and lazy groupby
|
||||
#[cfg(feature = "dataframe")]
|
||||
FrameStruct(FrameStruct),
|
||||
}
|
||||
|
||||
impl UntaggedValue {
|
||||
@ -685,11 +690,9 @@ impl ShellTypeName for UntaggedValue {
|
||||
UntaggedValue::Error(_) => "error",
|
||||
UntaggedValue::Block(_) => "block",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
|
||||
UntaggedValue::DataFrame(_) => "dataframe",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
|
||||
UntaggedValue::FrameStruct(FrameStruct::GroupBy(_)) => "groupby",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,9 @@ impl PrettyDebug for Value {
|
||||
UntaggedValue::Error(_) => DbgDocBldr::error("error"),
|
||||
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"),
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
DbgDocBldr::opaque("dataframe")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use nu_source::{
|
||||
use num_traits::cast::ToPrimitive;
|
||||
|
||||
#[cfg(feature = "dataframe")]
|
||||
use nu_protocol::dataframe::{NuSeries, PolarsData};
|
||||
use nu_protocol::dataframe::NuDataFrame;
|
||||
|
||||
pub trait ValueExt {
|
||||
fn into_parts(self) -> (UntaggedValue, Tag);
|
||||
@ -203,14 +203,14 @@ pub fn get_data_by_member(value: &Value, name: &PathMember) -> Result<Value, She
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => match &name.unspanned {
|
||||
UntaggedValue::DataFrame(df) => match &name.unspanned {
|
||||
UnspannedPathMember::String(string) => {
|
||||
let column = df.as_ref().column(string.as_ref()).map_err(|e| {
|
||||
let column = df.as_ref().select(string.as_str()).map_err(|e| {
|
||||
ShellError::labeled_error("Dataframe error", format!("{}", e), &name.span)
|
||||
})?;
|
||||
|
||||
Ok(NuSeries::series_to_value(
|
||||
column.clone(),
|
||||
Ok(NuDataFrame::dataframe_to_value(
|
||||
column,
|
||||
Tag::new(value.anchor(), name.span),
|
||||
))
|
||||
}
|
||||
@ -746,7 +746,7 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
|
||||
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => {
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
||||
}
|
||||
}
|
||||
|
@ -406,7 +406,7 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
|
||||
|
||||
UntaggedValue::Table(l) => serde_json::Value::Array(json_list(l)?),
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => {
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => {
|
||||
return Err(ShellError::labeled_error(
|
||||
"Cannot convert data struct",
|
||||
"Cannot convert data struct",
|
||||
|
@ -64,7 +64,7 @@ pub fn value_to_bson_value(v: &Value) -> Result<Bson, ShellError> {
|
||||
),
|
||||
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null,
|
||||
#[cfg(feature = "dataframe")]
|
||||
UntaggedValue::DataFrame(_) => Bson::Null,
|
||||
UntaggedValue::DataFrame(_) | UntaggedValue::FrameStruct(_) => Bson::Null,
|
||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||
Bson::Binary(BinarySubtype::Generic, b.clone())
|
||||
|
Loading…
Reference in New Issue
Block a user