Series Operation (#3563)

* Sample command

* Join command with checks

* More dataframes commands

* Groupby and aggregate commands

* Missing feature dataframe flag

* Renamed file

* New commands for dataframes

* error parser and df reference

* filter command for dataframes

* removed name from nu_dataframe

* commands to save to parquet and csv

* polars new version

* new dataframe commands

* series type and print

* Series basic arithmetics

* Add new column to dataframe

* Command names changed to nushell standard
This commit is contained in:
Fernando Herrera 2021-06-07 18:27:46 +01:00 committed by GitHub
parent 16faafb7a8
commit aa1cd7eba6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 1290 additions and 787 deletions

70
Cargo.lock generated
View File

@ -206,11 +206,9 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
[[package]]
name = "arrow"
version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93811be1c0f60f4b29d80b34dad4e59fdc397a9e580f849df9e2635701498663"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=f26ffb3091ae355d246edc4a6fcc2c8e5b9bc570#f26ffb3091ae355d246edc4a6fcc2c8e5b9bc570"
dependencies = [
"cfg_aliases",
"chrono",
"csv",
"flatbuffers",
@ -399,9 +397,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "backtrace"
version = "0.3.59"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4717cfcbfaa661a0fd48f8453951837ae7e8f81e481fbb136e3202d72805a744"
checksum = "b7815ea54e4d821e791162e078acbebfd6d8c8939cd559c9335dceb1c8ca7282"
dependencies = [
"addr2line",
"cc",
@ -753,12 +751,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "chrono"
version = "0.4.19"
@ -1926,9 +1918,9 @@ checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1"
[[package]]
name = "futures-lite"
version = "1.11.3"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4481d0cd0de1d204a4fa55e7d45f07b1d958abcb06714b3446438e2eff695fb"
checksum = "7694489acd39452c77daa48516b894c153f192c3578d5a839b62c58099fcbf48"
dependencies = [
"fastrand",
"futures-core",
@ -2269,9 +2261,9 @@ dependencies = [
[[package]]
name = "heck"
version = "0.3.2"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
@ -4209,9 +4201,12 @@ dependencies = [
[[package]]
name = "object"
version = "0.24.0"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a5b3dd1c072ee7963717671d1ca129f1048fda25edea6b752bfc71ac8854170"
checksum = "9023c1c0973b327f073c7f2fceb9bcc049862f93a7d14c6feb46c8a56460a0d5"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
@ -4358,9 +4353,8 @@ dependencies = [
[[package]]
name = "parquet"
version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9275a7f8eab04e6ab6918b4fdd50e00aeba3c288e0f91bdc5da87a2c8ff288a6"
version = "5.0.0-SNAPSHOT"
source = "git+https://github.com/apache/arrow-rs?rev=f26ffb3091ae355d246edc4a6fcc2c8e5b9bc570#f26ffb3091ae355d246edc4a6fcc2c8e5b9bc570"
dependencies = [
"arrow",
"base64 0.13.0",
@ -4598,9 +4592,8 @@ dependencies = [
[[package]]
name = "polars"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c406ce46726b7d33b05a343d9c1317c0803a419d50bb45275de3f366410e9a80"
version = "0.14.0"
source = "git+https://github.com/pola-rs/polars?rev=a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd#a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
dependencies = [
"polars-core",
"polars-io",
@ -4609,9 +4602,8 @@ dependencies = [
[[package]]
name = "polars-arrow"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53b2d5fb400345c7977e4e728a10be382476f2f9d2caf6b57cd60e97ea17d364"
version = "0.14.0"
source = "git+https://github.com/pola-rs/polars?rev=a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd#a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
dependencies = [
"arrow",
"num 0.4.0",
@ -4620,9 +4612,8 @@ dependencies = [
[[package]]
name = "polars-core"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88561e850748c507f0fc7835b35e795e770597ceecb14e0a8f7d8abf8346645d"
version = "0.14.0"
source = "git+https://github.com/pola-rs/polars?rev=a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd#a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
dependencies = [
"ahash",
"anyhow",
@ -4640,15 +4631,15 @@ dependencies = [
"rand_distr",
"rayon",
"regex 1.5.4",
"serde 1.0.126",
"thiserror",
"unsafe_unwrap",
]
[[package]]
name = "polars-io"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27388810ec5f3346838725aa0aa49343802c1344b96fe82229ae781c62c98bc7"
version = "0.14.0"
source = "git+https://github.com/pola-rs/polars?rev=a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd#a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
dependencies = [
"ahash",
"anyhow",
@ -4670,9 +4661,8 @@ dependencies = [
[[package]]
name = "polars-lazy"
version = "0.13.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7f83284970a9db7d0b6a56d6f944c3988587429c124c1d087188e9d2c7ad7c"
version = "0.14.0"
source = "git+https://github.com/pola-rs/polars?rev=a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd#a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
dependencies = [
"ahash",
"itertools",
@ -6856,9 +6846,9 @@ dependencies = [
[[package]]
name = "unicode-normalization"
version = "0.1.18"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33717dca7ac877f497014e10d73f3acf948c342bee31b5ca7892faf94ccc6b49"
checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
dependencies = [
"tinyvec",
]
@ -7327,9 +7317,9 @@ dependencies = [
[[package]]
name = "zstd"
version = "0.8.1+zstd.1.5.0"
version = "0.8.3+zstd.1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357d6bb1bd9c6f6a55a5a15c74d01260b272f724dc60cc829b86ebd2172ac5ef"
checksum = "5ea7094c7b4a58fbd738eb0d4a2fc7684a0e6949a31597e074ffe20a07cbc2bf"
dependencies = [
"zstd-safe",
]

View File

@ -100,7 +100,9 @@ which = { version = "4.1.0", optional = true }
zip = { version = "0.5.9", optional = true }
[dependencies.polars]
version = "0.13.4"
git = "https://github.com/pola-rs/polars"
rev = "a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
version = "0.14.0"
optional = true
features = ["parquet", "json", "random", "pivot"]

View File

@ -191,10 +191,12 @@ pub(crate) use all::Command as All;
pub(crate) use any::Command as Any;
#[cfg(feature = "dataframe")]
pub(crate) use dataframe::{
DataFrame, DataFrameAggregate, DataFrameConvert, DataFrameDTypes, DataFrameDrop,
DataFrameDummies, DataFrameGroupBy, DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad,
DataFrameMelt, DataFramePivot, DataFrameSample, DataFrameSelect, DataFrameShow, DataFrameSlice,
DataFrameTail, DataFrameToCsv, DataFrameToParquet, DataFrameWhere,
DataFrame, DataFrameAggregate, DataFrameColumn, DataFrameDTypes, DataFrameDrop,
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameGet, DataFrameGroupBy,
DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad, DataFrameMelt, DataFramePivot,
DataFrameSample, DataFrameSelect, DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTail,
DataFrameToCsv, DataFrameToDF, DataFrameToParquet, DataFrameToSeries, DataFrameWhere,
DataFrameWithColumn,
};
pub(crate) use enter::Enter;
pub(crate) use every::Every;

View File

@ -265,6 +265,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
let _ = result.collect::<Vec<_>>();
}
}
#[cfg(feature = "dataframe")]
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(series)),
tag,
} => {
if let Some(table) = table {
// TODO. Configure the parameter rows from file. It can be
// adjusted to see a certain amount of values in the head
let command_args =
create_default_command_args(&context, series.print()?.into(), tag);
let result = table.run(command_args)?;
let _ = result.collect::<Vec<_>>();
}
}
Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
..

View File

@ -66,7 +66,7 @@ impl Operation {
"Operation not fount",
"Operation does not exist",
&name.tag,
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, count",
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count",
&name.tag,
)),
}
@ -81,7 +81,7 @@ impl WholeStreamCommand for DataFrame {
}
fn usage(&self) -> &str {
"Performs an aggregation operation on a groupby object"
"Performs an aggregation operation on a dataframe or groupby object"
}
fn signature(&self) -> Signature {
@ -105,11 +105,19 @@ impl WholeStreamCommand for DataFrame {
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Aggregate sum by grouping by column a and summing on col b",
example: "[[a b]; [one 1] [one 2]] | pls convert | pls groupby [a] | pls aggregate sum",
result: None,
}]
vec![
Example {
description: "Aggregate sum by grouping by column a and summing on col b",
example:
"[[a b]; [one 1] [one 2]] | pls to-df | pls groupby [a] | pls aggregate sum",
result: None,
},
Example {
description: "Aggregate sum in dataframe columns",
example: "[[a b]; [4 1] [5 2]] | pls to-df | pls aggregate sum",
result: None,
},
]
}
}
@ -131,45 +139,48 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
None => (None, Span::unknown()),
};
// The operation is only done in one groupby. Only one input is
// expected from the InputStream
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing groupby input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) = value.value {
let groupby = nu_groupby.to_groupby()?;
let value = args.input.next().ok_or(ShellError::labeled_error(
"Empty stream",
"No value found in the stream",
&tag,
))?;
let groupby = match &selection {
Some(cols) => groupby.select(cols),
None => groupby,
};
let res = match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => {
let groupby = nu_groupby.to_groupby()?;
let res = perform_aggregation(groupby, op, &operation.tag, &agg_span)?;
let groupby = match &selection {
Some(cols) => groupby.select(cols),
None => groupby,
};
let final_df = Value {
tag,
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
};
perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)
}
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
let df = df.as_ref();
Ok(OutputStream::one(final_df))
} else {
Err(ShellError::labeled_error(
"No groupby in stream",
"no groupby found in input stream",
&tag,
))
match &selection {
Some(cols) => {
let df = df
.select(cols)
.map_err(|e| parse_polars_error::<&str>(&e, &agg_span, None))?;
perform_dataframe_aggregation(&df, op, &operation.tag)
}
None => perform_dataframe_aggregation(&df, op, &operation.tag),
}
}
}
_ => Err(ShellError::labeled_error(
"No groupby or dataframe",
"no groupby or found in input stream",
&value.tag.span,
)),
}?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
fn perform_aggregation(
fn perform_groupby_aggregation(
groupby: GroupBy,
operation: Operation,
operation_tag: &Tag,
@ -198,3 +209,29 @@ fn perform_aggregation(
parse_polars_error::<&str>(&e, span, None)
})
}
fn perform_dataframe_aggregation(
dataframe: &polars::prelude::DataFrame,
operation: Operation,
operation_tag: &Tag,
) -> Result<polars::prelude::DataFrame, ShellError> {
match operation {
Operation::Mean => Ok(dataframe.mean()),
Operation::Sum => Ok(dataframe.sum()),
Operation::Min => Ok(dataframe.min()),
Operation::Max => Ok(dataframe.max()),
Operation::Quantile(quantile) => dataframe
.quantile(quantile)
.map_err(|e| parse_polars_error::<&str>(&e, &operation_tag.span, None)),
Operation::Median => Ok(dataframe.median()),
Operation::Var => Ok(dataframe.var()),
Operation::Std => Ok(dataframe.std()),
_ => Err(ShellError::labeled_error_with_secondary(
"Not valid operation",
"operation not valid for dataframe",
&operation_tag.span,
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std",
&operation_tag.span,
)),
}
}

View File

@ -0,0 +1,56 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, NuSeries},
Signature, SyntaxShape,
};
use nu_source::Tagged;
use super::utils::parse_polars_error;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls column"
}
fn usage(&self) -> &str {
"Returns the selected column as Series"
}
fn signature(&self) -> Signature {
Signature::build("pls column").required("column", SyntaxShape::String, "column name")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Returns the selected column as series",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls column a",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
let column: Tagged<String> = args.req(0)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.column(column.item.as_ref())
.map_err(|e| parse_polars_error::<&str>(&e, &column.tag.span, None))?;
Ok(OutputStream::one(NuSeries::series_to_value(
res.clone(),
tag,
)))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls drop [a]",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls drop [a]",
result: None,
}]
}
@ -45,53 +42,29 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let mut args = args.evaluate_once()?;
let columns: Vec<Value> = args.req(0)?;
let (col_string, col_span) = convert_columns(&columns, &tag)?;
match args.input.next() {
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let new_df = match col_string.iter().next() {
Some(col) => df
.as_ref()
.drop(col)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None)),
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
"Empty names list",
"No column names where found",
&col_span,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
// Dataframe with the first selected column
let new_df = match col_string.iter().next() {
Some(col) => df
.as_ref()
.drop(col)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None)),
None => Err(ShellError::labeled_error(
"Empty names list",
"No column names where found",
&col_span,
)),
}?;
}?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
let res = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
new_df
.drop(col)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))
})?;
// If there are more columns in the drop selection list, these
// are added from the resulting dataframe
let res = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
new_df
.drop(col)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))
})?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -0,0 +1,66 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls drop-duplicates"
}
fn usage(&self) -> &str {
"Drops duplicate values in dataframe"
}
fn signature(&self) -> Signature {
Signature::build("pls drop-duplicates")
.optional(
"subset",
SyntaxShape::Table,
"subset of columns to drop duplicates",
)
.switch("maintain", "maintain order", Some('m'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | pls to-df | pls drop-duplicates",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
// Extracting the selection columns of the columns to perform the aggregation
let columns: Option<Vec<Value>> = args.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
(Some(agg_string), col_span)
}
None => (None, Span::unknown()),
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let res = df
.as_ref()
.drop_duplicates(args.has_flag("maintain"), subset_slice)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -0,0 +1,64 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls drop-nulls"
}
fn usage(&self) -> &str {
"Drops null values in dataframe"
}
fn signature(&self) -> Signature {
Signature::build("pls drop-nulls").optional(
"subset",
SyntaxShape::Table,
"subset of columns to drop duplicates",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop null values duplicates",
example: "[[a b]; [1 2] [3 4] [1 2]] | pls to-df | pls drop-nulls",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
// Extracting the selection columns of the columns to perform the aggregation
let columns: Option<Vec<Value>> = args.opt(0)?;
let (subset, col_span) = match columns {
Some(cols) => {
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
(Some(agg_string), col_span)
}
None => (None, Span::unknown()),
};
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
let res = df
.as_ref()
.drop_nulls(subset_slice)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -1,7 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::PolarsData, Signature, TaggedDictBuilder, UntaggedValue};
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
pub struct DataFrame;
@ -25,7 +25,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "drop column a",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls dtypes",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls dtypes",
result: None,
}]
}
@ -35,42 +35,26 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let col_names = df
.as_ref()
.get_column_names()
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>();
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let col_names = df
.as_ref()
.get_column_names()
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>();
let values = df
.as_ref()
.dtypes()
.into_iter()
.zip(col_names.into_iter())
.map(move |(dtype, name)| {
let mut data = TaggedDictBuilder::new(tag.clone());
data.insert_value("column", name.as_ref());
data.insert_value("dtype", format!("{}", dtype));
let values = df
.as_ref()
.dtypes()
.into_iter()
.zip(col_names.into_iter())
.map(move |(dtype, name)| {
let mut data = TaggedDictBuilder::new(tag.clone());
data.insert_value("column", name.as_ref());
data.insert_value("dtype", format!("{}", dtype));
data.into_value()
});
data.into_value()
});
Ok(OutputStream::from_stream(values))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::from_stream(values))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature};
use super::utils::parse_polars_error;
@ -12,7 +9,7 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls to_dummies"
"pls to-dummies"
}
fn usage(&self) -> &str {
@ -20,7 +17,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls select")
Signature::build("pls to-dummies")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -30,7 +27,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with dummy variables",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_dummies",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-dummies",
result: None,
}]
}
@ -40,37 +37,14 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = df.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
})?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().to_dummies().map_err(|e| {
parse_polars_error(
&e,
&tag.span,
Some("The only allowed column types for dummies are String or Int"),
)
})?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -0,0 +1,54 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls get"
}
fn usage(&self) -> &str {
"Creates dataframe with the selected columns"
}
fn signature(&self) -> Signature {
Signature::build("pls get").required(
"columns",
SyntaxShape::Table,
"column names to sort dataframe",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Creates dataframe with selected columns",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls get [a]",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
let columns: Vec<Value> = args.req(0)?;
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.select(&col_string)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -34,7 +34,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Grouping by column a",
example: "[[a b]; [one 1] [one 2]] | pls convert | pls groupby [a]",
example: "[[a b]; [one 1] [one 2]] | pls to-df | pls groupby [a]",
result: None,
}]
}
@ -48,43 +48,26 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let by_columns: Vec<Value> = args.req(0)?;
let (columns_string, col_span) = convert_columns(&by_columns, &tag)?;
// The operation is only done in one dataframe. Only one input is
// expected from the InputStream
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(nu_df)) = value.value {
// This is the expensive part of the groupby; to create the
// groups that will be used for grouping the data in the
// dataframe. Once it has been done these values can be stored
// in a NuGroupBy
let groupby = nu_df
.as_ref()
.groupby(&columns_string)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let groups = groupby.get_groups().to_vec();
let groupby = Value {
tag: value.tag,
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
NuDataFrame::new(nu_df.as_ref().clone()),
columns_string,
groups,
))),
};
// This is the expensive part of the groupby; to create the
// groups that will be used for grouping the data in the
// dataframe. Once it has been done these values can be stored
// in a NuGroupBy
let groupby = df
.as_ref()
.groupby(&columns_string)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(groupby))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
let groups = groupby.get_groups().to_vec();
let groupby = Value {
tag,
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
NuDataFrame::new(df.as_ref().clone()),
columns_string,
groups,
))),
};
Ok(OutputStream::one(groupby))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
@ -21,7 +18,7 @@ impl WholeStreamCommand for DataFrame {
fn signature(&self) -> Signature {
Signature::build("pls select").optional(
"n_rows",
"rows",
SyntaxShape::Number,
"Number of rows for head",
)
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with head rows",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls head",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls head",
result: None,
}]
}
@ -50,31 +47,8 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
None => 5,
};
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = df.as_ref().head(Some(rows));
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().head(Some(rows));
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -52,13 +52,13 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "inner join dataframe",
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls join $right [a] [a]",
example: "echo [[a b]; [1 2] [3 4]] | pls to-df | pls join $right [a] [a]",
result: None,
},
Example {
description: "right join dataframe",
example:
"[[a b]; [1 2] [3 4] [5 6]] | pls convert | pls join $right [b] [b] -t right",
"[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls join $right [b] [b] -t right",
result: None,
},
]
@ -95,53 +95,31 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = match r_df.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
// Checking the column types before performing the join
check_column_datatypes(
df.as_ref(),
&l_col_string,
&l_col_span,
&r_col_string,
&r_col_span,
)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
df.as_ref()
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
}
_ => Err(ShellError::labeled_error(
"Not a dataframe",
"not a dataframe type value",
&r_df.tag,
)),
}?;
let res = match r_df.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
// Checking the column types before performing the join
check_column_datatypes(
df.as_ref(),
&l_col_string,
&l_col_span,
&r_col_string,
&r_col_span,
)?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
df.as_ref()
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
}
}
_ => Err(ShellError::labeled_error(
"Not a dataframe",
"not a dataframe type value",
&r_df.tag,
)),
}?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
fn check_column_datatypes<T: AsRef<str>>(

View File

@ -4,8 +4,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::{EvaluatedCommandArgs, WholeStreamCommand};
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_source::Tagged;
@ -113,12 +112,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
span: tag.span,
};
let tagged_value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
tag: df_tag,
};
Ok(InputStream::one(tagged_value).to_output_stream())
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
df, df_tag,
)))
}
fn from_parquet(args: EvaluatedCommandArgs) -> Result<polars::prelude::DataFrame, ShellError> {

View File

@ -1,10 +1,7 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::convert_columns;
@ -20,7 +17,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls join")
Signature::build("pls melt")
.required("id_columns", SyntaxShape::Table, "Id columns for melting")
.required(
"value_columns",
@ -36,7 +33,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "melt dataframe",
example: "[[a b]; [a 2] [b 4] [a 6]] | pls convert | pls melt [a] [b]",
example: "[[a b]; [a 2] [b 4] [a 6]] | pls to-df | pls melt [a] [b]",
result: None,
}]
}
@ -52,39 +49,17 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let (id_col_string, id_col_span) = convert_columns(&id_col, &tag)?;
let (val_col_string, val_col_span) = convert_columns(&val_col, &tag)?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.melt(&id_col_string, &val_col_string)
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
let res = df
.as_ref()
.melt(&id_col_string, &val_col_string)
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
fn check_column_datatypes<T: AsRef<str>>(

View File

@ -1,9 +1,12 @@
pub mod aggregate;
pub mod column;
pub mod command;
pub mod convert;
pub mod drop;
pub mod drop_duplicates;
pub mod drop_nulls;
pub mod dtypes;
pub mod dummies;
pub mod get;
pub mod groupby;
pub mod head;
pub mod join;
@ -15,18 +18,25 @@ pub mod sample;
pub mod select;
pub mod show;
pub mod slice;
pub mod sort;
pub mod tail;
pub mod to_csv;
pub mod to_df;
pub mod to_parquet;
pub mod to_series;
pub(crate) mod utils;
pub mod where_;
pub mod with_column;
pub use aggregate::DataFrame as DataFrameAggregate;
pub use column::DataFrame as DataFrameColumn;
pub use command::Command as DataFrame;
pub use convert::DataFrame as DataFrameConvert;
pub use drop::DataFrame as DataFrameDrop;
pub use drop_duplicates::DataFrame as DataFrameDropDuplicates;
pub use drop_nulls::DataFrame as DataFrameDropNulls;
pub use dtypes::DataFrame as DataFrameDTypes;
pub use dummies::DataFrame as DataFrameDummies;
pub use get::DataFrame as DataFrameGet;
pub use groupby::DataFrame as DataFrameGroupBy;
pub use head::DataFrame as DataFrameHead;
pub use join::DataFrame as DataFrameJoin;
@ -38,7 +48,11 @@ pub use sample::DataFrame as DataFrameSample;
pub use select::DataFrame as DataFrameSelect;
pub use show::DataFrame as DataFrameShow;
pub use slice::DataFrame as DataFrameSlice;
pub use sort::DataFrame as DataFrameSort;
pub use tail::DataFrame as DataFrameTail;
pub use to_csv::DataFrame as DataFrameToCsv;
pub use to_df::DataFrame as DataFrameToDF;
pub use to_parquet::DataFrame as DataFrameToParquet;
pub use to_series::DataFrame as DataFrameToSeries;
pub use where_::DataFrame as DataFrameWhere;
pub use with_column::DataFrame as DataFrameWithColumn;

View File

@ -2,8 +2,8 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
dataframe::{NuDataFrame, NuGroupBy},
Signature, SyntaxShape,
};
use nu_source::Tagged;
@ -72,7 +72,7 @@ impl WholeStreamCommand for DataFrame {
vec![Example {
description: "Pivot a dataframe on b and aggregation on col c",
example:
"[[a b c]; [one x 1] [two y 2]] | pls convert | pls groupby [a] | pls pivot b c sum",
"[[a b c]; [one x 1] [two y 2]] | pls to-df | pls groupby [a] | pls pivot b c sum",
result: None,
}]
}
@ -93,50 +93,27 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
// The operation is only done in one groupby. Only one input is
// expected from the InputStream
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing groupby input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) = value.value {
let df_ref = nu_groupby.as_ref();
let nu_groupby = NuGroupBy::try_from_stream(&mut args.input, &tag.span)?;
let df_ref = nu_groupby.as_ref();
check_pivot_column(df_ref, &pivot_col)?;
check_value_column(df_ref, &value_col)?;
check_pivot_column(df_ref, &pivot_col)?;
check_value_column(df_ref, &value_col)?;
let mut groupby = nu_groupby.to_groupby()?;
let mut groupby = nu_groupby.to_groupby()?;
let pivot = groupby.pivot(pivot_col.item.as_ref(), value_col.item.as_ref());
let pivot = groupby.pivot(pivot_col.item.as_ref(), value_col.item.as_ref());
let res = match op {
Operation::Mean => pivot.mean(),
Operation::Sum => pivot.sum(),
Operation::Min => pivot.min(),
Operation::Max => pivot.max(),
Operation::First => pivot.first(),
Operation::Median => pivot.median(),
}
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
let final_df = Value {
tag,
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
};
Ok(OutputStream::one(final_df))
} else {
Err(ShellError::labeled_error(
"No groupby in stream",
"no groupby found in input stream",
&tag,
))
}
}
let res = match op {
Operation::Mean => pivot.mean(),
Operation::Sum => pivot.sum(),
Operation::Min => pivot.min(),
Operation::Max => pivot.max(),
Operation::First => pivot.first(),
Operation::Median => pivot.median(),
}
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}
fn check_pivot_column(

View File

@ -1,10 +1,7 @@
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
@ -44,12 +41,12 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | pls load | pls sample -r 1",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls sample -r 1",
result: None,
},
Example {
description: "Shows sample row using fraction and replace",
example: "[[a b]; [1 2] [3 4] [5 6]] | pls load | pls sample -f 0.5 -e",
example: "[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls sample -f 0.5 -e",
result: None,
},
]
@ -64,52 +61,30 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let fraction: Option<Tagged<f64>> = args.get_flag("fraction")?;
let replace: bool = args.has_flag("replace");
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = match (rows, fraction) {
(Some(rows), None) => df
.as_ref()
.sample_n(rows.item, replace)
.map_err(|e| parse_polars_error::<&str>(&e, &rows.tag.span, None)),
(None, Some(frac)) => df
.as_ref()
.sample_frac(frac.item, replace)
.map_err(|e| parse_polars_error::<&str>(&e, &frac.tag.span, None)),
(Some(_), Some(_)) => Err(ShellError::labeled_error(
"Incompatible flags",
"Only one selection criterion allowed",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = match (rows, fraction) {
(Some(rows), None) => df
.as_ref()
.sample_n(rows.item, replace)
.map_err(|e| parse_polars_error::<&str>(&e, &rows.tag.span, None)),
(None, Some(frac)) => df
.as_ref()
.sample_frac(frac.item, replace)
.map_err(|e| parse_polars_error::<&str>(&e, &frac.tag.span, None)),
(Some(_), Some(_)) => Err(ShellError::labeled_error(
"Incompatible flags",
"Only one selection criterion allowed",
&tag,
)),
(None, None) => Err(ShellError::labeled_error_with_secondary(
"No selection",
"No selection criterion was found",
&tag,
"Perhaps you want to use the flag -n or -f",
&tag,
)),
}?;
(None, None) => Err(ShellError::labeled_error_with_secondary(
"No selection",
"No selection criterion was found",
&tag,
"Perhaps you want to use the flag -n or -f",
&tag,
)),
}?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with column a",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls select [a]",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls select [a]",
result: None,
}]
}
@ -48,34 +45,12 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let (col_string, col_span) = convert_columns(&columns, &tag)?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = df
.as_ref()
.select(&col_string)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
let res = df
.as_ref()
.select(&col_string)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -1,7 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::PolarsData, Signature, SyntaxShape, UntaggedValue};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
@ -35,12 +35,12 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Shows head rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls show",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls show",
result: None,
},
Example {
description: "Shows tail rows from dataframe",
example: "[[a b]; [1 2] [3 4] [5 6]] | pls convert | pls show -t -n 1",
example: "[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls show -t -n 1",
result: None,
},
]
@ -54,25 +54,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let rows: Option<Tagged<usize>> = args.get_flag("n_rows")?;
let tail: bool = args.has_flag("tail");
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let rows = rows.map(|v| v.item);
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let rows = rows.map(|v| v.item);
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
Ok(OutputStream::from_stream(values.into_iter()))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::from_stream(values.into_iter()))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
pub struct DataFrame;
@ -19,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls select")
Signature::build("pls slice")
.required("offset", SyntaxShape::Number, "start of slice")
.required("size", SyntaxShape::Number, "size of slice")
}
@ -31,7 +28,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe from a slice of the rows",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls slice 0 1",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls slice 0 1",
result: None,
}]
}
@ -44,31 +41,8 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let offset: Tagged<usize> = args.req(0)?;
let size: Tagged<usize> = args.req(1)?;
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = df.as_ref().slice(offset.item as i64, size.item);
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df.as_ref().slice(offset.item as i64, size.item);
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -0,0 +1,57 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
use super::utils::{convert_columns, parse_polars_error};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls sort"
}
fn usage(&self) -> &str {
"Creates new sorted dataframe"
}
fn signature(&self) -> Signature {
Signature::build("pls sort")
.required(
"columns",
SyntaxShape::Table,
"column names to sort dataframe",
)
.switch("reverse", "invert sort", Some('r'))
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new sorted dataframe",
example: "[[a b]; [3 4] [1 2]] | pls to-df | pls sort [a]",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
let columns: Vec<Value> = args.req(0)?;
let reverse = args.has_flag("reverse");
let (col_string, col_span) = convert_columns(&columns, &tag)?;
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_ref()
.sort(&col_string, reverse)
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -1,10 +1,7 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
use nu_source::Tagged;
pub struct DataFrame;
@ -19,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls select").optional(
Signature::build("pls tail").optional(
"n_rows",
SyntaxShape::Number,
"Number of rows for tail",
@ -33,7 +30,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Create new dataframe with tail rows",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls tail",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls tail",
result: None,
}]
}
@ -49,31 +46,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
None => 5,
};
match args.input.next() {
None => Err(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag,
)),
Some(value) => {
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
let res = df.as_ref().tail(Some(rows));
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
res,
))),
tag: tag.clone(),
};
let res = df.as_ref().tail(Some(rows));
Ok(OutputStream::one(value))
} else {
Err(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
&tag,
))
}
}
}
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
}

View File

@ -4,9 +4,10 @@ use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::dataframe::NuDataFrame;
use nu_protocol::Primitive;
use nu_protocol::Value;
use nu_protocol::{dataframe::PolarsData, Signature, SyntaxShape, UntaggedValue};
use nu_protocol::{Signature, SyntaxShape, UntaggedValue};
use polars::prelude::{CsvWriter, SerWriter};
@ -17,7 +18,7 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls to_csv"
"pls to-csv"
}
fn usage(&self) -> &str {
@ -25,7 +26,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls to_csv")
Signature::build("pls to-csv")
.required("file", SyntaxShape::FilePath, "file path to save dataframe")
.named(
"delimiter",
@ -44,12 +45,12 @@ impl WholeStreamCommand for DataFrame {
vec![
Example {
description: "Saves dataframe to csv file",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_csv test.csv",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to_csv test.csv",
result: None,
},
Example {
description: "Saves dataframe to csv file using other delimiter",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_csv test.csv -d '|'",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-csv test.csv -d '|'",
result: None,
},
]
@ -63,18 +64,7 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let delimiter: Option<Tagged<String>> = args.get_flag("delimiter")?;
let no_header: bool = args.has_flag("no_header");
let mut df = args
.input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
_ => None,
})
.ok_or(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag.span,
))?;
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let mut file = File::create(&file_name.item).map_err(|e| {
ShellError::labeled_error(

View File

@ -1,16 +1,13 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, UntaggedValue,
};
use nu_protocol::{dataframe::NuDataFrame, Signature};
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls convert"
"pls to-df"
}
fn usage(&self) -> &str {
@ -18,7 +15,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls convert")
Signature::build("pls to-df")
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
@ -26,17 +23,14 @@ impl WholeStreamCommand for DataFrame {
let args = args.evaluate_once()?;
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
let init = InputStream::one(
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)).into_value(&tag),
);
Ok(init.to_output_stream())
Ok(InputStream::one(df.to_value(tag)))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a polars dataframe",
example: "[[a b];[1 2] [3 4]] | pls convert",
example: "[[a b];[1 2] [3 4]] | pls to-df",
result: None,
}]
}

View File

@ -4,7 +4,8 @@ use std::path::PathBuf;
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::PolarsData, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use nu_protocol::dataframe::NuDataFrame;
use nu_protocol::{Primitive, Signature, SyntaxShape, UntaggedValue, Value};
use polars::prelude::ParquetWriter;
@ -15,7 +16,7 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls to_parquet"
"pls to-parquet"
}
fn usage(&self) -> &str {
@ -23,7 +24,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls to_parquet").required(
Signature::build("pls to-parquet").required(
"file",
SyntaxShape::FilePath,
"file path to save dataframe",
@ -37,7 +38,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Saves dataframe to parquet file",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_parquet test.parquet",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-parquet test.parquet",
result: None,
}]
}
@ -48,18 +49,7 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let mut args = args.evaluate_once()?;
let file_name: Tagged<PathBuf> = args.req(0)?;
let mut df = args
.input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
_ => None,
})
.ok_or(ShellError::labeled_error(
"No input received",
"missing dataframe input from stream",
&tag.span,
))?;
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let file = File::create(&file_name.item).map_err(|e| {
ShellError::labeled_error(

View File

@ -0,0 +1,45 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
use nu_source::Tagged;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls to-series"
}
fn usage(&self) -> &str {
"Converts a pipelined List into a polars series"
}
fn signature(&self) -> Signature {
Signature::build("pls to-series").optional(
"name",
SyntaxShape::String,
"Optional series name",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let args = args.evaluate_once()?;
let name: Option<Tagged<String>> = args.opt(0)?;
let name = name.map(|v| v.item);
let series = NuSeries::try_from_iter(args.input, name)?;
Ok(InputStream::one(series.to_value(tag)))
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Takes an input stream and converts it to a polars series",
example: "[1 2 3 4] | pls to-series my-col",
result: None,
}]
}
}

View File

@ -2,9 +2,9 @@ use crate::prelude::*;
use nu_engine::{evaluate_baseline_expr, EvaluatedCommandArgs, WholeStreamCommand};
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
dataframe::NuDataFrame,
hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression},
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value,
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue,
};
use super::utils::parse_polars_error;
@ -36,7 +36,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Filter dataframe based on column a",
example: "[[a b]; [1 2] [3 4]] | pls convert | pls where a == 1",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls where a == 1",
result: None,
}]
}
@ -148,18 +148,8 @@ fn filter_dataframe(
right_condition: &Primitive,
operator: &SpannedExpression,
) -> Result<OutputStream, ShellError> {
let df = args
.input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(nu)) => Some(nu),
_ => None,
})
.ok_or(ShellError::labeled_error(
"Incorrect stream input",
"Expected dataframe in stream",
&args.call_info.name_tag.span,
))?;
let span = args.call_info.name_tag.span;
let df = NuDataFrame::try_from_stream(&mut args.input, &span)?;
let col = df
.as_ref()
@ -198,10 +188,8 @@ fn filter_dataframe(
.filter(&mask)
.map_err(|e| parse_polars_error::<&str>(&e, &args.call_info.name_tag.span, None))?;
let value = Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(res))),
tag: args.call_info.name_tag.clone(),
};
Ok(OutputStream::one(value))
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
res,
args.call_info.name_tag.clone(),
)))
}

View File

@ -0,0 +1,67 @@
use crate::prelude::*;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
dataframe::{NuDataFrame, PolarsData},
Signature, SyntaxShape, UntaggedValue, Value,
};
use super::utils::parse_polars_error;
pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls with-column"
}
fn usage(&self) -> &str {
"Adds a series to the dataframe"
}
fn signature(&self) -> Signature {
Signature::build("pls with-column").required(
"series",
SyntaxShape::Any,
"series to be added",
)
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
command(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Adds a series to the dataframe",
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls with-column ([5 6] | pls to-series)",
result: None,
}]
}
}
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
let tag = args.call_info.name_tag.clone();
let mut args = args.evaluate_once()?;
let value: Value = args.req(0)?;
let series = match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
_ => Err(ShellError::labeled_error(
"Incorrect type",
"can only add a series to a dataframe",
value.tag.span,
)),
}?;
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
let res = df
.as_mut()
.with_column(series.series())
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
res.clone(),
tag,
)))
}

View File

@ -253,49 +253,39 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(Seq),
whole_stream_command(SeqDates),
whole_stream_command(TermSize),
//Dataframe commands
#[cfg(feature = "dataframe")]
]);
//Dataframe commands
#[cfg(feature = "dataframe")]
context.add_commands(vec![
whole_stream_command(DataFrame),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameConvert),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameLoad),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameList),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameGroupBy),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameAggregate),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameShow),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameSample),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameJoin),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameDrop),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameSelect),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameDTypes),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameDummies),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameHead),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameTail),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameSlice),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameMelt),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFramePivot),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameWhere),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameToDF),
whole_stream_command(DataFrameToSeries),
whole_stream_command(DataFrameToParquet),
#[cfg(feature = "dataframe")]
whole_stream_command(DataFrameToCsv),
whole_stream_command(DataFrameSort),
whole_stream_command(DataFrameGet),
whole_stream_command(DataFrameDropDuplicates),
whole_stream_command(DataFrameDropNulls),
whole_stream_command(DataFrameColumn),
whole_stream_command(DataFrameWithColumn),
]);
#[cfg(feature = "clipboard-cli")]

View File

@ -12,6 +12,9 @@ use num_bigint::BigInt;
use num_traits::{ToPrimitive, Zero};
use std::collections::HashMap;
#[cfg(feature = "dataframe")]
use nu_protocol::dataframe::{NuSeries, PolarsData};
pub struct Date;
impl Date {
@ -491,6 +494,51 @@ pub fn compute_values(
}
_ => Err((left.type_name(), right.type_name())),
},
#[cfg(feature = "dataframe")]
(
UntaggedValue::DataFrame(PolarsData::Series(lhs)),
UntaggedValue::DataFrame(PolarsData::Series(rhs)),
) => {
if lhs.as_ref().dtype() == rhs.as_ref().dtype() {
let result = match operator {
Operator::Plus => {
let mut res = lhs.as_ref() + rhs.as_ref();
let name = format!("sum_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
let res = res.rename(name.as_ref());
Ok(res.clone())
}
Operator::Minus => {
let mut res = lhs.as_ref() - rhs.as_ref();
let name = format!("sub_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
let res = res.rename(name.as_ref());
Ok(res.clone())
}
Operator::Multiply => {
let mut res = lhs.as_ref() * rhs.as_ref();
let name = format!("mul_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
let res = res.rename(name.as_ref());
Ok(res.clone())
}
Operator::Divide => {
let mut res = lhs.as_ref() / rhs.as_ref();
let name = format!("div_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
let res = res.rename(name.as_ref());
Ok(res.clone())
}
Operator::Modulo => {
let mut res = lhs.as_ref() % rhs.as_ref();
let name = format!("mod_{}_{}", lhs.as_ref().name(), rhs.as_ref().name());
let res = res.rename(name.as_ref());
Ok(res.clone())
}
_ => Err((left.type_name(), right.type_name())),
}?;
Ok(NuSeries::series_to_untagged(result))
} else {
Err((left.type_name(), right.type_name()))
}
}
_ => Err((left.type_name(), right.type_name())),
}
}

View File

@ -31,8 +31,11 @@ serde_yaml = "0.8.16"
toml = "0.5.8"
[dependencies.polars]
version = "0.13.4"
git = "https://github.com/pola-rs/polars"
rev = "a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
version = "0.14.0"
optional = true
features = ["serde"]
[features]
dataframe = ["polars"]

View File

@ -1,12 +1,15 @@
pub mod nu_dataframe;
pub mod nu_groupby;
pub mod nu_series;
pub use nu_dataframe::NuDataFrame;
pub use nu_groupby::NuGroupBy;
pub use nu_series::NuSeries;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub enum PolarsData {
EagerDataFrame(NuDataFrame),
GroupBy(NuGroupBy),
Series(NuSeries),
}

View File

@ -4,16 +4,15 @@ use std::{cmp::Ordering, collections::hash_map::Entry, collections::HashMap};
use bigdecimal::FromPrimitive;
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use nu_errors::ShellError;
use nu_source::Tag;
use nu_source::{Span, Tag};
use num_bigint::BigInt;
use polars::prelude::{AnyValue, DataFrame, NamedFrom, Series, TimeUnit};
use serde::de::{Deserialize, Deserializer, Visitor};
use serde::Serialize;
use std::fmt;
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
const SECS_PER_DAY: i64 = 86_400;
#[derive(Debug)]
@ -40,26 +39,9 @@ impl Default for ColumnValues {
type ColumnMap = HashMap<String, ColumnValues>;
// TODO. Using Option to help with deserialization. It will be better to find
// a way to use serde with dataframes
#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuDataFrame {
#[serde(skip_serializing)]
pub dataframe: Option<DataFrame>,
}
impl Default for NuDataFrame {
fn default() -> Self {
NuDataFrame { dataframe: None }
}
}
impl NuDataFrame {
pub fn new(df: polars::prelude::DataFrame) -> Self {
NuDataFrame {
dataframe: Some(df),
}
}
dataframe: DataFrame,
}
// TODO. Better definition of equality and comparison for a dataframe.
@ -88,30 +70,46 @@ impl Hash for NuDataFrame {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl<'de> Visitor<'de> for NuDataFrame {
type Value = Self;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an integer between -2^31 and 2^31")
impl AsRef<DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
&self.dataframe
}
}
impl<'de> Deserialize<'de> for NuDataFrame {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_i32(NuDataFrame::default())
impl AsMut<DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
&mut self.dataframe
}
}
impl NuDataFrame {
pub fn new(dataframe: polars::prelude::DataFrame) -> Self {
NuDataFrame { dataframe }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuDataFrame, ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
_ => None,
})
.ok_or(ShellError::labeled_error(
"No dataframe in stream",
"no dataframe found in input stream",
span,
))
}
pub fn try_from_iter<T>(iter: T, tag: &Tag) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
// Dictionary to store the columnar data extracted from
// the input. During the iteration we will sort if the values
// the input. During the iteration we check if the values
// have different type
let mut column_values: ColumnMap = HashMap::new();
@ -120,10 +118,12 @@ impl NuDataFrame {
UntaggedValue::Row(dictionary) => insert_row(&mut column_values, dictionary)?,
UntaggedValue::Table(table) => insert_table(&mut column_values, table)?,
_ => {
return Err(ShellError::labeled_error(
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag,
"Perhaps you want to use a List of Tables or a Dictionary",
&value.tag,
));
}
}
@ -132,26 +132,37 @@ impl NuDataFrame {
from_parsed_columns(column_values, tag)
}
pub fn to_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(self)),
tag,
}
}
pub fn dataframe_to_value(df: DataFrame, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
tag,
}
}
// Print is made out a head and if the dataframe is too large, then a tail
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let size: usize = 20;
let df = &self.as_ref();
let size: usize = 20;
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
if df.height() > size {
let sample_size = size / 2;
let mut values = self.head(Some(sample_size))?;
add_separator(&mut values, df);
let remaining = df.height() - sample_size;
let tail_size = remaining.min(sample_size);
let mut tail_values = self.tail(Some(tail_size))?;
values.append(&mut tail_values);
Ok(values)
} else {
Ok(self.head(Some(size))?)
}
Ok(values)
} else {
unreachable!("No dataframe found in print command")
Ok(self.head(Some(size))?)
}
}
@ -163,71 +174,47 @@ impl NuDataFrame {
}
pub fn tail(&self, rows: Option<usize>) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let df = &self.as_ref();
let to_row = df.height();
let size = rows.unwrap_or(5);
let from_row = to_row.saturating_sub(size);
let values = self.to_rows(from_row, to_row)?;
let values = self.to_rows(from_row, to_row)?;
Ok(values)
} else {
unreachable!()
}
Ok(values)
}
pub fn to_rows(&self, from_row: usize, to_row: usize) -> Result<Vec<Value>, ShellError> {
if let Some(df) = &self.dataframe {
let column_names = df.get_column_names();
let df = &self.as_ref();
let column_names = df.get_column_names();
let mut values: Vec<Value> = Vec::new();
let mut values: Vec<Value> = Vec::new();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
let upper_row = to_row.min(df.height());
for i in from_row..upper_row {
let row = df.get_row(i);
let mut dictionary_row = Dictionary::default();
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
for (val, name) in row.0.iter().zip(column_names.iter()) {
let untagged_val = anyvalue_to_untagged(val)?;
let dict_val = Value {
value: untagged_val,
tag: Tag::unknown(),
};
dictionary_row.insert(name.to_string(), dict_val);
}
let value = Value {
value: UntaggedValue::Row(dictionary_row),
let dict_val = Value {
value: untagged_val,
tag: Tag::unknown(),
};
values.push(value);
dictionary_row.insert(name.to_string(), dict_val);
}
Ok(values)
} else {
unreachable!()
}
}
}
let value = Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
};
impl AsRef<polars::prelude::DataFrame> for NuDataFrame {
fn as_ref(&self) -> &polars::prelude::DataFrame {
match &self.dataframe {
Some(df) => df,
None => unreachable!("Accessing ref to dataframe from nu_dataframe"),
values.push(value);
}
}
}
impl AsMut<polars::prelude::DataFrame> for NuDataFrame {
fn as_mut(&mut self) -> &mut polars::prelude::DataFrame {
match &mut self.dataframe {
Some(df) => df,
None => unreachable!("Accessing mut ref to dataframe from nu_dataframe"),
}
Ok(values)
}
}
@ -391,10 +378,12 @@ fn insert_value(
UntaggedValue::Primitive(Primitive::String(_)),
) => col_val.values.push(value),
_ => {
return Err(ShellError::labeled_error(
return Err(ShellError::labeled_error_with_secondary(
"Different values in column",
"Value with different type",
&value.tag,
"Perhaps you want to change it to this value type",
&prev_value.tag,
));
}
}
@ -418,7 +407,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
}
InputValue::Integer => {
let series_values: Result<Vec<_>, _> =
column.values.iter().map(|v| v.as_f32()).collect();
column.values.iter().map(|v| v.as_i64()).collect();
let series = Series::new(&name, series_values?);
df_series.push(series)
}
@ -434,9 +423,7 @@ fn from_parsed_columns(column_values: ColumnMap, tag: &Tag) -> Result<NuDataFram
let df = DataFrame::new(df_series);
match df {
Ok(df) => Ok(NuDataFrame {
dataframe: Some(df),
}),
Ok(df) => Ok(NuDataFrame::new(df)),
Err(e) => {
return Err(ShellError::labeled_error(
"Error while creating dataframe",

View File

@ -1,11 +1,11 @@
use nu_source::Tag;
use nu_source::{Span, Tag};
use polars::frame::groupby::{GroupBy, GroupTuples};
use serde::{Deserialize, Serialize};
use super::NuDataFrame;
use super::{NuDataFrame, PolarsData};
use nu_errors::ShellError;
use crate::{TaggedDictBuilder, Value};
use crate::{TaggedDictBuilder, UntaggedValue, Value};
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
pub struct NuGroupBy {
@ -23,11 +23,25 @@ impl NuGroupBy {
}
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuGroupBy, ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::GroupBy(group)) => Some(group),
_ => None,
})
.ok_or(ShellError::labeled_error(
"No groupby object in stream",
"no groupby object found in input stream",
span,
))
}
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
let df = match &self.dataframe.dataframe {
Some(df) => df,
None => unreachable!("No dataframe in nu_dataframe"),
};
let df = self.dataframe.as_ref();
let by = df.select_series(&self.by).map_err(|e| {
ShellError::labeled_error("Error creating groupby", format!("{}", e), Tag::unknown())
@ -50,9 +64,6 @@ impl NuGroupBy {
impl AsRef<polars::prelude::DataFrame> for NuGroupBy {
fn as_ref(&self) -> &polars::prelude::DataFrame {
match &self.dataframe.dataframe {
Some(df) => df,
None => unreachable!("Accessing reference to dataframe from nu_groupby"),
}
self.dataframe.as_ref()
}
}

View File

@ -0,0 +1,330 @@
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::vec;
use nu_errors::ShellError;
use nu_source::{Span, Tag};
use polars::prelude::{DataType, NamedFrom, Series};
use serde::{Deserialize, Serialize};
use crate::{Dictionary, Primitive, UntaggedValue, Value};
use super::PolarsData;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NuSeries {
series: Series,
dtype: String,
}
// TODO. Better definition of equality and comparison for a dataframe.
// Probably it make sense to have a name field and use it for comparisons
impl PartialEq for NuSeries {
fn eq(&self, _: &Self) -> bool {
false
}
}
impl Eq for NuSeries {}
impl PartialOrd for NuSeries {
fn partial_cmp(&self, _: &Self) -> Option<Ordering> {
Some(Ordering::Equal)
}
}
impl Ord for NuSeries {
fn cmp(&self, _: &Self) -> Ordering {
Ordering::Equal
}
}
impl Hash for NuSeries {
fn hash<H: Hasher>(&self, _: &mut H) {}
}
impl NuSeries {
pub fn new(series: Series) -> Self {
let dtype = series.dtype().to_string();
NuSeries { series, dtype }
}
pub fn try_from_stream<T>(input: &mut T, span: &Span) -> Result<NuSeries, ShellError>
where
T: Iterator<Item = Value>,
{
input
.next()
.and_then(|value| match value.value {
UntaggedValue::DataFrame(PolarsData::Series(series)) => Some(series),
_ => None,
})
.ok_or(ShellError::labeled_error(
"No series in stream",
"no series found in input stream",
span,
))
}
pub fn try_from_iter<T>(iter: T, name: Option<String>) -> Result<Self, ShellError>
where
T: Iterator<Item = Value>,
{
let mut vec_values: Vec<Value> = Vec::new();
for value in iter {
match value.value {
UntaggedValue::Primitive(Primitive::Int(_))
| UntaggedValue::Primitive(Primitive::Decimal(_))
| UntaggedValue::Primitive(Primitive::String(_)) => {
insert_value(value, &mut vec_values)?
}
_ => {
return Err(ShellError::labeled_error_with_secondary(
"Format not supported",
"Value not supported for conversion",
&value.tag.span,
"Perhaps you want to use a list of primitive values (int, decimal, string)",
&value.tag.span,
));
}
}
}
from_parsed_vector(vec_values, name)
}
pub fn to_value(self, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(self)),
tag,
}
}
pub fn series_to_value(series: Series, tag: Tag) -> Value {
Value {
value: UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series))),
tag,
}
}
pub fn series_to_untagged(series: Series) -> UntaggedValue {
UntaggedValue::DataFrame(PolarsData::Series(NuSeries::new(series)))
}
pub fn dtype(&self) -> &str {
&self.dtype
}
pub fn series(self) -> Series {
self.series
}
}
impl AsRef<Series> for NuSeries {
fn as_ref(&self) -> &Series {
&self.series
}
}
impl AsMut<Series> for NuSeries {
fn as_mut(&mut self) -> &mut Series {
&mut self.series
}
}
macro_rules! series_to_chunked {
($converter: expr, $self: expr) => {{
let chunked_array = $converter.map_err(|e| {
ShellError::labeled_error("Parsing Error", format!("{}", e), Span::unknown())
})?;
let size = 20;
let (head_size, skip, tail_size) = if $self.as_ref().len() > size {
let remaining = $self.as_ref().len() - (size / 2);
let skip = $self.as_ref().len() - remaining;
(size / 2, skip, remaining.min(size / 2))
} else {
(size, 0, 0)
};
let head = chunked_array
.into_iter()
.take(head_size)
.map(|value| match value {
Some(v) => {
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
}
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
});
let res = if $self.as_ref().len() < size {
head.collect::<Vec<Value>>()
} else {
let middle = std::iter::once({
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive("...".into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.as_ref().dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
});
let tail =
chunked_array
.into_iter()
.skip(skip)
.take(tail_size)
.map(|value| match value {
Some(v) => {
let mut dictionary_row = Dictionary::default();
let value = Value {
value: UntaggedValue::Primitive(v.into()),
tag: Tag::unknown(),
};
let header = format!("{} ({})", $self.as_ref().name(), $self.dtype());
dictionary_row.insert(header, value);
Value {
value: UntaggedValue::Row(dictionary_row),
tag: Tag::unknown(),
}
}
None => Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
},
});
head.chain(middle).chain(tail).collect::<Vec<Value>>()
};
Ok(res)
}};
}
impl NuSeries {
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
match self.as_ref().dtype() {
DataType::Boolean => series_to_chunked!(self.as_ref().bool(), self),
DataType::UInt8 => series_to_chunked!(self.as_ref().u8(), self),
DataType::UInt16 => series_to_chunked!(self.as_ref().u16(), self),
DataType::UInt32 => series_to_chunked!(self.as_ref().u32(), self),
DataType::UInt64 => series_to_chunked!(self.as_ref().u64(), self),
DataType::Int8 => series_to_chunked!(self.as_ref().i8(), self),
DataType::Int16 => series_to_chunked!(self.as_ref().i16(), self),
DataType::Int32 => series_to_chunked!(self.as_ref().i32(), self),
DataType::Int64 => series_to_chunked!(self.as_ref().i64(), self),
DataType::Float32 => series_to_chunked!(self.as_ref().f32(), self),
DataType::Float64 => series_to_chunked!(self.as_ref().f64(), self),
DataType::Utf8 => series_to_chunked!(self.as_ref().utf8(), self),
DataType::Date32 => series_to_chunked!(self.as_ref().date32(), self),
DataType::Date64 => series_to_chunked!(self.as_ref().date64(), self),
DataType::Null => Ok(vec![Value {
value: UntaggedValue::Primitive(Primitive::Nothing),
tag: Tag::unknown(),
}]),
//DataType::List(_) => None,
//DataType::Time64(TimeUnit) => None,
//DataType::Duration(TimeUnit) => None,
// DataType::Categorical => None,
_ => unimplemented!(),
}
}
}
fn insert_value(value: Value, vec_values: &mut Vec<Value>) -> Result<(), ShellError> {
// Checking that the type for the value is the same
// for the previous value in the column
if vec_values.is_empty() {
Ok(vec_values.push(value))
} else {
let prev_value = &vec_values[vec_values.len() - 1];
match (&prev_value.value, &value.value) {
(
UntaggedValue::Primitive(Primitive::Int(_)),
UntaggedValue::Primitive(Primitive::Int(_)),
)
| (
UntaggedValue::Primitive(Primitive::Decimal(_)),
UntaggedValue::Primitive(Primitive::Decimal(_)),
)
| (
UntaggedValue::Primitive(Primitive::String(_)),
UntaggedValue::Primitive(Primitive::String(_)),
) => Ok(vec_values.push(value)),
_ => Err(ShellError::labeled_error_with_secondary(
"Different values in column",
"Value with different type",
&value.tag,
"Perhaps you want to change it to this value type",
&prev_value.tag,
)),
}
}
}
fn from_parsed_vector(
vec_values: Vec<Value>,
name: Option<String>,
) -> Result<NuSeries, ShellError> {
let series = match &vec_values[0].value {
UntaggedValue::Primitive(Primitive::Int(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_i64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "int",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::Decimal(_)) => {
let series_values: Result<Vec<_>, _> = vec_values.iter().map(|v| v.as_f64()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "decimal",
};
Series::new(series_name, series_values?)
}
UntaggedValue::Primitive(Primitive::String(_)) => {
let series_values: Result<Vec<_>, _> =
vec_values.iter().map(|v| v.as_string()).collect();
let series_name = match &name {
Some(n) => n.as_ref(),
None => "string",
};
Series::new(series_name, series_values?)
}
_ => unreachable!("The untagged type is checked while creating vec_values"),
};
Ok(NuSeries::new(series))
}

View File

@ -672,7 +672,11 @@ impl ShellTypeName for UntaggedValue {
UntaggedValue::Error(_) => "error",
UntaggedValue::Block(_) => "block",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(_) => "dataframe",
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(_)) => "dataframe",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::Series(_)) => "series",
#[cfg(feature = "dataframe")]
UntaggedValue::DataFrame(PolarsData::GroupBy(_)) => "groupby",
}
}
}