mirror of
https://github.com/nushell/nushell.git
synced 2024-12-22 23:23:12 +01:00
Dataframe commands (#3498)
* Sample command * Join command with checks * More dataframes commands * Groupby and aggregate commands * Missing feature dataframe flag * Renamed file
This commit is contained in:
parent
d8c4b9c4fb
commit
3a5ee1aed0
42
Cargo.lock
generated
42
Cargo.lock
generated
@ -2843,6 +2843,12 @@ dependencies = [
|
|||||||
"pkg-config",
|
"pkg-config",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libm"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libnghttp2-sys"
|
name = "libnghttp2-sys"
|
||||||
version = "0.1.6+1.43.0"
|
version = "0.1.6+1.43.0"
|
||||||
@ -3760,9 +3766,6 @@ dependencies = [
|
|||||||
"polars",
|
"polars",
|
||||||
"serde 1.0.125",
|
"serde 1.0.125",
|
||||||
"serde_bytes",
|
"serde_bytes",
|
||||||
"serde_json",
|
|
||||||
"serde_yaml",
|
|
||||||
"toml",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -4253,6 +4256,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
|
"libm",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -4685,9 +4689,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars"
|
name = "polars"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bc4e2e025126632e8e19d53cd9b655da344bd4942ba603ad246c7776b6401844"
|
checksum = "c406ce46726b7d33b05a343d9c1317c0803a419d50bb45275de3f366410e9a80"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"polars-core",
|
"polars-core",
|
||||||
"polars-io",
|
"polars-io",
|
||||||
@ -4696,9 +4700,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-arrow"
|
name = "polars-arrow"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c3534c76a7bafaca9c783506a1f331ad746621d3808ab2407c02ffadd9e99326"
|
checksum = "53b2d5fb400345c7977e4e728a10be382476f2f9d2caf6b57cd60e97ea17d364"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"num 0.4.0",
|
"num 0.4.0",
|
||||||
@ -4707,9 +4711,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-core"
|
name = "polars-core"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ad76c4d55017da2d0f8930b0caa327d12286c1e4407469f361e84fad176f9601"
|
checksum = "88561e850748c507f0fc7835b35e795e770597ceecb14e0a8f7d8abf8346645d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@ -4723,6 +4727,8 @@ dependencies = [
|
|||||||
"parquet",
|
"parquet",
|
||||||
"polars-arrow",
|
"polars-arrow",
|
||||||
"prettytable-rs",
|
"prettytable-rs",
|
||||||
|
"rand 0.7.3",
|
||||||
|
"rand_distr",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex 1.5.3",
|
"regex 1.5.3",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@ -4731,9 +4737,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-io"
|
name = "polars-io"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "07f20f27363d85f847a2b7e9d1bfd426bff18680691dd42ff17ca91893f12f89"
|
checksum = "27388810ec5f3346838725aa0aa49343802c1344b96fe82229ae781c62c98bc7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@ -4755,9 +4761,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-lazy"
|
name = "polars-lazy"
|
||||||
version = "0.13.3"
|
version = "0.13.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "595906f951bacf223625ed6b0e4e73153eb9e251850bb2f9c36d78828334f32b"
|
checksum = "0e7f83284970a9db7d0b6a56d6f944c3988587429c124c1d087188e9d2c7ad7c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"itertools",
|
"itertools",
|
||||||
@ -5089,6 +5095,16 @@ dependencies = [
|
|||||||
"getrandom 0.2.2",
|
"getrandom 0.2.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_distr"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c9e9532ada3929fb8b2e9dbe28d1e06c9b2cc65813f074fcb6bd5fbefeff9d56"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits 0.2.14",
|
||||||
|
"rand 0.7.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rand_hc"
|
name = "rand_hc"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
@ -99,7 +99,10 @@ uuid_crate = { package = "uuid", version = "0.8.2", features = ["v4"], optional
|
|||||||
which = { version = "4.1.0", optional = true }
|
which = { version = "4.1.0", optional = true }
|
||||||
zip = { version = "0.5.9", optional = true }
|
zip = { version = "0.5.9", optional = true }
|
||||||
|
|
||||||
polars = { version = "0.13.3",optional = true, features = ["parquet", "json"] }
|
[dependencies.polars]
|
||||||
|
version = "0.13.4"
|
||||||
|
optional = true
|
||||||
|
features = ["parquet", "json", "random"]
|
||||||
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
[target.'cfg(unix)'.dependencies]
|
||||||
umask = "1.0.0"
|
umask = "1.0.0"
|
||||||
|
@ -188,7 +188,11 @@ pub(crate) mod touch;
|
|||||||
pub(crate) use all::Command as All;
|
pub(crate) use all::Command as All;
|
||||||
pub(crate) use any::Command as Any;
|
pub(crate) use any::Command as Any;
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
pub(crate) use dataframe::{DataFrame, DataFrameGroupBy, DataFrameList, DataFrameLoad};
|
pub(crate) use dataframe::{
|
||||||
|
DataFrame, DataFrameAggregate, DataFrameConvert, DataFrameDTypes, DataFrameDrop,
|
||||||
|
DataFrameGroupBy, DataFrameJoin, DataFrameList, DataFrameLoad, DataFrameSample,
|
||||||
|
DataFrameSelect, DataFrameShow,
|
||||||
|
};
|
||||||
pub(crate) use enter::Enter;
|
pub(crate) use enter::Enter;
|
||||||
pub(crate) use every::Every;
|
pub(crate) use every::Every;
|
||||||
pub(crate) use exec::Exec;
|
pub(crate) use exec::Exec;
|
||||||
|
@ -8,6 +8,9 @@ use nu_protocol::hir::{self, Expression, ExternalRedirection, Literal, SpannedEx
|
|||||||
use nu_protocol::{Primitive, Signature, UntaggedValue, Value};
|
use nu_protocol::{Primitive, Signature, UntaggedValue, Value};
|
||||||
use nu_table::TextStyle;
|
use nu_table::TextStyle;
|
||||||
|
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
use nu_protocol::dataframe::PolarsData;
|
||||||
|
|
||||||
pub struct Command;
|
pub struct Command;
|
||||||
|
|
||||||
impl WholeStreamCommand for Command {
|
impl WholeStreamCommand for Command {
|
||||||
@ -236,8 +239,8 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Value {
|
Value {
|
||||||
value: UntaggedValue::DataFrame(df),
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)),
|
||||||
..
|
tag,
|
||||||
} => {
|
} => {
|
||||||
if let Some(table) = table {
|
if let Some(table) = table {
|
||||||
// TODO. Configure the parameter rows from file. It can be
|
// TODO. Configure the parameter rows from file. It can be
|
||||||
@ -248,6 +251,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
let _ = result.collect::<Vec<_>>();
|
let _ = result.collect::<Vec<_>>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
Value {
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::GroupBy(groupby)),
|
||||||
|
tag,
|
||||||
|
} => {
|
||||||
|
if let Some(table) = table {
|
||||||
|
// TODO. Configure the parameter rows from file. It can be
|
||||||
|
// adjusted to see a certain amount of values in the head
|
||||||
|
let command_args =
|
||||||
|
create_default_command_args(&context, groupby.print()?.into(), tag);
|
||||||
|
let result = table.run(command_args)?;
|
||||||
|
let _ = result.collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
Value {
|
Value {
|
||||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||||
..
|
..
|
||||||
|
202
crates/nu-command/src/commands/dataframe/aggregate.rs
Normal file
202
crates/nu-command/src/commands/dataframe/aggregate.rs
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
use nu_source::Tagged;
|
||||||
|
use polars::frame::groupby::GroupBy;
|
||||||
|
|
||||||
|
use super::utils::convert_columns;
|
||||||
|
|
||||||
|
enum Operation {
|
||||||
|
Mean,
|
||||||
|
Sum,
|
||||||
|
Min,
|
||||||
|
Max,
|
||||||
|
First,
|
||||||
|
Last,
|
||||||
|
Nunique,
|
||||||
|
Quantile(f64),
|
||||||
|
Median,
|
||||||
|
Var,
|
||||||
|
Std,
|
||||||
|
Count,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Operation {
|
||||||
|
fn from_tagged(
|
||||||
|
name: &Tagged<String>,
|
||||||
|
quantile: Option<Tagged<f64>>,
|
||||||
|
) -> Result<Operation, ShellError> {
|
||||||
|
match name.item.as_ref() {
|
||||||
|
"mean" => Ok(Operation::Mean),
|
||||||
|
"sum" => Ok(Operation::Sum),
|
||||||
|
"min" => Ok(Operation::Min),
|
||||||
|
"max" => Ok(Operation::Max),
|
||||||
|
"first" => Ok(Operation::First),
|
||||||
|
"last" => Ok(Operation::Last),
|
||||||
|
"nunique" => Ok(Operation::Nunique),
|
||||||
|
"quantile" => {
|
||||||
|
match quantile {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"Quantile value not fount",
|
||||||
|
"Quantile operation requires quantile value",
|
||||||
|
&name.tag,
|
||||||
|
)),
|
||||||
|
Some(value ) => {
|
||||||
|
if (value.item < 0.0) | (value.item > 1.0) {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"Inappropriate quantile",
|
||||||
|
"Quantile value should be between 0.0 and 1.0",
|
||||||
|
&value.tag,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(Operation::Quantile(value.item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"median" => Ok(Operation::Median),
|
||||||
|
"var" => Ok(Operation::Var),
|
||||||
|
"std" => Ok(Operation::Std),
|
||||||
|
"count" => Ok(Operation::Count),
|
||||||
|
_ => Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"Operation not fount",
|
||||||
|
"Operation does not exist",
|
||||||
|
&name.tag,
|
||||||
|
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, count",
|
||||||
|
&name.tag,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls aggregate"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Performs an aggregation operation on a groupby object"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls aggregate")
|
||||||
|
.required("operation", SyntaxShape::String, "aggregate operation")
|
||||||
|
.optional(
|
||||||
|
"selection",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"columns to perform aggregation",
|
||||||
|
)
|
||||||
|
.named(
|
||||||
|
"quantile",
|
||||||
|
SyntaxShape::Number,
|
||||||
|
"quantile value for quantile operation",
|
||||||
|
Some('q'),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
aggregate(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||||
|
example:
|
||||||
|
"echo [[a b]; [one 1] [one 2]] | pls convert | pls groupby [a] | pls aggregate sum",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn aggregate(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let quantile: Option<Tagged<f64>> = args.get_flag("quantile")?;
|
||||||
|
let operation: Tagged<String> = args.req(0)?;
|
||||||
|
let op = Operation::from_tagged(&operation, quantile)?;
|
||||||
|
|
||||||
|
// Extracting the selection columns of the columns to perform the aggregation
|
||||||
|
let agg_cols: Option<Vec<Value>> = args.opt(1)?;
|
||||||
|
let (selection, agg_span) = match agg_cols {
|
||||||
|
Some(cols) => {
|
||||||
|
let (agg_string, agg_span) = convert_columns(&cols, &tag)?;
|
||||||
|
(Some(agg_string), agg_span)
|
||||||
|
}
|
||||||
|
None => (None, Span::unknown()),
|
||||||
|
};
|
||||||
|
|
||||||
|
// The operation is only done in one dataframe. Only one input is
|
||||||
|
// expected from the InputStream
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) = value.value {
|
||||||
|
let groupby = nu_groupby.to_groupby()?;
|
||||||
|
|
||||||
|
let groupby = match &selection {
|
||||||
|
Some(cols) => groupby.select(cols),
|
||||||
|
None => groupby,
|
||||||
|
};
|
||||||
|
|
||||||
|
let res = perform_aggregation(groupby, op, &operation.tag, &agg_span)?;
|
||||||
|
|
||||||
|
let final_df = Value {
|
||||||
|
tag,
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||||
|
res,
|
||||||
|
))),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(final_df))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No groupby in stream",
|
||||||
|
"no groupby found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn perform_aggregation(
|
||||||
|
groupby: GroupBy,
|
||||||
|
operation: Operation,
|
||||||
|
operation_tag: &Tag,
|
||||||
|
agg_span: &Span,
|
||||||
|
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||||
|
match operation {
|
||||||
|
Operation::Mean => groupby.mean(),
|
||||||
|
Operation::Sum => groupby.sum(),
|
||||||
|
Operation::Min => groupby.min(),
|
||||||
|
Operation::Max => groupby.max(),
|
||||||
|
Operation::First => groupby.first(),
|
||||||
|
Operation::Last => groupby.last(),
|
||||||
|
Operation::Nunique => groupby.n_unique(),
|
||||||
|
Operation::Quantile(quantile) => groupby.quantile(quantile),
|
||||||
|
Operation::Median => groupby.median(),
|
||||||
|
Operation::Var => groupby.var(),
|
||||||
|
Operation::Std => groupby.std(),
|
||||||
|
Operation::Count => groupby.count(),
|
||||||
|
}
|
||||||
|
.map_err(|e| {
|
||||||
|
let span = if e.to_string().contains("Not found") {
|
||||||
|
agg_span
|
||||||
|
} else {
|
||||||
|
&operation_tag.span
|
||||||
|
};
|
||||||
|
|
||||||
|
ShellError::labeled_error("Aggregation error", format!("{}", e), span)
|
||||||
|
})
|
||||||
|
}
|
@ -1,38 +1,26 @@
|
|||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use nu_engine::WholeStreamCommand;
|
use nu_engine::WholeStreamCommand;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{dataframe::NuDataFrame, Signature, UntaggedValue};
|
use nu_protocol::{Signature, UntaggedValue};
|
||||||
|
|
||||||
pub struct Command;
|
pub struct Command;
|
||||||
|
|
||||||
impl WholeStreamCommand for Command {
|
impl WholeStreamCommand for Command {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe"
|
"pls"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
"Creates a dataframe from pipelined Table or List "
|
"Commands to work with polars dataframes"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("dataframe")
|
Signature::build("pls")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
let tag = args.call_info.name_tag.clone();
|
Ok(OutputStream::one(
|
||||||
let args = args.evaluate_once()?;
|
UntaggedValue::string(get_full_help(&Command, args.scope())).into_value(Tag::unknown()),
|
||||||
|
))
|
||||||
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
|
||||||
let init = InputStream::one(UntaggedValue::DataFrame(df).into_value(&tag));
|
|
||||||
|
|
||||||
Ok(init.to_output_stream())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
|
||||||
vec![Example {
|
|
||||||
description: "Takes an input stream and converts it to a dataframe",
|
|
||||||
example: "echo [[a b];[1 2] [3 4]] | dataframe",
|
|
||||||
result: None,
|
|
||||||
}]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
43
crates/nu-command/src/commands/dataframe/convert.rs
Normal file
43
crates/nu-command/src/commands/dataframe/convert.rs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, UntaggedValue,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls convert"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Converts a pipelined Table or List into a polars dataframe"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls convert")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
||||||
|
let init = InputStream::one(
|
||||||
|
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)).into_value(&tag),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(init.to_output_stream())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "Takes an input stream and converts it to a polars dataframe",
|
||||||
|
example: "echo [[a b];[1 2] [3 4]] | pls convert",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
97
crates/nu-command/src/commands/dataframe/drop.rs
Normal file
97
crates/nu-command/src/commands/dataframe/drop.rs
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::utils::convert_columns;
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls drop"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Creates a new dataframe by dropping the selected columns"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls drop").required(
|
||||||
|
"columns",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"column names to be dropped",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
drop(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "drop column a",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls drop [a]",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn drop(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let columns: Vec<Value> = args.req(0)?;
|
||||||
|
|
||||||
|
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(ref df),
|
||||||
|
..
|
||||||
|
})) = value.value
|
||||||
|
{
|
||||||
|
let new_df = match col_string.iter().next() {
|
||||||
|
Some(col) => df.drop(col).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Join error", format!("{}", e), &col_span)
|
||||||
|
}),
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"Empty names list",
|
||||||
|
"No column names where found",
|
||||||
|
&col_span,
|
||||||
|
)),
|
||||||
|
}?;
|
||||||
|
|
||||||
|
let res = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
|
||||||
|
new_df.drop(col).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Drop error", format!("{}", e), &col_span)
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let value = Value {
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||||
|
res,
|
||||||
|
))),
|
||||||
|
tag: tag.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(value))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
81
crates/nu-command/src/commands/dataframe/dtypes.rs
Normal file
81
crates/nu-command/src/commands/dataframe/dtypes.rs
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, TaggedDictBuilder, UntaggedValue,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls dtypes"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Show dataframe data types"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls dtypes")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
dtypes(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "drop column a",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls dtypes",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dtypes(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(df),
|
||||||
|
..
|
||||||
|
})) = value.value
|
||||||
|
{
|
||||||
|
let col_names = df
|
||||||
|
.get_column_names()
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.to_string())
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
let values =
|
||||||
|
df.dtypes()
|
||||||
|
.into_iter()
|
||||||
|
.zip(col_names.into_iter())
|
||||||
|
.map(move |(dtype, name)| {
|
||||||
|
let mut data = TaggedDictBuilder::new(tag.clone());
|
||||||
|
data.insert_value("column", name.as_ref());
|
||||||
|
data.insert_value("dtype", format!("{}", dtype));
|
||||||
|
|
||||||
|
data.into_value()
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(OutputStream::from_stream(values))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,100 +2,29 @@ use crate::prelude::*;
|
|||||||
use nu_engine::WholeStreamCommand;
|
use nu_engine::WholeStreamCommand;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
dataframe::{NuDataFrame, NuGroupBy, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
};
|
};
|
||||||
use nu_source::Tagged;
|
|
||||||
use polars::frame::groupby::GroupBy;
|
|
||||||
|
|
||||||
enum Operation {
|
use super::utils::convert_columns;
|
||||||
Mean,
|
|
||||||
Sum,
|
|
||||||
Min,
|
|
||||||
Max,
|
|
||||||
First,
|
|
||||||
Last,
|
|
||||||
Nunique,
|
|
||||||
Quantile(f64),
|
|
||||||
Median,
|
|
||||||
//Var,
|
|
||||||
//Std,
|
|
||||||
Count,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Operation {
|
|
||||||
fn from_tagged(
|
|
||||||
name: &Tagged<String>,
|
|
||||||
quantile: Option<Tagged<f64>>,
|
|
||||||
) -> Result<Operation, ShellError> {
|
|
||||||
match name.item.as_ref() {
|
|
||||||
"mean" => Ok(Operation::Mean),
|
|
||||||
"sum" => Ok(Operation::Sum),
|
|
||||||
"min" => Ok(Operation::Min),
|
|
||||||
"max" => Ok(Operation::Max),
|
|
||||||
"first" => Ok(Operation::First),
|
|
||||||
"last" => Ok(Operation::Last),
|
|
||||||
"nunique" => Ok(Operation::Nunique),
|
|
||||||
"quantile" => {
|
|
||||||
match quantile {
|
|
||||||
None => Err(ShellError::labeled_error(
|
|
||||||
"Quantile value not fount",
|
|
||||||
"Quantile operation requires quantile value",
|
|
||||||
&name.tag,
|
|
||||||
)),
|
|
||||||
Some(value ) => {
|
|
||||||
if (value.item < 0.0) | (value.item > 1.0) {
|
|
||||||
Err(ShellError::labeled_error(
|
|
||||||
"Inappropriate quantile",
|
|
||||||
"Quantile value should be between 0.0 and 1.0",
|
|
||||||
&value.tag,
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Ok(Operation::Quantile(value.item))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"median" => Ok(Operation::Median),
|
|
||||||
//"var" => Ok(Operation::Var),
|
|
||||||
//"std" => Ok(Operation::Std),
|
|
||||||
"count" => Ok(Operation::Count),
|
|
||||||
_ => Err(ShellError::labeled_error_with_secondary(
|
|
||||||
"Operation not fount",
|
|
||||||
"Operation does not exist",
|
|
||||||
&name.tag,
|
|
||||||
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, count",
|
|
||||||
&name.tag,
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DataFrame;
|
pub struct DataFrame;
|
||||||
|
|
||||||
impl WholeStreamCommand for DataFrame {
|
impl WholeStreamCommand for DataFrame {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe groupby"
|
"pls groupby"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
"Creates a groupby operation on a dataframe"
|
"Creates a groupby object that can be used for other aggregations"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("dataframe groupby")
|
Signature::build("pls groupby").required(
|
||||||
.required("columns", SyntaxShape::Table, "groupby columns")
|
"by columns",
|
||||||
.required(
|
SyntaxShape::Table,
|
||||||
"aggregation columns",
|
"groupby columns",
|
||||||
SyntaxShape::Table,
|
)
|
||||||
"columns to perform aggregation",
|
|
||||||
)
|
|
||||||
.required("operation", SyntaxShape::String, "aggregate operation")
|
|
||||||
.named(
|
|
||||||
"quantile",
|
|
||||||
SyntaxShape::Number,
|
|
||||||
"auantile value for quantile operation",
|
|
||||||
Some('q'),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
@ -104,8 +33,8 @@ impl WholeStreamCommand for DataFrame {
|
|||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![Example {
|
||||||
description: "",
|
description: "Grouping by column a",
|
||||||
example: "",
|
example: "echo [[a b]; [one 1] [one 2]] | pls convert | pls groupby [a]",
|
||||||
result: None,
|
result: None,
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
@ -115,77 +44,9 @@ fn groupby(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
let tag = args.call_info.name_tag.clone();
|
let tag = args.call_info.name_tag.clone();
|
||||||
let mut args = args.evaluate_once()?;
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
let quantile: Option<Tagged<f64>> = args.get_flag("quantile")?;
|
|
||||||
let operation: Tagged<String> = args.req(2)?;
|
|
||||||
let op = Operation::from_tagged(&operation, quantile)?;
|
|
||||||
|
|
||||||
// Extracting the names of the columns to perform the groupby
|
// Extracting the names of the columns to perform the groupby
|
||||||
let columns: Vec<Value> = args.req(0)?;
|
let by_columns: Vec<Value> = args.req(0)?;
|
||||||
|
let (columns_string, col_span) = convert_columns(&by_columns, &tag)?;
|
||||||
// Extracting the first tag from the groupby column names
|
|
||||||
let mut col_span = match columns
|
|
||||||
.iter()
|
|
||||||
.nth(0)
|
|
||||||
.map(|v| Span::new(v.tag.span.start(), v.tag.span.end()))
|
|
||||||
{
|
|
||||||
Some(span) => span,
|
|
||||||
None => {
|
|
||||||
return Err(ShellError::labeled_error(
|
|
||||||
"Empty groupby names list",
|
|
||||||
"Empty list for groupby column names",
|
|
||||||
&tag,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let columns_string = columns
|
|
||||||
.into_iter()
|
|
||||||
.map(|value| match value.value {
|
|
||||||
UntaggedValue::Primitive(Primitive::String(s)) => {
|
|
||||||
col_span = col_span.until(value.tag.span);
|
|
||||||
Ok(s)
|
|
||||||
}
|
|
||||||
_ => Err(ShellError::labeled_error(
|
|
||||||
"Incorrect column format",
|
|
||||||
"Only string as column name",
|
|
||||||
&value.tag,
|
|
||||||
)),
|
|
||||||
})
|
|
||||||
.collect::<Result<Vec<String>, _>>()?;
|
|
||||||
|
|
||||||
// Extracting the names of the columns to perform the aggregation
|
|
||||||
let agg_cols: Vec<Value> = args.req(1)?;
|
|
||||||
|
|
||||||
// Extracting the first tag from the aggregation column names
|
|
||||||
let mut agg_span = match agg_cols
|
|
||||||
.iter()
|
|
||||||
.nth(0)
|
|
||||||
.map(|v| Span::new(v.tag.span.start(), v.tag.span.end()))
|
|
||||||
{
|
|
||||||
Some(span) => span,
|
|
||||||
None => {
|
|
||||||
return Err(ShellError::labeled_error(
|
|
||||||
"Empty aggregation names list",
|
|
||||||
"Empty list for aggregation column names",
|
|
||||||
&tag,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let aggregation_string = agg_cols
|
|
||||||
.into_iter()
|
|
||||||
.map(|value| match value.value {
|
|
||||||
UntaggedValue::Primitive(Primitive::String(s)) => {
|
|
||||||
agg_span = agg_span.until(value.tag.span);
|
|
||||||
Ok(s)
|
|
||||||
}
|
|
||||||
_ => Err(ShellError::labeled_error(
|
|
||||||
"Incorrect column format",
|
|
||||||
"Only string as column name",
|
|
||||||
value.tag,
|
|
||||||
)),
|
|
||||||
})
|
|
||||||
.collect::<Result<Vec<String>, _>>()?;
|
|
||||||
|
|
||||||
// The operation is only done in one dataframe. Only one input is
|
// The operation is only done in one dataframe. Only one input is
|
||||||
// expected from the InputStream
|
// expected from the InputStream
|
||||||
@ -196,29 +57,31 @@ fn groupby(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
&tag,
|
&tag,
|
||||||
)),
|
)),
|
||||||
Some(value) => {
|
Some(value) => {
|
||||||
if let UntaggedValue::DataFrame(NuDataFrame {
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(nu_df)) = value.value {
|
||||||
dataframe: Some(df),
|
let df = match nu_df.dataframe {
|
||||||
..
|
Some(df) => df,
|
||||||
}) = value.value
|
None => unreachable!("No dataframe in nu_dataframe"),
|
||||||
{
|
|
||||||
let groupby = df
|
|
||||||
.groupby(&columns_string)
|
|
||||||
.map_err(|e| {
|
|
||||||
ShellError::labeled_error("Groupby error", format!("{}", e), col_span)
|
|
||||||
})?
|
|
||||||
.select(&aggregation_string);
|
|
||||||
|
|
||||||
let res = perform_aggregation(groupby, op, &operation.tag, &agg_span)?;
|
|
||||||
|
|
||||||
let final_df = Value {
|
|
||||||
tag,
|
|
||||||
value: UntaggedValue::DataFrame(NuDataFrame {
|
|
||||||
dataframe: Some(res),
|
|
||||||
name: "agg result".to_string(),
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(OutputStream::one(final_df))
|
// This is the expensive part of the groupby; to create the
|
||||||
|
// groups that will be used for grouping the data in the
|
||||||
|
// dataframe. Once it has been done these values can be stored
|
||||||
|
// in the NuGroupBy
|
||||||
|
let groupby = df.groupby(&columns_string).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Groupby error", format!("{}", e), col_span)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let groups = groupby.get_groups().to_vec();
|
||||||
|
let groupby = Value {
|
||||||
|
tag: value.tag,
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
|
||||||
|
NuDataFrame::new_with_name(df, nu_df.name),
|
||||||
|
columns_string,
|
||||||
|
groups,
|
||||||
|
))),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(groupby))
|
||||||
} else {
|
} else {
|
||||||
Err(ShellError::labeled_error(
|
Err(ShellError::labeled_error(
|
||||||
"No dataframe in stream",
|
"No dataframe in stream",
|
||||||
@ -229,34 +92,3 @@ fn groupby(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn perform_aggregation(
|
|
||||||
groupby: GroupBy,
|
|
||||||
operation: Operation,
|
|
||||||
operation_tag: &Tag,
|
|
||||||
agg_span: &Span,
|
|
||||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
|
||||||
match operation {
|
|
||||||
Operation::Mean => groupby.mean(),
|
|
||||||
Operation::Sum => groupby.sum(),
|
|
||||||
Operation::Min => groupby.min(),
|
|
||||||
Operation::Max => groupby.max(),
|
|
||||||
Operation::First => groupby.first(),
|
|
||||||
Operation::Last => groupby.last(),
|
|
||||||
Operation::Nunique => groupby.n_unique(),
|
|
||||||
Operation::Quantile(quantile) => groupby.quantile(quantile),
|
|
||||||
Operation::Median => groupby.median(),
|
|
||||||
//Operation::Var => groupby.var(),
|
|
||||||
//Operation::Std => groupby.std(),
|
|
||||||
Operation::Count => groupby.count(),
|
|
||||||
}
|
|
||||||
.map_err(|e| {
|
|
||||||
let span = if e.to_string().contains("Not found") {
|
|
||||||
agg_span
|
|
||||||
} else {
|
|
||||||
&operation_tag.span
|
|
||||||
};
|
|
||||||
|
|
||||||
ShellError::labeled_error("Aggregation error", format!("{}", e), span)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
205
crates/nu-command/src/commands/dataframe/join.rs
Normal file
205
crates/nu-command/src/commands/dataframe/join.rs
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::utils::convert_columns;
|
||||||
|
|
||||||
|
use polars::prelude::JoinType;
|
||||||
|
|
||||||
|
use nu_source::Tagged;
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls join"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Joins a dataframe using columns as reference"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls join")
|
||||||
|
.required("dataframe", SyntaxShape::Any, "right dataframe to join")
|
||||||
|
.required(
|
||||||
|
"l_columns",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"left column names to perform join",
|
||||||
|
)
|
||||||
|
.required(
|
||||||
|
"r_columns",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"right column names to perform join",
|
||||||
|
)
|
||||||
|
.named(
|
||||||
|
"type",
|
||||||
|
SyntaxShape::String,
|
||||||
|
"type of join. Inner by default",
|
||||||
|
Some('t'),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
join(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![
|
||||||
|
Example {
|
||||||
|
description: "inner join dataframe",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls join $right [a] [a]",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "right join dataframe",
|
||||||
|
example:
|
||||||
|
"echo [[a b]; [1 2] [3 4] [5 6]] | pls convert | pls join $right [b] [b] -t right",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn join(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let r_df: Value = args.req(0)?;
|
||||||
|
let l_col: Vec<Value> = args.req(1)?;
|
||||||
|
let r_col: Vec<Value> = args.req(2)?;
|
||||||
|
let join_type_op: Option<Tagged<String>> = args.get_flag("type")?;
|
||||||
|
|
||||||
|
let join_type = match join_type_op {
|
||||||
|
None => JoinType::Inner,
|
||||||
|
Some(val) => match val.item.as_ref() {
|
||||||
|
"inner" => JoinType::Inner,
|
||||||
|
"outer" => JoinType::Outer,
|
||||||
|
"left" => JoinType::Left,
|
||||||
|
_ => {
|
||||||
|
return Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"Incorrect join type",
|
||||||
|
"Invalid join type",
|
||||||
|
&val.tag,
|
||||||
|
"Perhaps you mean: inner, outer or left",
|
||||||
|
&val.tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
|
||||||
|
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(ref df),
|
||||||
|
..
|
||||||
|
})) = value.value
|
||||||
|
{
|
||||||
|
let res = match r_df.value {
|
||||||
|
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(r_df),
|
||||||
|
..
|
||||||
|
})) => {
|
||||||
|
// Checking the column types before performing the join
|
||||||
|
check_column_datatypes(
|
||||||
|
df,
|
||||||
|
&l_col_string,
|
||||||
|
&l_col_span,
|
||||||
|
&r_col_string,
|
||||||
|
&r_col_span,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
df.join(&r_df, &l_col_string, &r_col_string, join_type)
|
||||||
|
.map_err(|e| {
|
||||||
|
ShellError::labeled_error(
|
||||||
|
"Join error",
|
||||||
|
format!("{}", e),
|
||||||
|
&l_col_span,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => Err(ShellError::labeled_error(
|
||||||
|
"Not a dataframe",
|
||||||
|
"not a dataframe type value",
|
||||||
|
&r_df.tag,
|
||||||
|
)),
|
||||||
|
}?;
|
||||||
|
|
||||||
|
let value = Value {
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||||
|
res,
|
||||||
|
))),
|
||||||
|
tag: tag.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(value))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_column_datatypes<T: AsRef<str>>(
|
||||||
|
df: &polars::prelude::DataFrame,
|
||||||
|
l_cols: &[T],
|
||||||
|
l_col_span: &Span,
|
||||||
|
r_cols: &[T],
|
||||||
|
r_col_span: &Span,
|
||||||
|
) -> Result<(), ShellError> {
|
||||||
|
if l_cols.len() != r_cols.len() {
|
||||||
|
return Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"Mismatched number of column names",
|
||||||
|
format!(
|
||||||
|
"found {} left names vs {} right names",
|
||||||
|
l_cols.len(),
|
||||||
|
r_cols.len()
|
||||||
|
),
|
||||||
|
l_col_span,
|
||||||
|
"perhaps you need to change the number of columns to join",
|
||||||
|
r_col_span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (l, r) in l_cols.iter().zip(r_cols.iter()) {
|
||||||
|
let l_series = df
|
||||||
|
.column(l.as_ref())
|
||||||
|
.map_err(|e| ShellError::labeled_error("Join error", format!("{}", e), l_col_span))?;
|
||||||
|
|
||||||
|
let r_series = df
|
||||||
|
.column(r.as_ref())
|
||||||
|
.map_err(|e| ShellError::labeled_error("Join error", format!("{}", e), r_col_span))?;
|
||||||
|
|
||||||
|
if l_series.dtype() != r_series.dtype() {
|
||||||
|
return Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"Mismatched datatypes",
|
||||||
|
format!(
|
||||||
|
"left column type '{}' doesn't match '{}' right column match",
|
||||||
|
l_series.dtype(),
|
||||||
|
r_series.dtype()
|
||||||
|
),
|
||||||
|
l_col_span,
|
||||||
|
"perhaps you need to select other column to match",
|
||||||
|
r_col_span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,13 +1,16 @@
|
|||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use nu_engine::WholeStreamCommand;
|
use nu_engine::WholeStreamCommand;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value};
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, TaggedDictBuilder, UntaggedValue,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct DataFrame;
|
pub struct DataFrame;
|
||||||
|
|
||||||
impl WholeStreamCommand for DataFrame {
|
impl WholeStreamCommand for DataFrame {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe list"
|
"pls list"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
@ -15,38 +18,46 @@ impl WholeStreamCommand for DataFrame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("dataframe list")
|
Signature::build("pls list")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
let args = args.evaluate_once()?;
|
let args = args.evaluate_once()?;
|
||||||
|
|
||||||
let mut dataframes: Vec<Value> = Vec::new();
|
let values = args
|
||||||
for (name, value) in args.context.scope.get_vars() {
|
.context
|
||||||
if let UntaggedValue::DataFrame(df) = value.value {
|
.scope
|
||||||
let mut data = TaggedDictBuilder::new(value.tag);
|
.get_vars()
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|(name, value)| {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(df),
|
||||||
|
name: file_name,
|
||||||
|
})) = &value.value
|
||||||
|
{
|
||||||
|
let mut data = TaggedDictBuilder::new(value.tag.clone());
|
||||||
|
|
||||||
let polars_df = df.dataframe.unwrap();
|
let rows = df.height();
|
||||||
|
let cols = df.width();
|
||||||
|
|
||||||
let rows = polars_df.height();
|
data.insert_value("name", name.as_ref());
|
||||||
let cols = polars_df.width();
|
data.insert_value("file", file_name.as_ref());
|
||||||
|
data.insert_value("rows", format!("{}", rows));
|
||||||
|
data.insert_value("columns", format!("{}", cols));
|
||||||
|
|
||||||
data.insert_value("name", name);
|
Some(data.into_value())
|
||||||
data.insert_value("file", df.name);
|
} else {
|
||||||
data.insert_value("rows", format!("{}", rows));
|
None
|
||||||
data.insert_value("columns", format!("{}", cols));
|
}
|
||||||
|
});
|
||||||
|
|
||||||
dataframes.push(data.into_value());
|
Ok(OutputStream::from_stream(values))
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(OutputStream::from_stream(dataframes.into_iter()))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![Example {
|
||||||
description: "Lists loaded dataframes in current scope",
|
description: "Lists loaded dataframes in current scope",
|
||||||
example: "dataframe list",
|
example: "pls list",
|
||||||
result: None,
|
result: None,
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,8 @@ use crate::prelude::*;
|
|||||||
use nu_engine::{EvaluatedCommandArgs, WholeStreamCommand};
|
use nu_engine::{EvaluatedCommandArgs, WholeStreamCommand};
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
};
|
};
|
||||||
|
|
||||||
use nu_source::Tagged;
|
use nu_source::Tagged;
|
||||||
@ -15,7 +16,7 @@ pub struct DataFrame;
|
|||||||
|
|
||||||
impl WholeStreamCommand for DataFrame {
|
impl WholeStreamCommand for DataFrame {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe load"
|
"pls load"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
@ -23,7 +24,7 @@ impl WholeStreamCommand for DataFrame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::build("dataframe load")
|
Signature::build("pls load")
|
||||||
.required(
|
.required(
|
||||||
"file",
|
"file",
|
||||||
SyntaxShape::FilePath,
|
SyntaxShape::FilePath,
|
||||||
@ -67,7 +68,7 @@ impl WholeStreamCommand for DataFrame {
|
|||||||
fn examples(&self) -> Vec<Example> {
|
fn examples(&self) -> Vec<Example> {
|
||||||
vec![Example {
|
vec![Example {
|
||||||
description: "Takes a file name and creates a dataframe",
|
description: "Takes a file name and creates a dataframe",
|
||||||
example: "dataframe load test.csv",
|
example: "pls load test.csv",
|
||||||
result: None,
|
result: None,
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
@ -85,7 +86,7 @@ fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
Some("json") => from_json(args),
|
Some("json") => from_json(args),
|
||||||
_ => Err(ShellError::labeled_error(
|
_ => Err(ShellError::labeled_error(
|
||||||
"Error with file",
|
"Error with file",
|
||||||
"Not a csv or parquet file",
|
"Not a csv, parquet or json file",
|
||||||
&file.tag,
|
&file.tag,
|
||||||
)),
|
)),
|
||||||
},
|
},
|
||||||
@ -107,12 +108,12 @@ fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let nu_dataframe = NuDataFrame {
|
let init = InputStream::one(
|
||||||
dataframe: Some(df),
|
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new_with_name(
|
||||||
name: file_name,
|
df, file_name,
|
||||||
};
|
)))
|
||||||
|
.into_value(&tag),
|
||||||
let init = InputStream::one(UntaggedValue::DataFrame(nu_dataframe).into_value(&tag));
|
);
|
||||||
|
|
||||||
Ok(init.to_output_stream())
|
Ok(init.to_output_stream())
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,26 @@
|
|||||||
|
pub mod aggregate;
|
||||||
pub mod command;
|
pub mod command;
|
||||||
|
pub mod convert;
|
||||||
|
pub mod drop;
|
||||||
|
pub mod dtypes;
|
||||||
pub mod groupby;
|
pub mod groupby;
|
||||||
|
pub mod join;
|
||||||
pub mod list;
|
pub mod list;
|
||||||
pub mod load;
|
pub mod load;
|
||||||
|
pub mod sample;
|
||||||
|
pub mod select;
|
||||||
|
pub mod show;
|
||||||
|
pub(crate) mod utils;
|
||||||
|
|
||||||
|
pub use aggregate::DataFrame as DataFrameAggregate;
|
||||||
pub use command::Command as DataFrame;
|
pub use command::Command as DataFrame;
|
||||||
|
pub use convert::DataFrame as DataFrameConvert;
|
||||||
|
pub use drop::DataFrame as DataFrameDrop;
|
||||||
|
pub use dtypes::DataFrame as DataFrameDTypes;
|
||||||
pub use groupby::DataFrame as DataFrameGroupBy;
|
pub use groupby::DataFrame as DataFrameGroupBy;
|
||||||
|
pub use join::DataFrame as DataFrameJoin;
|
||||||
pub use list::DataFrame as DataFrameList;
|
pub use list::DataFrame as DataFrameList;
|
||||||
pub use load::DataFrame as DataFrameLoad;
|
pub use load::DataFrame as DataFrameLoad;
|
||||||
|
pub use sample::DataFrame as DataFrameSample;
|
||||||
|
pub use select::DataFrame as DataFrameSelect;
|
||||||
|
pub use show::DataFrame as DataFrameShow;
|
||||||
|
117
crates/nu-command/src/commands/dataframe/sample.rs
Normal file
117
crates/nu-command/src/commands/dataframe/sample.rs
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
use nu_source::Tagged;
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls sample"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Create sample dataframe"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls load")
|
||||||
|
.named(
|
||||||
|
"n_rows",
|
||||||
|
SyntaxShape::Number,
|
||||||
|
"number of rows to be taken from dataframe",
|
||||||
|
Some('n'),
|
||||||
|
)
|
||||||
|
.named(
|
||||||
|
"fraction",
|
||||||
|
SyntaxShape::Number,
|
||||||
|
"fraction of dataframe to be taken",
|
||||||
|
Some('f'),
|
||||||
|
)
|
||||||
|
.switch("replace", "sample with replace", Some('e'))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
sample(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![
|
||||||
|
Example {
|
||||||
|
description: "Sample rows from dataframe",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls load | pls sample -r 1",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Shows sample row using fraction and replace",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4] [5 6]] | pls load | pls sample -f 0.5 -e",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sample(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let rows: Option<Tagged<usize>> = args.get_flag("n_rows")?;
|
||||||
|
let fraction: Option<Tagged<f64>> = args.get_flag("fraction")?;
|
||||||
|
let replace: bool = args.has_flag("replace");
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(ref df),
|
||||||
|
..
|
||||||
|
})) = value.value
|
||||||
|
{
|
||||||
|
let res = match (rows, fraction) {
|
||||||
|
(Some(rows), None) => df.sample_n(rows.item, replace).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Polars error", format!("{}", e), &rows.tag)
|
||||||
|
}),
|
||||||
|
(None, Some(frac)) => df.sample_frac(frac.item, replace).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Polars error", format!("{}", e), &frac.tag)
|
||||||
|
}),
|
||||||
|
(Some(_), Some(_)) => Err(ShellError::labeled_error(
|
||||||
|
"Incompatible flags",
|
||||||
|
"Only one selection criterion allowed",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
(None, None) => Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"No selection",
|
||||||
|
"No selection criterion was found",
|
||||||
|
&tag,
|
||||||
|
"Perhaps you want to use the flag -n or -f",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
}?;
|
||||||
|
|
||||||
|
let value = Value {
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||||
|
res,
|
||||||
|
))),
|
||||||
|
tag: tag.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(value))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
84
crates/nu-command/src/commands/dataframe/select.rs
Normal file
84
crates/nu-command/src/commands/dataframe/select.rs
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::{NuDataFrame, PolarsData},
|
||||||
|
Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::utils::convert_columns;
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls select"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Creates a new dataframe with the selected columns"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls select").required(
|
||||||
|
"columns",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"selected column names",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
select(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "Create new dataframe with column a",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls select [a]",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn select(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let columns: Vec<Value> = args.req(0)?;
|
||||||
|
|
||||||
|
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(ref df),
|
||||||
|
..
|
||||||
|
})) = value.value
|
||||||
|
{
|
||||||
|
let res = df.select(&col_string).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Drop error", format!("{}", e), &col_span)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let value = Value {
|
||||||
|
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||||
|
res,
|
||||||
|
))),
|
||||||
|
tag: tag.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(value))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
78
crates/nu-command/src/commands/dataframe/show.rs
Normal file
78
crates/nu-command/src/commands/dataframe/show.rs
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{dataframe::PolarsData, Signature, SyntaxShape, UntaggedValue};
|
||||||
|
|
||||||
|
use nu_source::Tagged;
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"pls show"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Show dataframe"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("pls show")
|
||||||
|
.named(
|
||||||
|
"n_rows",
|
||||||
|
SyntaxShape::Number,
|
||||||
|
"number of rows to be shown",
|
||||||
|
Some('n'),
|
||||||
|
)
|
||||||
|
.switch("tail", "shows tail rows", Some('t'))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
show(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![
|
||||||
|
Example {
|
||||||
|
description: "Shows head rows from dataframe",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls show",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Shows tail rows from dataframe",
|
||||||
|
example: "echo [[a b]; [1 2] [3 4] [5 6]] | pls convert | pls show -t -n 1",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn show(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let rows: Option<Tagged<usize>> = args.get_flag("rows")?;
|
||||||
|
let tail: bool = args.has_flag("tail");
|
||||||
|
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||||
|
let rows = rows.map(|v| v.item);
|
||||||
|
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
|
||||||
|
|
||||||
|
Ok(OutputStream::from_stream(values.into_iter()))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
42
crates/nu-command/src/commands/dataframe/utils.rs
Normal file
42
crates/nu-command/src/commands/dataframe/utils.rs
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
use crate::prelude::*;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{Primitive, UntaggedValue, Value};
|
||||||
|
|
||||||
|
// Converts a Vec<Value> to a Vec<String> with a Span marking the whole
|
||||||
|
// location of the columns for error referencing
|
||||||
|
pub(crate) fn convert_columns<'columns>(
|
||||||
|
columns: &'columns [Value],
|
||||||
|
tag: &Tag,
|
||||||
|
) -> Result<(Vec<String>, Span), ShellError> {
|
||||||
|
let mut col_span = match columns
|
||||||
|
.iter()
|
||||||
|
.nth(0)
|
||||||
|
.map(|v| Span::new(v.tag.span.start(), v.tag.span.end()))
|
||||||
|
{
|
||||||
|
Some(span) => span,
|
||||||
|
None => {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"Empty column list",
|
||||||
|
"Empty list found for command",
|
||||||
|
tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let res = columns
|
||||||
|
.iter()
|
||||||
|
.map(|value| match &value.value {
|
||||||
|
UntaggedValue::Primitive(Primitive::String(s)) => {
|
||||||
|
col_span = col_span.until(value.tag.span);
|
||||||
|
Ok(s.clone())
|
||||||
|
}
|
||||||
|
_ => Err(ShellError::labeled_error(
|
||||||
|
"Incorrect column format",
|
||||||
|
"Only string as column name",
|
||||||
|
&value.tag,
|
||||||
|
)),
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<String>, _>>()?;
|
||||||
|
|
||||||
|
Ok((res, col_span))
|
||||||
|
}
|
@ -253,14 +253,31 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
|||||||
whole_stream_command(Seq),
|
whole_stream_command(Seq),
|
||||||
whole_stream_command(SeqDates),
|
whole_stream_command(SeqDates),
|
||||||
whole_stream_command(TermSize),
|
whole_stream_command(TermSize),
|
||||||
|
//Dataframe commands
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataFrame),
|
whole_stream_command(DataFrame),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameConvert),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataFrameLoad),
|
whole_stream_command(DataFrameLoad),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataFrameList),
|
whole_stream_command(DataFrameList),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataFrameGroupBy),
|
whole_stream_command(DataFrameGroupBy),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameAggregate),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameShow),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameSample),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameJoin),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameDrop),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameSelect),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameDTypes),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
#[cfg(feature = "clipboard-cli")]
|
#[cfg(feature = "clipboard-cli")]
|
||||||
|
@ -117,7 +117,7 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
|||||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::DataFrame(_) => toml::Value::String("<Data>".to_string()),
|
UntaggedValue::DataFrame(_) => toml::Value::String("<DataFrame>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||||
|
@ -25,7 +25,9 @@ num-traits = "0.2.14"
|
|||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_bytes = "0.11.5"
|
serde_bytes = "0.11.5"
|
||||||
|
|
||||||
polars = {version="0.13.3", optional = true}
|
[dependencies.polars]
|
||||||
|
version = "0.13.4"
|
||||||
|
optional = true
|
||||||
|
|
||||||
# implement conversions
|
# implement conversions
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
@ -1,3 +1,12 @@
|
|||||||
pub mod nu_dataframe;
|
pub mod nu_dataframe;
|
||||||
|
pub mod nu_groupby;
|
||||||
|
|
||||||
pub use nu_dataframe::NuDataFrame;
|
pub use nu_dataframe::NuDataFrame;
|
||||||
|
pub use nu_groupby::NuGroupBy;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||||
|
pub enum PolarsData {
|
||||||
|
EagerDataFrame(NuDataFrame),
|
||||||
|
GroupBy(NuGroupBy),
|
||||||
|
}
|
||||||
|
@ -59,8 +59,18 @@ impl Default for NuDataFrame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl NuDataFrame {
|
impl NuDataFrame {
|
||||||
fn new() -> Self {
|
pub fn new(df: polars::prelude::DataFrame) -> Self {
|
||||||
Self::default()
|
NuDataFrame {
|
||||||
|
dataframe: Some(df),
|
||||||
|
name: String::from("dataframe"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_name(df: polars::prelude::DataFrame, name: String) -> Self {
|
||||||
|
NuDataFrame {
|
||||||
|
dataframe: Some(df),
|
||||||
|
name,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +113,7 @@ impl<'de> Deserialize<'de> for NuDataFrame {
|
|||||||
where
|
where
|
||||||
D: Deserializer<'de>,
|
D: Deserializer<'de>,
|
||||||
{
|
{
|
||||||
deserializer.deserialize_i32(NuDataFrame::new())
|
deserializer.deserialize_i32(NuDataFrame::default())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,22 +147,23 @@ impl NuDataFrame {
|
|||||||
// Print is made out a head and if the dataframe is too large, then a tail
|
// Print is made out a head and if the dataframe is too large, then a tail
|
||||||
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||||
if let Some(df) = &self.dataframe {
|
if let Some(df) = &self.dataframe {
|
||||||
let size: usize = 5;
|
let size: usize = 20;
|
||||||
let mut values = self.head(Some(size))?;
|
|
||||||
|
|
||||||
if df.height() > size {
|
if df.height() > size {
|
||||||
|
let sample_size = size / 2;
|
||||||
|
let mut values = self.head(Some(sample_size))?;
|
||||||
add_separator(&mut values, df);
|
add_separator(&mut values, df);
|
||||||
|
let remaining = df.height() - sample_size;
|
||||||
let remaining = df.height() - size;
|
let tail_size = remaining.min(sample_size);
|
||||||
let tail_size = remaining.min(size);
|
|
||||||
let mut tail_values = self.tail(Some(tail_size))?;
|
let mut tail_values = self.tail(Some(tail_size))?;
|
||||||
|
|
||||||
values.append(&mut tail_values);
|
values.append(&mut tail_values);
|
||||||
}
|
|
||||||
|
|
||||||
Ok(values)
|
Ok(values)
|
||||||
|
} else {
|
||||||
|
Ok(self.head(Some(size))?)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
unreachable!()
|
unreachable!("No dataframe found in print command")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
54
crates/nu-protocol/src/dataframe/nu_groupby.rs
Normal file
54
crates/nu-protocol/src/dataframe/nu_groupby.rs
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
use nu_source::Tag;
|
||||||
|
use polars::frame::groupby::{GroupBy, GroupTuples};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::NuDataFrame;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
|
||||||
|
use crate::{TaggedDictBuilder, Value};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||||
|
pub struct NuGroupBy {
|
||||||
|
dataframe: NuDataFrame,
|
||||||
|
by: Vec<String>,
|
||||||
|
groups: GroupTuples,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NuGroupBy {
|
||||||
|
pub fn new(dataframe: NuDataFrame, by: Vec<String>, groups: GroupTuples) -> Self {
|
||||||
|
NuGroupBy {
|
||||||
|
dataframe,
|
||||||
|
by,
|
||||||
|
groups,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_groupby(&self) -> Result<GroupBy, ShellError> {
|
||||||
|
let df = match &self.dataframe.dataframe {
|
||||||
|
Some(df) => df,
|
||||||
|
None => unreachable!("No dataframe in nu_dataframe"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let by = df.select_series(&self.by).map_err(|e| {
|
||||||
|
ShellError::labeled_error("Error creating groupby", format!("{}", e), Tag::unknown())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(GroupBy::new(df, by, self.groups.clone(), None))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print(&self) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let mut values: Vec<Value> = Vec::new();
|
||||||
|
|
||||||
|
let mut data = TaggedDictBuilder::new(Tag::unknown());
|
||||||
|
data.insert_value("property", "dataframe");
|
||||||
|
data.insert_value("value", self.dataframe.name.as_ref());
|
||||||
|
values.push(data.into_value());
|
||||||
|
|
||||||
|
let mut data = TaggedDictBuilder::new(Tag::unknown());
|
||||||
|
data.insert_value("property", "group by");
|
||||||
|
data.insert_value("value", self.by.join(", "));
|
||||||
|
values.push(data.into_value());
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
}
|
@ -31,7 +31,7 @@ use std::path::PathBuf;
|
|||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
use crate::dataframe::NuDataFrame;
|
use crate::dataframe::PolarsData;
|
||||||
|
|
||||||
/// The core structured values that flow through a pipeline
|
/// The core structured values that flow through a pipeline
|
||||||
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
|
||||||
@ -54,7 +54,7 @@ pub enum UntaggedValue {
|
|||||||
/// Data option that holds the polars structs required to to data
|
/// Data option that holds the polars structs required to to data
|
||||||
/// manipulation and operations using polars dataframes
|
/// manipulation and operations using polars dataframes
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
DataFrame(NuDataFrame),
|
DataFrame(PolarsData),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UntaggedValue {
|
impl UntaggedValue {
|
||||||
|
@ -364,8 +364,8 @@ macro_rules! from_native_to_primitive {
|
|||||||
($native_type:ty, $primitive_type:expr, $converter: expr) => {
|
($native_type:ty, $primitive_type:expr, $converter: expr) => {
|
||||||
// e.g. from u32 -> Primitive
|
// e.g. from u32 -> Primitive
|
||||||
impl From<$native_type> for Primitive {
|
impl From<$native_type> for Primitive {
|
||||||
fn from(int: $native_type) -> Primitive {
|
fn from(value: $native_type) -> Primitive {
|
||||||
if let Some(i) = $converter(int) {
|
if let Some(i) = $converter(value) {
|
||||||
$primitive_type(i)
|
$primitive_type(i)
|
||||||
} else {
|
} else {
|
||||||
unreachable!("Internal error: protocol did not use compatible decimal")
|
unreachable!("Internal error: protocol did not use compatible decimal")
|
||||||
|
Loading…
Reference in New Issue
Block a user