mirror of
https://github.com/nushell/nushell.git
synced 2025-06-30 06:30:08 +02:00
Series Operation (#3563)
* Sample command * Join command with checks * More dataframes commands * Groupby and aggregate commands * Missing feature dataframe flag * Renamed file * New commands for dataframes * error parser and df reference * filter command for dataframes * removed name from nu_dataframe * commands to save to parquet and csv * polars new version * new dataframe commands * series type and print * Series basic arithmetics * Add new column to dataframe * Command names changed to nushell standard
This commit is contained in:
@ -100,7 +100,9 @@ which = { version = "4.1.0", optional = true }
|
||||
zip = { version = "0.5.9", optional = true }
|
||||
|
||||
[dependencies.polars]
|
||||
version = "0.13.4"
|
||||
git = "https://github.com/pola-rs/polars"
|
||||
rev = "a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
|
||||
version = "0.14.0"
|
||||
optional = true
|
||||
features = ["parquet", "json", "random", "pivot"]
|
||||
|
||||
|
@ -191,10 +191,12 @@ pub(crate) use all::Command as All;
|
||||
pub(crate) use any::Command as Any;
|
||||
#[cfg(feature = "dataframe")]
|
||||
pub(crate) use dataframe::{
|
||||
DataFrame, DataFrameAggregate, DataFrameConvert, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrameDummies, DataFrameGroupBy, DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad,
|
||||
DataFrameMelt, DataFramePivot, DataFrameSample, DataFrameSelect, DataFrameShow, DataFrameSlice,
|
||||
DataFrameTail, DataFrameToCsv, DataFrameToParquet, DataFrameWhere,
|
||||
DataFrame, DataFrameAggregate, DataFrameColumn, DataFrameDTypes, DataFrameDrop,
|
||||
DataFrameDropDuplicates, DataFrameDropNulls, DataFrameDummies, DataFrameGet, DataFrameGroupBy,
|
||||
DataFrameHead, DataFrameJoin, DataFrameList, DataFrameLoad, DataFrameMelt, DataFramePivot,
|
||||
DataFrameSample, DataFrameSelect, DataFrameShow, DataFrameSlice, DataFrameSort, DataFrameTail,
|
||||
DataFrameToCsv, DataFrameToDF, DataFrameToParquet, DataFrameToSeries, DataFrameWhere,
|
||||
DataFrameWithColumn,
|
||||
};
|
||||
pub(crate) use enter::Enter;
|
||||
pub(crate) use every::Every;
|
||||
|
@ -265,6 +265,20 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let _ = result.collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "dataframe")]
|
||||
Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::Series(series)),
|
||||
tag,
|
||||
} => {
|
||||
if let Some(table) = table {
|
||||
// TODO. Configure the parameter rows from file. It can be
|
||||
// adjusted to see a certain amount of values in the head
|
||||
let command_args =
|
||||
create_default_command_args(&context, series.print()?.into(), tag);
|
||||
let result = table.run(command_args)?;
|
||||
let _ = result.collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
Value {
|
||||
value: UntaggedValue::Primitive(Primitive::Nothing),
|
||||
..
|
||||
|
@ -66,7 +66,7 @@ impl Operation {
|
||||
"Operation not fount",
|
||||
"Operation does not exist",
|
||||
&name.tag,
|
||||
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, count",
|
||||
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, var, std, or count",
|
||||
&name.tag,
|
||||
)),
|
||||
}
|
||||
@ -81,7 +81,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Performs an aggregation operation on a groupby object"
|
||||
"Performs an aggregation operation on a dataframe or groupby object"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@ -105,11 +105,19 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||
example: "[[a b]; [one 1] [one 2]] | pls convert | pls groupby [a] | pls aggregate sum",
|
||||
result: None,
|
||||
}]
|
||||
vec![
|
||||
Example {
|
||||
description: "Aggregate sum by grouping by column a and summing on col b",
|
||||
example:
|
||||
"[[a b]; [one 1] [one 2]] | pls to-df | pls groupby [a] | pls aggregate sum",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Aggregate sum in dataframe columns",
|
||||
example: "[[a b]; [4 1] [5 2]] | pls to-df | pls aggregate sum",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,45 +139,48 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => (None, Span::unknown()),
|
||||
};
|
||||
|
||||
// The operation is only done in one groupby. Only one input is
|
||||
// expected from the InputStream
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing groupby input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) = value.value {
|
||||
let groupby = nu_groupby.to_groupby()?;
|
||||
let value = args.input.next().ok_or(ShellError::labeled_error(
|
||||
"Empty stream",
|
||||
"No value found in the stream",
|
||||
&tag,
|
||||
))?;
|
||||
|
||||
let groupby = match &selection {
|
||||
Some(cols) => groupby.select(cols),
|
||||
None => groupby,
|
||||
};
|
||||
let res = match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) => {
|
||||
let groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let res = perform_aggregation(groupby, op, &operation.tag, &agg_span)?;
|
||||
let groupby = match &selection {
|
||||
Some(cols) => groupby.select(cols),
|
||||
None => groupby,
|
||||
};
|
||||
|
||||
let final_df = Value {
|
||||
tag,
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
};
|
||||
perform_groupby_aggregation(groupby, op, &operation.tag, &agg_span)
|
||||
}
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => {
|
||||
let df = df.as_ref();
|
||||
|
||||
Ok(OutputStream::one(final_df))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No groupby in stream",
|
||||
"no groupby found in input stream",
|
||||
&tag,
|
||||
))
|
||||
match &selection {
|
||||
Some(cols) => {
|
||||
let df = df
|
||||
.select(cols)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &agg_span, None))?;
|
||||
|
||||
perform_dataframe_aggregation(&df, op, &operation.tag)
|
||||
}
|
||||
None => perform_dataframe_aggregation(&df, op, &operation.tag),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"No groupby or dataframe",
|
||||
"no groupby or found in input stream",
|
||||
&value.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
fn perform_aggregation(
|
||||
fn perform_groupby_aggregation(
|
||||
groupby: GroupBy,
|
||||
operation: Operation,
|
||||
operation_tag: &Tag,
|
||||
@ -198,3 +209,29 @@ fn perform_aggregation(
|
||||
parse_polars_error::<&str>(&e, span, None)
|
||||
})
|
||||
}
|
||||
|
||||
fn perform_dataframe_aggregation(
|
||||
dataframe: &polars::prelude::DataFrame,
|
||||
operation: Operation,
|
||||
operation_tag: &Tag,
|
||||
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
match operation {
|
||||
Operation::Mean => Ok(dataframe.mean()),
|
||||
Operation::Sum => Ok(dataframe.sum()),
|
||||
Operation::Min => Ok(dataframe.min()),
|
||||
Operation::Max => Ok(dataframe.max()),
|
||||
Operation::Quantile(quantile) => dataframe
|
||||
.quantile(quantile)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &operation_tag.span, None)),
|
||||
Operation::Median => Ok(dataframe.median()),
|
||||
Operation::Var => Ok(dataframe.var()),
|
||||
Operation::Std => Ok(dataframe.std()),
|
||||
_ => Err(ShellError::labeled_error_with_secondary(
|
||||
"Not valid operation",
|
||||
"operation not valid for dataframe",
|
||||
&operation_tag.span,
|
||||
"Perhaps you want: mean, sum, min, max, quantile, median, var, or std",
|
||||
&operation_tag.span,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
56
crates/nu-command/src/commands/dataframe/column.rs
Normal file
56
crates/nu-command/src/commands/dataframe/column.rs
Normal file
@ -0,0 +1,56 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, NuSeries},
|
||||
Signature, SyntaxShape,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
|
||||
use super::utils::parse_polars_error;
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls column"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Returns the selected column as Series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls column").required("column", SyntaxShape::String, "column name")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Returns the selected column as series",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls column a",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
let column: Tagged<String> = args.req(0)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.column(column.item.as_ref())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &column.tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuSeries::series_to_value(
|
||||
res.clone(),
|
||||
tag,
|
||||
)))
|
||||
}
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls drop [a]",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls drop [a]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -45,53 +42,29 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let mut args = args.evaluate_once()?;
|
||||
|
||||
let columns: Vec<Value> = args.req(0)?;
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
match args.input.next() {
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let new_df = match col_string.iter().next() {
|
||||
Some(col) => df
|
||||
.as_ref()
|
||||
.drop(col)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None)),
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
"Empty names list",
|
||||
"No column names where found",
|
||||
&col_span,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
// Dataframe with the first selected column
|
||||
let new_df = match col_string.iter().next() {
|
||||
Some(col) => df
|
||||
.as_ref()
|
||||
.drop(col)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None)),
|
||||
None => Err(ShellError::labeled_error(
|
||||
"Empty names list",
|
||||
"No column names where found",
|
||||
&col_span,
|
||||
)),
|
||||
}?;
|
||||
}?;
|
||||
|
||||
// If there are more columns in the drop selection list, these
|
||||
// are added from the resulting dataframe
|
||||
let res = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
|
||||
new_df
|
||||
.drop(col)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))
|
||||
})?;
|
||||
// If there are more columns in the drop selection list, these
|
||||
// are added from the resulting dataframe
|
||||
let res = col_string.iter().skip(1).try_fold(new_df, |new_df, col| {
|
||||
new_df
|
||||
.drop(col)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))
|
||||
})?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
66
crates/nu-command/src/commands/dataframe/drop_duplicates.rs
Normal file
66
crates/nu-command/src/commands/dataframe/drop_duplicates.rs
Normal file
@ -0,0 +1,66 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls drop-duplicates"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops duplicate values in dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls drop-duplicates")
|
||||
.optional(
|
||||
"subset",
|
||||
SyntaxShape::Table,
|
||||
"subset of columns to drop duplicates",
|
||||
)
|
||||
.switch("maintain", "maintain order", Some('m'))
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | pls to-df | pls drop-duplicates",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
|
||||
// Extracting the selection columns of the columns to perform the aggregation
|
||||
let columns: Option<Vec<Value>> = args.opt(0)?;
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, Span::unknown()),
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.drop_duplicates(args.has_flag("maintain"), subset_slice)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
64
crates/nu-command/src/commands/dataframe/drop_nulls.rs
Normal file
64
crates/nu-command/src/commands/dataframe/drop_nulls.rs
Normal file
@ -0,0 +1,64 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls drop-nulls"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Drops null values in dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls drop-nulls").optional(
|
||||
"subset",
|
||||
SyntaxShape::Table,
|
||||
"subset of columns to drop duplicates",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop null values duplicates",
|
||||
example: "[[a b]; [1 2] [3 4] [1 2]] | pls to-df | pls drop-nulls",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
|
||||
// Extracting the selection columns of the columns to perform the aggregation
|
||||
let columns: Option<Vec<Value>> = args.opt(0)?;
|
||||
let (subset, col_span) = match columns {
|
||||
Some(cols) => {
|
||||
let (agg_string, col_span) = convert_columns(&cols, &tag)?;
|
||||
(Some(agg_string), col_span)
|
||||
}
|
||||
None => (None, Span::unknown()),
|
||||
};
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let subset_slice = subset.as_ref().map(|cols| &cols[..]);
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.drop_nulls(subset_slice)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::PolarsData, Signature, TaggedDictBuilder, UntaggedValue};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, TaggedDictBuilder};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
@ -25,7 +25,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "drop column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls dtypes",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls dtypes",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -35,42 +35,26 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let col_names = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let col_names = df
|
||||
.as_ref()
|
||||
.get_column_names()
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let values = df
|
||||
.as_ref()
|
||||
.dtypes()
|
||||
.into_iter()
|
||||
.zip(col_names.into_iter())
|
||||
.map(move |(dtype, name)| {
|
||||
let mut data = TaggedDictBuilder::new(tag.clone());
|
||||
data.insert_value("column", name.as_ref());
|
||||
data.insert_value("dtype", format!("{}", dtype));
|
||||
let values = df
|
||||
.as_ref()
|
||||
.dtypes()
|
||||
.into_iter()
|
||||
.zip(col_names.into_iter())
|
||||
.map(move |(dtype, name)| {
|
||||
let mut data = TaggedDictBuilder::new(tag.clone());
|
||||
data.insert_value("column", name.as_ref());
|
||||
data.insert_value("dtype", format!("{}", dtype));
|
||||
|
||||
data.into_value()
|
||||
});
|
||||
data.into_value()
|
||||
});
|
||||
|
||||
Ok(OutputStream::from_stream(values))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::from_stream(values))
|
||||
}
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature};
|
||||
|
||||
use super::utils::parse_polars_error;
|
||||
|
||||
@ -12,7 +9,7 @@ pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls to_dummies"
|
||||
"pls to-dummies"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -20,7 +17,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls select")
|
||||
Signature::build("pls to-dummies")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
@ -30,7 +27,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with dummy variables",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_dummies",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-dummies",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -40,37 +37,14 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = df.as_ref().to_dummies().map_err(|e| {
|
||||
parse_polars_error(
|
||||
&e,
|
||||
&tag.span,
|
||||
Some("The only allowed column types for dummies are String or Int"),
|
||||
)
|
||||
})?;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let res = df.as_ref().to_dummies().map_err(|e| {
|
||||
parse_polars_error(
|
||||
&e,
|
||||
&tag.span,
|
||||
Some("The only allowed column types for dummies are String or Int"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
54
crates/nu-command/src/commands/dataframe/get.rs
Normal file
54
crates/nu-command/src/commands/dataframe/get.rs
Normal file
@ -0,0 +1,54 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls get"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates dataframe with the selected columns"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls get").required(
|
||||
"columns",
|
||||
SyntaxShape::Table,
|
||||
"column names to sort dataframe",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Creates dataframe with selected columns",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls get [a]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
let columns: Vec<Value> = args.req(0)?;
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.select(&col_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
@ -34,7 +34,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Grouping by column a",
|
||||
example: "[[a b]; [one 1] [one 2]] | pls convert | pls groupby [a]",
|
||||
example: "[[a b]; [one 1] [one 2]] | pls to-df | pls groupby [a]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -48,43 +48,26 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let by_columns: Vec<Value> = args.req(0)?;
|
||||
let (columns_string, col_span) = convert_columns(&by_columns, &tag)?;
|
||||
|
||||
// The operation is only done in one dataframe. Only one input is
|
||||
// expected from the InputStream
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(nu_df)) = value.value {
|
||||
// This is the expensive part of the groupby; to create the
|
||||
// groups that will be used for grouping the data in the
|
||||
// dataframe. Once it has been done these values can be stored
|
||||
// in a NuGroupBy
|
||||
let groupby = nu_df
|
||||
.as_ref()
|
||||
.groupby(&columns_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let groups = groupby.get_groups().to_vec();
|
||||
let groupby = Value {
|
||||
tag: value.tag,
|
||||
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
|
||||
NuDataFrame::new(nu_df.as_ref().clone()),
|
||||
columns_string,
|
||||
groups,
|
||||
))),
|
||||
};
|
||||
// This is the expensive part of the groupby; to create the
|
||||
// groups that will be used for grouping the data in the
|
||||
// dataframe. Once it has been done these values can be stored
|
||||
// in a NuGroupBy
|
||||
let groupby = df
|
||||
.as_ref()
|
||||
.groupby(&columns_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(groupby))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
let groups = groupby.get_groups().to_vec();
|
||||
let groupby = Value {
|
||||
tag,
|
||||
value: UntaggedValue::DataFrame(PolarsData::GroupBy(NuGroupBy::new(
|
||||
NuDataFrame::new(df.as_ref().clone()),
|
||||
columns_string,
|
||||
groups,
|
||||
))),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(groupby))
|
||||
}
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
|
||||
use nu_source::Tagged;
|
||||
|
||||
@ -21,7 +18,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls select").optional(
|
||||
"n_rows",
|
||||
"rows",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows for head",
|
||||
)
|
||||
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with head rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls head",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls head",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -50,31 +47,8 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => 5,
|
||||
};
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let res = df.as_ref().head(Some(rows));
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
@ -52,13 +52,13 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "inner join dataframe",
|
||||
example: "echo [[a b]; [1 2] [3 4]] | pls convert | pls join $right [a] [a]",
|
||||
example: "echo [[a b]; [1 2] [3 4]] | pls to-df | pls join $right [a] [a]",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "right join dataframe",
|
||||
example:
|
||||
"[[a b]; [1 2] [3 4] [5 6]] | pls convert | pls join $right [b] [b] -t right",
|
||||
"[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls join $right [b] [b] -t right",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
@ -95,53 +95,31 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?;
|
||||
let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = match r_df.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
|
||||
// Checking the column types before performing the join
|
||||
check_column_datatypes(
|
||||
df.as_ref(),
|
||||
&l_col_string,
|
||||
&l_col_span,
|
||||
&r_col_string,
|
||||
&r_col_span,
|
||||
)?;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
df.as_ref()
|
||||
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Not a dataframe",
|
||||
"not a dataframe type value",
|
||||
&r_df.tag,
|
||||
)),
|
||||
}?;
|
||||
let res = match r_df.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(r_df)) => {
|
||||
// Checking the column types before performing the join
|
||||
check_column_datatypes(
|
||||
df.as_ref(),
|
||||
&l_col_string,
|
||||
&l_col_span,
|
||||
&r_col_string,
|
||||
&r_col_span,
|
||||
)?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
df.as_ref()
|
||||
.join(r_df.as_ref(), &l_col_string, &r_col_string, join_type)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None))
|
||||
}
|
||||
}
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Not a dataframe",
|
||||
"not a dataframe type value",
|
||||
&r_df.tag,
|
||||
)),
|
||||
}?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
|
@ -4,8 +4,7 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::{EvaluatedCommandArgs, WholeStreamCommand};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||
dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use nu_source::Tagged;
|
||||
@ -113,12 +112,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
span: tag.span,
|
||||
};
|
||||
|
||||
let tagged_value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(df))),
|
||||
tag: df_tag,
|
||||
};
|
||||
|
||||
Ok(InputStream::one(tagged_value).to_output_stream())
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
|
||||
df, df_tag,
|
||||
)))
|
||||
}
|
||||
|
||||
fn from_parquet(args: EvaluatedCommandArgs) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::convert_columns;
|
||||
|
||||
@ -20,7 +17,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls join")
|
||||
Signature::build("pls melt")
|
||||
.required("id_columns", SyntaxShape::Table, "Id columns for melting")
|
||||
.required(
|
||||
"value_columns",
|
||||
@ -36,7 +33,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "melt dataframe",
|
||||
example: "[[a b]; [a 2] [b 4] [a 6]] | pls convert | pls melt [a] [b]",
|
||||
example: "[[a b]; [a 2] [b 4] [a 6]] | pls to-df | pls melt [a] [b]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -52,39 +49,17 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let (id_col_string, id_col_span) = convert_columns(&id_col, &tag)?;
|
||||
let (val_col_string, val_col_span) = convert_columns(&val_col, &tag)?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
check_column_datatypes(df.as_ref(), &id_col_string, &id_col_span)?;
|
||||
check_column_datatypes(df.as_ref(), &val_col_string, &val_col_span)?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
let res = df
|
||||
.as_ref()
|
||||
.melt(&id_col_string, &val_col_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
fn check_column_datatypes<T: AsRef<str>>(
|
||||
|
@ -1,9 +1,12 @@
|
||||
pub mod aggregate;
|
||||
pub mod column;
|
||||
pub mod command;
|
||||
pub mod convert;
|
||||
pub mod drop;
|
||||
pub mod drop_duplicates;
|
||||
pub mod drop_nulls;
|
||||
pub mod dtypes;
|
||||
pub mod dummies;
|
||||
pub mod get;
|
||||
pub mod groupby;
|
||||
pub mod head;
|
||||
pub mod join;
|
||||
@ -15,18 +18,25 @@ pub mod sample;
|
||||
pub mod select;
|
||||
pub mod show;
|
||||
pub mod slice;
|
||||
pub mod sort;
|
||||
pub mod tail;
|
||||
pub mod to_csv;
|
||||
pub mod to_df;
|
||||
pub mod to_parquet;
|
||||
pub mod to_series;
|
||||
pub(crate) mod utils;
|
||||
pub mod where_;
|
||||
pub mod with_column;
|
||||
|
||||
pub use aggregate::DataFrame as DataFrameAggregate;
|
||||
pub use column::DataFrame as DataFrameColumn;
|
||||
pub use command::Command as DataFrame;
|
||||
pub use convert::DataFrame as DataFrameConvert;
|
||||
pub use drop::DataFrame as DataFrameDrop;
|
||||
pub use drop_duplicates::DataFrame as DataFrameDropDuplicates;
|
||||
pub use drop_nulls::DataFrame as DataFrameDropNulls;
|
||||
pub use dtypes::DataFrame as DataFrameDTypes;
|
||||
pub use dummies::DataFrame as DataFrameDummies;
|
||||
pub use get::DataFrame as DataFrameGet;
|
||||
pub use groupby::DataFrame as DataFrameGroupBy;
|
||||
pub use head::DataFrame as DataFrameHead;
|
||||
pub use join::DataFrame as DataFrameJoin;
|
||||
@ -38,7 +48,11 @@ pub use sample::DataFrame as DataFrameSample;
|
||||
pub use select::DataFrame as DataFrameSelect;
|
||||
pub use show::DataFrame as DataFrameShow;
|
||||
pub use slice::DataFrame as DataFrameSlice;
|
||||
pub use sort::DataFrame as DataFrameSort;
|
||||
pub use tail::DataFrame as DataFrameTail;
|
||||
pub use to_csv::DataFrame as DataFrameToCsv;
|
||||
pub use to_df::DataFrame as DataFrameToDF;
|
||||
pub use to_parquet::DataFrame as DataFrameToParquet;
|
||||
pub use to_series::DataFrame as DataFrameToSeries;
|
||||
pub use where_::DataFrame as DataFrameWhere;
|
||||
pub use with_column::DataFrame as DataFrameWithColumn;
|
||||
|
@ -2,8 +2,8 @@ use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
dataframe::{NuDataFrame, NuGroupBy},
|
||||
Signature, SyntaxShape,
|
||||
};
|
||||
use nu_source::Tagged;
|
||||
|
||||
@ -72,7 +72,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![Example {
|
||||
description: "Pivot a dataframe on b and aggregation on col c",
|
||||
example:
|
||||
"[[a b c]; [one x 1] [two y 2]] | pls convert | pls groupby [a] | pls pivot b c sum",
|
||||
"[[a b c]; [one x 1] [two y 2]] | pls to-df | pls groupby [a] | pls pivot b c sum",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -93,50 +93,27 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
// The operation is only done in one groupby. Only one input is
|
||||
// expected from the InputStream
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing groupby input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::GroupBy(nu_groupby)) = value.value {
|
||||
let df_ref = nu_groupby.as_ref();
|
||||
let nu_groupby = NuGroupBy::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let df_ref = nu_groupby.as_ref();
|
||||
|
||||
check_pivot_column(df_ref, &pivot_col)?;
|
||||
check_value_column(df_ref, &value_col)?;
|
||||
check_pivot_column(df_ref, &pivot_col)?;
|
||||
check_value_column(df_ref, &value_col)?;
|
||||
|
||||
let mut groupby = nu_groupby.to_groupby()?;
|
||||
let mut groupby = nu_groupby.to_groupby()?;
|
||||
|
||||
let pivot = groupby.pivot(pivot_col.item.as_ref(), value_col.item.as_ref());
|
||||
let pivot = groupby.pivot(pivot_col.item.as_ref(), value_col.item.as_ref());
|
||||
|
||||
let res = match op {
|
||||
Operation::Mean => pivot.mean(),
|
||||
Operation::Sum => pivot.sum(),
|
||||
Operation::Min => pivot.min(),
|
||||
Operation::Max => pivot.max(),
|
||||
Operation::First => pivot.first(),
|
||||
Operation::Median => pivot.median(),
|
||||
}
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
let final_df = Value {
|
||||
tag,
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(final_df))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No groupby in stream",
|
||||
"no groupby found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
let res = match op {
|
||||
Operation::Mean => pivot.mean(),
|
||||
Operation::Sum => pivot.sum(),
|
||||
Operation::Min => pivot.min(),
|
||||
Operation::Max => pivot.max(),
|
||||
Operation::First => pivot.first(),
|
||||
Operation::Median => pivot.median(),
|
||||
}
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
||||
fn check_pivot_column(
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::{commands::dataframe::utils::parse_polars_error, prelude::*};
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
|
||||
use nu_source::Tagged;
|
||||
|
||||
@ -44,12 +41,12 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Sample rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls load | pls sample -r 1",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls sample -r 1",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Shows sample row using fraction and replace",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | pls load | pls sample -f 0.5 -e",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls sample -f 0.5 -e",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
@ -64,52 +61,30 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let fraction: Option<Tagged<f64>> = args.get_flag("fraction")?;
|
||||
let replace: bool = args.has_flag("replace");
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
.as_ref()
|
||||
.sample_n(rows.item, replace)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &rows.tag.span, None)),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(frac.item, replace)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &frac.tag.span, None)),
|
||||
(Some(_), Some(_)) => Err(ShellError::labeled_error(
|
||||
"Incompatible flags",
|
||||
"Only one selection criterion allowed",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = match (rows, fraction) {
|
||||
(Some(rows), None) => df
|
||||
.as_ref()
|
||||
.sample_n(rows.item, replace)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &rows.tag.span, None)),
|
||||
(None, Some(frac)) => df
|
||||
.as_ref()
|
||||
.sample_frac(frac.item, replace)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &frac.tag.span, None)),
|
||||
(Some(_), Some(_)) => Err(ShellError::labeled_error(
|
||||
"Incompatible flags",
|
||||
"Only one selection criterion allowed",
|
||||
&tag,
|
||||
)),
|
||||
(None, None) => Err(ShellError::labeled_error_with_secondary(
|
||||
"No selection",
|
||||
"No selection criterion was found",
|
||||
&tag,
|
||||
"Perhaps you want to use the flag -n or -f",
|
||||
&tag,
|
||||
)),
|
||||
}?;
|
||||
(None, None) => Err(ShellError::labeled_error_with_secondary(
|
||||
"No selection",
|
||||
"No selection criterion was found",
|
||||
&tag,
|
||||
"Perhaps you want to use the flag -n or -f",
|
||||
&tag,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
|
||||
@ -34,7 +31,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls select [a]",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls select [a]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -48,34 +45,12 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = df
|
||||
.as_ref()
|
||||
.select(&col_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
let res = df
|
||||
.as_ref()
|
||||
.select(&col_string)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::PolarsData, Signature, SyntaxShape, UntaggedValue};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
|
||||
use nu_source::Tagged;
|
||||
|
||||
@ -35,12 +35,12 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Shows head rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls show",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls show",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Shows tail rows from dataframe",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | pls convert | pls show -t -n 1",
|
||||
example: "[[a b]; [1 2] [3 4] [5 6]] | pls to-df | pls show -t -n 1",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
@ -54,25 +54,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let rows: Option<Tagged<usize>> = args.get_flag("n_rows")?;
|
||||
let tail: bool = args.has_flag("tail");
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let rows = rows.map(|v| v.item);
|
||||
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let rows = rows.map(|v| v.item);
|
||||
let values = if tail { df.tail(rows)? } else { df.head(rows)? };
|
||||
|
||||
Ok(OutputStream::from_stream(values.into_iter()))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::from_stream(values.into_iter()))
|
||||
}
|
||||
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
|
||||
use nu_source::Tagged;
|
||||
pub struct DataFrame;
|
||||
@ -19,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls select")
|
||||
Signature::build("pls slice")
|
||||
.required("offset", SyntaxShape::Number, "start of slice")
|
||||
.required("size", SyntaxShape::Number, "size of slice")
|
||||
}
|
||||
@ -31,7 +28,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe from a slice of the rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls slice 0 1",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls slice 0 1",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -44,31 +41,8 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let offset: Tagged<usize> = args.req(0)?;
|
||||
let size: Tagged<usize> = args.req(1)?;
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = df.as_ref().slice(offset.item as i64, size.item);
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
let res = df.as_ref().slice(offset.item as i64, size.item);
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
57
crates/nu-command/src/commands/dataframe/sort.rs
Normal file
57
crates/nu-command/src/commands/dataframe/sort.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape, Value};
|
||||
|
||||
use super::utils::{convert_columns, parse_polars_error};
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls sort"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Creates new sorted dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls sort")
|
||||
.required(
|
||||
"columns",
|
||||
SyntaxShape::Table,
|
||||
"column names to sort dataframe",
|
||||
)
|
||||
.switch("reverse", "invert sort", Some('r'))
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new sorted dataframe",
|
||||
example: "[[a b]; [3 4] [1 2]] | pls to-df | pls sort [a]",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
let columns: Vec<Value> = args.req(0)?;
|
||||
let reverse = args.has_flag("reverse");
|
||||
|
||||
let (col_string, col_span) = convert_columns(&columns, &tag)?;
|
||||
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_ref()
|
||||
.sort(&col_string, reverse)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &col_span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
@ -1,10 +1,7 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature, SyntaxShape};
|
||||
|
||||
use nu_source::Tagged;
|
||||
pub struct DataFrame;
|
||||
@ -19,7 +16,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls select").optional(
|
||||
Signature::build("pls tail").optional(
|
||||
"n_rows",
|
||||
SyntaxShape::Number,
|
||||
"Number of rows for tail",
|
||||
@ -33,7 +30,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Create new dataframe with tail rows",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls tail",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls tail",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -49,31 +46,9 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
None => 5,
|
||||
};
|
||||
|
||||
match args.input.next() {
|
||||
None => Err(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag,
|
||||
)),
|
||||
Some(value) => {
|
||||
if let UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) = value.value {
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(
|
||||
res,
|
||||
))),
|
||||
tag: tag.clone(),
|
||||
};
|
||||
let res = df.as_ref().tail(Some(rows));
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
} else {
|
||||
Err(ShellError::labeled_error(
|
||||
"No dataframe in stream",
|
||||
"no dataframe found in input stream",
|
||||
&tag,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(res, tag)))
|
||||
}
|
||||
|
@ -4,9 +4,10 @@ use std::path::PathBuf;
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::dataframe::NuDataFrame;
|
||||
use nu_protocol::Primitive;
|
||||
use nu_protocol::Value;
|
||||
use nu_protocol::{dataframe::PolarsData, Signature, SyntaxShape, UntaggedValue};
|
||||
use nu_protocol::{Signature, SyntaxShape, UntaggedValue};
|
||||
|
||||
use polars::prelude::{CsvWriter, SerWriter};
|
||||
|
||||
@ -17,7 +18,7 @@ pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls to_csv"
|
||||
"pls to-csv"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -25,7 +26,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls to_csv")
|
||||
Signature::build("pls to-csv")
|
||||
.required("file", SyntaxShape::FilePath, "file path to save dataframe")
|
||||
.named(
|
||||
"delimiter",
|
||||
@ -44,12 +45,12 @@ impl WholeStreamCommand for DataFrame {
|
||||
vec![
|
||||
Example {
|
||||
description: "Saves dataframe to csv file",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_csv test.csv",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to_csv test.csv",
|
||||
result: None,
|
||||
},
|
||||
Example {
|
||||
description: "Saves dataframe to csv file using other delimiter",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_csv test.csv -d '|'",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-csv test.csv -d '|'",
|
||||
result: None,
|
||||
},
|
||||
]
|
||||
@ -63,18 +64,7 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let delimiter: Option<Tagged<String>> = args.get_flag("delimiter")?;
|
||||
let no_header: bool = args.has_flag("no_header");
|
||||
|
||||
let mut df = args
|
||||
.input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag.span,
|
||||
))?;
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let mut file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
|
@ -1,16 +1,13 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, UntaggedValue,
|
||||
};
|
||||
use nu_protocol::{dataframe::NuDataFrame, Signature};
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls convert"
|
||||
"pls to-df"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -18,7 +15,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls convert")
|
||||
Signature::build("pls to-df")
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
@ -26,17 +23,14 @@ impl WholeStreamCommand for DataFrame {
|
||||
let args = args.evaluate_once()?;
|
||||
|
||||
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
||||
let init = InputStream::one(
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)).into_value(&tag),
|
||||
);
|
||||
|
||||
Ok(init.to_output_stream())
|
||||
Ok(InputStream::one(df.to_value(tag)))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes an input stream and converts it to a polars dataframe",
|
||||
example: "[[a b];[1 2] [3 4]] | pls convert",
|
||||
example: "[[a b];[1 2] [3 4]] | pls to-df",
|
||||
result: None,
|
||||
}]
|
||||
}
|
@ -4,7 +4,8 @@ use std::path::PathBuf;
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::PolarsData, Primitive, Signature, SyntaxShape, UntaggedValue, Value};
|
||||
use nu_protocol::dataframe::NuDataFrame;
|
||||
use nu_protocol::{Primitive, Signature, SyntaxShape, UntaggedValue, Value};
|
||||
|
||||
use polars::prelude::ParquetWriter;
|
||||
|
||||
@ -15,7 +16,7 @@ pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls to_parquet"
|
||||
"pls to-parquet"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
@ -23,7 +24,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls to_parquet").required(
|
||||
Signature::build("pls to-parquet").required(
|
||||
"file",
|
||||
SyntaxShape::FilePath,
|
||||
"file path to save dataframe",
|
||||
@ -37,7 +38,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Saves dataframe to parquet file",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls to_parquet test.parquet",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls to-parquet test.parquet",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -48,18 +49,7 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let mut args = args.evaluate_once()?;
|
||||
let file_name: Tagged<PathBuf> = args.req(0)?;
|
||||
|
||||
let mut df = args
|
||||
.input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(df)) => Some(df),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"No input received",
|
||||
"missing dataframe input from stream",
|
||||
&tag.span,
|
||||
))?;
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let file = File::create(&file_name.item).map_err(|e| {
|
||||
ShellError::labeled_error(
|
||||
|
45
crates/nu-command/src/commands/dataframe/to_series.rs
Normal file
45
crates/nu-command/src/commands/dataframe/to_series.rs
Normal file
@ -0,0 +1,45 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{dataframe::NuSeries, Signature, SyntaxShape};
|
||||
use nu_source::Tagged;
|
||||
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls to-series"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Converts a pipelined List into a polars series"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls to-series").optional(
|
||||
"name",
|
||||
SyntaxShape::String,
|
||||
"Optional series name",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let args = args.evaluate_once()?;
|
||||
|
||||
let name: Option<Tagged<String>> = args.opt(0)?;
|
||||
let name = name.map(|v| v.item);
|
||||
|
||||
let series = NuSeries::try_from_iter(args.input, name)?;
|
||||
|
||||
Ok(InputStream::one(series.to_value(tag)))
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Takes an input stream and converts it to a polars series",
|
||||
example: "[1 2 3 4] | pls to-series my-col",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
@ -2,9 +2,9 @@ use crate::prelude::*;
|
||||
use nu_engine::{evaluate_baseline_expr, EvaluatedCommandArgs, WholeStreamCommand};
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
dataframe::NuDataFrame,
|
||||
hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression},
|
||||
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value,
|
||||
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue,
|
||||
};
|
||||
|
||||
use super::utils::parse_polars_error;
|
||||
@ -36,7 +36,7 @@ impl WholeStreamCommand for DataFrame {
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Filter dataframe based on column a",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls convert | pls where a == 1",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls where a == 1",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
@ -148,18 +148,8 @@ fn filter_dataframe(
|
||||
right_condition: &Primitive,
|
||||
operator: &SpannedExpression,
|
||||
) -> Result<OutputStream, ShellError> {
|
||||
let df = args
|
||||
.input
|
||||
.next()
|
||||
.and_then(|value| match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::EagerDataFrame(nu)) => Some(nu),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(ShellError::labeled_error(
|
||||
"Incorrect stream input",
|
||||
"Expected dataframe in stream",
|
||||
&args.call_info.name_tag.span,
|
||||
))?;
|
||||
let span = args.call_info.name_tag.span;
|
||||
let df = NuDataFrame::try_from_stream(&mut args.input, &span)?;
|
||||
|
||||
let col = df
|
||||
.as_ref()
|
||||
@ -198,10 +188,8 @@ fn filter_dataframe(
|
||||
.filter(&mask)
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &args.call_info.name_tag.span, None))?;
|
||||
|
||||
let value = Value {
|
||||
value: UntaggedValue::DataFrame(PolarsData::EagerDataFrame(NuDataFrame::new(res))),
|
||||
tag: args.call_info.name_tag.clone(),
|
||||
};
|
||||
|
||||
Ok(OutputStream::one(value))
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
|
||||
res,
|
||||
args.call_info.name_tag.clone(),
|
||||
)))
|
||||
}
|
||||
|
67
crates/nu-command/src/commands/dataframe/with_column.rs
Normal file
67
crates/nu-command/src/commands/dataframe/with_column.rs
Normal file
@ -0,0 +1,67 @@
|
||||
use crate::prelude::*;
|
||||
use nu_engine::WholeStreamCommand;
|
||||
use nu_errors::ShellError;
|
||||
use nu_protocol::{
|
||||
dataframe::{NuDataFrame, PolarsData},
|
||||
Signature, SyntaxShape, UntaggedValue, Value,
|
||||
};
|
||||
|
||||
use super::utils::parse_polars_error;
|
||||
pub struct DataFrame;
|
||||
|
||||
impl WholeStreamCommand for DataFrame {
|
||||
fn name(&self) -> &str {
|
||||
"pls with-column"
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"Adds a series to the dataframe"
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::build("pls with-column").required(
|
||||
"series",
|
||||
SyntaxShape::Any,
|
||||
"series to be added",
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
command(args)
|
||||
}
|
||||
|
||||
fn examples(&self) -> Vec<Example> {
|
||||
vec![Example {
|
||||
description: "Adds a series to the dataframe",
|
||||
example: "[[a b]; [1 2] [3 4]] | pls to-df | pls with-column ([5 6] | pls to-series)",
|
||||
result: None,
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||
let tag = args.call_info.name_tag.clone();
|
||||
let mut args = args.evaluate_once()?;
|
||||
let value: Value = args.req(0)?;
|
||||
|
||||
let series = match value.value {
|
||||
UntaggedValue::DataFrame(PolarsData::Series(series)) => Ok(series),
|
||||
_ => Err(ShellError::labeled_error(
|
||||
"Incorrect type",
|
||||
"can only add a series to a dataframe",
|
||||
value.tag.span,
|
||||
)),
|
||||
}?;
|
||||
|
||||
let mut df = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?;
|
||||
|
||||
let res = df
|
||||
.as_mut()
|
||||
.with_column(series.series())
|
||||
.map_err(|e| parse_polars_error::<&str>(&e, &tag.span, None))?;
|
||||
|
||||
Ok(OutputStream::one(NuDataFrame::dataframe_to_value(
|
||||
res.clone(),
|
||||
tag,
|
||||
)))
|
||||
}
|
@ -253,49 +253,39 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||
whole_stream_command(Seq),
|
||||
whole_stream_command(SeqDates),
|
||||
whole_stream_command(TermSize),
|
||||
//Dataframe commands
|
||||
#[cfg(feature = "dataframe")]
|
||||
]);
|
||||
|
||||
//Dataframe commands
|
||||
#[cfg(feature = "dataframe")]
|
||||
context.add_commands(vec![
|
||||
whole_stream_command(DataFrame),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameConvert),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameLoad),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameList),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameGroupBy),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameAggregate),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameShow),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameSample),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameJoin),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameDrop),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameSelect),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameDTypes),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameDummies),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameHead),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameTail),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameSlice),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameMelt),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFramePivot),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameWhere),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameToDF),
|
||||
whole_stream_command(DataFrameToSeries),
|
||||
whole_stream_command(DataFrameToParquet),
|
||||
#[cfg(feature = "dataframe")]
|
||||
whole_stream_command(DataFrameToCsv),
|
||||
whole_stream_command(DataFrameSort),
|
||||
whole_stream_command(DataFrameGet),
|
||||
whole_stream_command(DataFrameDropDuplicates),
|
||||
whole_stream_command(DataFrameDropNulls),
|
||||
whole_stream_command(DataFrameColumn),
|
||||
whole_stream_command(DataFrameWithColumn),
|
||||
]);
|
||||
|
||||
#[cfg(feature = "clipboard-cli")]
|
||||
|
Reference in New Issue
Block a user