Series arithmetic (#3602)

* operations with series

* contains operations with series

* Checked division and masked operations
This commit is contained in:
Fernando Herrera
2021-06-10 22:39:51 +01:00
committed by GitHub
parent 1d7c909080
commit c4163c3621
13 changed files with 812 additions and 90 deletions

View File

@ -101,10 +101,10 @@ zip = { version = "0.5.9", optional = true }
[dependencies.polars]
git = "https://github.com/pola-rs/polars"
rev = "a5f17b0a6e3e05ff6be789aa24a7cae54fd400dd"
version = "0.14.0"
rev = "9e1506cca9fb646fc55f949ab6345290c3d198a7"
version = "0.14.1"
optional = true
features = ["parquet", "json", "random", "pivot"]
features = ["parquet", "json", "random", "pivot", "strings"]
[target.'cfg(unix)'.dependencies]
umask = "1.0.0"

View File

@ -12,7 +12,7 @@ pub struct DataFrame;
impl WholeStreamCommand for DataFrame {
fn name(&self) -> &str {
"pls groupby"
"pls group-by"
}
fn usage(&self) -> &str {
@ -20,7 +20,7 @@ impl WholeStreamCommand for DataFrame {
}
fn signature(&self) -> Signature {
Signature::build("pls groupby").required(
Signature::build("pls group-by").required(
"by columns",
SyntaxShape::Table,
"groupby columns",
@ -34,7 +34,7 @@ impl WholeStreamCommand for DataFrame {
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Grouping by column a",
example: "[[a b]; [one 1] [one 2]] | pls to-df | pls groupby [a]",
example: "[[a b]; [one 1] [one 2]] | pls to-df | pls group-by [a]",
result: None,
}]
}

View File

@ -4,11 +4,11 @@ use nu_errors::ShellError;
use nu_protocol::{
dataframe::NuDataFrame,
hir::{CapturedBlock, ClassifiedCommand, Expression, Literal, Operator, SpannedExpression},
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue,
Primitive, Signature, SyntaxShape, UnspannedPathMember, UntaggedValue, Value,
};
use super::utils::parse_polars_error;
use polars::prelude::{ChunkCompare, Series};
use polars::prelude::{ChunkCompare, DataType, Series};
pub struct DataFrame;
@ -91,22 +91,8 @@ fn command(args: CommandArgs) -> Result<OutputStream, ShellError> {
}?;
let rhs = evaluate_baseline_expr(&expression.right, &args.args.context)?;
let right_condition = match &rhs.value {
UntaggedValue::Primitive(primitive) => Ok(primitive),
_ => Err(ShellError::labeled_error(
"Incorrect argument",
"Expected primitive values",
&rhs.tag.span,
)),
}?;
filter_dataframe(
args,
&col_name,
&col_name_span,
&right_condition,
&expression.op,
)
filter_dataframe(args, &col_name, &col_name_span, &rhs, &expression.op)
}
macro_rules! comparison_arm {
@ -145,16 +131,25 @@ fn filter_dataframe(
mut args: EvaluatedCommandArgs,
col_name: &str,
col_name_span: &Span,
right_condition: &Primitive,
rhs: &Value,
operator: &SpannedExpression,
) -> Result<OutputStream, ShellError> {
let right_condition = match &rhs.value {
UntaggedValue::Primitive(primitive) => Ok(primitive),
_ => Err(ShellError::labeled_error(
"Incorrect argument",
"Expected primitive values",
&rhs.tag.span,
)),
}?;
let span = args.call_info.name_tag.span;
let df = NuDataFrame::try_from_stream(&mut args.input, &span)?;
let col = df
.as_ref()
.column(col_name)
.map_err(|e| parse_polars_error::<&str>(&e, &col_name_span, None))?;
.map_err(|e| parse_polars_error::<&str>(&e, col_name_span, None))?;
let op = match &operator.expr {
Expression::Literal(Literal::Operator(op)) => Ok(op),
@ -176,6 +171,33 @@ fn filter_dataframe(
Operator::GreaterThanOrEqual => {
comparison_arm!(Series::gt_eq, col, right_condition, operator.span)
}
Operator::Contains => match col.dtype() {
DataType::Utf8 => match right_condition {
Primitive::String(pat) => {
let casted = col.utf8().map_err(|e| {
parse_polars_error::<&str>(&e, &args.call_info.name_tag.span, None)
})?;
casted.contains(pat).map_err(|e| {
parse_polars_error::<&str>(&e, &args.call_info.name_tag.span, None)
})
}
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect argument",
"Can't perform contains with this value",
&rhs.tag.span,
"Contains only works with strings",
&rhs.tag.span,
)),
},
_ => Err(ShellError::labeled_error_with_secondary(
"Incorrect datatype",
format!("The selected column is of type '{}'", col.dtype()),
col_name_span,
"Perhaps you want to select a column of 'str' type",
col_name_span,
)),
},
_ => Err(ShellError::labeled_error(
"Incorrect operator",
"Not implemented operator for dataframes filter",