From 9d0be7d96fa0e960bcc702a523ca62f0c53b765c Mon Sep 17 00:00:00 2001 From: Fernando Herrera Date: Sat, 16 Jul 2022 15:34:12 +0100 Subject: [PATCH] check column type during aggregation (#6058) * check column type during aggregation * check first if there is schema --- .../src/dataframe/lazy/aggregate.rs | 77 ++++++++++++++++++- .../nu-command/src/dataframe/lazy/groupby.rs | 8 +- .../values/nu_lazyframe/custom_value.rs | 1 + .../src/dataframe/values/nu_lazyframe/mod.rs | 8 +- .../values/nu_lazygroupby/custom_value.rs | 1 + .../dataframe/values/nu_lazygroupby/mod.rs | 5 +- 6 files changed, 89 insertions(+), 11 deletions(-) diff --git a/crates/nu-command/src/dataframe/lazy/aggregate.rs b/crates/nu-command/src/dataframe/lazy/aggregate.rs index 44435cd25..7e59b4e74 100644 --- a/crates/nu-command/src/dataframe/lazy/aggregate.rs +++ b/crates/nu-command/src/dataframe/lazy/aggregate.rs @@ -6,6 +6,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use polars::{datatypes::DataType, prelude::Expr}; #[derive(Clone)] pub struct LazyAggregate; @@ -118,12 +119,29 @@ impl Command for LazyAggregate { let expressions = NuExpression::extract_exprs(value)?; let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?; - let from_eager = group_by.from_eager; - let group_by = group_by.into_polars(); + if let Some(schema) = &group_by.schema { + for expr in &expressions { + if let Some(name) = get_col_name(expr) { + let dtype = schema.get(name.as_str()); + + if matches!(dtype, Some(DataType::Object(..))) { + return Err(ShellError::GenericError( + "Object type column not supported for aggregation".into(), + format!("Column '{}' is type Object", name), + Some(call.head), + Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()), + Vec::new(), + )); + } + } + } + } + let lazy = NuLazyFrame { - lazy: group_by.agg(&expressions).into(), - from_eager, + from_eager: group_by.from_eager, + lazy: Some(group_by.into_polars().agg(&expressions)), + schema: None, }; let res = lazy.into_value(call.head)?; @@ -131,6 +149,57 @@ impl Command for LazyAggregate { } } +fn get_col_name(expr: &Expr) -> Option { + match expr { + Expr::Column(column) => Some(column.to_string()), + Expr::Agg(agg) => match agg { + polars::prelude::AggExpr::Min(e) + | polars::prelude::AggExpr::Max(e) + | polars::prelude::AggExpr::Median(e) + | polars::prelude::AggExpr::NUnique(e) + | polars::prelude::AggExpr::First(e) + | polars::prelude::AggExpr::Last(e) + | polars::prelude::AggExpr::Mean(e) + | polars::prelude::AggExpr::List(e) + | polars::prelude::AggExpr::Count(e) + | polars::prelude::AggExpr::Sum(e) + | polars::prelude::AggExpr::AggGroups(e) + | polars::prelude::AggExpr::Std(e) + | polars::prelude::AggExpr::Var(e) => get_col_name(e.as_ref()), + polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()), + }, + Expr::Reverse(expr) + | Expr::Shift { input: expr, .. } + | Expr::Filter { input: expr, .. } + | Expr::Slice { input: expr, .. } + | Expr::Cast { expr, .. } + | Expr::Sort { expr, .. } + | Expr::Take { expr, .. } + | Expr::SortBy { expr, .. } + | Expr::Exclude(expr, _) + | Expr::Alias(expr, _) + | Expr::KeepName(expr) + | Expr::Not(expr) + | Expr::IsNotNull(expr) + | Expr::IsNull(expr) + | Expr::Duplicated(expr) + | Expr::IsUnique(expr) + | Expr::Explode(expr) => get_col_name(expr.as_ref()), + Expr::Ternary { .. } + | Expr::AnonymousFunction { .. } + | Expr::Function { .. } + | Expr::Columns(_) + | Expr::DtypeColumn(_) + | Expr::Literal(_) + | Expr::BinaryExpr { .. } + | Expr::Window { .. } + | Expr::Wildcard + | Expr::RenameAlias { .. } + | Expr::Count + | Expr::Nth(_) => None, + } +} + #[cfg(test)] mod test { use super::super::super::test_dataframe::test_dataframe; diff --git a/crates/nu-command/src/dataframe/lazy/groupby.rs b/crates/nu-command/src/dataframe/lazy/groupby.rs index 4d99e7c87..bf2f6a44f 100644 --- a/crates/nu-command/src/dataframe/lazy/groupby.rs +++ b/crates/nu-command/src/dataframe/lazy/groupby.rs @@ -128,13 +128,11 @@ impl Command for ToLazyGroupBy { )); } - let value = input.into_value(call.head); - let lazy = NuLazyFrame::try_from_value(value)?; - let from_eager = lazy.from_eager; - + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; let group_by = NuLazyGroupBy { + schema: lazy.schema.clone(), + from_eager: lazy.from_eager, group_by: Some(lazy.into_polars().groupby(&expressions)), - from_eager, }; Ok(PipelineData::Value(group_by.into_value(call.head), None)) diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs index 39d723e22..c33dcd06b 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs @@ -15,6 +15,7 @@ impl CustomValue for NuLazyFrame { let cloned = NuLazyFrame { lazy: self.lazy.clone(), from_eager: self.from_eager, + schema: self.schema.clone(), }; Value::CustomValue { diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs index 8d6b5d985..5d45543fc 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs @@ -3,7 +3,7 @@ mod custom_value; use super::{NuDataFrame, NuExpression}; use core::fmt; use nu_protocol::{PipelineData, ShellError, Span, Value}; -use polars::prelude::{Expr, IntoLazy, LazyFrame}; +use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; // Lazyframe wrapper for Nushell operations @@ -12,6 +12,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[derive(Default)] pub struct NuLazyFrame { pub lazy: Option, + pub schema: Option, pub from_eager: bool, } @@ -63,6 +64,7 @@ impl From for NuLazyFrame { Self { lazy: Some(lazy_frame), from_eager: false, + schema: None, } } } @@ -72,6 +74,7 @@ impl NuLazyFrame { Self { lazy: Some(lazy), from_eager, + schema: None, } } @@ -80,6 +83,7 @@ impl NuLazyFrame { Self { lazy: Some(lazy), from_eager: true, + schema: Some(df.as_ref().schema()), } } @@ -148,6 +152,7 @@ impl NuLazyFrame { Some(expr) => Ok(Self { lazy: expr.lazy.clone(), from_eager: false, + schema: None, }), None => Err(ShellError::CantConvert( "lazy frame".into(), @@ -184,6 +189,7 @@ impl NuLazyFrame { Self { from_eager: self.from_eager, lazy: Some(new_frame), + schema: None, } } } diff --git a/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs index 2ceefb232..9123cb8ee 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazygroupby/custom_value.rs @@ -14,6 +14,7 @@ impl CustomValue for NuLazyGroupBy { fn clone_value(&self, span: nu_protocol::Span) -> Value { let cloned = NuLazyGroupBy { group_by: self.group_by.clone(), + schema: self.schema.clone(), from_eager: self.from_eager, }; diff --git a/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs index 56afda567..4bfd4e5be 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs @@ -2,7 +2,7 @@ mod custom_value; use core::fmt; use nu_protocol::{PipelineData, ShellError, Span, Value}; -use polars::prelude::LazyGroupBy; +use polars::prelude::{LazyGroupBy, Schema}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; // Lazyframe wrapper for Nushell operations @@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[derive(Default)] pub struct NuLazyGroupBy { pub group_by: Option, + pub schema: Option, pub from_eager: bool, } @@ -66,6 +67,7 @@ impl From for NuLazyGroupBy { Self { group_by: Some(group_by), from_eager: false, + schema: None, } } } @@ -88,6 +90,7 @@ impl NuLazyGroupBy { match val.as_any().downcast_ref::() { Some(group) => Ok(Self { group_by: group.group_by.clone(), + schema: group.schema.clone(), from_eager: group.from_eager, }), None => Err(ShellError::CantConvert(