From e5cec8f4ebad5796a2ce9b8155e6a0bec53b45c9 Mon Sep 17 00:00:00 2001 From: Bahex Date: Mon, 18 Nov 2024 02:25:53 +0300 Subject: [PATCH] fix(group-by): re #14337 name collision prevention (#14360) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A more involved solution to the issue pointed out [here](https://github.com/nushell/nushell/pull/14337#issuecomment-2480392373) # Description With `--to-table` - cell-path groupers are used to create column names, similar to `select` - closure groupers result in columns named `closure_{i}` where `i` is the index of argument, with regards to other closures i.e. first closure grouper results in a column named `closure_0` Previously - `group-by foo {...} {...}` => `table` - `group-by {...} foo {...}` => `table` With this PR - `group-by foo {...} {...}` => `table` - `group-by {...} foo {...}` => `table` - no grouper argument results in a `table` as previously On naming conflicts caused by cell-path groupers named `items` or `closure_{i}`, an error is thrown, suggesting to use a closure in place of a cell-path. ```nushell ❯ ls | rename items | group-by items --to-table Error: × grouper arguments can't be named `items` ╭─[entry #3:1:29] 1 │ ls | rename items | group-by items --to-table · ────────┬──────── · ╰── contains `items` ╰──── help: instead of a cell-path, try using a closure ``` And following the suggestion: ```nushell ❯ ls | rename items | group-by { get items } --to-table ╭─#──┬──────closure_0──────┬───────────────────────────items────────────────────────────╮ │ 0 │ CITATION.cff │ ╭─#─┬────items─────┬─type─┬─size──┬───modified───╮ │ │ │ │ │ 0 │ CITATION.cff │ file │ 812 B │ 3 months ago │ │ │ │ │ ╰─#─┴────items─────┴─type─┴─size──┴───modified───╯ │ │ 1 │ CODE_OF_CONDUCT.md │ ╭─#─┬───────items────────┬─type─┬──size───┬───modified───╮ │ ... ``` --- crates/nu-command/src/filters/group_by.rs | 167 +++++++++++++++------- 1 file changed, 116 insertions(+), 51 deletions(-) diff --git a/crates/nu-command/src/filters/group_by.rs b/crates/nu-command/src/filters/group_by.rs index 9396de4781..0f56152834 100644 --- a/crates/nu-command/src/filters/group_by.rs +++ b/crates/nu-command/src/filters/group_by.rs @@ -1,6 +1,6 @@ use indexmap::IndexMap; use nu_engine::{command_prelude::*, ClosureEval}; -use nu_protocol::{engine::Closure, IntoValue}; +use nu_protocol::{engine::Closure, FromValue, IntoValue}; #[derive(Clone)] pub struct GroupBy; @@ -12,10 +12,6 @@ impl Command for GroupBy { fn signature(&self) -> Signature { Signature::build("group-by") - // TODO: It accepts Table also, but currently there is no Table - // example. Perhaps Table should be a subtype of List, in which case - // the current signature would suffice even when a Table example - // exists. .input_output_types(vec![(Type::List(Box::new(Type::Any)), Type::Any)]) .switch( "to-table", @@ -229,7 +225,7 @@ pub fn group_by( input: PipelineData, ) -> Result { let head = call.head; - let groupers: Vec = call.rest(engine_state, stack, 0)?; + let groupers: Vec> = call.rest(engine_state, stack, 0)?; let to_table = call.has_flag(engine_state, stack, "to-table")?; let config = engine_state.get_config(); @@ -238,20 +234,20 @@ pub fn group_by( return Ok(Value::record(Record::new(), head).into_pipeline_data()); } - let mut groupers = groupers.into_iter(); - - let grouped = if let Some(grouper) = groupers.next() { - let mut groups = Grouped::new(&grouper, values, config, engine_state, stack)?; - for grouper in groupers { - groups.subgroup(&grouper, config, engine_state, stack)?; + let grouped = match &groupers[..] { + [first, rest @ ..] => { + let mut grouped = Grouped::new(first.as_ref(), values, config, engine_state, stack)?; + for grouper in rest { + grouped.subgroup(grouper.as_ref(), config, engine_state, stack)?; + } + grouped } - groups - } else { - Grouped::empty(values, config) + [] => Grouped::empty(values, config), }; let value = if to_table { - grouped.into_table(head) + let column_names = groupers_to_column_names(&groupers)?; + grouped.into_table(&column_names, head) } else { grouped.into_record(head) }; @@ -259,8 +255,67 @@ pub fn group_by( Ok(value.into_pipeline_data()) } +fn groupers_to_column_names(groupers: &[Spanned]) -> Result, ShellError> { + if groupers.is_empty() { + return Ok(vec!["group".into(), "items".into()]); + } + + let mut closure_idx: usize = 0; + let grouper_names = groupers.iter().map(|grouper| { + grouper.as_ref().map(|item| match item { + Grouper::CellPath { val } => val.to_column_name(), + Grouper::Closure { .. } => { + closure_idx += 1; + format!("closure_{}", closure_idx - 1) + } + }) + }); + + let mut name_set: Vec> = Vec::with_capacity(grouper_names.len()); + + for name in grouper_names { + if name.item == "items" { + return Err(ShellError::GenericError { + error: "grouper arguments can't be named `items`".into(), + msg: "here".into(), + span: Some(name.span), + help: Some("instead of a cell-path, try using a closure: { get items }".into()), + inner: vec![], + }); + } + + if let Some(conflicting_name) = name_set + .iter() + .find(|elem| elem.as_ref().item == name.item.as_str()) + { + return Err(ShellError::GenericError { + error: "grouper arguments result in colliding column names".into(), + msg: "duplicate column names".into(), + span: Some(conflicting_name.span.append(name.span)), + help: Some( + "instead of a cell-path, try using a closure or renaming columns".into(), + ), + inner: vec![ShellError::ColumnDefinedTwice { + col_name: conflicting_name.item.clone(), + first_use: conflicting_name.span, + second_use: name.span, + }], + }); + } + + name_set.push(name); + } + + let column_names: Vec = name_set + .into_iter() + .map(|elem| elem.item) + .chain(["items".into()]) + .collect(); + Ok(column_names) +} + fn group_cell_path( - column_name: CellPath, + column_name: &CellPath, values: Vec, config: &nu_protocol::Config, ) -> Result>, ShellError> { @@ -305,8 +360,25 @@ fn group_closure( Ok(groups) } +enum Grouper { + CellPath { val: CellPath }, + Closure { val: Box }, +} + +impl FromValue for Grouper { + fn from_value(v: Value) -> Result { + match v { + Value::CellPath { val, .. } => Ok(Grouper::CellPath { val }), + Value::Closure { val, .. } => Ok(Grouper::Closure { val }), + _ => Err(ShellError::TypeMismatch { + err_message: "unsupported grouper type".to_string(), + span: v.span(), + }), + } + } +} + struct Grouped { - grouper: Option, groups: Tree, } @@ -325,41 +397,35 @@ impl Grouped { } Self { - grouper: Some("group".into()), groups: Tree::Leaf(groups), } } fn new( - grouper: &Value, + grouper: Spanned<&Grouper>, values: Vec, config: &nu_protocol::Config, engine_state: &EngineState, stack: &mut Stack, ) -> Result { - let span = grouper.span(); - let groups = match grouper { - Value::CellPath { val, .. } => group_cell_path(val.clone(), values, config)?, - Value::Closure { val, .. } => { - group_closure(values, span, Closure::clone(val), engine_state, stack)? - } - _ => { - return Err(ShellError::TypeMismatch { - err_message: "unsupported grouper type".to_string(), - span, - }) - } + let groups = match grouper.item { + Grouper::CellPath { val } => group_cell_path(val, values, config)?, + Grouper::Closure { val } => group_closure( + values, + grouper.span, + Closure::clone(val), + engine_state, + stack, + )?, }; - let grouper = grouper.as_cell_path().ok().map(CellPath::to_column_name); Ok(Self { - grouper, groups: Tree::Leaf(groups), }) } fn subgroup( &mut self, - grouper: &Value, + grouper: Spanned<&Grouper>, config: &nu_protocol::Config, engine_state: &EngineState, stack: &mut Stack, @@ -384,34 +450,33 @@ impl Grouped { Ok(()) } - fn into_table(self, head: Span) -> Value { - self._into_table(head, 0) + fn into_table(self, column_names: &[String], head: Span) -> Value { + self._into_table(head) .into_iter() - .map(|row| row.into_iter().rev().collect::().into_value(head)) + .map(|row| { + row.into_iter() + .rev() + .zip(column_names) + .map(|(val, key)| (key.clone(), val)) + .collect::() + .into_value(head) + }) .collect::>() .into_value(head) } - fn _into_table(self, head: Span, index: usize) -> Vec { - let grouper = self.grouper.unwrap_or_else(|| format!("group{index}")); + fn _into_table(self, head: Span) -> Vec> { match self.groups { Tree::Leaf(leaf) => leaf .into_iter() - .map(|(group, values)| { - [ - ("items".to_string(), values.into_value(head)), - (grouper.clone(), group.into_value(head)), - ] - .into_iter() - .collect() - }) - .collect::>(), + .map(|(group, values)| vec![(values.into_value(head)), (group.into_value(head))]) + .collect::>>(), Tree::Branch(branch) => branch .into_iter() .flat_map(|(group, items)| { - let mut inner = items._into_table(head, index + 1); + let mut inner = items._into_table(head); for row in &mut inner { - row.insert(grouper.clone(), group.clone().into_value(head)); + row.push(group.clone().into_value(head)); } inner })