diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 353cf3994..c2dce24a1 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -65,6 +65,7 @@ pub fn create_default_context(cwd: impl AsRef) -> EngineState { First, Flatten, Get, + GroupBy, Keep, KeepUntil, KeepWhile, @@ -83,6 +84,7 @@ pub fn create_default_context(cwd: impl AsRef) -> EngineState { Skip, SkipUntil, SkipWhile, + Transpose, Uniq, Update, Where, diff --git a/crates/nu-command/src/filters/group_by.rs b/crates/nu-command/src/filters/group_by.rs new file mode 100644 index 000000000..5b36539f7 --- /dev/null +++ b/crates/nu-command/src/filters/group_by.rs @@ -0,0 +1,268 @@ +use nu_engine::{eval_block, CallExt}; +use nu_protocol::ast::Call; +use nu_protocol::engine::{CaptureBlock, Command, EngineState, Stack}; +use nu_protocol::{ + Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, + Value, +}; + +use indexmap::IndexMap; + +#[derive(Clone)] +pub struct GroupBy; + +impl Command for GroupBy { + fn name(&self) -> &str { + "group-by" + } + + fn signature(&self) -> Signature { + Signature::build("group-by").optional( + "grouper", + SyntaxShape::Any, + "the grouper value to use", + ) + } + + fn usage(&self) -> &str { + "Create a new table grouped." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + group_by(engine_state, stack, call, input) + } + + #[allow(clippy::unwrap_used)] + fn examples(&self) -> Vec { + vec![ + Example { + description: "group items by column named \"type\"", + example: r#"ls | group-by type"#, + result: None, + }, + Example { + description: "you can also group by raw values by leaving out the argument", + example: "echo ['1' '3' '1' '3' '2' '1' '1'] | group-by", + result: Some(Value::Record { + cols: vec!["1".to_string(), "3".to_string(), "2".to_string()], + vals: vec![ + Value::List { + vals: vec![ + Value::test_string("1"), + Value::test_string("1"), + Value::test_string("1"), + Value::test_string("1"), + ], + span: Span::test_data(), + }, + Value::List { + vals: vec![Value::test_string("3"), Value::test_string("3")], + span: Span::test_data(), + }, + Value::List { + vals: vec![Value::test_string("2")], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }, + ] + } +} + +enum Grouper { + ByColumn(Option>), + ByBlock, +} + +pub fn group_by( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let name = call.head; + + let grouper: Option = call.opt(engine_state, stack, 0)?; + let values: Vec = input.into_iter().collect(); + let mut keys: Vec> = vec![]; + let mut group_strategy = Grouper::ByColumn(None); + + let first = values[0].clone(); + + if values.is_empty() { + return Err(ShellError::SpannedLabeledError( + "expected table from pipeline".into(), + "requires a table input".into(), + name, + )); + } + + let value_list = Value::List { + vals: values.clone(), + span: name, + }; + + match grouper { + Some(Value::Block { .. }) => { + let block: Option = call.opt(engine_state, stack, 0)?; + let error_key = "error"; + + for value in values { + if let Some(capture_block) = &block { + let mut stack = stack.captures_to_stack(&capture_block.captures); + let block = engine_state.get_block(capture_block.block_id); + let pipeline = + eval_block(engine_state, &mut stack, block, value.into_pipeline_data()); + + match pipeline { + Ok(s) => { + let collection: Vec = s.into_iter().collect(); + + if collection.len() > 1 { + return Err(ShellError::SpannedLabeledError( + "expected one value from the block".into(), + "requires a table with one value for grouping".into(), + name, + )); + } + + let value = match collection.get(0) { + Some(Value::Error { .. }) | None => Value::String { + val: error_key.to_string(), + span: name, + }, + Some(return_value) => return_value.clone(), + }; + + keys.push(value.as_string()); + } + Err(_) => { + keys.push(Ok(error_key.into())); + } + } + } + } + + group_strategy = Grouper::ByBlock; + } + Some(other) => { + group_strategy = Grouper::ByColumn(Some(Spanned { + item: other.as_string()?, + span: name, + })); + } + _ => {} + } + + let name = if let Ok(span) = first.span() { + span + } else { + name + }; + + let group_value = match group_strategy { + Grouper::ByBlock => { + let map = keys; + + let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) { + Some(Ok(key)) => Ok(key.clone()), + Some(Err(reason)) => Err(reason.clone()), + None => row.as_string(), + }); + + data_group(&value_list, &Some(block), name) + } + Grouper::ByColumn(column_name) => group(&column_name, &value_list, name), + }; + + Ok(PipelineData::Value(group_value?, None)) +} + +#[allow(clippy::type_complexity)] +pub fn data_group( + values: &Value, + grouper: &Option Result + Send>>, + span: Span, +) -> Result { + let mut groups: IndexMap> = IndexMap::new(); + + for (idx, value) in values.clone().into_pipeline_data().into_iter().enumerate() { + let group_key = if let Some(ref grouper) = grouper { + grouper(idx, &value) + } else { + value.as_string() + }; + + let group = groups.entry(group_key?).or_insert(vec![]); + group.push(value); + } + + let mut cols = vec![]; + let mut vals = vec![]; + + for (k, v) in groups { + cols.push(k.to_string()); + vals.push(Value::List { vals: v, span }); + } + + Ok(Value::Record { cols, vals, span }) +} + +pub fn group( + column_name: &Option>, + values: &Value, + span: Span, +) -> Result { + let name = span; + + let grouper = if let Some(column_name) = column_name { + Grouper::ByColumn(Some(column_name.clone())) + } else { + Grouper::ByColumn(None) + }; + + match grouper { + Grouper::ByColumn(Some(column_name)) => { + let block = + Box::new( + move |_, row: &Value| match row.get_data_by_key(&column_name.item) { + Some(group_key) => Ok(group_key.as_string()?), + None => Err(ShellError::CantFindColumn( + column_name.span, + row.span().unwrap_or(column_name.span), + )), + }, + ); + + data_group(values, &Some(block), name) + } + Grouper::ByColumn(None) => { + let block = Box::new(move |_, row: &Value| row.as_string()); + + data_group(values, &Some(block), name) + } + Grouper::ByBlock => Err(ShellError::NushellFailed( + "Block not implemented: This should never happen.".into(), + )), + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(GroupBy {}) + } +} diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 156c0057e..6a8410835 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -11,6 +11,7 @@ mod every; mod first; mod flatten; mod get; +mod group_by; mod keep; mod last; mod length; @@ -25,6 +26,7 @@ mod reverse; mod select; mod shuffle; mod skip; +mod transpose; mod uniq; mod update; mod where_; @@ -44,6 +46,7 @@ pub use every::Every; pub use first::First; pub use flatten::Flatten; pub use get::Get; +pub use group_by::GroupBy; pub use keep::*; pub use last::Last; pub use length::Length; @@ -58,6 +61,7 @@ pub use reverse::Reverse; pub use select::Select; pub use shuffle::Shuffle; pub use skip::*; +pub use transpose::Transpose; pub use uniq::*; pub use update::Update; pub use where_::Where; diff --git a/crates/nu-command/src/filters/transpose.rs b/crates/nu-command/src/filters/transpose.rs new file mode 100644 index 000000000..ac7922ef4 --- /dev/null +++ b/crates/nu-command/src/filters/transpose.rs @@ -0,0 +1,177 @@ +use nu_engine::column::get_columns; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + IntoInterruptiblePipelineData, PipelineData, ShellError, Signature, Spanned, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct Transpose; + +pub struct TransposeArgs { + rest: Vec>, + header_row: bool, + ignore_titles: bool, +} + +impl Command for Transpose { + fn name(&self) -> &str { + "transpose" + } + + fn signature(&self) -> Signature { + Signature::build("transpose") + .switch( + "header-row", + "treat the first row as column names", + Some('r'), + ) + .switch( + "ignore-titles", + "don't transpose the column names into values", + Some('i'), + ) + .rest( + "rest", + SyntaxShape::String, + "the names to give columns once transposed", + ) + } + + fn usage(&self) -> &str { + "Transposes the table contents so rows become columns and columns become rows." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + transpose(engine_state, stack, call, input) + } +} + +pub fn transpose( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let name = call.head; + let transpose_args = TransposeArgs { + header_row: call.has_flag("header-row"), + ignore_titles: call.has_flag("ignore-titles"), + rest: call.rest(engine_state, stack, 0)?, + }; + + let ctrlc = engine_state.ctrlc.clone(); + let input: Vec<_> = input.into_iter().collect(); + let args = transpose_args; + + let descs = get_columns(&input); + + let mut headers: Vec = vec![]; + + if !args.rest.is_empty() && args.header_row { + return Err(ShellError::SpannedLabeledError( + "Can not provide header names and use header row".into(), + "using header row".into(), + name, + )); + } + + if args.header_row { + for i in input.clone() { + if let Some(desc) = descs.get(0) { + match &i.get_data_by_key(desc) { + Some(x) => { + if let Ok(s) = x.as_string() { + headers.push(s.to_string()); + } else { + return Err(ShellError::SpannedLabeledError( + "Header row needs string headers".into(), + "used non-string headers".into(), + name, + )); + } + } + _ => { + return Err(ShellError::SpannedLabeledError( + "Header row is incomplete and can't be used".into(), + "using incomplete header row".into(), + name, + )); + } + } + } else { + return Err(ShellError::SpannedLabeledError( + "Header row is incomplete and can't be used".into(), + "using incomplete header row".into(), + name, + )); + } + } + } else { + for i in 0..=input.len() { + if let Some(name) = args.rest.get(i) { + headers.push(name.item.clone()) + } else { + headers.push(format!("Column{}", i)); + } + } + } + + let descs: Vec<_> = if args.header_row { + descs.into_iter().skip(1).collect() + } else { + descs + }; + + Ok((descs.into_iter().map(move |desc| { + let mut column_num: usize = 0; + let mut cols = vec![]; + let mut vals = vec![]; + + if !args.ignore_titles && !args.header_row { + cols.push(headers[column_num].clone()); + vals.push(Value::string(desc.clone(), name)); + column_num += 1 + } + + for i in input.clone() { + match &i.get_data_by_key(&desc) { + Some(x) => { + cols.push(headers[column_num].clone()); + vals.push(x.clone()); + } + _ => { + cols.push(headers[column_num].clone()); + vals.push(Value::nothing(name)); + } + } + column_num += 1; + } + + Value::Record { + cols, + vals, + span: name, + } + })) + .into_pipeline_data(ctrlc)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(Transpose {}) + } +}