From 6f6340186a6f8c1afb69719339c15b1b1e933d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C8=98tefan?= <71919805+onthebridgetonowhere@users.noreply.github.com> Date: Fri, 17 Dec 2021 21:44:51 +0100 Subject: [PATCH] Port flatten (#512) * A first working version of flatten. Needs a lot of cleanup. Committing to have a working version * Typo fix * Flatten tests pass * Final cleanup, ready for push * Final cleanup, ready for push * Final cleanup, ready for push * Final cleanup, ready for push * Update flatten.rs Co-authored-by: JT <547158+jntrnr@users.noreply.github.com> --- crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/filters/flatten.rs | 258 +++++++++++++++++++++++ crates/nu-command/src/filters/mod.rs | 2 + src/tests.rs | 26 +++ 4 files changed, 287 insertions(+) create mode 100644 crates/nu-command/src/filters/flatten.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index d1639966c0..073f7dbec7 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -54,6 +54,7 @@ pub fn create_default_context() -> EngineState { DropNth, Each, First, + Flatten, Get, Keep, KeepUntil, diff --git a/crates/nu-command/src/filters/flatten.rs b/crates/nu-command/src/filters/flatten.rs new file mode 100644 index 0000000000..287dee0f51 --- /dev/null +++ b/crates/nu-command/src/filters/flatten.rs @@ -0,0 +1,258 @@ +use indexmap::IndexMap; +use nu_engine::CallExt; +use nu_protocol::ast::{Call, CellPath, PathMember}; + +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct Flatten; + +impl Command for Flatten { + fn name(&self) -> &str { + "flatten" + } + + fn signature(&self) -> Signature { + Signature::build("flatten") + .rest( + "rest", + SyntaxShape::String, + "optionally flatten data by column", + ) + .category(Category::Filters) + } + + fn usage(&self) -> &str { + "Flatten the table." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + flatten(engine_state, stack, call, input) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "flatten a table", + example: "[[N, u, s, h, e, l, l]] | flatten ", + result: None + }, + Example { + description: "flatten a table, get the first item", + example: "[[N, u, s, h, e, l, l]] | flatten | first", + result: None, + }, + Example { + description: "flatten a column having a nested table", + example: "[[origin, people]; [Ecuador, ([[name, meal]; ['Andres', 'arepa']])]] | flatten | get meal", + result: None, + }, + Example { + description: "restrict the flattening by passing column names", + example: "[[origin, crate, versions]; [World, ([[name]; ['nu-cli']]), ['0.21', '0.22']]] | flatten versions | last | get versions", + result: None, + } + ] + } +} + +fn flatten( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + let tag = call.head; + let columns: Vec = call.rest(engine_state, stack, 0)?; + + input.flat_map( + move |item| flat_value(&columns, &item, tag), + engine_state.ctrlc.clone(), + ) +} + +enum TableInside<'a> { + Entries(&'a str, &'a Span, Vec<&'a Value>), +} + +fn is_table(value: &Value) -> bool { + match value { + Value::List { vals, span: _ } => vals.iter().all(|f| f.as_record().is_ok()), + _ => false, + } +} + +fn flat_value(columns: &[CellPath], item: &Value, _name_tag: Span) -> Vec { + let tag = match item.span() { + Ok(x) => x, + Err(e) => return vec![Value::Error { error: e }], + }; + + let res = { + if item.as_record().is_ok() { + let mut out = IndexMap::::new(); + let mut a_table = None; + let mut tables_explicitly_flattened = 0; + + let records = match item { + Value::Record { + cols, + vals, + span: _, + } => (cols, vals), + x => { + return vec![Value::Error { + error: ShellError::UnsupportedInput( + format!("This should be a record, but instead got {}", x.get_type()), + tag, + ), + }] + } + }; + + let s = match item.span() { + Ok(x) => x, + Err(e) => return vec![Value::Error { error: e }], + }; + + for (column, value) in records.0.iter().zip(records.1.iter()) { + let column_requested = columns.iter().find(|c| c.into_string() == *column); + + match value { + Value::List { vals, span: _ } if vals.iter().all(|f| f.as_record().is_ok()) => { + let mut cs = vec![]; + let mut vs = vec![]; + + for v in vals { + if let Ok(r) = v.as_record() { + cs.push(r.0); + vs.push(r.1) + } + } + + if column_requested.is_none() && !columns.is_empty() { + if out.contains_key(column) { + out.insert(format!("{}_{}", column, column), value.clone()); + } else { + out.insert(column.to_string(), value.clone()); + } + continue; + } + + let cols = cs.into_iter().flat_map(|f| f.to_vec()); + let vals = vs.into_iter().flat_map(|f| f.to_vec()); + + for (k, v) in cols.into_iter().zip(vals.into_iter()) { + if out.contains_key(&k) { + out.insert(format!("{}_{}", column.to_string(), k), v.clone()); + } else { + out.insert(k, v.clone()); + } + } + } + Value::List { vals: _, span: _ } => { + let vals = if let Value::List { vals, span: _ } = value { + vals.iter().collect::>() + } else { + vec![] + }; + + if tables_explicitly_flattened >= 1 && column_requested.is_some() { + return vec![Value::Error{ error: ShellError::UnsupportedInput( + "can only flatten one inner table at the same time. tried flattening more than one column with inner tables... but is flattened already".to_string(), + s + )} + ]; + } + + if !columns.is_empty() { + let cell_path = match column_requested { + Some(x) => match x.members.first() { + Some(PathMember::String { val, span: _ }) => Some(val), + Some(PathMember::Int { val: _, span: _ }) => None, + None => None, + }, + None => None, + }; + + if let Some(r) = cell_path { + if !columns.is_empty() { + a_table = Some(TableInside::Entries( + r, + &s, + vals.into_iter().collect::>(), + )); + + tables_explicitly_flattened += 1; + } + } else { + out.insert(column.to_string(), value.clone()); + } + } else if a_table.is_none() { + a_table = Some(TableInside::Entries( + column, + &s, + vals.into_iter().collect::>(), + )) + } + } + _ => { + out.insert(column.to_string(), value.clone()); + } + } + } + + let mut expanded = vec![]; + + if let Some(TableInside::Entries(column, _, entries)) = a_table { + for entry in entries { + let mut base = out.clone(); + base.insert(column.to_string(), entry.clone()); + let r = Value::Record { + cols: base.keys().map(|f| f.to_string()).collect::>(), + vals: base.values().cloned().collect(), + span: tag, + }; + expanded.push(r); + } + } else { + let r = Value::Record { + cols: out.keys().map(|f| f.to_string()).collect::>(), + vals: out.values().cloned().collect(), + span: tag, + }; + expanded.push(r); + } + expanded + } else if !is_table(item) { + if let Value::List { vals, span: _ } = item { + vals.to_vec() + } else { + vec![] + } + } else { + vec![item.clone()] + } + }; + res +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(Flatten {}) + } +} diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 1ebb2044b1..5770bc12f3 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -5,6 +5,7 @@ mod collect; mod drop; mod each; mod first; +mod flatten; mod get; mod keep; mod last; @@ -32,6 +33,7 @@ pub use collect::Collect; pub use drop::*; pub use each::Each; pub use first::First; +pub use flatten::Flatten; pub use get::Get; pub use keep::*; pub use last::Last; diff --git a/src/tests.rs b/src/tests.rs index c998cfa63a..9a09af5e6a 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1261,3 +1261,29 @@ fn comment_multiline() -> TestResult { "10", ) } + +#[test] +fn flatten_simple_list() -> TestResult { + run_test("[[N, u, s, h, e, l, l]] | flatten", "N\nu\ns\nh\ne\nl\nl") +} + +#[test] +fn flatten_get_simple_list() -> TestResult { + run_test("[[N, u, s, h, e, l, l]] | flatten | get 0", "N") +} + +#[test] +fn flatten_table_get() -> TestResult { + run_test( + "[[origin, people]; [Ecuador, ([[name, meal]; ['Andres', 'arepa']])]] | flatten | get meal", + "arepa", + ) +} + +#[test] +fn flatten_table_column_get_last() -> TestResult { + run_test( + "[[origin, crate, versions]; [World, ([[name]; ['nu-cli']]), ['0.21', '0.22']]] | flatten versions | last | get versions", + "0.22", + ) +}