From 5d59234f8ddb830ebe228e56e0c10b1325e75484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Sun, 19 Sep 2021 15:37:54 -0500 Subject: [PATCH] Flexibility updating table's cells. (#4027) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Very often we need to work with tables (say extracted from unstructured data or some kind of final report, timeseries, and the like). It's inevitable we will be having columns that we can't know beforehand what their names will be, or how many. Also, we may end up with certain cells having values we may want to remove as we explore. Here, `update cells` fundamentally goes over every cell in the table coming in and updates the cell's contents with the output of the block passed. Basic example here: ``` > [ [ ty1, t2, ty]; [ 1, a, $nothing] [(wrap), (0..<10), 1Mb] [ 1s, ({}), 1000000] [ $true, $false, ([[]])] ] | update cells { describe } ───┬───────────────────────┬───────────────────────────┬────────── # │ ty1 │ t2 │ ty ───┼───────────────────────┼───────────────────────────┼────────── 0 │ integer │ string │ nothing 1 │ row Column(table of ) │ range[[integer, integer)] │ filesize 2 │ string │ nothing │ integer 3 │ boolean │ boolean │ table of ───┴───────────────────────┴───────────────────────────┴────────── ``` and another one (in the examples) for cases, say we have a timeseries table generated and we want to remove the zeros and have empty strings and save it out to something like CSV. ``` > [ [2021-04-16, 2021-06-10, 2021-09-18, 2021-10-15, 2021-11-16, 2021-11-17, 2021-11-18]; [ 37, 0, 0, 0, 37, 0, 0] ] | update cells {|value| i if ($value | into int) == 0 { "" } { $value } } ───┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬──────────── # │ 2021-04-16 │ 2021-06-10 │ 2021-09-18 │ 2021-10-15 │ 2021-11-16 │ 2021-11-17 │ 2021-11-18 ───┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼──────────── 0 │ 37 │ │ │ │ 37 │ │ ───┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴──────────── ``` --- crates/nu-command/src/commands/filters/mod.rs | 2 + .../src/commands/filters/update_cells.rs | 158 ++++++++++++++++++ crates/nu-command/src/commands/mod.rs | 6 +- crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/examples.rs | 8 +- 5 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 crates/nu-command/src/commands/filters/update_cells.rs diff --git a/crates/nu-command/src/commands/filters/mod.rs b/crates/nu-command/src/commands/filters/mod.rs index b93404dfaa..e44c5f7c7d 100644 --- a/crates/nu-command/src/commands/filters/mod.rs +++ b/crates/nu-command/src/commands/filters/mod.rs @@ -36,6 +36,7 @@ mod skip; pub(crate) mod sort_by; mod uniq; mod update; +mod update_cells; mod where_; mod wrap; mod zip_; @@ -78,6 +79,7 @@ pub use skip::{Skip, SkipUntil, SkipWhile}; pub use sort_by::SortBy; pub use uniq::Uniq; pub use update::Command as Update; +pub use update_cells::SubCommand as UpdateCells; pub use where_::Command as Where; pub use wrap::Wrap; pub use zip_::Command as Zip; diff --git a/crates/nu-command/src/commands/filters/update_cells.rs b/crates/nu-command/src/commands/filters/update_cells.rs new file mode 100644 index 0000000000..2032ace89f --- /dev/null +++ b/crates/nu-command/src/commands/filters/update_cells.rs @@ -0,0 +1,158 @@ +use crate::prelude::*; +use nu_engine::run_block; +use nu_engine::WholeStreamCommand; + +use nu_errors::ShellError; +use nu_protocol::{ + hir::{CapturedBlock, ExternalRedirection}, + Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value, +}; + +pub struct SubCommand; + +impl WholeStreamCommand for SubCommand { + fn name(&self) -> &str { + "update cells" + } + + fn signature(&self) -> Signature { + Signature::build("update cells").required( + "block", + SyntaxShape::Block, + "the block to run an update for each cell", + ) + } + + fn usage(&self) -> &str { + "Update the table cells." + } + + fn run(&self, args: CommandArgs) -> Result { + update_cells(args) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Update the zero value cells to empty strings.", + example: r#"[ + [2021-04-16, 2021-06-10, 2021-09-18, 2021-10-15, 2021-11-16, 2021-11-17, 2021-11-18]; + [ 37, 0, 0, 0, 37, 0, 0] +] | update cells {|value| + if ($value | into int) == 0 { + "" + } { + $value + } +}"#, + result: Some(vec![UntaggedValue::row(indexmap! { + "2021-04-16".to_string() => UntaggedValue::int(37).into(), + "2021-06-10".to_string() => Value::from(""), + "2021-09-18".to_string() => Value::from(""), + "2021-10-15".to_string() => Value::from(""), + "2021-11-16".to_string() => UntaggedValue::int(37).into(), + "2021-11-17".to_string() => Value::from(""), + "2021-11-18".to_string() => Value::from(""), + }) + .into()]), + }] + } +} + +fn update_cells(args: CommandArgs) -> Result { + let context = Arc::new(args.context.clone()); + let external_redirection = args.call_info.args.external_redirection; + + let block: CapturedBlock = args.req(0)?; + let block = Arc::new(block); + + Ok(args + .input + .flat_map(move |input| { + let block = block.clone(); + let context = context.clone(); + + if input.is_row() { + OutputStream::one(process_cells(block, context, input, external_redirection)) + } else { + match process_input(block, context, input, external_redirection) { + Ok(s) => s, + Err(e) => OutputStream::one(Value::error(e)), + } + } + }) + .into_output_stream()) +} + +pub fn process_input( + captured_block: Arc, + context: Arc, + input: Value, + external_redirection: ExternalRedirection, +) -> Result { + let input_clone = input.clone(); + // When we process a row, we need to know whether the block wants to have the contents of the row as + // a parameter to the block (so it gets assigned to a variable that can be used inside the block) or + // if it wants the contents as as an input stream + + let input_stream = if !captured_block.block.params.positional.is_empty() { + InputStream::empty() + } else { + vec![Ok(input_clone)].into_iter().into_input_stream() + }; + + context.scope.enter_scope(); + context.scope.add_vars(&captured_block.captured.entries); + + if let Some((arg, _)) = captured_block.block.params.positional.first() { + context.scope.add_var(arg.name(), input); + } else { + context.scope.add_var("$it", input); + } + + let result = run_block( + &captured_block.block, + &context, + input_stream, + external_redirection, + ); + + context.scope.exit_scope(); + + result +} + +pub fn process_cells( + captured_block: Arc, + context: Arc, + input: Value, + external_redirection: ExternalRedirection, +) -> Value { + TaggedDictBuilder::build(input.tag(), |row| { + input.row_entries().for_each(|(column, cell_value)| { + let cell_processed = process_input( + captured_block.clone(), + context.clone(), + cell_value.clone(), + external_redirection, + ) + .map(|it| it.into_vec()) + .map_err(Value::error); + + match cell_processed { + Ok(value) => { + match value.get(0) { + Some(one) => { + row.insert_value(column, one.clone()); + } + None => { + row.insert_untagged(column, UntaggedValue::nothing()); + } + }; + } + Err(reason) => { + row.insert_value(column, reason); + } + } + }); + }) +} diff --git a/crates/nu-command/src/commands/mod.rs b/crates/nu-command/src/commands/mod.rs index 4c53d18279..b6f6cae59e 100644 --- a/crates/nu-command/src/commands/mod.rs +++ b/crates/nu-command/src/commands/mod.rs @@ -112,7 +112,11 @@ mod tests { fn only_examples() -> Vec { let mut commands = full_tests(); - commands.extend([whole_stream_command(Zip), whole_stream_command(Flatten)]); + commands.extend([ + whole_stream_command(UpdateCells), + whole_stream_command(Zip), + whole_stream_command(Flatten), + ]); commands } diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index dfec8ff1fb..e8cefef79e 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -134,6 +134,7 @@ pub fn create_default_context(interactive: bool) -> Result Result<(), ShellError> { whole_stream_command(BuildString {}), whole_stream_command(First {}), whole_stream_command(Get {}), + whole_stream_command(If {}), + whole_stream_command(IntoInt {}), whole_stream_command(Keep {}), whole_stream_command(Each {}), whole_stream_command(Last {}), @@ -253,6 +255,8 @@ pub fn test_anchors(cmd: Command) -> Result<(), ShellError> { whole_stream_command(BuildString {}), whole_stream_command(First {}), whole_stream_command(Get {}), + whole_stream_command(If {}), + whole_stream_command(IntoInt {}), whole_stream_command(Keep {}), whole_stream_command(Each {}), whole_stream_command(Last {}),