Flexibility updating table's cells. (#4027)

Very often we need to work with tables (say extracted from unstructured data or some
kind of final report, timeseries, and the like).

It's inevitable we will be having columns that we can't know beforehand what their names
will be, or how many.

Also, we may end up with certain cells having values we may want to remove as we explore.

Here, `update cells` fundamentally goes over every cell in the table coming in and updates
the cell's contents with the output of the block passed. Basic example here:

```
> [

    [   ty1,       t2,       ty];

    [     1,        a, $nothing]
    [(wrap), (0..<10),      1Mb]
    [    1s,     ({}),  1000000]
    [ $true,   $false,   ([[]])]

] | update cells { describe }

───┬───────────────────────┬───────────────────────────┬──────────
 # │          ty1          │            t2             │    ty
───┼───────────────────────┼───────────────────────────┼──────────
 0 │ integer               │ string                    │ nothing
 1 │ row Column(table of ) │ range[[integer, integer)] │ filesize
 2 │ string                │ nothing                   │ integer
 3 │ boolean               │ boolean                   │ table of
───┴───────────────────────┴───────────────────────────┴──────────
```

and another one (in the examples) for cases, say we have a timeseries table generated and
we want to remove the zeros and have empty strings and save it out to something like CSV.

```
> [
    [2021-04-16, 2021-06-10, 2021-09-18, 2021-10-15, 2021-11-16, 2021-11-17, 2021-11-18];
    [        37,          0,          0,          0,         37,          0,          0]
] | update cells {|value| i
  if ($value | into int) == 0 {
    ""
  } {
    $value
  }
}

───┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────┬────────────
 # │ 2021-04-16 │ 2021-06-10 │ 2021-09-18 │ 2021-10-15 │ 2021-11-16 │ 2021-11-17 │ 2021-11-18
───┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────┼────────────
 0 │         37 │            │            │            │         37 │            │
───┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────┴────────────
```
This commit is contained in:
Andrés N. Robalino 2021-09-19 15:37:54 -05:00 committed by GitHub
parent 4f7b423f36
commit 5d59234f8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 172 additions and 3 deletions

View File

@ -36,6 +36,7 @@ mod skip;
pub(crate) mod sort_by;
mod uniq;
mod update;
mod update_cells;
mod where_;
mod wrap;
mod zip_;
@ -78,6 +79,7 @@ pub use skip::{Skip, SkipUntil, SkipWhile};
pub use sort_by::SortBy;
pub use uniq::Uniq;
pub use update::Command as Update;
pub use update_cells::SubCommand as UpdateCells;
pub use where_::Command as Where;
pub use wrap::Wrap;
pub use zip_::Command as Zip;

View File

@ -0,0 +1,158 @@
use crate::prelude::*;
use nu_engine::run_block;
use nu_engine::WholeStreamCommand;
use nu_errors::ShellError;
use nu_protocol::{
hir::{CapturedBlock, ExternalRedirection},
Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value,
};
pub struct SubCommand;
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"update cells"
}
fn signature(&self) -> Signature {
Signature::build("update cells").required(
"block",
SyntaxShape::Block,
"the block to run an update for each cell",
)
}
fn usage(&self) -> &str {
"Update the table cells."
}
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
update_cells(args)
}
fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Update the zero value cells to empty strings.",
example: r#"[
[2021-04-16, 2021-06-10, 2021-09-18, 2021-10-15, 2021-11-16, 2021-11-17, 2021-11-18];
[ 37, 0, 0, 0, 37, 0, 0]
] | update cells {|value|
if ($value | into int) == 0 {
""
} {
$value
}
}"#,
result: Some(vec![UntaggedValue::row(indexmap! {
"2021-04-16".to_string() => UntaggedValue::int(37).into(),
"2021-06-10".to_string() => Value::from(""),
"2021-09-18".to_string() => Value::from(""),
"2021-10-15".to_string() => Value::from(""),
"2021-11-16".to_string() => UntaggedValue::int(37).into(),
"2021-11-17".to_string() => Value::from(""),
"2021-11-18".to_string() => Value::from(""),
})
.into()]),
}]
}
}
fn update_cells(args: CommandArgs) -> Result<OutputStream, ShellError> {
let context = Arc::new(args.context.clone());
let external_redirection = args.call_info.args.external_redirection;
let block: CapturedBlock = args.req(0)?;
let block = Arc::new(block);
Ok(args
.input
.flat_map(move |input| {
let block = block.clone();
let context = context.clone();
if input.is_row() {
OutputStream::one(process_cells(block, context, input, external_redirection))
} else {
match process_input(block, context, input, external_redirection) {
Ok(s) => s,
Err(e) => OutputStream::one(Value::error(e)),
}
}
})
.into_output_stream())
}
pub fn process_input(
captured_block: Arc<CapturedBlock>,
context: Arc<EvaluationContext>,
input: Value,
external_redirection: ExternalRedirection,
) -> Result<OutputStream, ShellError> {
let input_clone = input.clone();
// When we process a row, we need to know whether the block wants to have the contents of the row as
// a parameter to the block (so it gets assigned to a variable that can be used inside the block) or
// if it wants the contents as as an input stream
let input_stream = if !captured_block.block.params.positional.is_empty() {
InputStream::empty()
} else {
vec![Ok(input_clone)].into_iter().into_input_stream()
};
context.scope.enter_scope();
context.scope.add_vars(&captured_block.captured.entries);
if let Some((arg, _)) = captured_block.block.params.positional.first() {
context.scope.add_var(arg.name(), input);
} else {
context.scope.add_var("$it", input);
}
let result = run_block(
&captured_block.block,
&context,
input_stream,
external_redirection,
);
context.scope.exit_scope();
result
}
pub fn process_cells(
captured_block: Arc<CapturedBlock>,
context: Arc<EvaluationContext>,
input: Value,
external_redirection: ExternalRedirection,
) -> Value {
TaggedDictBuilder::build(input.tag(), |row| {
input.row_entries().for_each(|(column, cell_value)| {
let cell_processed = process_input(
captured_block.clone(),
context.clone(),
cell_value.clone(),
external_redirection,
)
.map(|it| it.into_vec())
.map_err(Value::error);
match cell_processed {
Ok(value) => {
match value.get(0) {
Some(one) => {
row.insert_value(column, one.clone());
}
None => {
row.insert_untagged(column, UntaggedValue::nothing());
}
};
}
Err(reason) => {
row.insert_value(column, reason);
}
}
});
})
}

View File

@ -112,7 +112,11 @@ mod tests {
fn only_examples() -> Vec<Command> {
let mut commands = full_tests();
commands.extend([whole_stream_command(Zip), whole_stream_command(Flatten)]);
commands.extend([
whole_stream_command(UpdateCells),
whole_stream_command(Zip),
whole_stream_command(Flatten),
]);
commands
}

View File

@ -134,6 +134,7 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
whole_stream_command(Select),
whole_stream_command(Get),
whole_stream_command(Update),
whole_stream_command(UpdateCells),
whole_stream_command(Insert),
whole_stream_command(Into),
whole_stream_command(IntoBinary),

View File

@ -21,8 +21,8 @@ use crate::commands::{
};
use crate::commands::{
Append, BuildString, Collect, Each, Echo, First, Get, Keep, Last, Let, Math, MathMode, Nth,
Select, StrCollect, Wrap,
Append, BuildString, Collect, Each, Echo, First, Get, If, IntoInt, Keep, Last, Let, Math,
MathMode, Nth, Select, StrCollect, Wrap,
};
use nu_engine::{run_block, whole_stream_command, Command, EvaluationContext, WholeStreamCommand};
use nu_stream::InputStream;
@ -41,6 +41,8 @@ pub fn test_examples(cmd: Command) -> Result<(), ShellError> {
whole_stream_command(BuildString {}),
whole_stream_command(First {}),
whole_stream_command(Get {}),
whole_stream_command(If {}),
whole_stream_command(IntoInt {}),
whole_stream_command(Keep {}),
whole_stream_command(Each {}),
whole_stream_command(Last {}),
@ -253,6 +255,8 @@ pub fn test_anchors(cmd: Command) -> Result<(), ShellError> {
whole_stream_command(BuildString {}),
whole_stream_command(First {}),
whole_stream_command(Get {}),
whole_stream_command(If {}),
whole_stream_command(IntoInt {}),
whole_stream_command(Keep {}),
whole_stream_command(Each {}),
whole_stream_command(Last {}),