diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index a4288c685c..84abe6a643 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -61,6 +61,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { SplitBy, Take, Merge, + MergeDeep, Move, TakeWhile, TakeUntil, diff --git a/crates/nu-command/src/filters/merge/common.rs b/crates/nu-command/src/filters/merge/common.rs new file mode 100644 index 0000000000..219250d113 --- /dev/null +++ b/crates/nu-command/src/filters/merge/common.rs @@ -0,0 +1,174 @@ +use nu_engine::command_prelude::*; + +#[derive(Copy, Clone)] +pub(crate) enum MergeStrategy { + /// Key-value pairs present in lhs and rhs are overwritten by values in rhs + Shallow, + /// Records are merged recursively, otherwise same behavior as shallow + Deep(ListMerge), +} + +#[derive(Copy, Clone)] +pub(crate) enum ListMerge { + /// Lists in lhs are overwritten by lists in rhs + Overwrite, + /// Lists of records are merged element-wise, other lists are overwritten by rhs + Elementwise, + /// All lists are concatenated together, lhs ++ rhs + Append, + /// All lists are concatenated together, rhs ++ lhs + Prepend, +} + +/// Test whether a value is a list of records. +/// +/// This includes tables and non-tables. +fn is_list_of_records(val: &Value) -> bool { + match val { + list @ Value::List { .. } if matches!(list.get_type(), Type::Table { .. }) => true, + // we want to include lists of records, but not lists of mixed types + Value::List { vals, .. } => vals + .iter() + .map(Value::get_type) + .all(|val| matches!(val, Type::Record { .. })), + _ => false, + } +} + +/// Typecheck a merge operation. +/// +/// Ensures that both arguments are records, tables, or lists of non-matching records. +pub(crate) fn typecheck_merge(lhs: &Value, rhs: &Value, head: Span) -> Result<(), ShellError> { + match (lhs.get_type(), rhs.get_type()) { + (Type::Record { .. }, Type::Record { .. }) => Ok(()), + (_, _) if is_list_of_records(lhs) && is_list_of_records(rhs) => Ok(()), + _ => Err(ShellError::PipelineMismatch { + exp_input_type: "input and argument to be both record or both table".to_string(), + dst_span: head, + src_span: lhs.span(), + }), + } +} + +pub(crate) fn do_merge( + lhs: Value, + rhs: Value, + strategy: MergeStrategy, + span: Span, +) -> Result { + match (strategy, lhs, rhs) { + // Propagate errors + (_, Value::Error { error, .. }, _) | (_, _, Value::Error { error, .. }) => Err(*error), + // Shallow merge records + ( + MergeStrategy::Shallow, + Value::Record { val: lhs, .. }, + Value::Record { val: rhs, .. }, + ) => Ok(Value::record( + merge_records(lhs.into_owned(), rhs.into_owned(), strategy, span)?, + span, + )), + // Deep merge records + ( + MergeStrategy::Deep(_), + Value::Record { val: lhs, .. }, + Value::Record { val: rhs, .. }, + ) => Ok(Value::record( + merge_records(lhs.into_owned(), rhs.into_owned(), strategy, span)?, + span, + )), + // Merge lists by appending + ( + MergeStrategy::Deep(ListMerge::Append), + Value::List { vals: lhs, .. }, + Value::List { vals: rhs, .. }, + ) => Ok(Value::list(lhs.into_iter().chain(rhs).collect(), span)), + // Merge lists by prepending + ( + MergeStrategy::Deep(ListMerge::Prepend), + Value::List { vals: lhs, .. }, + Value::List { vals: rhs, .. }, + ) => Ok(Value::list(rhs.into_iter().chain(lhs).collect(), span)), + // Merge lists of records elementwise (tables and non-tables) + // Match on shallow since this might be a top-level table + ( + MergeStrategy::Shallow | MergeStrategy::Deep(ListMerge::Elementwise), + lhs_list @ Value::List { .. }, + rhs_list @ Value::List { .. }, + ) if is_list_of_records(&lhs_list) && is_list_of_records(&rhs_list) => { + let lhs = lhs_list + .into_list() + .expect("Value matched as list above, but is not a list"); + let rhs = rhs_list + .into_list() + .expect("Value matched as list above, but is not a list"); + Ok(Value::list(merge_tables(lhs, rhs, strategy, span)?, span)) + } + // Use rhs value (shallow record merge, overwrite list merge, and general scalar merge) + (_, _, val) => Ok(val), + } +} + +/// Merge right-hand table into left-hand table, element-wise +/// +/// For example: +/// lhs = [{a: 12, b: 34}] +/// rhs = [{a: 56, c: 78}] +/// output = [{a: 56, b: 34, c: 78}] +fn merge_tables( + lhs: Vec, + rhs: Vec, + strategy: MergeStrategy, + span: Span, +) -> Result, ShellError> { + let mut table_iter = rhs.into_iter(); + + lhs.into_iter() + .map(move |inp| match (inp.into_record(), table_iter.next()) { + (Ok(rec), Some(to_merge)) => match to_merge.into_record() { + Ok(to_merge) => Ok(Value::record( + merge_records(rec.to_owned(), to_merge.to_owned(), strategy, span)?, + span, + )), + Err(error) => Ok(Value::error(error, span)), + }, + (Ok(rec), None) => Ok(Value::record(rec, span)), + (Err(error), _) => Ok(Value::error(error, span)), + }) + .collect() +} + +fn merge_records( + mut lhs: Record, + rhs: Record, + strategy: MergeStrategy, + span: Span, +) -> Result { + match strategy { + MergeStrategy::Shallow => { + for (col, rval) in rhs.into_iter() { + lhs.insert(col, rval); + } + } + strategy => { + for (col, rval) in rhs.into_iter() { + // in order to both avoid cloning (possibly nested) record values and maintain the ordering of record keys, we can swap a temporary value into the source record. + // if we were to remove the value, the ordering would be messed up as we might not insert back into the original index + // it's okay to swap a temporary value in, since we know it will be replaced by the end of the function call + // + // use an error here instead of something like null so if this somehow makes it into the output, the bug will be immediately obvious + let failed_error = ShellError::NushellFailed { + msg: "Merge failed to properly replace internal temporary value".to_owned(), + }; + + let value = match lhs.insert(&col, Value::error(failed_error, span)) { + Some(lval) => do_merge(lval, rval, strategy, span)?, + None => rval, + }; + + lhs.insert(col, value); + } + } + } + Ok(lhs) +} diff --git a/crates/nu-command/src/filters/merge/deep.rs b/crates/nu-command/src/filters/merge/deep.rs new file mode 100644 index 0000000000..42d4cefd9d --- /dev/null +++ b/crates/nu-command/src/filters/merge/deep.rs @@ -0,0 +1,157 @@ +use super::common::{do_merge, typecheck_merge, ListMerge, MergeStrategy}; +use nu_engine::command_prelude::*; + +#[derive(Clone)] +pub struct MergeDeep; + +impl Command for MergeDeep { + fn name(&self) -> &str { + "merge deep" + } + + fn description(&self) -> &str { + "Merge the input with a record or table, recursively merging values in matching columns." + } + + fn extra_description(&self) -> &str { + r#"The way that key-value pairs which exist in both the input and the argument are merged depends on their types. + +Scalar values (like numbers and strings) in the input are overwritten by the corresponding value from the argument. +Records in the input are merged similarly to the merge command, but recursing rather than overwriting inner records. + +The way lists and tables are merged is controlled by the `--strategy` flag: + - table: Merges tables element-wise, similarly to the merge command. Non-table lists are overwritten. + - overwrite: Lists and tables are overwritten with their corresponding value from the argument, similarly to scalars. + - append: Lists and tables in the input are appended with the corresponding list from the argument. + - prepend: Lists and tables in the input are prepended with the corresponding list from the argument."# + } + + fn signature(&self) -> nu_protocol::Signature { + Signature::build("merge deep") + .input_output_types(vec![ + (Type::record(), Type::record()), + (Type::table(), Type::table()), + ]) + .required( + "value", + SyntaxShape::OneOf(vec![ + SyntaxShape::Record(vec![]), + SyntaxShape::Table(vec![]), + SyntaxShape::List(SyntaxShape::Any.into()), + ]), + "The new value to merge with.", + ) + .category(Category::Filters) + .named("strategy", SyntaxShape::String, "The list merging strategy to use. One of: table (default), overwrite, append, prepend", Some('s')) + } + + fn examples(&self) -> Vec { + vec![ + Example { + example: "{a: 1, b: {c: 2, d: 3}} | merge deep {b: {d: 4, e: 5}}", + description: "Merge two records recursively", + result: Some(Value::test_record(record! { + "a" => Value::test_int(1), + "b" => Value::test_record(record! { + "c" => Value::test_int(2), + "d" => Value::test_int(4), + "e" => Value::test_int(5), + }) + })), + }, + Example { + example: r#"[{columnA: 0, columnB: [{B1: 1}]}] | merge deep [{columnB: [{B2: 2}]}]"#, + description: "Merge two tables", + result: Some(Value::test_list(vec![Value::test_record(record! { + "columnA" => Value::test_int(0), + "columnB" => Value::test_list(vec![ + Value::test_record(record! { + "B1" => Value::test_int(1), + "B2" => Value::test_int(2), + }) + ]), + })])), + }, + Example { + example: r#"{inner: [{a: 1}, {b: 2}]} | merge deep {inner: [{c: 3}]}"#, + description: "Merge two records and their inner tables", + result: Some(Value::test_record(record! { + "inner" => Value::test_list(vec![ + Value::test_record(record! { + "a" => Value::test_int(1), + "c" => Value::test_int(3), + }), + Value::test_record(record! { + "b" => Value::test_int(2), + }) + ]) + })), + }, + Example { + example: r#"{inner: [{a: 1}, {b: 2}]} | merge deep {inner: [{c: 3}]} --strategy=append"#, + description: "Merge two records, appending their inner tables", + result: Some(Value::test_record(record! { + "inner" => Value::test_list(vec![ + Value::test_record(record! { + "a" => Value::test_int(1), + }), + Value::test_record(record! { + "b" => Value::test_int(2), + }), + Value::test_record(record! { + "c" => Value::test_int(3), + }), + ]) + })), + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + let merge_value: Value = call.req(engine_state, stack, 0)?; + let strategy_flag: Option = call.get_flag(engine_state, stack, "strategy")?; + let metadata = input.metadata(); + + // collect input before typechecking, so tables are detected as such + let input_span = input.span().unwrap_or(head); + let input = input.into_value(input_span)?; + + let strategy = match strategy_flag.as_deref() { + None | Some("table") => MergeStrategy::Deep(ListMerge::Elementwise), + Some("append") => MergeStrategy::Deep(ListMerge::Append), + Some("prepend") => MergeStrategy::Deep(ListMerge::Prepend), + Some("overwrite") => MergeStrategy::Deep(ListMerge::Overwrite), + Some(_) => { + return Err(ShellError::IncorrectValue { + msg: "The list merging strategy must be one one of: table, overwrite, append, prepend".to_string(), + val_span: call.get_flag_span(stack, "strategy").unwrap_or(head), + call_span: head, + }) + } + }; + + typecheck_merge(&input, &merge_value, head)?; + + let merged = do_merge(input, merge_value, strategy, head)?; + Ok(merged.into_pipeline_data_with_metadata(metadata)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(MergeDeep {}) + } +} diff --git a/crates/nu-command/src/filters/merge.rs b/crates/nu-command/src/filters/merge/merge_.rs similarity index 52% rename from crates/nu-command/src/filters/merge.rs rename to crates/nu-command/src/filters/merge/merge_.rs index 588138fb42..5974f13d60 100644 --- a/crates/nu-command/src/filters/merge.rs +++ b/crates/nu-command/src/filters/merge/merge_.rs @@ -1,3 +1,4 @@ +use super::common::{do_merge, typecheck_merge, MergeStrategy}; use nu_engine::command_prelude::*; #[derive(Clone)] @@ -28,8 +29,10 @@ repeating this process with row 1, and so on."# ]) .required( "value", - // Both this and `update` should have a shape more like | than just . -Leon 2022-10-27 - SyntaxShape::Any, + SyntaxShape::OneOf(vec![ + SyntaxShape::Record(vec![]), + SyntaxShape::Table(vec![]), + ]), "The new value to merge with.", ) .category(Category::Filters) @@ -89,74 +92,17 @@ repeating this process with row 1, and so on."# let merge_value: Value = call.req(engine_state, stack, 0)?; let metadata = input.metadata(); - match (&input, merge_value) { - // table (list of records) - ( - PipelineData::Value(Value::List { .. }, ..) | PipelineData::ListStream { .. }, - Value::List { vals, .. }, - ) => { - let mut table_iter = vals.into_iter(); + // collect input before typechecking, so tables are detected as such + let input_span = input.span().unwrap_or(head); + let input = input.into_value(input_span)?; - let res = - input - .into_iter() - .map(move |inp| match (inp.as_record(), table_iter.next()) { - (Ok(inp), Some(to_merge)) => match to_merge.as_record() { - Ok(to_merge) => Value::record(do_merge(inp, to_merge), head), - Err(error) => Value::error(error, head), - }, - (_, None) => inp, - (Err(error), _) => Value::error(error, head), - }); + typecheck_merge(&input, &merge_value, head)?; - Ok(res.into_pipeline_data_with_metadata( - head, - engine_state.signals().clone(), - metadata, - )) - } - // record - ( - PipelineData::Value(Value::Record { val: inp, .. }, ..), - Value::Record { val: to_merge, .. }, - ) => Ok(Value::record(do_merge(inp, &to_merge), head).into_pipeline_data()), - // Propagate errors in the pipeline - (PipelineData::Value(Value::Error { error, .. }, ..), _) => Err(*error.clone()), - (PipelineData::Value(val, ..), ..) => { - // Only point the "value originates here" arrow at the merge value - // if it was generated from a block. Otherwise, point at the pipeline value. -Leon 2022-10-27 - let span = if val.span() == Span::test_data() { - Span::new(head.start, head.start) - } else { - val.span() - }; - - Err(ShellError::PipelineMismatch { - exp_input_type: "input, and argument, to be both record or both table" - .to_string(), - dst_span: head, - src_span: span, - }) - } - _ => Err(ShellError::PipelineMismatch { - exp_input_type: "input, and argument, to be both record or both table".to_string(), - dst_span: head, - src_span: Span::new(head.start, head.start), - }), - } + let merged = do_merge(input, merge_value, MergeStrategy::Shallow, head)?; + Ok(merged.into_pipeline_data_with_metadata(metadata)) } } -// TODO: rewrite to mutate the input record -fn do_merge(input_record: &Record, to_merge_record: &Record) -> Record { - let mut result = input_record.clone(); - - for (col, val) in to_merge_record { - result.insert(col, val.clone()); - } - result -} - #[cfg(test)] mod test { use super::*; diff --git a/crates/nu-command/src/filters/merge/mod.rs b/crates/nu-command/src/filters/merge/mod.rs new file mode 100644 index 0000000000..36a457b221 --- /dev/null +++ b/crates/nu-command/src/filters/merge/mod.rs @@ -0,0 +1,6 @@ +mod common; +pub mod deep; +pub mod merge_; + +pub use deep::MergeDeep; +pub use merge_::Merge; diff --git a/crates/nu-command/src/filters/mod.rs b/crates/nu-command/src/filters/mod.rs index 8eac0fc70b..373314fb18 100644 --- a/crates/nu-command/src/filters/mod.rs +++ b/crates/nu-command/src/filters/mod.rs @@ -87,6 +87,7 @@ pub use last::Last; pub use length::Length; pub use lines::Lines; pub use merge::Merge; +pub use merge::MergeDeep; pub use move_::Move; pub use par_each::ParEach; pub use prepend::Prepend; diff --git a/crates/nu-command/tests/commands/merge_deep.rs b/crates/nu-command/tests/commands/merge_deep.rs new file mode 100644 index 0000000000..a9cc62aba8 --- /dev/null +++ b/crates/nu-command/tests/commands/merge_deep.rs @@ -0,0 +1,144 @@ +use nu_test_support::nu; + +#[test] +fn table_strategy_table() { + assert_eq!( + nu!( + "{} | merge deep {} | to nuon", + "{inner: [{a: 1}, {b: 2}]}", + "{inner: [{c: 3}]}" + ) + .out, + "{inner: [{a: 1, c: 3}, {b: 2}]}" + ) +} + +#[test] +fn table_strategy_list() { + assert_eq!( + nu!( + "{} | merge deep {} | to nuon", + "{a: [1, 2, 3]}", + "{a: [4, 5, 6]}" + ) + .out, + "{a: [4, 5, 6]}" + ) +} + +#[test] +fn overwrite_strategy_table() { + assert_eq!( + nu!( + "{} | merge deep --strategy=overwrite {} | to nuon", + "{inner: [{a: 1}, {b: 2}]}", + "{inner: [[c]; [3]]}" + ) + .out, + "{inner: [[c]; [3]]}" + ) +} + +#[test] +fn overwrite_strategy_list() { + assert_eq!( + nu!( + "{} | merge deep --strategy=overwrite {} | to nuon", + "{a: [1, 2, 3]}", + "{a: [4, 5, 6]}" + ) + .out, + "{a: [4, 5, 6]}" + ) +} + +#[test] +fn append_strategy_table() { + assert_eq!( + nu!( + "{} | merge deep --strategy=append {} | to nuon", + "{inner: [{a: 1}, {b: 2}]}", + "{inner: [{c: 3}]}" + ) + .out, + "{inner: [{a: 1}, {b: 2}, {c: 3}]}" + ) +} + +#[test] +fn append_strategy_list() { + assert_eq!( + nu!( + "{} | merge deep --strategy=append {} | to nuon", + "{inner: [1, 2, 3]}", + "{inner: [4, 5, 6]}" + ) + .out, + "{inner: [1, 2, 3, 4, 5, 6]}" + ) +} + +#[test] +fn prepend_strategy_table() { + assert_eq!( + nu!( + "{} | merge deep --strategy=prepend {} | to nuon", + "{inner: [{a: 1}, {b: 2}]}", + "{inner: [{c: 3}]}" + ) + .out, + "{inner: [{c: 3}, {a: 1}, {b: 2}]}" + ) +} + +#[test] +fn prepend_strategy_list() { + assert_eq!( + nu!( + "{} | merge deep --strategy=prepend {} | to nuon", + "{inner: [1, 2, 3]}", + "{inner: [4, 5, 6]}" + ) + .out, + "{inner: [4, 5, 6, 1, 2, 3]}" + ) +} + +#[test] +fn record_nested_with_overwrite() { + assert_eq!( + nu!( + "{} | merge deep {} | to nuon", + "{a: {b: {c: {d: 123, e: 456}}}}", + "{a: {b: {c: {e: 654, f: 789}}}}" + ) + .out, + "{a: {b: {c: {d: 123, e: 654, f: 789}}}}" + ) +} + +#[test] +fn single_row_table() { + assert_eq!( + nu!( + "{} | merge deep {} | to nuon", + "[[a]; [{foo: [1, 2, 3]}]]", + "[[a]; [{bar: [4, 5, 6]}]]" + ) + .out, + "[[a]; [{foo: [1, 2, 3], bar: [4, 5, 6]}]]" + ) +} + +#[test] +fn multi_row_table() { + assert_eq!( + nu!( + "{} | merge deep {} | to nuon ", + "[[a b]; [{inner: {foo: abc}} {inner: {baz: ghi}}]]", + "[[a b]; [{inner: {bar: def}} {inner: {qux: jkl}}]]" + ) + .out, + "[[a, b]; [{inner: {foo: abc, bar: def}}, {inner: {baz: ghi, qux: jkl}}]]" + ) +} diff --git a/crates/nu-command/tests/commands/mod.rs b/crates/nu-command/tests/commands/mod.rs index a8ffdeb917..3de3888823 100644 --- a/crates/nu-command/tests/commands/mod.rs +++ b/crates/nu-command/tests/commands/mod.rs @@ -66,6 +66,7 @@ mod ls; mod match_; mod math; mod merge; +mod merge_deep; mod mktemp; mod move_; mod mut_;