From 03d455a68885fc95188cbd74f7daf2a57a47b783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Lazenga?= Date: Tue, 22 Apr 2025 00:19:08 +0100 Subject: [PATCH] Fix #13546: Outer joins incorrectly removing unmatched rows (#15472) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #13546 # Description Previously, outer joins would remove rows without join columns, since the "did not match" logic only executed when the row had the join column. To solve this, missing join columns are now treated the same as "exists but did not match" cases. The logic now executes both when the join column doesn't exist and when it exists but doesn't match, ensuring rows without join columns are preserved. If the join column is not defined at all, the previous behavior remains unchanged. Example: ``` For the tables: let left_side = [{a: a1 ref: 1} {a: a2 ref: 2} {a: a3}] let right_side = [[b ref]; [b1 1] [b2 2] [b3 3]] Running "$left_side | join -l $right_side ref" now outputs: ╭───┬────┬─────┬────╮ │ # │ a │ ref │ b │ ├───┼────┼─────┼────┤ │ 0 │ a1 │ 1 │ b1 │ │ 1 │ a2 │ 2 │ b2 │ │ 2 │ a3 │ │ │ ╰───┴────┴─────┴────╯ ``` # User-Facing Changes The ```join``` command will behave more similarly to SQL-style joins. In this case, rows that lack the join column are preserved. # Tests + Formatting Added 2 test cases. fmt + clippy OK. # After Submitting I don't believe anything is necessary. --- crates/nu-command/src/filters/join.rs | 75 ++++++++++++++---------- crates/nu-command/tests/commands/join.rs | 46 +++++++++++++++ 2 files changed, 89 insertions(+), 32 deletions(-) diff --git a/crates/nu-command/src/filters/join.rs b/crates/nu-command/src/filters/join.rs index 7f3bbae238..1dea038347 100644 --- a/crates/nu-command/src/filters/join.rs +++ b/crates/nu-command/src/filters/join.rs @@ -255,6 +255,16 @@ fn join_rows( config: &Config, span: Span, ) { + if !this + .iter() + .any(|this_record| match this_record.as_record() { + Ok(record) => record.contains(this_join_key), + Err(_) => false, + }) + { + // `this` table does not contain the join column; do nothing + return; + } for this_row in this { if let Value::Record { val: this_record, .. @@ -281,39 +291,40 @@ fn join_rows( result.push(Value::record(record, span)) } } - } else if !matches!(join_type, JoinType::Inner) { - // `other` table did not contain any rows matching - // `this` row on the join column; emit a single joined - // row with null values for columns not present, - let other_record = other_keys - .iter() - .map(|&key| { - let val = if Some(key.as_ref()) == shared_join_key { - this_record - .get(key) - .cloned() - .unwrap_or_else(|| Value::nothing(span)) - } else { - Value::nothing(span) - }; - - (key.clone(), val) - }) - .collect(); - - let record = match join_type { - JoinType::Inner | JoinType::Right => { - merge_records(&other_record, this_record, shared_join_key) - } - JoinType::Left => { - merge_records(this_record, &other_record, shared_join_key) - } - _ => panic!("not implemented"), - }; - - result.push(Value::record(record, span)) + continue; } - } // else { a row is missing a value for the join column } + } + if !matches!(join_type, JoinType::Inner) { + // Either `this` row is missing a value for the join column or + // `other` table did not contain any rows matching + // `this` row on the join column; emit a single joined + // row with null values for columns not present + let other_record = other_keys + .iter() + .map(|&key| { + let val = if Some(key.as_ref()) == shared_join_key { + this_record + .get(key) + .cloned() + .unwrap_or_else(|| Value::nothing(span)) + } else { + Value::nothing(span) + }; + + (key.clone(), val) + }) + .collect(); + + let record = match join_type { + JoinType::Inner | JoinType::Right => { + merge_records(&other_record, this_record, shared_join_key) + } + JoinType::Left => merge_records(this_record, &other_record, shared_join_key), + _ => panic!("not implemented"), + }; + + result.push(Value::record(record, span)) + } }; } } diff --git a/crates/nu-command/tests/commands/join.rs b/crates/nu-command/tests/commands/join.rs index a3c2bad11e..4483debce7 100644 --- a/crates/nu-command/tests/commands/join.rs +++ b/crates/nu-command/tests/commands/join.rs @@ -195,6 +195,52 @@ fn do_cases_where_result_differs_between_join_types(join_type: &str) { ), ], ), + ( + // a row in the left table does not have the join column + ( + "[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3}]", + "[{ref: 1 b: 1} {ref: 2 b: 2} {ref: 3 b: 3}]", + "ref", + ), + [ + ("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"), + ( + "--left", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null]]", + ), + ( + "--right", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, 3, 3]]", + ), + ( + "--outer", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null], [null, 3, 3]]", + ), + ], + ), + ( + // a row in the right table does not have the join column + ( + "[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3 ref: 3}]", + "[{ref: 1 b: 1} {ref: 2 b: 2} {b: 3}]", + "ref", + ), + [ + ("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"), + ( + "--left", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null]]", + ), + ( + "--right", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, null, 3]]", + ), + ( + "--outer", + "[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null], [null, null, 3]]", + ), + ], + ), ] { for (join_type_, expected) in join_types { if join_type_ == join_type {