Fix #13546: Outer joins incorrectly removing unmatched rows (#15472)

Fixes #13546 

# Description

Previously, outer joins would remove rows without join columns, since
the "did not match" logic only executed when the row had the join
column.
To solve this, missing join columns are now treated the same as "exists
but did not match" cases. The logic now executes both when the join
column doesn't exist and when it exists but doesn't match, ensuring rows
without join columns are preserved. If the join column is not defined at
all, the previous behavior remains unchanged.

Example:
```
For the tables:
let left_side = [{a: a1 ref: 1} {a: a2 ref: 2} {a: a3}]
let right_side = [[b ref]; [b1 1] [b2 2] [b3 3]]

Running "$left_side | join -l $right_side ref" now outputs:
╭───┬────┬─────┬────╮
│ # │ a  │ ref │ b  │
├───┼────┼─────┼────┤
│ 0 │ a1 │   1 │ b1 │
│ 1 │ a2 │   2 │ b2 │
│ 2 │ a3 │     │    │
╰───┴────┴─────┴────╯
```

# User-Facing Changes

The ```join``` command will behave more similarly to SQL-style joins. In
this case, rows that lack the join column are preserved.

# Tests + Formatting

Added 2 test cases.
fmt + clippy OK.

# After Submitting

I don't believe anything is necessary.
This commit is contained in:
André Lazenga 2025-04-22 00:19:08 +01:00 committed by GitHub
parent bae04352ca
commit 03d455a688
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 89 additions and 32 deletions

View File

@ -255,6 +255,16 @@ fn join_rows(
config: &Config,
span: Span,
) {
if !this
.iter()
.any(|this_record| match this_record.as_record() {
Ok(record) => record.contains(this_join_key),
Err(_) => false,
})
{
// `this` table does not contain the join column; do nothing
return;
}
for this_row in this {
if let Value::Record {
val: this_record, ..
@ -281,39 +291,40 @@ fn join_rows(
result.push(Value::record(record, span))
}
}
} else if !matches!(join_type, JoinType::Inner) {
// `other` table did not contain any rows matching
// `this` row on the join column; emit a single joined
// row with null values for columns not present,
let other_record = other_keys
.iter()
.map(|&key| {
let val = if Some(key.as_ref()) == shared_join_key {
this_record
.get(key)
.cloned()
.unwrap_or_else(|| Value::nothing(span))
} else {
Value::nothing(span)
};
(key.clone(), val)
})
.collect();
let record = match join_type {
JoinType::Inner | JoinType::Right => {
merge_records(&other_record, this_record, shared_join_key)
}
JoinType::Left => {
merge_records(this_record, &other_record, shared_join_key)
}
_ => panic!("not implemented"),
};
result.push(Value::record(record, span))
continue;
}
} // else { a row is missing a value for the join column }
}
if !matches!(join_type, JoinType::Inner) {
// Either `this` row is missing a value for the join column or
// `other` table did not contain any rows matching
// `this` row on the join column; emit a single joined
// row with null values for columns not present
let other_record = other_keys
.iter()
.map(|&key| {
let val = if Some(key.as_ref()) == shared_join_key {
this_record
.get(key)
.cloned()
.unwrap_or_else(|| Value::nothing(span))
} else {
Value::nothing(span)
};
(key.clone(), val)
})
.collect();
let record = match join_type {
JoinType::Inner | JoinType::Right => {
merge_records(&other_record, this_record, shared_join_key)
}
JoinType::Left => merge_records(this_record, &other_record, shared_join_key),
_ => panic!("not implemented"),
};
result.push(Value::record(record, span))
}
};
}
}

View File

@ -195,6 +195,52 @@ fn do_cases_where_result_differs_between_join_types(join_type: &str) {
),
],
),
(
// a row in the left table does not have the join column
(
"[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3}]",
"[{ref: 1 b: 1} {ref: 2 b: 2} {ref: 3 b: 3}]",
"ref",
),
[
("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"),
(
"--left",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null]]",
),
(
"--right",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, 3, 3]]",
),
(
"--outer",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, null, null], [null, 3, 3]]",
),
],
),
(
// a row in the right table does not have the join column
(
"[{a: 1 ref: 1} {a: 2 ref: 2} {a: 3 ref: 3}]",
"[{ref: 1 b: 1} {ref: 2 b: 2} {b: 3}]",
"ref",
),
[
("--inner", "[[a, ref, b]; [1, 1, 1], [2, 2, 2]]"),
(
"--left",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null]]",
),
(
"--right",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [null, null, 3]]",
),
(
"--outer",
"[[a, ref, b]; [1, 1, 1], [2, 2, 2], [3, 3, null], [null, null, 3]]",
),
],
),
] {
for (join_type_, expected) in join_types {
if join_type_ == join_type {