Disallow duplicated columns in table literals (#10875)

# Description
Pretty much all operations/commands in Nushell assume that the column
names/keys in a record and thus also in a table (which consists of a
list of records) are unique.
Access through a string-like cell path should refer to a single column
or key/value pair and our output through `table` will only show the last
mention of a repeated column name.

```nu
[[a a]; [1 2]]
╭─#─┬─a─╮
│ 0 │ 2 │
╰───┴───╯
```

While the record parsing already either errors with the
`ShellError::ColumnDefinedTwice` or silently overwrites the first
occurence with the second occurence, the table literal syntax `[[header
columns]; [val1 val2]]` currently still allowed the creation of tables
(and internally records with more than one entry with the same name.

This is not only confusing, but also breaks some assumptions around how
we can efficiently perform operations or in the past lead to outright
bugs (e.g. #8431 fixed by #8446).

This PR proposes to make this an error.
After this change another hole which allowed the construction of records
with non-unique column names will be plugged.

## Parts
- Fix `SE::ColumnDefinedTwice` error code
- Remove previous tests permitting duplicate columns
- Deny duplicate column in table literal eval
- Deny duplicate column in const eval
- Deny duplicate column in `from nuon`

# User-Facing Changes
`[[a a]; [1 2]]` will now return an error:

```
Error: nu:🐚:column_defined_twice

  × Record field or table column used twice
   ╭─[entry #2:1:1]
 1 │ [[a a]; [1 2]]
   ·   ┬ ┬
   ·   │ ╰── field redefined here
   ·   ╰── field first defined here
   ╰────
```

this may under rare circumstances block code from evaluating.

Furthermore this makes some NUON files invalid if they previously
contained tables with repeated column names.

# Tests + Formatting
Added tests for each of the different evaluation paths that materialize
tables.
This commit is contained in:
Stefan Holderbach 2023-11-01 21:25:35 +01:00 committed by GitHub
parent c1ca10ffd1
commit 0569a9c92e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 63 additions and 34 deletions

View File

@ -361,13 +361,13 @@ fn convert_to_value(
"subexpressions not supported in nuon".into(), "subexpressions not supported in nuon".into(),
expr.span, expr.span,
)), )),
Expr::Table(headers, cells) => { Expr::Table(mut headers, cells) => {
let mut cols = vec![]; let mut cols = vec![];
let mut output = vec![]; let mut output = vec![];
for key in headers { for key in headers.iter_mut() {
let key_str = match key.expr { let key_str = match &mut key.expr {
Expr::String(key_str) => key_str, Expr::String(key_str) => key_str,
_ => { _ => {
return Err(ShellError::OutsideSpannedLabeledError( return Err(ShellError::OutsideSpannedLabeledError(
@ -379,7 +379,14 @@ fn convert_to_value(
} }
}; };
cols.push(key_str); if let Some(idx) = cols.iter().position(|existing| existing == key_str) {
return Err(ShellError::ColumnDefinedTwice {
second_use: key.span,
first_use: headers[idx].span,
});
} else {
cols.push(std::mem::take(key_str));
}
} }
for row in cells { for row in cells {

View File

@ -106,29 +106,6 @@ fn reject_nested_field() {
assert_eq!(actual.out, "{a: {c: 5}}"); assert_eq!(actual.out, "{a: {c: 5}}");
} }
#[test]
fn reject_two_identical_elements() {
let actual = nu!("[[a, a]; [1, 2]] | reject a");
assert!(actual.out.contains("record 0 fields"));
}
#[test]
fn reject_large_vec_with_two_identical_elements() {
let actual = nu!("[[a, b, c, d, e, a]; [1323, 23, 45, 100, 2, 2423]] | reject a");
assert!(!actual.out.contains("1323"));
assert!(!actual.out.contains("2423"));
assert!(actual.out.contains('b'));
assert!(actual.out.contains('c'));
assert!(actual.out.contains('d'));
assert!(actual.out.contains('e'));
assert!(actual.out.contains("23"));
assert!(actual.out.contains("45"));
assert!(actual.out.contains("100"));
assert!(actual.out.contains('2'));
}
#[test] #[test]
fn reject_optional_column() { fn reject_optional_column() {
let actual = nu!("{} | reject foo? | to nuon"); let actual = nu!("{} | reject foo? | to nuon");

View File

@ -57,6 +57,18 @@ fn to_nuon_table() {
assert_eq!(actual.out, "true"); assert_eq!(actual.out, "true");
} }
#[test]
fn from_nuon_illegal_table() {
let actual = nu!(pipeline(
r#"
"[[repeated repeated]; [abc, xyz], [def, ijk]]"
| from nuon
"#
));
assert!(actual.err.contains("column_defined_twice"));
}
#[test] #[test]
fn to_nuon_bool() { fn to_nuon_bool() {
let actual = nu!(pipeline( let actual = nu!(pipeline(

View File

@ -571,7 +571,18 @@ pub fn eval_expression(
Expr::Table(headers, vals) => { Expr::Table(headers, vals) => {
let mut output_headers = vec![]; let mut output_headers = vec![];
for expr in headers { for expr in headers {
output_headers.push(eval_expression(engine_state, stack, expr)?.as_string()?); let header = eval_expression(engine_state, stack, expr)?.as_string()?;
if let Some(idx) = output_headers
.iter()
.position(|existing| existing == &header)
{
return Err(ShellError::ColumnDefinedTwice {
second_use: expr.span,
first_use: headers[idx].span,
});
} else {
output_headers.push(header);
}
} }
let mut output_rows = vec![]; let mut output_rows = vec![];

View File

@ -303,10 +303,18 @@ pub fn eval_constant(
Expr::Table(headers, vals) => { Expr::Table(headers, vals) => {
let mut output_headers = vec![]; let mut output_headers = vec![];
for expr in headers { for expr in headers {
output_headers.push(value_as_string( let header = value_as_string(eval_constant(working_set, expr)?, expr.span)?;
eval_constant(working_set, expr)?, if let Some(idx) = output_headers
expr.span, .iter()
)?); .position(|existing| existing == &header)
{
return Err(ShellError::ColumnDefinedTwice {
second_use: expr.span,
first_use: headers[idx].span,
});
} else {
output_headers.push(header);
}
} }
let mut output_rows = vec![]; let mut output_rows = vec![];

View File

@ -597,8 +597,8 @@ pub enum ShellError {
/// ## Resolution /// ## Resolution
/// ///
/// Check the record to ensure you aren't reusing the same field name /// Check the record to ensure you aren't reusing the same field name
#[error("Record field used twice")] #[error("Record field or table column used twice")]
#[diagnostic(code(nu::shell::not_a_list))] #[diagnostic(code(nu::shell::column_defined_twice))]
ColumnDefinedTwice { ColumnDefinedTwice {
#[label = "field redefined here"] #[label = "field redefined here"]
second_use: Span, second_use: Span,

View File

@ -1,5 +1,10 @@
use crate::tests::{fail_test, run_test, TestResult}; use crate::tests::{fail_test, run_test, TestResult};
#[test]
fn illegal_column_duplication() -> TestResult {
fail_test("[[lang, lang]; [nu, 100]]", "column_defined_twice")
}
#[test] #[test]
fn cell_path_subexpr1() -> TestResult { fn cell_path_subexpr1() -> TestResult {
run_test("([[lang, gems]; [nu, 100]]).lang | get 0", "nu") run_test("([[lang, gems]; [nu, 100]]).lang | get 0", "nu")

View File

@ -90,6 +90,15 @@ fn const_table() {
assert_eq!(actual.out, "table<a: int, b: int, c: int>"); assert_eq!(actual.out, "table<a: int, b: int, c: int>");
} }
#[test]
fn const_invalid_table() {
let inp = &["const x = [[a b a]; [10 20 30] [100 200 300]]"];
let actual = nu!(&inp.join("; "));
assert!(actual.err.contains("column_defined_twice"));
}
#[test] #[test]
fn const_string() { fn const_string() {
let inp = &[r#"const x = "abc""#, "$x"]; let inp = &[r#"const x = "abc""#, "$x"];