From 0569a9c92ec5479a45bee761e80479b879777500 Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Wed, 1 Nov 2023 21:25:35 +0100 Subject: [PATCH] Disallow duplicated columns in table literals (#10875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Pretty much all operations/commands in Nushell assume that the column names/keys in a record and thus also in a table (which consists of a list of records) are unique. Access through a string-like cell path should refer to a single column or key/value pair and our output through `table` will only show the last mention of a repeated column name. ```nu [[a a]; [1 2]] ╭─#─┬─a─╮ │ 0 │ 2 │ ╰───┴───╯ ``` While the record parsing already either errors with the `ShellError::ColumnDefinedTwice` or silently overwrites the first occurence with the second occurence, the table literal syntax `[[header columns]; [val1 val2]]` currently still allowed the creation of tables (and internally records with more than one entry with the same name. This is not only confusing, but also breaks some assumptions around how we can efficiently perform operations or in the past lead to outright bugs (e.g. #8431 fixed by #8446). This PR proposes to make this an error. After this change another hole which allowed the construction of records with non-unique column names will be plugged. ## Parts - Fix `SE::ColumnDefinedTwice` error code - Remove previous tests permitting duplicate columns - Deny duplicate column in table literal eval - Deny duplicate column in const eval - Deny duplicate column in `from nuon` # User-Facing Changes `[[a a]; [1 2]]` will now return an error: ``` Error: nu::shell::column_defined_twice × Record field or table column used twice ╭─[entry #2:1:1] 1 │ [[a a]; [1 2]] · ┬ ┬ · │ ╰── field redefined here · ╰── field first defined here ╰──── ``` this may under rare circumstances block code from evaluating. Furthermore this makes some NUON files invalid if they previously contained tables with repeated column names. # Tests + Formatting Added tests for each of the different evaluation paths that materialize tables. --- crates/nu-command/src/formats/from/nuon.rs | 15 ++++++++---- crates/nu-command/tests/commands/reject.rs | 23 ------------------- .../tests/format_conversions/nuon.rs | 12 ++++++++++ crates/nu-engine/src/eval.rs | 13 ++++++++++- crates/nu-protocol/src/eval_const.rs | 16 +++++++++---- crates/nu-protocol/src/shell_error.rs | 4 ++-- src/tests/test_table_operations.rs | 5 ++++ tests/const_/mod.rs | 9 ++++++++ 8 files changed, 63 insertions(+), 34 deletions(-) diff --git a/crates/nu-command/src/formats/from/nuon.rs b/crates/nu-command/src/formats/from/nuon.rs index 0ff034aef..34cfd1d65 100644 --- a/crates/nu-command/src/formats/from/nuon.rs +++ b/crates/nu-command/src/formats/from/nuon.rs @@ -361,13 +361,13 @@ fn convert_to_value( "subexpressions not supported in nuon".into(), expr.span, )), - Expr::Table(headers, cells) => { + Expr::Table(mut headers, cells) => { let mut cols = vec![]; let mut output = vec![]; - for key in headers { - let key_str = match key.expr { + for key in headers.iter_mut() { + let key_str = match &mut key.expr { Expr::String(key_str) => key_str, _ => { return Err(ShellError::OutsideSpannedLabeledError( @@ -379,7 +379,14 @@ fn convert_to_value( } }; - cols.push(key_str); + if let Some(idx) = cols.iter().position(|existing| existing == key_str) { + return Err(ShellError::ColumnDefinedTwice { + second_use: key.span, + first_use: headers[idx].span, + }); + } else { + cols.push(std::mem::take(key_str)); + } } for row in cells { diff --git a/crates/nu-command/tests/commands/reject.rs b/crates/nu-command/tests/commands/reject.rs index 714850d8f..c0393fa81 100644 --- a/crates/nu-command/tests/commands/reject.rs +++ b/crates/nu-command/tests/commands/reject.rs @@ -106,29 +106,6 @@ fn reject_nested_field() { assert_eq!(actual.out, "{a: {c: 5}}"); } -#[test] -fn reject_two_identical_elements() { - let actual = nu!("[[a, a]; [1, 2]] | reject a"); - - assert!(actual.out.contains("record 0 fields")); -} - -#[test] -fn reject_large_vec_with_two_identical_elements() { - let actual = nu!("[[a, b, c, d, e, a]; [1323, 23, 45, 100, 2, 2423]] | reject a"); - - assert!(!actual.out.contains("1323")); - assert!(!actual.out.contains("2423")); - assert!(actual.out.contains('b')); - assert!(actual.out.contains('c')); - assert!(actual.out.contains('d')); - assert!(actual.out.contains('e')); - assert!(actual.out.contains("23")); - assert!(actual.out.contains("45")); - assert!(actual.out.contains("100")); - assert!(actual.out.contains('2')); -} - #[test] fn reject_optional_column() { let actual = nu!("{} | reject foo? | to nuon"); diff --git a/crates/nu-command/tests/format_conversions/nuon.rs b/crates/nu-command/tests/format_conversions/nuon.rs index a7f85f357..7fd3854cd 100644 --- a/crates/nu-command/tests/format_conversions/nuon.rs +++ b/crates/nu-command/tests/format_conversions/nuon.rs @@ -57,6 +57,18 @@ fn to_nuon_table() { assert_eq!(actual.out, "true"); } +#[test] +fn from_nuon_illegal_table() { + let actual = nu!(pipeline( + r#" + "[[repeated repeated]; [abc, xyz], [def, ijk]]" + | from nuon + "# + )); + + assert!(actual.err.contains("column_defined_twice")); +} + #[test] fn to_nuon_bool() { let actual = nu!(pipeline( diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index fbdd4ba78..2b6d3e374 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -571,7 +571,18 @@ pub fn eval_expression( Expr::Table(headers, vals) => { let mut output_headers = vec![]; for expr in headers { - output_headers.push(eval_expression(engine_state, stack, expr)?.as_string()?); + let header = eval_expression(engine_state, stack, expr)?.as_string()?; + if let Some(idx) = output_headers + .iter() + .position(|existing| existing == &header) + { + return Err(ShellError::ColumnDefinedTwice { + second_use: expr.span, + first_use: headers[idx].span, + }); + } else { + output_headers.push(header); + } } let mut output_rows = vec![]; diff --git a/crates/nu-protocol/src/eval_const.rs b/crates/nu-protocol/src/eval_const.rs index ca8297f14..0fce15628 100644 --- a/crates/nu-protocol/src/eval_const.rs +++ b/crates/nu-protocol/src/eval_const.rs @@ -303,10 +303,18 @@ pub fn eval_constant( Expr::Table(headers, vals) => { let mut output_headers = vec![]; for expr in headers { - output_headers.push(value_as_string( - eval_constant(working_set, expr)?, - expr.span, - )?); + let header = value_as_string(eval_constant(working_set, expr)?, expr.span)?; + if let Some(idx) = output_headers + .iter() + .position(|existing| existing == &header) + { + return Err(ShellError::ColumnDefinedTwice { + second_use: expr.span, + first_use: headers[idx].span, + }); + } else { + output_headers.push(header); + } } let mut output_rows = vec![]; diff --git a/crates/nu-protocol/src/shell_error.rs b/crates/nu-protocol/src/shell_error.rs index 2c390632f..e581e68c7 100644 --- a/crates/nu-protocol/src/shell_error.rs +++ b/crates/nu-protocol/src/shell_error.rs @@ -597,8 +597,8 @@ pub enum ShellError { /// ## Resolution /// /// Check the record to ensure you aren't reusing the same field name - #[error("Record field used twice")] - #[diagnostic(code(nu::shell::not_a_list))] + #[error("Record field or table column used twice")] + #[diagnostic(code(nu::shell::column_defined_twice))] ColumnDefinedTwice { #[label = "field redefined here"] second_use: Span, diff --git a/src/tests/test_table_operations.rs b/src/tests/test_table_operations.rs index c14ca5091..43c68f423 100644 --- a/src/tests/test_table_operations.rs +++ b/src/tests/test_table_operations.rs @@ -1,5 +1,10 @@ use crate::tests::{fail_test, run_test, TestResult}; +#[test] +fn illegal_column_duplication() -> TestResult { + fail_test("[[lang, lang]; [nu, 100]]", "column_defined_twice") +} + #[test] fn cell_path_subexpr1() -> TestResult { run_test("([[lang, gems]; [nu, 100]]).lang | get 0", "nu") diff --git a/tests/const_/mod.rs b/tests/const_/mod.rs index feb13d691..9ef7667bc 100644 --- a/tests/const_/mod.rs +++ b/tests/const_/mod.rs @@ -90,6 +90,15 @@ fn const_table() { assert_eq!(actual.out, "table"); } +#[test] +fn const_invalid_table() { + let inp = &["const x = [[a b a]; [10 20 30] [100 200 300]]"]; + + let actual = nu!(&inp.join("; ")); + + assert!(actual.err.contains("column_defined_twice")); +} + #[test] fn const_string() { let inp = &[r#"const x = "abc""#, "$x"];