Optional members in cell paths: Attempt 2 (#8379)

This is a follow up from https://github.com/nushell/nushell/pull/7540. Please provide feedback if you have the time! ## Summary This PR lets you use `?` to indicate that a member in a cell path is optional and Nushell should return `null` if that member cannot be accessed. Unlike the previous PR, `?` is now a _postfix_ modifier for cell path members. A cell path of `.foo?.bar` means that `foo` is optional and `bar` is not. `?` does _not_ suppress all errors; it is intended to help in situations where data has "holes", i.e. the data types are correct but something is missing. Type mismatches (like trying to do a string path access on a date) will still fail. ### Record Examples ```bash { foo: 123 }.foo # returns 123 { foo: 123 }.bar # errors { foo: 123 }.bar? # returns null { foo: 123 } | get bar # errors { foo: 123 } | get bar? # returns null { foo: 123 }.bar.baz # errors { foo: 123 }.bar?.baz # errors because `baz` is not present on the result from `bar?` { foo: 123 }.bar.baz? # errors { foo: 123 }.bar?.baz? # returns null ``` ### List Examples ``` 〉[{foo: 1} {foo: 2} {}].foo Error: nu:🐚:column_not_found × Cannot find column ╭─[entry #30:1:1] 1 │ [{foo: 1} {foo: 2} {}].foo · ─┬ ─┬─ · │ ╰── cannot find column 'foo' · ╰── value originates here ╰──── 〉[{foo: 1} {foo: 2} {}].foo? ╭───┬───╮ │ 0 │ 1 │ │ 1 │ 2 │ │ 2 │ │ ╰───┴───╯ 〉[{foo: 1} {foo: 2} {}].foo?.2 | describe nothing 〉[a b c].4? | describe nothing 〉[{foo: 1} {foo: 2} {}] | where foo? == 1 ╭───┬─────╮ │ # │ foo │ ├───┼─────┤ │ 0 │ 1 │ ╰───┴─────╯ ``` # Breaking changes 1. Column names with `?` in them now need to be quoted. 2. The `-i`/`--ignore-errors` flag has been removed from `get` and `select` 1. After this PR, most `get` error handling can be done with `?` and/or `try`/`catch`. 4. Cell path accesses like this no longer work without a `?`: ```bash 〉[{a:1 b:2} {a:3}].b.0 2 ``` We had some clever code that was able to recognize that since we only want row `0`, it's OK if other rows are missing column `b`. I removed that because it's tricky to maintain, and now that query needs to be written like: ```bash 〉[{a:1 b:2} {a:3}].b?.0 2 ``` I think the regression is acceptable for now. I plan to do more work in the future to enable streaming of cell path accesses, and when that happens I'll be able to make `.b.0` work again.
2025-08-14 23:22:32 +02:00 · 2023-03-15 20:50:58 -07:00
parent d3be5ec750
commit 21b84a6d65
32 changed files with 510 additions and 277 deletions
--- a/src/tests/test_cell_path.rs
+++ b/src/tests/test_cell_path.rs
@ -17,6 +17,21 @@ fn record_single_field_success() -> TestResult {
    run_test("{foo: 'bar'}.foo == 'bar'", "true")
 }

+#[test]
+fn record_single_field_optional_success() -> TestResult {
+    run_test("{foo: 'bar'}.foo? == 'bar'", "true")
+}
+
+#[test]
+fn get_works_with_cell_path_success() -> TestResult {
+    run_test("{foo: 'bar'} | get foo?", "bar")
+}
+
+#[test]
+fn get_works_with_cell_path_missing_data() -> TestResult {
+    run_test("{foo: 'bar'} | get foobar? | to nuon", "null")
+}
+
 #[test]
 fn record_single_field_failure() -> TestResult {
    fail_test("{foo: 'bar'}.foobar", "")
@ -27,6 +42,21 @@ fn record_int_failure() -> TestResult {
    fail_test("{foo: 'bar'}.3", "")
 }

+#[test]
+fn record_single_field_optional() -> TestResult {
+    run_test("{foo: 'bar'}.foobar?  | to nuon", "null")
+}
+
+#[test]
+fn record_single_field_optional_does_not_short_circuit() -> TestResult {
+    fail_test("{foo: 'bar'}.foobar?.baz", "nothing")
+}
+
+#[test]
+fn record_multiple_optional_fields() -> TestResult {
+    run_test("{foo: 'bar'}.foobar?.baz? | to nuon", "null")
+}
+
 #[test]
 fn nested_record_field_success() -> TestResult {
    run_test("{foo: {bar: 'baz'} }.foo.bar == 'baz'", "true")
@ -37,6 +67,11 @@ fn nested_record_field_failure() -> TestResult {
    fail_test("{foo: {bar: 'baz'} }.foo.asdf", "")
 }

+#[test]
+fn nested_record_field_optional() -> TestResult {
+    run_test("{foo: {bar: 'baz'} }.foo.asdf?  | to nuon", "null")
+}
+
 #[test]
 fn record_with_nested_list_success() -> TestResult {
    run_test("{foo: [{bar: 'baz'}]}.foo.0.bar == 'baz'", "true")
@ -72,12 +107,27 @@ fn jagged_list_access_fails() -> TestResult {
    fail_test("[{}, {foo: 'bar'}].foo", "cannot find column")
 }

+#[test]
+fn jagged_list_optional_access_succeeds() -> TestResult {
+    run_test("[{foo: 'bar'}, {}].foo?.0", "bar")?;
+    run_test("[{foo: 'bar'}, {}].foo?.1  | to nuon", "null")?;
+
+    run_test("[{}, {foo: 'bar'}].foo?.0 | to nuon", "null")?;
+    run_test("[{}, {foo: 'bar'}].foo?.1", "bar")
+}
+
 // test that accessing a nonexistent row fails
 #[test]
 fn list_row_access_failure() -> TestResult {
    fail_test("[{foo: 'bar'}, {foo: 'baz'}].2", "")
 }

+#[test]
+fn list_row_optional_access_succeeds() -> TestResult {
+    run_test("[{foo: 'bar'}, {foo: 'baz'}].2? | to nuon", "null")?;
+    run_test("[{foo: 'bar'}, {foo: 'baz'}].3? | to nuon", "null")
+}
+
 // regression test for an old bug
 #[test]
 fn do_not_delve_too_deep_in_nested_lists() -> TestResult {
--- a/src/tests/test_table_operations.rs
+++ b/src/tests/test_table_operations.rs
@ -179,6 +179,33 @@ fn missing_column_errors() -> TestResult {
    )
 }

+#[test]
+fn missing_optional_column_fills_in_nothing() -> TestResult {
+    // The empty value will be replaced with $nothing because of the ?
+    run_test(
+        r#"[ { name: ABC, size: 20 }, { name: HIJ } ].size?.1 == $nothing"#,
+        "true",
+    )
+}
+
+#[test]
+fn missing_required_row_fails() -> TestResult {
+    // .3 will fail if there is no 3rd row
+    fail_test(
+        r#"[ { name: ABC, size: 20 }, { name: HIJ } ].3"#,
+        "", // we just care if it errors
+    )
+}
+
+#[test]
+fn missing_optional_row_fills_in_nothing() -> TestResult {
+    // ?.3 will return $nothing if there is no 3rd row
+    run_test(
+        r#"[ { name: ABC, size: 20 }, { name: HIJ } ].3? == $nothing"#,
+        "true",
+    )
+}
+
 #[test]
 fn string_cell_path() -> TestResult {
    run_test(
@ -257,9 +284,9 @@ fn length_defaulted_columns() -> TestResult {

 #[test]
 fn nullify_errors() -> TestResult {
-    run_test("([{a:1} {a:2} {a:3}] | get -i foo | length) == 3", "true")?;
+    run_test("([{a:1} {a:2} {a:3}] | get foo? | length) == 3", "true")?;
    run_test(
-        "([{a:1} {a:2} {a:3}] | get -i foo | to nuon) == '[null, null, null]'",
+        "([{a:1} {a:2} {a:3}] | get foo? | to nuon) == '[null, null, null]'",
        "true",
    )
 }
@ -267,7 +294,7 @@ fn nullify_errors() -> TestResult {
 #[test]
 fn nullify_holes() -> TestResult {
    run_test(
-        "([{a:1} {b:2} {a:3}] | get -i a | to nuon) == '[1, null, 3]'",
+        "([{a:1} {b:2} {a:3}] | get a? | to nuon) == '[1, null, 3]'",
        "true",
    )
 }