Optional members in cell paths: Attempt 2 (#8379)

This is a follow up from https://github.com/nushell/nushell/pull/7540. Please provide feedback if you have the time! ## Summary This PR lets you use `?` to indicate that a member in a cell path is optional and Nushell should return `null` if that member cannot be accessed. Unlike the previous PR, `?` is now a _postfix_ modifier for cell path members. A cell path of `.foo?.bar` means that `foo` is optional and `bar` is not. `?` does _not_ suppress all errors; it is intended to help in situations where data has "holes", i.e. the data types are correct but something is missing. Type mismatches (like trying to do a string path access on a date) will still fail. ### Record Examples ```bash { foo: 123 }.foo # returns 123 { foo: 123 }.bar # errors { foo: 123 }.bar? # returns null { foo: 123 } | get bar # errors { foo: 123 } | get bar? # returns null { foo: 123 }.bar.baz # errors { foo: 123 }.bar?.baz # errors because `baz` is not present on the result from `bar?` { foo: 123 }.bar.baz? # errors { foo: 123 }.bar?.baz? # returns null ``` ### List Examples ``` 〉[{foo: 1} {foo: 2} {}].foo Error: nu:🐚:column_not_found × Cannot find column ╭─[entry #30:1:1] 1 │ [{foo: 1} {foo: 2} {}].foo · ─┬ ─┬─ · │ ╰── cannot find column 'foo' · ╰── value originates here ╰──── 〉[{foo: 1} {foo: 2} {}].foo? ╭───┬───╮ │ 0 │ 1 │ │ 1 │ 2 │ │ 2 │ │ ╰───┴───╯ 〉[{foo: 1} {foo: 2} {}].foo?.2 | describe nothing 〉[a b c].4? | describe nothing 〉[{foo: 1} {foo: 2} {}] | where foo? == 1 ╭───┬─────╮ │ # │ foo │ ├───┼─────┤ │ 0 │ 1 │ ╰───┴─────╯ ``` # Breaking changes 1. Column names with `?` in them now need to be quoted. 2. The `-i`/`--ignore-errors` flag has been removed from `get` and `select` 1. After this PR, most `get` error handling can be done with `?` and/or `try`/`catch`. 4. Cell path accesses like this no longer work without a `?`: ```bash 〉[{a:1 b:2} {a:3}].b.0 2 ``` We had some clever code that was able to recognize that since we only want row `0`, it's OK if other rows are missing column `b`. I removed that because it's tricky to maintain, and now that query needs to be written like: ```bash 〉[{a:1 b:2} {a:3}].b?.0 2 ``` I think the regression is acceptable for now. I plan to do more work in the future to enable streaming of cell path accesses, and when that happens I'll be able to make `.b.0` work again.
2025-08-09 11:25:41 +02:00 · 2023-03-15 20:50:58 -07:00
parent d3be5ec750
commit 21b84a6d65
32 changed files with 510 additions and 277 deletions
--- a/crates/nu-parser/src/eval.rs
+++ b/crates/nu-parser/src/eval.rs
@ -31,7 +31,7 @@ pub fn eval_constant(
        Expr::FullCellPath(cell_path) => {
            let value = eval_constant(working_set, &cell_path.head)?;

-            match value.follow_cell_path(&cell_path.tail, false, false) {
+            match value.follow_cell_path(&cell_path.tail, false) {
                Ok(val) => Ok(val),
                // TODO: Better error conversion
                Err(shell_error) => Err(ParseError::LabeledError(
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -941,10 +941,12 @@ pub fn parse_old_alias(
            PathMember::String {
                val: "scope".to_string(),
                span: Span::new(0, 0),
+                optional: false,
            },
            PathMember::String {
                val: "aliases".to_string(),
                span: Span::new(0, 0),
+                optional: false,
            },
        ];
        let expr = Expression {
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -2015,54 +2015,100 @@ pub fn parse_variable_expr(
 pub fn parse_cell_path(
    working_set: &mut StateWorkingSet,
    tokens: impl Iterator<Item = Token>,
-    mut expect_dot: bool,
+    expect_dot: bool,
    expand_aliases_denylist: &[usize],
-    span: Span,
 ) -> (Vec<PathMember>, Option<ParseError>) {
+    enum TokenType {
+        Dot,           // .
+        QuestionOrDot, // ? or .
+        PathMember,    // an int or string, like `1` or `foo`
+    }
+
+    // Parsing a cell path is essentially a state machine, and this is the state
+    let mut expected_token = if expect_dot {
+        TokenType::Dot
+    } else {
+        TokenType::PathMember
+    };
+
    let mut error = None;
    let mut tail = vec![];

    for path_element in tokens {
        let bytes = working_set.get_span_contents(path_element.span);

-        if expect_dot {
-            expect_dot = false;
-            if bytes.len() != 1 || bytes[0] != b'.' {
-                error = error.or_else(|| Some(ParseError::Expected('.'.into(), path_element.span)));
+        match expected_token {
+            TokenType::Dot => {
+                if bytes.len() != 1 || bytes[0] != b'.' {
+                    return (
+                        tail,
+                        Some(ParseError::Expected('.'.into(), path_element.span)),
+                    );
+                }
+                expected_token = TokenType::PathMember;
            }
-        } else {
-            expect_dot = true;
-
-            match parse_int(bytes, path_element.span) {
-                (
-                    Expression {
-                        expr: Expr::Int(val),
-                        span,
-                        ..
-                    },
-                    None,
-                ) => tail.push(PathMember::Int {
-                    val: val as usize,
-                    span,
-                }),
-                _ => {
-                    let (result, err) =
-                        parse_string(working_set, path_element.span, expand_aliases_denylist);
-                    error = error.or(err);
-                    match result {
+            TokenType::QuestionOrDot => {
+                if bytes.len() == 1 && bytes[0] == b'.' {
+                    expected_token = TokenType::PathMember;
+                } else if bytes.len() == 1 && bytes[0] == b'?' {
+                    if let Some(last) = tail.last_mut() {
+                        match last {
+                            PathMember::String {
+                                ref mut optional, ..
+                            } => *optional = true,
+                            PathMember::Int {
+                                ref mut optional, ..
+                            } => *optional = true,
+                        }
+                    }
+                    expected_token = TokenType::Dot;
+                } else {
+                    return (
+                        tail,
+                        Some(ParseError::Expected(". or ?".into(), path_element.span)),
+                    );
+                }
+            }
+            TokenType::PathMember => {
+                match parse_int(bytes, path_element.span) {
+                    (
                        Expression {
-                            expr: Expr::String(string),
+                            expr: Expr::Int(val),
                            span,
                            ..
-                        } => {
-                            tail.push(PathMember::String { val: string, span });
-                        }
-                        _ => {
-                            error =
-                                error.or_else(|| Some(ParseError::Expected("string".into(), span)));
+                        },
+                        None,
+                    ) => tail.push(PathMember::Int {
+                        val: val as usize,
+                        span,
+                        optional: false,
+                    }),
+                    _ => {
+                        let (result, err) =
+                            parse_string(working_set, path_element.span, expand_aliases_denylist);
+                        error = error.or(err);
+                        match result {
+                            Expression {
+                                expr: Expr::String(string),
+                                span,
+                                ..
+                            } => {
+                                tail.push(PathMember::String {
+                                    val: string,
+                                    span,
+                                    optional: false,
+                                });
+                            }
+                            _ => {
+                                return (
+                                    tail,
+                                    Some(ParseError::Expected("string".into(), path_element.span)),
+                                );
+                            }
                        }
                    }
                }
+                expected_token = TokenType::QuestionOrDot;
            }
        }
    }
@ -2081,7 +2127,7 @@ pub fn parse_full_cell_path(
    let source = working_set.get_span_contents(span);
    let mut error = None;

-    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
+    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
    error = error.or(err);

    let mut tokens = tokens.into_iter().peekable();
@ -2202,13 +2248,7 @@ pub fn parse_full_cell_path(
            );
        };

-        let (tail, err) = parse_cell_path(
-            working_set,
-            tokens,
-            expect_dot,
-            expand_aliases_denylist,
-            span,
-        );
+        let (tail, err) = parse_cell_path(working_set, tokens, expect_dot, expand_aliases_denylist);
        error = error.or(err);

        (
@ -4597,13 +4637,13 @@ pub fn parse_value(
            let source = working_set.get_span_contents(span);
            let mut error = None;

-            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
+            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
            error = error.or(err);

            let tokens = tokens.into_iter().peekable();

            let (cell_path, err) =
-                parse_cell_path(working_set, tokens, false, expand_aliases_denylist, span);
+                parse_cell_path(working_set, tokens, false, expand_aliases_denylist);
            error = error.or(err);

            (