Optional members in cell paths: Attempt 2 (#8379)

This is a follow up from https://github.com/nushell/nushell/pull/7540.
Please provide feedback if you have the time!

## Summary

This PR lets you use `?` to indicate that a member in a cell path is
optional and Nushell should return `null` if that member cannot be
accessed.

Unlike the previous PR, `?` is now a _postfix_ modifier for cell path
members. A cell path of `.foo?.bar` means that `foo` is optional and
`bar` is not.

`?` does _not_ suppress all errors; it is intended to help in situations
where data has "holes", i.e. the data types are correct but something is
missing. Type mismatches (like trying to do a string path access on a
date) will still fail.

### Record Examples

```bash

{ foo: 123 }.foo # returns 123

{ foo: 123 }.bar # errors
{ foo: 123 }.bar? # returns null

{ foo: 123 } | get bar # errors
{ foo: 123 } | get bar? # returns null

{ foo: 123 }.bar.baz # errors
{ foo: 123 }.bar?.baz # errors because `baz` is not present on the result from `bar?`
{ foo: 123 }.bar.baz? # errors
{ foo: 123 }.bar?.baz? # returns null
```

### List Examples
```
〉[{foo: 1} {foo: 2} {}].foo
Error: nu:🐚:column_not_found

  × Cannot find column
   ╭─[entry #30:1:1]
 1 │ [{foo: 1} {foo: 2} {}].foo
   ·                    ─┬  ─┬─
   ·                     │   ╰── cannot find column 'foo'
   ·                     ╰── value originates here
   ╰────
〉[{foo: 1} {foo: 2} {}].foo?
╭───┬───╮
│ 0 │ 1 │
│ 1 │ 2 │
│ 2 │   │
╰───┴───╯
〉[{foo: 1} {foo: 2} {}].foo?.2 | describe
nothing

〉[a b c].4? | describe
nothing

〉[{foo: 1} {foo: 2} {}] | where foo? == 1
╭───┬─────╮
│ # │ foo │
├───┼─────┤
│ 0 │   1 │
╰───┴─────╯
```

# Breaking changes

1. Column names with `?` in them now need to be quoted.
2. The `-i`/`--ignore-errors` flag has been removed from `get` and
`select`
1. After this PR, most `get` error handling can be done with `?` and/or
`try`/`catch`.
4. Cell path accesses like this no longer work without a `?`:
```bash
〉[{a:1 b:2} {a:3}].b.0
2
```
We had some clever code that was able to recognize that since we only
want row `0`, it's OK if other rows are missing column `b`. I removed
that because it's tricky to maintain, and now that query needs to be
written like:


```bash
〉[{a:1 b:2} {a:3}].b?.0
2
```

I think the regression is acceptable for now. I plan to do more work in
the future to enable streaming of cell path accesses, and when that
happens I'll be able to make `.b.0` work again.
This commit is contained in:
Reilly Wood
2023-03-15 20:50:58 -07:00
committed by GitHub
parent d3be5ec750
commit 21b84a6d65
32 changed files with 510 additions and 277 deletions

View File

@ -31,7 +31,7 @@ pub fn eval_constant(
Expr::FullCellPath(cell_path) => {
let value = eval_constant(working_set, &cell_path.head)?;
match value.follow_cell_path(&cell_path.tail, false, false) {
match value.follow_cell_path(&cell_path.tail, false) {
Ok(val) => Ok(val),
// TODO: Better error conversion
Err(shell_error) => Err(ParseError::LabeledError(

View File

@ -941,10 +941,12 @@ pub fn parse_old_alias(
PathMember::String {
val: "scope".to_string(),
span: Span::new(0, 0),
optional: false,
},
PathMember::String {
val: "aliases".to_string(),
span: Span::new(0, 0),
optional: false,
},
];
let expr = Expression {

View File

@ -2015,54 +2015,100 @@ pub fn parse_variable_expr(
pub fn parse_cell_path(
working_set: &mut StateWorkingSet,
tokens: impl Iterator<Item = Token>,
mut expect_dot: bool,
expect_dot: bool,
expand_aliases_denylist: &[usize],
span: Span,
) -> (Vec<PathMember>, Option<ParseError>) {
enum TokenType {
Dot, // .
QuestionOrDot, // ? or .
PathMember, // an int or string, like `1` or `foo`
}
// Parsing a cell path is essentially a state machine, and this is the state
let mut expected_token = if expect_dot {
TokenType::Dot
} else {
TokenType::PathMember
};
let mut error = None;
let mut tail = vec![];
for path_element in tokens {
let bytes = working_set.get_span_contents(path_element.span);
if expect_dot {
expect_dot = false;
if bytes.len() != 1 || bytes[0] != b'.' {
error = error.or_else(|| Some(ParseError::Expected('.'.into(), path_element.span)));
match expected_token {
TokenType::Dot => {
if bytes.len() != 1 || bytes[0] != b'.' {
return (
tail,
Some(ParseError::Expected('.'.into(), path_element.span)),
);
}
expected_token = TokenType::PathMember;
}
} else {
expect_dot = true;
match parse_int(bytes, path_element.span) {
(
Expression {
expr: Expr::Int(val),
span,
..
},
None,
) => tail.push(PathMember::Int {
val: val as usize,
span,
}),
_ => {
let (result, err) =
parse_string(working_set, path_element.span, expand_aliases_denylist);
error = error.or(err);
match result {
TokenType::QuestionOrDot => {
if bytes.len() == 1 && bytes[0] == b'.' {
expected_token = TokenType::PathMember;
} else if bytes.len() == 1 && bytes[0] == b'?' {
if let Some(last) = tail.last_mut() {
match last {
PathMember::String {
ref mut optional, ..
} => *optional = true,
PathMember::Int {
ref mut optional, ..
} => *optional = true,
}
}
expected_token = TokenType::Dot;
} else {
return (
tail,
Some(ParseError::Expected(". or ?".into(), path_element.span)),
);
}
}
TokenType::PathMember => {
match parse_int(bytes, path_element.span) {
(
Expression {
expr: Expr::String(string),
expr: Expr::Int(val),
span,
..
} => {
tail.push(PathMember::String { val: string, span });
}
_ => {
error =
error.or_else(|| Some(ParseError::Expected("string".into(), span)));
},
None,
) => tail.push(PathMember::Int {
val: val as usize,
span,
optional: false,
}),
_ => {
let (result, err) =
parse_string(working_set, path_element.span, expand_aliases_denylist);
error = error.or(err);
match result {
Expression {
expr: Expr::String(string),
span,
..
} => {
tail.push(PathMember::String {
val: string,
span,
optional: false,
});
}
_ => {
return (
tail,
Some(ParseError::Expected("string".into(), path_element.span)),
);
}
}
}
}
expected_token = TokenType::QuestionOrDot;
}
}
}
@ -2081,7 +2127,7 @@ pub fn parse_full_cell_path(
let source = working_set.get_span_contents(span);
let mut error = None;
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
error = error.or(err);
let mut tokens = tokens.into_iter().peekable();
@ -2202,13 +2248,7 @@ pub fn parse_full_cell_path(
);
};
let (tail, err) = parse_cell_path(
working_set,
tokens,
expect_dot,
expand_aliases_denylist,
span,
);
let (tail, err) = parse_cell_path(working_set, tokens, expect_dot, expand_aliases_denylist);
error = error.or(err);
(
@ -4597,13 +4637,13 @@ pub fn parse_value(
let source = working_set.get_span_contents(span);
let mut error = None;
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.', b'?'], true);
error = error.or(err);
let tokens = tokens.into_iter().peekable();
let (cell_path, err) =
parse_cell_path(working_set, tokens, false, expand_aliases_denylist, span);
parse_cell_path(working_set, tokens, false, expand_aliases_denylist);
error = error.or(err);
(