mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 07:05:47 +02:00
Fix parsing record values containing colons (#13413)
This PR is an attempt to fix #8257 and fix #10985 (which is duplicate-ish) # Description The parser currently doesn't know how to deal with colons appearing while lexing whitespace-terminated tokens specifying a record value. Most notably, this means you can't use datetime literals in record value position (and as a consequence, `| to nuon | from nuon` roundtrips can fail), but it also means that bare words containing colons cause a non-useful error message.  `parser::parse_record` calls `lex::lex` with the `:` colon character in the `special_tokens` argument. This allows colons to terminate record keys, but as a side effect, it also causes colons to terminate record *values*. I added a new function `lex::lex_n_tokens`, which allows the caller to drive the lexing process more explicitly, and used it in `parser::parse_record` to let colons terminate record keys while not giving them special treatment when appearing in record values. This PR description previously said: *Another approach suggested in one of the issues was to support an additional datetime literal format that doesn't require colons. I like that that wouldn't require new `lex::lex_internal` behaviour, but an advantage of my approach is that it also newly allows for string record values given as bare words containing colons. I think this eliminates another possible source of confusion.* It was determined that this is undesirable, and in the current state of this PR, bare word record values with colons are rejected explicitly. The better error message is still a win. # User-Facing Changes In addition to the above, this PR also disables the use of "special" (non-item) tokens in record key and value position, and the use of a single bare `:` as a record key. Examples of behaviour *before* this PR: ```nu { a: b } # Valid, same as { 'a': 'b' } { a: b:c } # Error: expected ':' { a: 2024-08-13T22:11:09 } # Error: expected ':' { :: 1 } # Valid, same as { ':': 1 } { ;: 1 } # Valid, same as { ';': 1 } { a: || } # Valid, same as { 'a': '||' } ``` Examples of behaviour *after* this PR: ```nu { a: b } # (Unchanged) Valid, same as { 'a': 'b' } { a: b:c } # Error: colon in bare word specifying record value { a: 2024-08-13T22:11:09 } # Valid, same as { a: (2024-08-13T22:11:09) } { :: 1 } # Error: colon in bare word specifying record key { ;: 1 } # Error: expected item in record key position { a: || } # Error: expected item in record value position ``` # Tests + Formatting I added tests, but I'm not sure if they're sufficient and in the right place. # After Submitting I don't think documentation changes are needed for this, but please let me know if you disagree.
This commit is contained in:
@ -2478,3 +2478,56 @@ mod operator {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
mod record {
|
||||
use super::*;
|
||||
|
||||
use nu_protocol::ast::RecordItem;
|
||||
|
||||
#[rstest]
|
||||
#[case(b"{ :: x }", "Invalid literal")] // Key is bare colon
|
||||
#[case(b"{ a: x:y }", "Invalid literal")] // Value is bare word with colon
|
||||
#[case(b"{ a: x('y'):z }", "Invalid literal")] // Value is bare string interpolation with colon
|
||||
#[case(b"{ ;: x }", "Parse mismatch during operation.")] // Key is a non-item token
|
||||
#[case(b"{ a: || }", "Parse mismatch during operation.")] // Value is a non-item token
|
||||
fn refuse_confusing_record(#[case] expr: &[u8], #[case] error: &str) {
|
||||
dbg!(String::from_utf8_lossy(expr));
|
||||
let engine_state = EngineState::new();
|
||||
let mut working_set = StateWorkingSet::new(&engine_state);
|
||||
parse(&mut working_set, None, expr, false);
|
||||
assert_eq!(
|
||||
working_set.parse_errors.first().map(|e| e.to_string()),
|
||||
Some(error.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case(b"{ a: 2024-07-23T22:54:54.532100627+02:00 b:xy }")]
|
||||
fn parse_datetime_in_record(#[case] expr: &[u8]) {
|
||||
dbg!(String::from_utf8_lossy(expr));
|
||||
let engine_state = EngineState::new();
|
||||
let mut working_set = StateWorkingSet::new(&engine_state);
|
||||
let block = parse(&mut working_set, None, expr, false);
|
||||
assert!(working_set.parse_errors.first().is_none());
|
||||
let pipeline_el_expr = &block
|
||||
.pipelines
|
||||
.first()
|
||||
.unwrap()
|
||||
.elements
|
||||
.first()
|
||||
.unwrap()
|
||||
.expr
|
||||
.expr;
|
||||
dbg!(pipeline_el_expr);
|
||||
match pipeline_el_expr {
|
||||
Expr::FullCellPath(v) => match &v.head.expr {
|
||||
Expr::Record(fields) => assert!(matches!(
|
||||
fields[0],
|
||||
RecordItem::Pair(_, Expression { ty: Type::Date, .. })
|
||||
)),
|
||||
_ => panic!("Expected record head"),
|
||||
},
|
||||
_ => panic!("Expected full cell path"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user