forked from extern/nushell
Syntax errors for string and int (#7952)
# Description Added a few syntax errors in ints and strings, changed parser to stop and show that error rather than continue trying to parse those tokens as some other shape. However, I don't see how to push this direction much further, and most of the classic confusing errors can't be changed. Flagged as WIP for the moment, but passes all checks and works better than current release: 1. I have yet to figure out how to make these errors refer back to the book, as I see some other errors do. 2. How to give syntax error when malformed int is first token in line? Currently parsed as external command, user gets confusing error message. 3. Would like to be more strict with *decimal* int literals (lacking, e.g, `0x' prefix). Need to tinker more with the order of parse shape calls, currently, float is tried after int, so '1.4' has to be passed. _(Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience.)_ ```bash 〉"\z" Error: ╭─[entry #3:1:1] 1 │ "\z" · ─┬─ · ╰── Syntax error in string, unrecognized character after escape '\'. ╰──── ``` Canonic presentation of a syntax error. ```bash 〉" \u{01ffbogus}" Error: × Invalid syntax ╭─[entry #2:1:1] 1 │ " \u{01ffbogus}" · ───────┬────── · ╰── Syntax error in string, expecting 1 to 6 hex digits in unicode escape '\u{X...}', max value 10FFFF. ╰──── ``` Malformed unicode escape in string, flagged as error. String parse can be opinionated, it's the last shape tried. ```bash 〉0x22bogus Error: nu:🐚:external_command (link) × External command failed ╭─[entry #4:1:1] 1 │ 0x22bogus · ────┬──── · ╰── executable was not found ╰──── help: No such file or directory (os error 2) ``` A *correct* number in first token would be evaluated, but an *incorrect* one is treated as external command? Confusing to users. ```bash 〉0 + 0x22bogus Error: × Invalid syntax ╭─[entry #5:1:1] 1 │ 0 + 0x22bogus · ────┬──── · ╰── Syntax error in int, invalid digits in radix 16 int. ╰──── ``` Can give syntax error if token is unambiguously int literal. e.g has 0b or 0x prefix, could not be a float. ```bash 〉0 + 098bogus Error: nu::parser::unsupported_operation (link) × Types mismatched for operation. ╭─[entry #6:1:1] 1 │ 0 + 098bogus · ┬ ┬ ────┬─── · │ │ ╰── string · │ ╰── doesn't support these values. · ╰── int ╰──── help: Change int or string to be the right types and try again. ``` But *decimal* literal (no prefix) can't be too strict. Parser is going to try float later. So '1.4' must be passed. # User-Facing Changes First and foremost, more specific error messages for typos in string and int literals. Probably improves interactive user experience. But a script that was causing and then checking for specific error might notice a different error message. _(List of all changes that impact the user experience here. This helps us keep track of breaking changes.)_ # Tests + Formatting Added (positive and negative unit tests in `cargo test -p nu-parser`. Didn't add integration tests. Make sure you've run and fixed any issues with these commands: - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --------- Co-authored-by: Stefan Holderbach <sholderbach@users.noreply.github.com>
This commit is contained in:
@ -442,6 +442,10 @@ pub enum ParseError {
|
||||
)]
|
||||
NotAConstant(#[label = "Value is not a parse-time constant"] Span),
|
||||
|
||||
#[error("Invalid literal")] // <problem> in <entity>.
|
||||
#[diagnostic()]
|
||||
InvalidLiteral(String, String, #[label("{0} in {1}")] Span),
|
||||
|
||||
#[error("{0}")]
|
||||
#[diagnostic()]
|
||||
LabeledError(String, String, #[label("{1}")] Span),
|
||||
@ -520,6 +524,7 @@ impl ParseError {
|
||||
ParseError::ShellErrRedirect(s) => *s,
|
||||
ParseError::ShellOutErrRedirect(s) => *s,
|
||||
ParseError::UnknownOperator(_, _, s) => *s,
|
||||
ParseError::InvalidLiteral(_, _, s) => *s,
|
||||
ParseError::NotAConstant(s) => *s,
|
||||
}
|
||||
}
|
||||
|
@ -1351,9 +1351,9 @@ pub fn parse_int(token: &[u8], span: Span) -> (Expression, Option<ParseError>) {
|
||||
} else {
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Mismatch(
|
||||
Some(ParseError::InvalidLiteral(
|
||||
format!("invalid digits for radix {}", radix),
|
||||
"int".into(),
|
||||
"incompatible int".into(),
|
||||
span,
|
||||
)),
|
||||
)
|
||||
@ -1362,6 +1362,13 @@ pub fn parse_int(token: &[u8], span: Span) -> (Expression, Option<ParseError>) {
|
||||
|
||||
let token = strip_underscores(token);
|
||||
|
||||
if token.is_empty() {
|
||||
return (
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("int".into(), span)),
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(num) = token.strip_prefix("0b") {
|
||||
extract_int(num, span, 2)
|
||||
} else if let Some(num) = token.strip_prefix("0o") {
|
||||
@ -1408,16 +1415,21 @@ pub fn parse_float(token: &[u8], span: Span) -> (Expression, Option<ParseError>)
|
||||
}
|
||||
|
||||
pub fn parse_number(token: &[u8], span: Span) -> (Expression, Option<ParseError>) {
|
||||
if let (x, None) = parse_int(token, span) {
|
||||
(x, None)
|
||||
} else if let (x, None) = parse_float(token, span) {
|
||||
(x, None)
|
||||
} else {
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("number".into(), span)),
|
||||
)
|
||||
match parse_int(token, span) {
|
||||
(x, None) => {
|
||||
return (x, None);
|
||||
}
|
||||
(_, Some(ParseError::Expected(_, _))) => {}
|
||||
(x, e) => return (x, e),
|
||||
}
|
||||
if let (x, None) = parse_float(token, span) {
|
||||
return (x, None);
|
||||
}
|
||||
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("number".into(), span)),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_range(
|
||||
@ -1432,6 +1444,7 @@ pub fn parse_range(
|
||||
// and <range_operator> is ".." or "..<"
|
||||
// and one of the <from> or <to> bounds must be present (just '..' is not allowed since it
|
||||
// looks like parent directory)
|
||||
//bugbug range cannot be [..] because that looks like parent directory
|
||||
|
||||
let contents = working_set.get_span_contents(span);
|
||||
|
||||
@ -2140,11 +2153,7 @@ pub fn parse_datetime(
|
||||
if bytes.is_empty() || !bytes[0].is_ascii_digit() {
|
||||
return (
|
||||
garbage(span),
|
||||
Some(ParseError::Mismatch(
|
||||
"datetime".into(),
|
||||
"non-datetime".into(),
|
||||
span,
|
||||
)),
|
||||
Some(ParseError::Expected("datetime".into(), span)),
|
||||
);
|
||||
}
|
||||
|
||||
@ -2192,11 +2201,7 @@ pub fn parse_datetime(
|
||||
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Mismatch(
|
||||
"datetime".into(),
|
||||
"non-datetime".into(),
|
||||
span,
|
||||
)),
|
||||
Some(ParseError::Expected("datetime".into(), span)),
|
||||
)
|
||||
}
|
||||
|
||||
@ -2213,9 +2218,8 @@ pub fn parse_duration(
|
||||
Some(expression) => (expression, None),
|
||||
None => (
|
||||
garbage(span),
|
||||
Some(ParseError::Mismatch(
|
||||
"duration".into(),
|
||||
"non-duration unit".into(),
|
||||
Some(ParseError::Expected(
|
||||
"duration with valid units".into(),
|
||||
span,
|
||||
)),
|
||||
),
|
||||
@ -2339,13 +2343,13 @@ pub fn parse_filesize(
|
||||
|
||||
let bytes = working_set.get_span_contents(span);
|
||||
|
||||
//todo: parse_filesize_bytes should distinguish between not-that-type and syntax error in units
|
||||
match parse_filesize_bytes(bytes, span) {
|
||||
Some(expression) => (expression, None),
|
||||
None => (
|
||||
garbage(span),
|
||||
Some(ParseError::Mismatch(
|
||||
"filesize".into(),
|
||||
"non-filesize unit".into(),
|
||||
Some(ParseError::Expected(
|
||||
"filesize with valid units".into(),
|
||||
span,
|
||||
)),
|
||||
),
|
||||
@ -2454,7 +2458,7 @@ pub fn parse_glob_pattern(
|
||||
} else {
|
||||
(
|
||||
garbage(span),
|
||||
Some(ParseError::Expected("string".into(), span)),
|
||||
Some(ParseError::Expected("glob pattern string".into(), span)),
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -2568,8 +2572,9 @@ pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>
|
||||
cur_idx += 1;
|
||||
}
|
||||
_ => {
|
||||
err = Some(ParseError::Expected(
|
||||
"closing '}' in unicode escape `\\u{n..}`".into(),
|
||||
err = Some(ParseError::InvalidLiteral(
|
||||
"missing '}' for unicode escape '\\u{X...}'".into(),
|
||||
"string".into(),
|
||||
Span::new(span.start + idx, span.end),
|
||||
));
|
||||
break 'us_loop;
|
||||
@ -2600,16 +2605,18 @@ pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>
|
||||
}
|
||||
}
|
||||
// fall through -- escape not accepted above, must be error.
|
||||
err = Some(ParseError::Expected(
|
||||
"unicode escape \\u{n..}".into(),
|
||||
err = Some(ParseError::InvalidLiteral(
|
||||
"invalid unicode escape '\\u{X...}', must be 1-6 hex digits, max value 10FFFF".into(),
|
||||
"string".into(),
|
||||
Span::new(span.start + idx, span.end),
|
||||
));
|
||||
break 'us_loop;
|
||||
}
|
||||
|
||||
_ => {
|
||||
err = Some(ParseError::Expected(
|
||||
"supported escape character".into(),
|
||||
err = Some(ParseError::InvalidLiteral(
|
||||
"unrecognized escape after '\\'".into(),
|
||||
"string".into(),
|
||||
Span::new(span.start + idx, span.end),
|
||||
));
|
||||
break 'us_loop;
|
||||
@ -4539,11 +4546,23 @@ pub fn parse_value(
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex
|
||||
// stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant.
|
||||
SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record => (
|
||||
garbage(span),
|
||||
Some(ParseError::Expected(
|
||||
"block, closure or record".into(),
|
||||
span,
|
||||
)),
|
||||
),
|
||||
|
||||
SyntaxShape::Any => {
|
||||
if bytes.starts_with(b"[") {
|
||||
//parse_value(working_set, span, &SyntaxShape::Table)
|
||||
parse_full_cell_path(working_set, None, span, expand_aliases_denylist)
|
||||
} else {
|
||||
/* Parser very sensitive to order of shapes tried. Recording the original order for postierity
|
||||
let shapes = [
|
||||
SyntaxShape::Binary,
|
||||
SyntaxShape::Int,
|
||||
@ -4557,11 +4576,34 @@ pub fn parse_value(
|
||||
SyntaxShape::Block,
|
||||
SyntaxShape::String,
|
||||
];
|
||||
*/
|
||||
let shapes = [
|
||||
SyntaxShape::Binary,
|
||||
SyntaxShape::Filesize,
|
||||
SyntaxShape::Duration,
|
||||
SyntaxShape::Range,
|
||||
SyntaxShape::DateTime, //FIXME requires 3 failed conversion attempts before failing
|
||||
SyntaxShape::Record,
|
||||
SyntaxShape::Closure(None),
|
||||
SyntaxShape::Block,
|
||||
SyntaxShape::Int,
|
||||
SyntaxShape::Number,
|
||||
SyntaxShape::String,
|
||||
];
|
||||
for shape in shapes.iter() {
|
||||
if let (s, None) =
|
||||
parse_value(working_set, span, shape, expand_aliases_denylist)
|
||||
{
|
||||
return (s, None);
|
||||
let (s, e) = parse_value(working_set, span, shape, expand_aliases_denylist);
|
||||
match (s, e) {
|
||||
(s, None) => {
|
||||
return (s, None);
|
||||
}
|
||||
(_, Some(ParseError::Expected(_, _))) => {
|
||||
// value didn't parse as this shape, try other options
|
||||
continue;
|
||||
}
|
||||
(s, e) => {
|
||||
// value did parse, but had syntax issues, don't try any more options.
|
||||
return (s, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
|
Reference in New Issue
Block a user