Parse time type checking for range (#13595)

# Description

As part of fixing https://github.com/nushell/nushell/issues/13586, this
PR checks the types of the operands when creating a range. Stuff like
`0..(glob .)` will be rejected at parse time. Additionally, `0..$x` will
be treated as a range and rejected if `x` is not defined, rather than
being treated as a string. A separate PR will need to be made to do
reject streams at runtime, so that stuff like `0..(open /dev/random)`
doesn't hang.

Internally, this PR adds a `ParseError::UnsupportedOperationTernary`
variant, for when you have a range like `1..2..(glob .)`.

# User-Facing Changes

Users will now receive an error if any of the operands in the ranges
they construct have types that aren't compatible with `Type::Number`.

Additionally, if a piece of code looks like a range but some parse error
is encountered while parsing it, that piece of code will still be
treated as a range and the user will be shown the parse error. This
means that a piece of code like `0..$x` will be treated as a range no
matter what. Previously, if `x` weren't the expression would've been
treated as a string `"0..$x"`. I feel like it makes the language less
complicated if we make it less context-sensitive.

Here's an example of the error you get:
```
> 0..(glob .)
Error: nu::parser::unsupported_operation

  × range is not supported between int and any.
   ╭─[entry #1:1:1]
 1 │ 0..(glob .)
   · ─────┬─────┬┬
   ·      │     │╰── any
   ·      │     ╰── int
   ·      ╰── doesn't support these values
   ╰────
```

And as an image:

![image](https://github.com/user-attachments/assets/5c76168d-27db-481b-b541-861dac899dbf)

Note: I made the operands themselves (above, `(glob .)`) be garbage,
rather than the `..` operator itself. This doesn't match the behavior of
the math operators (if you do `1 + "foo"`, `+` gets highlighted red).
This is because with ranges, the range operators aren't `Expression`s
themselves, so they can't be turned into garbage. I felt like here, it
makes more sense to highlight the individual operand anyway.
This commit is contained in:
Yash Thakur
2024-08-13 03:05:34 -04:00
committed by GitHub
parent a432bf94ec
commit d5946a9667
4 changed files with 182 additions and 57 deletions

View File

@ -4,7 +4,7 @@ use crate::{
parse_keywords::*,
parse_patterns::parse_pattern,
parse_shape_specs::{parse_shape_name, parse_type, ShapeDescriptorUse},
type_check::{self, math_result_type, type_compatible},
type_check::{self, check_range_types, math_result_type, type_compatible},
Token, TokenContents,
};
use itertools::Itertools;
@ -109,14 +109,9 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) ->
}
working_set.parse_errors.truncate(starting_error_count);
parse_range(working_set, span);
if working_set.parse_errors.len() == starting_error_count {
return true;
}
let is_range = parse_range(working_set, span).is_some();
working_set.parse_errors.truncate(starting_error_count);
false
is_range
}
fn is_identifier(bytes: &[u8]) -> bool {
@ -1389,8 +1384,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span)
trace!("-- found leading range indicator");
let starting_error_count = working_set.parse_errors.len();
let range_expr = parse_range(working_set, spans[0]);
if working_set.parse_errors.len() == starting_error_count {
if let Some(range_expr) = parse_range(working_set, spans[0]) {
trace!("-- successfully parsed range");
return range_expr;
}
@ -1598,7 +1592,7 @@ pub fn parse_number(working_set: &mut StateWorkingSet, span: Span) -> Expression
garbage(working_set, span)
}
pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression {
pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Option<Expression> {
trace!("parsing: range");
// Range follows the following syntax: [<from>][<next_operator><next>]<range_operator>[<to>]
@ -1614,12 +1608,20 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
s
} else {
working_set.error(ParseError::NonUtf8(span));
return garbage(working_set, span);
return None;
};
if token.starts_with("...") {
working_set.error(ParseError::Expected(
"range operator ('..'), got spread ('...')",
span,
));
return None;
}
if !token.contains("..") {
working_set.error(ParseError::Expected("at least one range bound set", span));
return garbage(working_set, span);
return None;
}
// First, figure out what exact operators are used and determine their positions
@ -1633,7 +1635,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
"one range operator ('..' or '..<') and optionally one next operator ('..')",
span,
));
return garbage(working_set, span);
return None;
}
};
// Avoid calling sub-parsers on unmatched parens, to prevent quadratic time on things like ((((1..2))))
@ -1648,7 +1650,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
);
if let Some(_err) = err {
working_set.error(ParseError::Expected("Valid expression before ..", span));
return garbage(working_set, span);
return None;
}
}
@ -1665,7 +1667,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
"inclusive operator preceding second range bound",
span,
));
return garbage(working_set, span);
return None;
}
} else {
let op_str = if token.contains("..=") { "..=" } else { ".." };
@ -1698,7 +1700,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
if let (None, None) = (&from, &to) {
working_set.error(ParseError::Expected("at least one range bound set", span));
return garbage(working_set, span);
return None;
}
let (next, next_op_span) = if let Some(pos) = next_op_pos {
@ -1719,14 +1721,21 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
next_op_span,
};
let range = Range {
let mut range = Range {
from,
next,
to,
operator,
};
Expression::new(working_set, Expr::Range(Box::new(range)), span, Type::Range)
check_range_types(working_set, &mut range);
Some(Expression::new(
working_set,
Expr::Range(Box::new(range)),
span,
Type::Range,
))
}
pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -> Expression {
@ -1740,8 +1749,7 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -
} else {
let starting_error_count = working_set.parse_errors.len();
let expr = parse_range(working_set, span);
if starting_error_count == working_set.parse_errors.len() {
if let Some(expr) = parse_range(working_set, span) {
expr
} else {
working_set.parse_errors.truncate(starting_error_count);
@ -1812,9 +1820,7 @@ pub fn parse_paren_expr(
) -> Expression {
let starting_error_count = working_set.parse_errors.len();
let expr = parse_range(working_set, span);
if starting_error_count == working_set.parse_errors.len() {
if let Some(expr) = parse_range(working_set, span) {
expr
} else {
working_set.parse_errors.truncate(starting_error_count);
@ -4733,7 +4739,9 @@ pub fn parse_value(
SyntaxShape::Duration => parse_duration(working_set, span),
SyntaxShape::DateTime => parse_datetime(working_set, span),
SyntaxShape::Filesize => parse_filesize(working_set, span),
SyntaxShape::Range => parse_range(working_set, span),
SyntaxShape::Range => {
parse_range(working_set, span).unwrap_or_else(|| garbage(working_set, span))
}
SyntaxShape::Filepath => parse_filepath(working_set, span),
SyntaxShape::Directory => parse_directory(working_set, span),
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),