allow lists to have type annotations (#8529)

this pr refines #8270 and closes #8109

# description
examples:

the original syntax is okay
```nu
def okay [nums: list] {}         # the type of list will be list<any>
```

empty annotations are allowed in any variation
the last two may be caught by a future formatter, 
but do not affect `nu` code currently
```nu
def okay [nums: list<>] {}       # okay

def okay [nums: list<     >] {}  # weird but also okay

def okay [nums: list<
>] {}                            # also weird but okay
```

types are allowed (See [notes](#notes) below)
```nu
def okay [nums: list<int>] {}    # `test [a b c]` will throw an error 

def okay [nums: list< int > {}   # any amount of space within the angle brackets is okay

def err [nums: list <int>] {}    # this is not okay, `nums` and `<int>` will be parsed as
                                 # two separate params, 
```

nested annotations are allowed in many variations
```nu
def okay [items: list<list<int>>] {}

def okay [items: list<list>] {}
```

any unterminated annotation is caught
```nu
Error: nu::parser::unexpected_eof

  × Unexpected end of code.
   ╭─[source:1:1]
 1 │ def err [nums: list<int] {}
   ·                       ▲
   ·                       ╰── expected closing >
   ╰────
```

unknown types are flagged
```nu
Error: nu::parser::unknown_type

  × Unknown type.
   ╭─[source:1:1]
 1 │ def err [nums: list<str>] {}
   ·                     ─┬─
   ·                      ╰── unknown type
   ╰────

Error: nu::parser::unknown_type

  × Unknown type.
   ╭─[source:1:1]
 1 │ def err [nums: list<int, string>] {}
   ·                    ─────┬─────
   ·                          ╰── unknown type
   ╰────
```

# notes
the error message for mismatched types in not as intuitive
```nu
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[source:1:1]
 1 │ def err [nums: list<int>] {}; err [a b c]
   ·                                    ┬
   ·                                    ╰── expected int
   ╰────
```
it should be something like this
```nu
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[source:1:1]
 1 │ def err [nums: list<int>] {}; err [a b c]
   ·                                    ──┬──
   ·                                      ╰── expected list<int>
   ╰────
```
this is currently not implemented
This commit is contained in:
mike
2023-03-24 14:54:06 +03:00
committed by GitHub
parent d0aa69bfcb
commit 8cf9bc9993
7 changed files with 311 additions and 18 deletions

View File

@ -31,6 +31,7 @@ pub enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
AngleBracket,
}
impl BlockKind {
@ -39,6 +40,7 @@ impl BlockKind {
BlockKind::Paren => b')',
BlockKind::SquareBracket => b']',
BlockKind::CurlyBracket => b'}',
BlockKind::AngleBracket => b'>',
}
}
}
@ -77,6 +79,7 @@ pub fn lex_item(
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
in_signature: bool,
) -> (Token, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
@ -156,6 +159,12 @@ pub fn lex_item(
} else if c == b'[' {
// We encountered an opening `[` delimiter.
block_level.push(BlockKind::SquareBracket);
} else if c == b'<' && in_signature {
block_level.push(BlockKind::AngleBracket);
} else if c == b'>' && in_signature {
if let Some(BlockKind::AngleBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == b']' {
// We encountered a closing `]` delimiter. Pop off the opening `[`
// delimiter.
@ -299,12 +308,48 @@ pub fn lex_item(
}
}
pub fn lex_signature(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
) -> (Vec<Token>, Option<ParseError>) {
lex_internal(
input,
span_offset,
additional_whitespace,
special_tokens,
skip_comment,
true,
)
}
pub fn lex(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
) -> (Vec<Token>, Option<ParseError>) {
lex_internal(
input,
span_offset,
additional_whitespace,
special_tokens,
skip_comment,
false,
)
}
fn lex_internal(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
// within signatures we want to treat `<` and `>` specially
in_signature: bool,
) -> (Vec<Token>, Option<ParseError>) {
let mut error = None;
@ -427,6 +472,7 @@ pub fn lex(
span_offset,
additional_whitespace,
special_tokens,
in_signature,
);
if error.is_none() {
error = err;

View File

@ -16,7 +16,7 @@ pub use flatten::{
flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
};
pub use known_external::KnownExternal;
pub use lex::{lex, Token, TokenContents};
pub use lex::{lex, lex_signature, Token, TokenContents};
pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
pub use parse_keywords::*;

View File

@ -1,6 +1,6 @@
use crate::{
eval::{eval_constant, value_as_string},
lex,
lex::{lex, lex_signature},
lite_parser::{lite_parse, LiteCommand, LiteElement},
parse_mut,
parse_patterns::{parse_match_pattern, parse_pattern},
@ -3039,6 +3039,8 @@ pub fn parse_shape_name(
bytes: &[u8],
span: Span,
) -> (SyntaxShape, Option<ParseError>) {
let mut error = None;
let result = match bytes {
b"any" => SyntaxShape::Any,
b"binary" => SyntaxShape::Binary,
@ -3060,7 +3062,11 @@ pub fn parse_shape_name(
b"int" => SyntaxShape::Int,
b"import-pattern" => SyntaxShape::ImportPattern,
b"keyword" => SyntaxShape::Keyword(vec![], Box::new(SyntaxShape::Any)),
b"list" => SyntaxShape::List(Box::new(SyntaxShape::Any)),
_ if bytes.starts_with(b"list") => {
let (sig, err) = parse_list_shape(working_set, bytes, span);
error = error.or(err);
sig
}
b"math" => SyntaxShape::MathExpression,
b"nothing" => SyntaxShape::Nothing,
b"number" => SyntaxShape::Number,
@ -3104,7 +3110,51 @@ pub fn parse_shape_name(
}
};
(result, None)
(result, error)
}
fn parse_list_shape(
working_set: &StateWorkingSet,
bytes: &[u8],
span: Span,
) -> (SyntaxShape, Option<ParseError>) {
assert!(bytes.starts_with(b"list"));
if bytes == b"list" {
(SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
} else if bytes.starts_with(b"list<") {
let start = span.start + 5;
// if the annotation is unterminated, we want to return early to avoid
// overflows with spans
let end = if bytes.ends_with(b">") {
span.end - 1
} else {
let err = ParseError::Unclosed(">".into(), span);
return (SyntaxShape::List(Box::new(SyntaxShape::Any)), Some(err));
};
let inner_span = Span::new(start, end);
let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
// remove any extra whitespace, for example `list< string >` becomes `list<string>`
let inner_bytes = inner_text.trim().as_bytes();
// list<>
if inner_bytes.is_empty() {
(SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
} else {
let (inner_sig, err) = parse_shape_name(working_set, inner_bytes, inner_span);
(SyntaxShape::List(Box::new(inner_sig)), err)
}
} else {
(
SyntaxShape::List(Box::new(SyntaxShape::Any)),
Some(ParseError::UnknownType(span)),
)
}
}
pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
@ -3518,13 +3568,14 @@ pub fn parse_signature_helper(
let mut error = None;
let source = working_set.get_span_contents(span);
let (output, err) = lex(
let (output, err) = lex_signature(
source,
span.start,
&[b'\n', b'\r'],
&[b':', b'=', b','],
false,
);
error = error.or(err);
let mut args: Vec<Arg> = vec![];