allow lists to have type annotations (#8529)

this pr refines #8270 and closes #8109 # description examples: the original syntax is okay ```nu def okay [nums: list] {} # the type of list will be list<any> ``` empty annotations are allowed in any variation the last two may be caught by a future formatter, but do not affect `nu` code currently ```nu def okay [nums: list<>] {} # okay def okay [nums: list< >] {} # weird but also okay def okay [nums: list< >] {} # also weird but okay ``` types are allowed (See [notes](#notes) below) ```nu def okay [nums: list<int>] {} # `test [a b c]` will throw an error def okay [nums: list< int > {} # any amount of space within the angle brackets is okay def err [nums: list <int>] {} # this is not okay, `nums` and `<int>` will be parsed as # two separate params, ``` nested annotations are allowed in many variations ```nu def okay [items: list<list<int>>] {} def okay [items: list<list>] {} ``` any unterminated annotation is caught ```nu Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:1] 1 │ def err [nums: list<int] {} · ▲ · ╰── expected closing > ╰──── ``` unknown types are flagged ```nu Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list<str>] {} · ─┬─ · ╰── unknown type ╰──── Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list<int, string>] {} · ─────┬───── · ╰── unknown type ╰──── ``` # notes the error message for mismatched types in not as intuitive ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list<int>] {}; err [a b c] · ┬ · ╰── expected int ╰──── ``` it should be something like this ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list<int>] {}; err [a b c] · ──┬── · ╰── expected list<int> ╰──── ``` this is currently not implemented
2025-08-09 11:45:50 +02:00 · 2023-03-24 14:54:06 +03:00
parent d0aa69bfcb
commit 8cf9bc9993
7 changed files with 311 additions and 18 deletions
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -31,6 +31,7 @@ pub enum BlockKind {
    Paren,
    CurlyBracket,
    SquareBracket,
+    AngleBracket,
 }

 impl BlockKind {
@ -39,6 +40,7 @@ impl BlockKind {
            BlockKind::Paren => b')',
            BlockKind::SquareBracket => b']',
            BlockKind::CurlyBracket => b'}',
+            BlockKind::AngleBracket => b'>',
        }
    }
 }
@ -77,6 +79,7 @@ pub fn lex_item(
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
+    in_signature: bool,
 ) -> (Token, Option<ParseError>) {
    // This variable tracks the starting character of a string literal, so that
    // we remain inside the string literal lexer mode until we encounter the
@ -156,6 +159,12 @@ pub fn lex_item(
        } else if c == b'[' {
            // We encountered an opening `[` delimiter.
            block_level.push(BlockKind::SquareBracket);
+        } else if c == b'<' && in_signature {
+            block_level.push(BlockKind::AngleBracket);
+        } else if c == b'>' && in_signature {
+            if let Some(BlockKind::AngleBracket) = block_level.last() {
+                let _ = block_level.pop();
+            }
        } else if c == b']' {
            // We encountered a closing `]` delimiter. Pop off the opening `[`
            // delimiter.
@ -299,12 +308,48 @@ pub fn lex_item(
    }
 }

+pub fn lex_signature(
+    input: &[u8],
+    span_offset: usize,
+    additional_whitespace: &[u8],
+    special_tokens: &[u8],
+    skip_comment: bool,
+) -> (Vec<Token>, Option<ParseError>) {
+    lex_internal(
+        input,
+        span_offset,
+        additional_whitespace,
+        special_tokens,
+        skip_comment,
+        true,
+    )
+}
+
 pub fn lex(
    input: &[u8],
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
    skip_comment: bool,
+) -> (Vec<Token>, Option<ParseError>) {
+    lex_internal(
+        input,
+        span_offset,
+        additional_whitespace,
+        special_tokens,
+        skip_comment,
+        false,
+    )
+}
+
+fn lex_internal(
+    input: &[u8],
+    span_offset: usize,
+    additional_whitespace: &[u8],
+    special_tokens: &[u8],
+    skip_comment: bool,
+    // within signatures we want to treat `<` and `>` specially
+    in_signature: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
    let mut error = None;

@ -427,6 +472,7 @@ pub fn lex(
                span_offset,
                additional_whitespace,
                special_tokens,
+                in_signature,
            );
            if error.is_none() {
                error = err;
--- a/crates/nu-parser/src/lib.rs
+++ b/crates/nu-parser/src/lib.rs
@ -16,7 +16,7 @@ pub use flatten::{
    flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
 };
 pub use known_external::KnownExternal;
-pub use lex::{lex, Token, TokenContents};
+pub use lex::{lex, lex_signature, Token, TokenContents};
 pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
 pub use parse_keywords::*;

--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -1,6 +1,6 @@
 use crate::{
    eval::{eval_constant, value_as_string},
-    lex,
+    lex::{lex, lex_signature},
    lite_parser::{lite_parse, LiteCommand, LiteElement},
    parse_mut,
    parse_patterns::{parse_match_pattern, parse_pattern},
@ -3039,6 +3039,8 @@ pub fn parse_shape_name(
    bytes: &[u8],
    span: Span,
 ) -> (SyntaxShape, Option<ParseError>) {
+    let mut error = None;
+
    let result = match bytes {
        b"any" => SyntaxShape::Any,
        b"binary" => SyntaxShape::Binary,
@ -3060,7 +3062,11 @@ pub fn parse_shape_name(
        b"int" => SyntaxShape::Int,
        b"import-pattern" => SyntaxShape::ImportPattern,
        b"keyword" => SyntaxShape::Keyword(vec![], Box::new(SyntaxShape::Any)),
-        b"list" => SyntaxShape::List(Box::new(SyntaxShape::Any)),
+        _ if bytes.starts_with(b"list") => {
+            let (sig, err) = parse_list_shape(working_set, bytes, span);
+            error = error.or(err);
+            sig
+        }
        b"math" => SyntaxShape::MathExpression,
        b"nothing" => SyntaxShape::Nothing,
        b"number" => SyntaxShape::Number,
@ -3104,7 +3110,51 @@ pub fn parse_shape_name(
        }
    };

-    (result, None)
+    (result, error)
+}
+
+fn parse_list_shape(
+    working_set: &StateWorkingSet,
+    bytes: &[u8],
+    span: Span,
+) -> (SyntaxShape, Option<ParseError>) {
+    assert!(bytes.starts_with(b"list"));
+
+    if bytes == b"list" {
+        (SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
+    } else if bytes.starts_with(b"list<") {
+        let start = span.start + 5;
+
+        // if the annotation is unterminated, we want to return early to avoid
+        // overflows with spans
+        let end = if bytes.ends_with(b">") {
+            span.end - 1
+        } else {
+            let err = ParseError::Unclosed(">".into(), span);
+            return (SyntaxShape::List(Box::new(SyntaxShape::Any)), Some(err));
+        };
+
+        let inner_span = Span::new(start, end);
+
+        let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
+
+        // remove any extra whitespace, for example `list< string >` becomes `list<string>`
+        let inner_bytes = inner_text.trim().as_bytes();
+
+        // list<>
+        if inner_bytes.is_empty() {
+            (SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
+        } else {
+            let (inner_sig, err) = parse_shape_name(working_set, inner_bytes, inner_span);
+
+            (SyntaxShape::List(Box::new(inner_sig)), err)
+        }
+    } else {
+        (
+            SyntaxShape::List(Box::new(SyntaxShape::Any)),
+            Some(ParseError::UnknownType(span)),
+        )
+    }
 }

 pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
@ -3518,13 +3568,14 @@ pub fn parse_signature_helper(
    let mut error = None;
    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(
+    let (output, err) = lex_signature(
        source,
        span.start,
        &[b'\n', b'\r'],
        &[b':', b'=', b','],
        false,
    );
+
    error = error.or(err);

    let mut args: Vec<Arg> = vec![];