From c7224be3cd32634c733391a02dc4cf378fbe9e16 Mon Sep 17 00:00:00 2001 From: Bahex <17417311+Bahex@users.noreply.github.com> Date: Sun, 27 Apr 2025 08:18:21 +0300 Subject: [PATCH] refactor: parsing of type parameters: list, record, one_of etc --- crates/nu-parser/src/parse_shape_specs.rs | 440 +++++++++------------- tests/repl/test_signatures.rs | 2 +- 2 files changed, 184 insertions(+), 258 deletions(-) diff --git a/crates/nu-parser/src/parse_shape_specs.rs b/crates/nu-parser/src/parse_shape_specs.rs index 20a284502a..2d0de2f4f2 100644 --- a/crates/nu-parser/src/parse_shape_specs.rs +++ b/crates/nu-parser/src/parse_shape_specs.rs @@ -1,7 +1,9 @@ #![allow(clippy::byte_char_slices)] use crate::{lex::lex_signature, parser::parse_value, trim_quotes, TokenContents}; -use nu_protocol::{engine::StateWorkingSet, ParseError, Span, SyntaxShape, Type}; +use nu_protocol::{ + engine::StateWorkingSet, IntoSpanned, ParseError, Span, Spanned, SyntaxShape, Type, +}; #[derive(Debug, Clone, Copy, PartialEq)] pub enum ShapeDescriptorUse { @@ -58,19 +60,52 @@ pub fn parse_shape_name( b"filesize" => SyntaxShape::Filesize, b"glob" => SyntaxShape::GlobPattern, b"int" => SyntaxShape::Int, - _ if bytes.starts_with(b"list") => parse_list_shape(working_set, bytes, span, use_loc), b"nothing" => SyntaxShape::Nothing, - _ if bytes.starts_with(b"one_of") => parse_one_of_shape(working_set, bytes, span, use_loc), b"number" => SyntaxShape::Number, b"path" => SyntaxShape::Filepath, b"range" => SyntaxShape::Range, - _ if bytes.starts_with(b"record") => { - parse_collection_shape(working_set, bytes, span, use_loc) + _ if bytes.starts_with(b"one_of") + || bytes.starts_with(b"list") + || bytes.starts_with(b"record") + || bytes.starts_with(b"table") => + { + let (type_name, type_params) = split_generic_params(working_set, bytes, span); + match type_name { + b"one_of" => SyntaxShape::OneOf(match type_params { + Some(params) => parse_type_params(working_set, params, use_loc), + None => vec![], + }), + b"list" => SyntaxShape::List(Box::new(match type_params { + Some(params) => { + let mut parsed_params = parse_type_params(working_set, params, use_loc); + if parsed_params.len() > 1 { + working_set.error(ParseError::LabeledError( + "expected a single type parameter".into(), + "only one parameter allowed".into(), + params.span, + )); + SyntaxShape::Any + } else { + parsed_params.pop().unwrap_or(SyntaxShape::Any) + } + } + None => SyntaxShape::Any, + })), + b"record" => SyntaxShape::Record(match type_params { + Some(params) => parse_named_type_params(working_set, params, use_loc), + None => vec![], + }), + b"table" => SyntaxShape::Table(match type_params { + Some(params) => parse_named_type_params(working_set, params, use_loc), + None => vec![], + }), + _ => { + working_set.error(ParseError::UnknownType(span)); + SyntaxShape::Any + } + } } b"string" => SyntaxShape::String, - _ if bytes.starts_with(b"table") => { - parse_collection_shape(working_set, bytes, span, use_loc) - } _ => { if bytes.contains(&b'@') { let mut split = bytes.splitn(2, |b| b == &b'@'); @@ -121,259 +156,31 @@ pub fn parse_shape_name( result } -fn parse_collection_shape( +fn split_generic_params<'a>( working_set: &mut StateWorkingSet, - bytes: &[u8], + bytes: &'a [u8], span: Span, - use_loc: ShapeDescriptorUse, -) -> SyntaxShape { - assert!(bytes.starts_with(b"record") || bytes.starts_with(b"table")); - let is_table = bytes.starts_with(b"table"); - - let name = if is_table { "table" } else { "record" }; - let prefix = (if is_table { "table<" } else { "record<" }).as_bytes(); - let prefix_len = prefix.len(); - let mk_shape = |ty| -> SyntaxShape { - if is_table { - SyntaxShape::Table(ty) - } else { - SyntaxShape::Record(ty) - } +) -> (&'a [u8], Option>) { + let n = bytes.iter().position(|&c| c == b'<' || c == b'('); + let (open_delim_pos, close_delim) = match n.and_then(|n| Some((n, bytes.get(n)?))) { + Some((n, b'<')) => (n, b'>'), + Some((n, b'(')) => (n, b')'), + _ => return (bytes, None), }; - if bytes == name.as_bytes() { - mk_shape(vec![]) - } else if bytes.starts_with(prefix) { - let Some(inner_span) = prepare_inner_span(working_set, bytes, span, prefix_len, b'>') - else { - return SyntaxShape::Any; - }; + let type_name = &bytes[..(open_delim_pos)]; + let params = &bytes[(open_delim_pos + 1)..]; - // record<> or table<> - if inner_span.end - inner_span.start == 0 { - return mk_shape(vec![]); - } - let source = working_set.get_span_contents(inner_span); - let (tokens, err) = lex_signature( - source, - inner_span.start, - &[b'\n', b'\r'], - &[b':', b','], - true, - ); + let start = span.start + type_name.len() + 1; - if let Some(err) = err { - working_set.error(err); - // lexer errors cause issues with span overflows - return mk_shape(vec![]); - } - - let mut sig = vec![]; - let mut idx = 0; - - let key_error = |span| { - ParseError::LabeledError( - format!("`{name}` type annotations key not string"), - "must be a string".into(), - span, - ) - }; - - while idx < tokens.len() { - let TokenContents::Item = tokens[idx].contents else { - working_set.error(key_error(tokens[idx].span)); - return mk_shape(vec![]); - }; - - if working_set - .get_span_contents(tokens[idx].span) - .starts_with(b",") - { - idx += 1; - continue; - } - - let Some(key) = - parse_value(working_set, tokens[idx].span, &SyntaxShape::String).as_string() - else { - working_set.error(key_error(tokens[idx].span)); - return mk_shape(vec![]); - }; - - // we want to allow such an annotation - // `record` where the user leaves out the type - if idx + 1 == tokens.len() { - sig.push((key, SyntaxShape::Any)); - break; - } else { - idx += 1; - } - - let maybe_colon = working_set.get_span_contents(tokens[idx].span); - match maybe_colon { - b":" => { - if idx + 1 == tokens.len() { - working_set - .error(ParseError::Expected("type after colon", tokens[idx].span)); - break; - } else { - idx += 1; - } - } - // a key provided without a type - b"," => { - idx += 1; - sig.push((key, SyntaxShape::Any)); - continue; - } - // a key provided without a type - _ => { - sig.push((key, SyntaxShape::Any)); - continue; - } - } - - let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); - let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span, use_loc); - sig.push((key, shape)); - idx += 1; - } - - mk_shape(sig) - } else { - working_set.error(ParseError::UnknownType(span)); - - SyntaxShape::Any - } -} - -fn parse_one_of_shape( - working_set: &mut StateWorkingSet, - bytes: &[u8], - span: Span, - use_loc: ShapeDescriptorUse, -) -> SyntaxShape { - assert!(bytes.starts_with(b"one_of")); - - let name = "one_of"; - let prefix = b"one_of("; - let prefix_len = prefix.len(); - let mk_shape = |ty| -> SyntaxShape { SyntaxShape::OneOf(ty) }; - - if bytes == name.as_bytes() { - mk_shape(vec![]) - } else if bytes.starts_with(prefix) { - let Some(inner_span) = prepare_inner_span(working_set, bytes, span, prefix_len, b')') - else { - return SyntaxShape::Any; - }; - - // one_of() - if inner_span.end - inner_span.start == 0 { - return mk_shape(vec![]); - } - let source = working_set.get_span_contents(inner_span); - let (tokens, err) = lex_signature( - source, - inner_span.start, - &[b'\n', b'\r'], - &[b':', b','], - true, - ); - - if let Some(err) = err { - working_set.error(err); - // lexer errors cause issues with span overflows - return mk_shape(vec![]); - } - - let mut sig = vec![]; - let mut idx = 0; - - let key_error = |span| { - ParseError::LabeledError( - format!("`{name}` type annotations key not string"), - "must be a string".into(), - span, - ) - }; - - while idx < tokens.len() { - let TokenContents::Item = tokens[idx].contents else { - working_set.error(key_error(tokens[idx].span)); - return mk_shape(vec![]); - }; - - if working_set - .get_span_contents(tokens[idx].span) - .starts_with(b",") - { - idx += 1; - continue; - } - - let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); - let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span, use_loc); - sig.push(shape); - idx += 1; - } - - mk_shape(sig) - } else { - working_set.error(ParseError::UnknownType(span)); - SyntaxShape::Any - } -} - -fn parse_list_shape( - working_set: &mut StateWorkingSet, - bytes: &[u8], - span: Span, - use_loc: ShapeDescriptorUse, -) -> SyntaxShape { - assert!(bytes.starts_with(b"list")); - - if bytes == b"list" { - SyntaxShape::List(Box::new(SyntaxShape::Any)) - } else if bytes.starts_with(b"list<") { - let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 5, b'>') else { - return SyntaxShape::Any; - }; - - let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span)); - // remove any extra whitespace, for example `list< string >` becomes `list` - let inner_bytes = inner_text.trim().as_bytes().to_vec(); - - // list<> - if inner_bytes.is_empty() { - SyntaxShape::List(Box::new(SyntaxShape::Any)) - } else { - let inner_sig = parse_shape_name(working_set, &inner_bytes, inner_span, use_loc); - - SyntaxShape::List(Box::new(inner_sig)) - } - } else { - working_set.error(ParseError::UnknownType(span)); - - SyntaxShape::List(Box::new(SyntaxShape::Any)) - } -} - -fn prepare_inner_span( - working_set: &mut StateWorkingSet, - bytes: &[u8], - span: Span, - prefix_len: usize, - terminator: u8, -) -> Option { - let start = span.start + prefix_len; - - if bytes.ends_with(&[terminator]) { + if params.ends_with(&[close_delim]) { let end = span.end - 1; - Some(Span::new(start, end)) - } else if bytes.contains(&terminator) { - let angle_start = bytes.split(|it| it == &terminator).collect::>()[0].len() + 1; - let span = Span::new(span.start + angle_start, span.end); + ( + type_name, + Some((¶ms[..(params.len() - 1)]).into_spanned(Span::new(start, end))), + ) + } else if let Some(close_delim_pos) = params.iter().position(|it| it == &close_delim) { + let span = Span::new(span.start + close_delim_pos, span.end); working_set.error(ParseError::LabeledError( "Extra characters in the parameter name".into(), @@ -381,9 +188,128 @@ fn prepare_inner_span( span, )); - None + (bytes, None) } else { - working_set.error(ParseError::Unclosed((terminator as char).into(), span)); - None + working_set.error(ParseError::Unclosed((close_delim as char).into(), span)); + (bytes, None) } } + +fn parse_named_type_params( + working_set: &mut StateWorkingSet, + Spanned { item: source, span }: Spanned<&[u8]>, + use_loc: ShapeDescriptorUse, +) -> Vec<(String, SyntaxShape)> { + let (tokens, err) = lex_signature(source, span.start, &[b'\n', b'\r'], &[b':', b','], true); + + if let Some(err) = err { + working_set.error(err); + return Vec::new(); + } + + let mut sig = Vec::new(); + let mut idx = 0; + + let key_error = |span| { + ParseError::LabeledError( + // format!("`{name}` type annotations key not string"), + "annotation key not string".into(), + "must be a string".into(), + span, + ) + }; + + while idx < tokens.len() { + let TokenContents::Item = tokens[idx].contents else { + working_set.error(key_error(tokens[idx].span)); + return Vec::new(); + }; + + if working_set + .get_span_contents(tokens[idx].span) + .starts_with(b",") + { + idx += 1; + continue; + } + + let Some(key) = + parse_value(working_set, tokens[idx].span, &SyntaxShape::String).as_string() + else { + working_set.error(key_error(tokens[idx].span)); + return Vec::new(); + }; + + // we want to allow such an annotation + // `record` where the user leaves out the type + if idx + 1 == tokens.len() { + sig.push((key, SyntaxShape::Any)); + break; + } else { + idx += 1; + } + + let maybe_colon = working_set.get_span_contents(tokens[idx].span); + match maybe_colon { + b":" => { + if idx + 1 == tokens.len() { + working_set.error(ParseError::Expected("type after colon", tokens[idx].span)); + break; + } else { + idx += 1; + } + } + // a key provided without a type + b"," => { + idx += 1; + sig.push((key, SyntaxShape::Any)); + continue; + } + // a key provided without a type + _ => { + sig.push((key, SyntaxShape::Any)); + continue; + } + } + + let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); + let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span, use_loc); + sig.push((key, shape)); + idx += 1; + } + + sig +} + +fn parse_type_params( + working_set: &mut StateWorkingSet, + Spanned { item: source, span }: Spanned<&[u8]>, + use_loc: ShapeDescriptorUse, +) -> Vec { + let (tokens, err) = lex_signature(source, span.start, &[b'\n', b'\r'], &[b':', b','], true); + + if let Some(err) = err { + working_set.error(err); + return Vec::new(); + } + + let mut sig = vec![]; + let mut idx = 0; + + while idx < tokens.len() { + if working_set + .get_span_contents(tokens[idx].span) + .starts_with(b",") + { + idx += 1; + continue; + } + + let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); + let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span, use_loc); + sig.push(shape); + idx += 1; + } + + sig +} diff --git a/tests/repl/test_signatures.rs b/tests/repl/test_signatures.rs index 7ff5df4641..8d9bd071bf 100644 --- a/tests/repl/test_signatures.rs +++ b/tests/repl/test_signatures.rs @@ -109,7 +109,7 @@ fn list_annotations_space_before() -> TestResult { #[test] fn list_annotations_unknown_separators() -> TestResult { let input = "def run [list: list] {$list | length}; run [2 5 4]"; - let expected = "unknown type"; + let expected = "only one parameter allowed"; fail_test(input, expected) }