From 77ca73f414ece8360e6da1812b4c3eff786cb0ad Mon Sep 17 00:00:00 2001 From: mike <98623181+1Kinoti@users.noreply.github.com> Date: Wed, 26 Apr 2023 16:16:55 +0300 Subject: [PATCH] allow records to have type annotations (#8914) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description follow up to #8529 cleaned up version of #8892 - the original syntax is okay ```nu def okay [rec: record] {} ``` - you can now add type annotations for fields if you know them before hand ```nu def okay [rec: record] {} ``` - you can specify multiple fields ```nu def okay [person: record] {} # an optional comma is allowed def okay [person: record] {} ``` - if annotations are specified, any use of the command will be type checked against the specified type ```nu def unwrap [result: record] {} unwrap {ok: 2, value: "value"} # errors with Error: nu::parser::type_mismatch × Type mismatch. ╭─[entry #4:1:1] 1 │ unwrap {ok: 2, value: "value"} · ───────┬───── · ╰── expected record, found record ╰──── ``` > here the error is in the `ok` field, since `any` is coerced into any type > as a result `unwrap {ok: true, value: "value"}` is okay - the key must be a string, either quoted or unquoted ```nu def err [rec: record<{}: list>] {} # errors with Error: × `record` type annotations key not string ╭─[entry #7:1:1] 1 │ def unwrap [result: record<{}: bool, value: any>] {} · ─┬ · ╰── must be a string ╰──── ``` - a key doesn't have to have a type in which case it is assumed to be `any` ```nu def okay [person: record] {} def okay [person: record] {} ``` - however, if you put a colon, you have to specify a type ```nu def err [person: record] {} # errors with Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[entry #12:1:1] 1 │ def unwrap [res: record] { $res } · ┬ · ╰── expected type after colon ╰──── ``` # User-Facing Changes **[BREAKING CHANGES]** - this change adds a field to `SyntaxShape::Record` so any plugins that used it will have to update and include the field. though if you are unsure of the type the record expects, `SyntaxShape::Record(vec![])` will suffice --- .../src/core_commands/error_make.rs | 6 +- crates/nu-command/src/env/load_env.rs | 2 +- crates/nu-parser/src/parser.rs | 207 +++++++++++++----- crates/nu-parser/src/type_check.rs | 30 +-- crates/nu-protocol/src/syntax_shape.rs | 26 ++- crates/nu-protocol/src/ty.rs | 37 ++-- src/tests/test_engine.rs | 2 +- src/tests/test_signatures.rs | 130 +++++++++++ 8 files changed, 347 insertions(+), 93 deletions(-) diff --git a/crates/nu-cmd-lang/src/core_commands/error_make.rs b/crates/nu-cmd-lang/src/core_commands/error_make.rs index 2c4f8c6bf..0ad64a42f 100644 --- a/crates/nu-cmd-lang/src/core_commands/error_make.rs +++ b/crates/nu-cmd-lang/src/core_commands/error_make.rs @@ -16,7 +16,11 @@ impl Command for ErrorMake { fn signature(&self) -> Signature { Signature::build("error make") .input_output_types(vec![(Type::Nothing, Type::Error)]) - .required("error_struct", SyntaxShape::Record, "the error to create") + .required( + "error_struct", + SyntaxShape::Record(vec![]), + "the error to create", + ) .switch( "unspanned", "remove the origin label from the error", diff --git a/crates/nu-command/src/env/load_env.rs b/crates/nu-command/src/env/load_env.rs index 618feeaa7..0a84fa100 100644 --- a/crates/nu-command/src/env/load_env.rs +++ b/crates/nu-command/src/env/load_env.rs @@ -23,7 +23,7 @@ impl Command for LoadEnv { .allow_variants_without_examples(true) .optional( "update", - SyntaxShape::Record, + SyntaxShape::Record(vec![]), "the record to use for updates", ) .category(Category::FileSystem) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 86afa0794..3fb0dcf5f 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -2775,7 +2775,7 @@ pub fn parse_shape_name( b"operator" => SyntaxShape::Operator, b"path" => SyntaxShape::Filepath, b"range" => SyntaxShape::Range, - b"record" => SyntaxShape::Record, + _ if bytes.starts_with(b"record") => parse_collection_shape(working_set, bytes, span), b"signature" => SyntaxShape::Signature, b"string" => SyntaxShape::String, b"table" => SyntaxShape::Table, @@ -2814,38 +2814,129 @@ pub fn parse_shape_name( result } +fn parse_collection_shape( + working_set: &mut StateWorkingSet, + bytes: &[u8], + span: Span, +) -> SyntaxShape { + assert!(bytes.starts_with(b"record")); + let name = "record"; + let mk_shape = SyntaxShape::Record; + + if bytes == name.as_bytes() { + mk_shape(vec![]) + } else if bytes.starts_with(b"record<") { + let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 7) else { + return SyntaxShape::Any; + }; + + // record<> or table<> + if inner_span.end - inner_span.start == 0 { + return mk_shape(vec![]); + } + let source = working_set.get_span_contents(inner_span); + let (tokens, err) = lex_signature( + source, + inner_span.start, + &[b'\n', b'\r'], + &[b':', b','], + true, + ); + + if let Some(err) = err { + working_set.error(err); + // lexer errors cause issues with span overflows + return mk_shape(vec![]); + } + + let mut sig = vec![]; + let mut idx = 0; + + let key_error = |span| { + ParseError::LabeledError( + format!("`{name}` type annotations key not string"), + "must be a string".into(), + span, + ) + }; + + while idx < tokens.len() { + let TokenContents::Item = tokens[idx].contents else { + working_set.error(key_error(tokens[idx].span)); + return mk_shape(vec![]) + }; + + let key_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); + if key_bytes.first().copied() == Some(b',') { + idx += 1; + continue; + } + + let Some(key) = parse_value(working_set, tokens[idx].span, &SyntaxShape::String).as_string() else { + working_set.error(key_error(tokens[idx].span)); + return mk_shape(vec![]); + }; + + // we want to allow such an annotation + // `record` where the user leaves out the type + if idx + 1 == tokens.len() { + sig.push((key, SyntaxShape::Any)); + break; + } else { + idx += 1; + } + + let maybe_colon = working_set.get_span_contents(tokens[idx].span).to_vec(); + match maybe_colon.as_slice() { + b":" => { + if idx + 1 == tokens.len() { + working_set.error(ParseError::Expected( + "type after colon".into(), + tokens[idx].span, + )); + break; + } else { + idx += 1; + } + } + // a key provided without a type + b"," => { + idx += 1; + sig.push((key, SyntaxShape::Any)); + continue; + } + // a key provided without a type + _ => { + sig.push((key, SyntaxShape::Any)); + continue; + } + } + + let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec(); + let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span); + sig.push((key, shape)); + idx += 1; + } + + mk_shape(sig) + } else { + working_set.error(ParseError::UnknownType(span)); + + SyntaxShape::Any + } +} + fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) -> SyntaxShape { assert!(bytes.starts_with(b"list")); if bytes == b"list" { SyntaxShape::List(Box::new(SyntaxShape::Any)) } else if bytes.starts_with(b"list<") { - let start = span.start + 5; - - // if the annotation is unterminated, we want to return early to avoid - // overflows with spans - let end = if bytes.ends_with(b">") { - span.end - 1 - // extra characters after the > - } else if bytes.contains(&b'>') { - let angle_start = bytes.split(|it| it == &b'>').collect::>()[0].len() + 1; - let span = Span::new(span.start + angle_start, span.end); - - working_set.error(ParseError::LabeledError( - "Extra characters in the parameter name".into(), - "extra characters".into(), - span, - )); + let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 5) else { return SyntaxShape::Any; - } else { - working_set.error(ParseError::Unclosed(">".into(), span)); - return SyntaxShape::List(Box::new(SyntaxShape::Any)); }; - let inner_span = Span::new(start, end); - let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span)); - // remove any extra whitespace, for example `list< string >` becomes `list` let inner_bytes = inner_text.trim().as_bytes().to_vec(); @@ -2864,6 +2955,34 @@ fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) } } +fn prepare_inner_span( + working_set: &mut StateWorkingSet, + bytes: &[u8], + span: Span, + prefix_len: usize, +) -> Option { + let start = span.start + prefix_len; + + if bytes.ends_with(b">") { + let end = span.end - 1; + Some(Span::new(start, end)) + } else if bytes.contains(&b'>') { + let angle_start = bytes.split(|it| it == &b'>').collect::>()[0].len() + 1; + let span = Span::new(span.start + angle_start, span.end); + + working_set.error(ParseError::LabeledError( + "Extra characters in the parameter name".into(), + "extra characters".into(), + span, + )); + + None + } else { + working_set.error(ParseError::Unclosed(">".into(), span)); + None + } +} + pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type { match bytes { b"binary" => Type::Binary, @@ -3602,43 +3721,13 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) -> expression.ty.clone(), ); } - Type::List(param_ty) => { - if let Type::List(expr_ty) = &expression.ty { - if param_ty == expr_ty - || **param_ty == Type::Any - { - working_set.set_variable_type( - var_id, - expression.ty.clone(), - ); - } else { - working_set.error(ParseError::AssignmentMismatch( - "Default value wrong type" - .into(), - format!( - "expected default value to be `{var_type}`", - ), - expression.span, - ), - ) - } - } else { - working_set.error(ParseError::AssignmentMismatch( - "Default value wrong type".into(), - format!( - "expected default value to be `{var_type}`", - ), - expression.span, - )) - } - } - t => { - if t != &expression.ty { + _ => { + if !type_compatible(var_type, &expression.ty) { working_set.error( ParseError::AssignmentMismatch( "Default value wrong type".into(), format!( - "expected default value to be `{t}`" + "expected default value to be `{var_type}`" ), expression.span, ), @@ -3686,7 +3775,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) -> "Default value is the wrong type" .into(), format!( - "default value should be {t}" + "expected default value to be `{t}`" ), expression_span, ), @@ -4485,7 +4574,7 @@ pub fn parse_value( // Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex // stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant. - SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record => { + SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record(_) => { working_set.error(ParseError::Expected( "block, closure or record".into(), span, @@ -4505,7 +4594,7 @@ pub fn parse_value( SyntaxShape::Duration, SyntaxShape::Range, SyntaxShape::DateTime, //FIXME requires 3 failed conversion attempts before failing - SyntaxShape::Record, + SyntaxShape::Record(vec![]), SyntaxShape::Closure(None), SyntaxShape::Block, SyntaxShape::Int, diff --git a/crates/nu-parser/src/type_check.rs b/crates/nu-parser/src/type_check.rs index 32c04fad2..3ad88e70e 100644 --- a/crates/nu-parser/src/type_check.rs +++ b/crates/nu-parser/src/type_check.rs @@ -5,6 +5,21 @@ use nu_protocol::{ }; pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool { + // Structural subtyping + let is_compatible = |expected: &[(String, Type)], found: &[(String, Type)]| { + // the expected type is `any` + if expected.is_empty() { + true + } else if expected.len() != found.len() { + false + } else { + expected + .iter() + .zip(found.iter()) + .all(|(lhs, rhs)| lhs.0 == rhs.0 && type_compatible(&lhs.1, &rhs.1)) + } + }; + match (lhs, rhs) { (Type::List(c), Type::List(d)) => type_compatible(c, d), (Type::Number, Type::Int) => true, @@ -13,20 +28,7 @@ pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool { (Type::Any, _) => true, (_, Type::Any) => true, (Type::Record(fields_lhs), Type::Record(fields_rhs)) => { - // Structural subtyping - 'outer: for field_lhs in fields_lhs { - for field_rhs in fields_rhs { - if field_lhs.0 == field_rhs.0 { - if type_compatible(&field_lhs.1, &field_rhs.1) { - continue 'outer; - } else { - return false; - } - } - } - return false; - } - true + is_compatible(fields_lhs, fields_rhs) } (lhs, rhs) => lhs == rhs, } diff --git a/crates/nu-protocol/src/syntax_shape.rs b/crates/nu-protocol/src/syntax_shape.rs index ab1fff61d..195cd2bfa 100644 --- a/crates/nu-protocol/src/syntax_shape.rs +++ b/crates/nu-protocol/src/syntax_shape.rs @@ -95,7 +95,7 @@ pub enum SyntaxShape { Range, /// A record value, eg `{x: 1, y: 2}` - Record, + Record(Vec<(String, SyntaxShape)>), /// A math expression which expands shorthand forms on the lefthand side, eg `foo > 1` /// The shorthand allows us to more easily reach columns inside of the row being passed in @@ -151,7 +151,13 @@ impl SyntaxShape { SyntaxShape::OneOf(_) => Type::Any, SyntaxShape::Operator => Type::Any, SyntaxShape::Range => Type::Any, - SyntaxShape::Record => Type::Record(vec![]), // FIXME: What role should fields play in the Record type? + SyntaxShape::Record(entries) => { + let ty = entries + .iter() + .map(|(key, val)| (key.clone(), val.to_type())) + .collect(); + Type::Record(ty) + } SyntaxShape::RowCondition => Type::Bool, SyntaxShape::Boolean => Type::Bool, SyntaxShape::Signature => Type::Signature, @@ -194,7 +200,21 @@ impl Display for SyntaxShape { SyntaxShape::Binary => write!(f, "binary"), SyntaxShape::Table => write!(f, "table"), SyntaxShape::List(x) => write!(f, "list<{x}>"), - SyntaxShape::Record => write!(f, "record"), + SyntaxShape::Record(entries) => { + if entries.is_empty() { + write!(f, "record") + } else { + write!( + f, + "record<{}>", + entries + .iter() + .map(|(x, y)| format!("{x}: {y}")) + .collect::>() + .join(", "), + ) + } + } SyntaxShape::Filesize => write!(f, "filesize"), SyntaxShape::Duration => write!(f, "duration"), SyntaxShape::DateTime => write!(f, "datetime"), diff --git a/crates/nu-protocol/src/ty.rs b/crates/nu-protocol/src/ty.rs index 754afd88d..b9653fbfe 100644 --- a/crates/nu-protocol/src/ty.rs +++ b/crates/nu-protocol/src/ty.rs @@ -35,25 +35,28 @@ pub enum Type { impl Type { pub fn is_subtype(&self, other: &Type) -> bool { + // Structural subtyping + let is_subtype_collection = |this: &[(String, Type)], that: &[(String, Type)]| { + if this.is_empty() || that.is_empty() { + true + } else if this.len() != that.len() { + false + } else { + this.iter() + .zip(that.iter()) + .all(|(lhs, rhs)| lhs.0 == rhs.0 && lhs.1.is_subtype(&rhs.1)) + } + }; + match (self, other) { (t, u) if t == u => true, (Type::Float, Type::Number) => true, (Type::Int, Type::Number) => true, (_, Type::Any) => true, (Type::List(t), Type::List(u)) if t.is_subtype(u) => true, // List is covariant - - // TODO: Currently Record types specify their field types. If we are - // going to continue to do that, then it might make sense to define - // a "structural subtyping" whereby r1 is a subtype of r2 is the - // fields of r1 are a "subset" of the fields of r2 (names are a - // subset and agree on types). However, if we do that, then we need - // a way to specify the supertype of all Records. For now, we define - // any Record to be a subtype of any other Record. This allows - // Record(vec![]) to be used as an ad-hoc supertype of all Records - // in command signatures. This comment applies to Tables also, with - // "columns" in place of "fields". - (Type::Record(_), Type::Record(_)) => true, - (Type::Table(_), Type::Table(_)) => true, + (Type::Record(this), Type::Record(that)) | (Type::Table(this), Type::Table(that)) => { + is_subtype_collection(this, that) + } _ => false, } } @@ -87,7 +90,13 @@ impl Type { Type::List(x) => SyntaxShape::List(Box::new(x.to_shape())), Type::Number => SyntaxShape::Number, Type::Nothing => SyntaxShape::Nothing, - Type::Record(_) => SyntaxShape::Record, + Type::Record(entries) => { + let entries = entries + .iter() + .map(|(key, val)| (key.clone(), val.to_shape())) + .collect(); + SyntaxShape::Record(entries) + } Type::Table(_) => SyntaxShape::Table, Type::ListStream => SyntaxShape::List(Box::new(SyntaxShape::Any)), Type::Any => SyntaxShape::Any, diff --git a/src/tests/test_engine.rs b/src/tests/test_engine.rs index 0de39f491..df9b03aa9 100644 --- a/src/tests/test_engine.rs +++ b/src/tests/test_engine.rs @@ -318,7 +318,7 @@ fn default_value11() -> TestResult { fn default_value12() -> TestResult { fail_test( r#"def foo [--x:int = "a"] { $x }"#, - "default value should be int", + "expected default value to be `int`", ) } diff --git a/src/tests/test_signatures.rs b/src/tests/test_signatures.rs index 571d14086..d21e2afa5 100644 --- a/src/tests/test_signatures.rs +++ b/src/tests/test_signatures.rs @@ -132,3 +132,133 @@ fn list_annotations_with_extra_characters() -> TestResult { let expected = "Extra characters in the parameter name"; fail_test(input, expected) } + +#[test] +fn record_annotations_none() -> TestResult { + let input = "def run [rec: record] { $rec }; run {} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations() -> TestResult { + let input = "def run [rec: record] { $rec }; run {age: 3} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_two_types() -> TestResult { + let input = "def run [rec: record] { $rec }; run {name: nushell age: 3} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_two_types_comma_sep() -> TestResult { + let input = "def run [rec: record] { $rec }; run {name: nushell age: 3} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_key_with_no_type() -> TestResult { + let input = "def run [rec: record] { $rec }; run {name: nushell} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_two_types_one_with_no_type() -> TestResult { + let input = + "def run [rec: record] { $rec }; run {name: nushell age: 3} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_two_types_both_with_no_types() -> TestResult { + let input = "def run [rec: record] { $rec }; run {name: nushell age: 3} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_nested() -> TestResult { + let input = "def run [ + err: record< + msg: string, + label: record< + text: string + start: int, + end: int, + >> + ] { + $err + }; run { + msg: 'error message' + label: { + text: 'here is the error' + start: 0 + end: 69 + } + } | describe"; + let expected = "record>"; + run_test(input, expected) +} + +#[test] +fn record_annotations_type_inference_1() -> TestResult { + let input = "def run [rec: record] { $rec }; run {age: 2wk} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_type_inference_2() -> TestResult { + let input = "def run [rec: record] { $rec }; run {size: 2mb} | describe"; + let expected = "record"; + run_test(input, expected) +} + +#[test] +fn record_annotations_not_terminated() -> TestResult { + let input = "def run [rec: record TestResult { + let input = "def run [rec: record] { $rec }"; + let expected = "expected closing >"; + fail_test(input, expected) +} + +#[test] +fn record_annotations_no_type_after_colon() -> TestResult { + let input = "def run [rec: record] { $rec }"; + let expected = "type after colon"; + fail_test(input, expected) +} + +#[test] +fn record_annotations_type_mismatch_key() -> TestResult { + let input = "def run [rec: record] { $rec }; run {nme: nushell}"; + let expected = "expected record, found record"; + fail_test(input, expected) +} + +#[test] +fn record_annotations_type_mismatch_shape() -> TestResult { + let input = "def run [rec: record] { $rec }; run {age: 2wk}"; + let expected = "expected record, found record"; + fail_test(input, expected) +} + +#[test] +fn record_annotations_with_extra_characters() -> TestResult { + let input = "def run [list: recordextra] {$list | length}; run [1 2 3]"; + let expected = "Extra characters in the parameter name"; + fail_test(input, expected) +}