Custom command input/output types (#9690)

# Description This adds input/output types to custom commands. These are input/output pairs that related an input type to an output type. For example (a single int-to-int input/output pair): ``` def foo []: int -> int { ... } ``` You can also have multiple input/output pairs: ``` def bar []: [int -> string, string -> list<string>] { ... } ``` These types are checked during definition time in the parser. If the block does not match the type, the user will get a parser error. This `:` to begin the input/output signatures should immediately follow the argument signature as shown above. The PR also improves type parsing by re-using the shape parser. The shape parser is now the canonical way to parse types/shapes in user code. This PR also splits `extern` into `extern`/`extern-wrapped` because of the parser limitation that a multi-span argument (which Signature now is) can't precede an optional argument. `extern-wrapped` now takes the required block that was previously optional. # User-Facing Changes The change to `extern` to split into `extern` and `extern-wrapped` is a breaking change. # Tests + Formatting  # After Submitting
2023-07-15 09:51:28 +12:00 · 2023-07-15 09:51:28 +12:00 · 53ae03bd63
commit 53ae03bd63
parent ba766de5d1
9 changed files with 282 additions and 73 deletions
--- a/crates/nu-cmd-lang/src/core_commands/extern_.rs
+++ b/crates/nu-cmd-lang/src/core_commands/extern_.rs
@ -19,7 +19,6 @@ impl Command for Extern {
            .input_output_types(vec![(Type::Nothing, Type::Nothing)])
            .required("def_name", SyntaxShape::String, "definition name")
            .required("params", SyntaxShape::Signature, "parameters")
            .optional("body", SyntaxShape::Block, "wrapper function block")
            .category(Category::Core)
    }
--- a/crates/nu-cmd-lang/src/core_commands/extern_wrapped.rs
+++ b/crates/nu-cmd-lang/src/core_commands/extern_wrapped.rs
@ -0,0 +1,52 @@
 use nu_protocol::ast::Call;
 use nu_protocol::engine::{Command, EngineState, Stack};
 use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type};
 #[derive(Clone)]
 pub struct ExternWrapped;
 impl Command for ExternWrapped {
    fn name(&self) -> &str {
        "extern-wrapped"
    }
    fn usage(&self) -> &str {
        "Define a signature for an external command."
    }
    fn signature(&self) -> nu_protocol::Signature {
        Signature::build("extern-wrapped")
            .input_output_types(vec![(Type::Nothing, Type::Nothing)])
            .required("def_name", SyntaxShape::String, "definition name")
            .required("params", SyntaxShape::Signature, "parameters")
            .required("body", SyntaxShape::Block, "wrapper function block")
            .category(Category::Core)
    }
    fn extra_usage(&self) -> &str {
        r#"This command is a parser keyword. For details, check:
  https://www.nushell.sh/book/thinking_in_nu.html"#
    }
    fn is_parser_keyword(&self) -> bool {
        true
    }
    fn run(
        &self,
        _engine_state: &EngineState,
        _stack: &mut Stack,
        _call: &Call,
        _input: PipelineData,
    ) -> Result<PipelineData, ShellError> {
        Ok(PipelineData::empty())
    }
    fn examples(&self) -> Vec<Example> {
        vec![Example {
            description: "Write a signature for an external command",
            example: r#"extern-wrapped echo [text: string] { print $text }"#,
            result: None,
        }]
    }
 }
--- a/crates/nu-cmd-lang/src/core_commands/mod.rs
+++ b/crates/nu-cmd-lang/src/core_commands/mod.rs
@ -17,6 +17,7 @@ mod export_extern;
 mod export_module;
 mod export_use;
 mod extern_;
 mod extern_wrapped;
 mod for_;
 mod hide;
 mod hide_env;
@ -55,6 +56,7 @@ pub use export_extern::ExportExtern;
 pub use export_module::ExportModule;
 pub use export_use::ExportUse;
 pub use extern_::Extern;
 pub use extern_wrapped::ExternWrapped;
 pub use for_::For;
 pub use hide::Hide;
 pub use hide_env::HideEnv;
--- a/crates/nu-cmd-lang/src/default_context.rs
+++ b/crates/nu-cmd-lang/src/default_context.rs
@ -35,6 +35,7 @@ pub fn create_default_context() -> EngineState {
            ExportUse,
            ExportModule,
            Extern,
            ExternWrapped,
            For,
            Hide,
            HideEnv,
--- a/crates/nu-command/tests/commands/def.rs
+++ b/crates/nu-command/tests/commands/def.rs
@ -158,8 +158,9 @@ fn def_with_paren_params() {
 #[test]
 fn extern_with_block() {
-    let actual =
+    let actual = nu!(
-        nu!("extern foo [...rest] { print ($rest | str join ',' ) }; foo --bar baz -- -q -u -x");
+        "extern-wrapped foo [...rest] { print ($rest | str join ',' ) }; foo --bar baz -- -q -u -x"
    );
    assert_eq!(actual.out, "--bar,baz,--,-q,-u,-x");
 }
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -1,4 +1,8 @@
-use crate::{parse_block, parser_path::ParserPath, type_check::type_compatible};
+use crate::{
    parse_block,
    parser_path::ParserPath,
    type_check::{check_block_input_output, type_compatible},
 };
 use itertools::Itertools;
 use log::trace;
 use nu_path::canonicalize_with;
@ -43,6 +47,7 @@ pub const UNALIASABLE_PARSER_KEYWORDS: &[&[u8]] = &[
    b"export def",
    b"for",
    b"extern",
    b"extern-wrapped",
    b"export extern",
    b"alias",
    b"export alias",
@ -181,7 +186,7 @@ pub fn parse_def_predecl(working_set: &mut StateWorkingSet, spans: &[Span]) {
                working_set.error(ParseError::DuplicateCommandDef(spans[1]));
            }
        }
-    } else if decl_name == b"extern" && spans.len() >= 3 {
+    } else if (decl_name == b"extern" || decl_name == b"extern-wrapped") && spans.len() >= 3 {
        let name_expr = parse_string(working_set, spans[1]);
        let name = name_expr.as_string();
@ -485,19 +490,20 @@ pub fn parse_def(
            block.signature = signature;
            block.redirect_env = def_call == b"def-env";
-            // Sadly we can't use this here as the inference would have to happen before
+            if block.signature.input_output_types.is_empty() {
-            // all the definitions had been fully parsed.
+                block
                    .signature
                    .input_output_types
                    .push((Type::Any, Type::Any));
            }
-            // infer the return type based on the output of the block
+            let block = working_set.get_block(block_id);
            // let block = working_set.get_block(block_id);
-            // let input_type = block.input_type(working_set);
+            let typecheck_errors = check_block_input_output(working_set, block);
-            // let output_type = block.output_type();
+
-            // block.signature.input_output_types = vec![(input_type, output_type)];
+            working_set
-            block
+                .parse_errors
-                .signature
+                .extend_from_slice(&typecheck_errors);
                .input_output_types
                .push((Type::Any, Type::Any));
        } else {
            working_set.error(ParseError::InternalError(
                "Predeclaration failed to add declaration".into(),
@ -529,15 +535,17 @@ pub fn parse_extern(
    // Checking that the function is used with the correct name
    // Maybe this is not necessary but it is a sanity check
-    let (name_span, split_id) =
+    let (name_span, split_id) = if spans.len() > 1
-        if spans.len() > 1 && working_set.get_span_contents(spans[0]) == b"export" {
+        && (working_set.get_span_contents(spans[0]) == b"export"
-            (spans[1], 2)
+            || working_set.get_span_contents(spans[0]) == b"export-wrapped")
-        } else {
+    {
-            (spans[0], 1)
+        (spans[1], 2)
-        };
+    } else {
        (spans[0], 1)
    };
    let extern_call = working_set.get_span_contents(name_span).to_vec();
-    if extern_call != b"extern" {
+    if extern_call != b"extern" && extern_call != b"extern-wrapped" {
        working_set.error(ParseError::UnknownState(
            "internal error: Wrong call name for extern function".into(),
            span(spans),
@ -932,7 +940,7 @@ pub fn parse_export_in_block(
    let full_name = if lite_command.parts.len() > 1 {
        let sub = working_set.get_span_contents(lite_command.parts[1]);
        match sub {
-            b"alias" | b"def" | b"def-env" | b"extern" | b"use" | b"module" => {
+            b"alias" | b"def" | b"def-env" | b"extern" | b"extern-wrapped" | b"use" | b"module" => {
                [b"export ", sub].concat()
            }
            _ => b"export".to_vec(),
@ -1175,7 +1183,7 @@ pub fn parse_export_in_module(
                result
            }
-            b"extern" => {
+            b"extern" | b"extern-wrapped" => {
                let lite_command = LiteCommand {
                    comments: lite_command.comments.clone(),
                    parts: spans[1..].to_vec(),
@ -1581,9 +1589,11 @@ pub fn parse_module_block(
                                None, // using commands named as the module locally is OK
                            ))
                        }
-                        b"extern" => block
+                        b"extern" | b"extern-wrapped" => {
-                            .pipelines
+                            block
-                            .push(parse_extern(working_set, command, None)),
+                                .pipelines
                                .push(parse_extern(working_set, command, None))
                        }
                        b"alias" => {
                            block.pipelines.push(parse_alias(
                                working_set,
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -682,6 +682,14 @@ pub fn parse_multispan_value(
            arg
        }
        SyntaxShape::Signature => {
            trace!("parsing: signature");
            let sig = parse_full_signature(working_set, &spans[*spans_idx..]);
            *spans_idx = spans.len() - 1;
            sig
        }
        SyntaxShape::Keyword(keyword, arg) => {
            trace!(
                "parsing: keyword({}) {:?}",
@ -2932,36 +2940,7 @@ fn prepare_inner_span(
 }
 pub fn parse_type(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) -> Type {
-    match bytes {
+    parse_shape_name(working_set, bytes, span).to_type()
        b"binary" => Type::Binary,
        b"block" => {
            working_set.error(ParseError::LabeledErrorWithHelp {
                error: "Blocks are not support as first-class values".into(),
                label: "blocks are not supported as values".into(),
                help: "Use 'closure' instead of 'block'".into(),
                span,
            });
            Type::Any
        }
        b"bool" => Type::Bool,
        b"cellpath" => Type::CellPath,
        b"closure" => Type::Closure,
        b"date" => Type::Date,
        b"duration" => Type::Duration,
        b"error" => Type::Error,
        b"filesize" => Type::Filesize,
        b"float" | b"decimal" => Type::Float,
        b"int" => Type::Int,
        b"list" => Type::List(Box::new(Type::Any)),
        b"number" => Type::Number,
        b"range" => Type::Range,
        b"record" => Type::Record(vec![]),
        b"string" => Type::String,
        b"table" => Type::Table(vec![]), //FIXME
        _ => Type::Any,
    }
 }
 pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
@ -3199,6 +3178,95 @@ pub fn expand_to_cell_path(
    }
 }
 pub fn parse_input_output_types(
    working_set: &mut StateWorkingSet,
    spans: &[Span],
 ) -> Vec<(Type, Type)> {
    let mut full_span = span(spans);
    let mut bytes = working_set.get_span_contents(full_span);
    if bytes.starts_with(b"[") {
        bytes = &bytes[1..];
        full_span.start += 1;
    }
    if bytes.ends_with(b"]") {
        bytes = &bytes[..(bytes.len() - 1)];
        full_span.end -= 1;
    }
    let (tokens, parse_error) = lex(bytes, full_span.start, &[b','], &[], true);
    if let Some(parse_error) = parse_error {
        working_set.parse_errors.push(parse_error);
    }
    let mut output = vec![];
    let mut idx = 0;
    while idx < tokens.len() {
        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
        let input_type = parse_type(working_set, &type_bytes, tokens[idx].span);
        idx += 1;
        if idx >= tokens.len() {
            working_set.error(ParseError::Expected(
                "arrow (->)",
                Span::new(tokens[idx - 1].span.end, tokens[idx - 1].span.end),
            ));
            break;
        }
        let arrow = working_set.get_span_contents(tokens[idx].span);
        if arrow != b"->" {
            working_set.error(ParseError::Expected("arrow (->)", tokens[idx].span));
        }
        idx += 1;
        if idx >= tokens.len() {
            working_set.error(ParseError::MissingType(Span::new(
                tokens[idx - 1].span.end,
                tokens[idx - 1].span.end,
            )));
            break;
        }
        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
        let output_type = parse_type(working_set, &type_bytes, tokens[idx].span);
        output.push((input_type, output_type));
        idx += 1;
    }
    output
 }
 pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
    let arg_signature = working_set.get_span_contents(spans[0]);
    if arg_signature.ends_with(b":") {
        let mut arg_signature =
            parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1));
        let input_output_types = parse_input_output_types(working_set, &spans[1..]);
        if let Expression {
            expr: Expr::Signature(sig),
            span: expr_span,
            ..
        } = &mut arg_signature
        {
            sig.input_output_types = input_output_types;
            expr_span.end = span(&spans[1..]).end;
        }
        arg_signature
    } else {
        parse_signature(working_set, spans[0])
    }
 }
 pub fn parse_row_condition(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
    let var_id = working_set.add_variable(b"$it".to_vec(), span(spans), Type::Any, false);
    let expression = parse_math_expression(working_set, spans, Some(var_id));
@ -5026,8 +5094,8 @@ pub fn parse_expression(
        // For now, check for special parses of certain keywords
        match bytes.as_slice() {
-            b"def" | b"extern" | b"for" | b"module" | b"use" | b"source" | b"alias" | b"export"
+            b"def" | b"extern" | b"extern-wrapped" | b"for" | b"module" | b"use" | b"source"
-            | b"hide" => {
+            | b"alias" | b"export" | b"hide" => {
                working_set.error(ParseError::BuiltinCommandInPipeline(
                    String::from_utf8(bytes)
                        .expect("builtin commands bytes should be able to convert to string"),
@ -5194,7 +5262,7 @@ pub fn parse_builtin_commands(
    match name {
        b"def" | b"def-env" => parse_def(working_set, lite_command, None),
-        b"extern" => parse_extern(working_set, lite_command, None),
+        b"extern" | b"extern-wrapped" => parse_extern(working_set, lite_command, None),
        b"let" => parse_let(working_set, &lite_command.parts),
        b"const" => parse_const(working_set, &lite_command.parts),
        b"mut" => parse_mut(working_set, &lite_command.parts),
@ -5582,7 +5650,10 @@ pub fn parse_block(
    block.span = Some(span);
-    type_check::check_block_input_output(working_set, &block);
+    let errors = type_check::check_block_input_output(working_set, &block);
    if !errors.is_empty() {
        working_set.parse_errors.extend_from_slice(&errors);
    }
    block
 }
--- a/crates/nu-parser/src/type_check.rs
+++ b/crates/nu-parser/src/type_check.rs
@ -949,12 +949,14 @@ pub fn math_result_type(
 }
 pub fn check_pipeline_type(
-    working_set: &mut StateWorkingSet,
+    working_set: &StateWorkingSet,
    pipeline: &Pipeline,
    input_type: Type,
-) -> Type {
+) -> (Type, Option<Vec<ParseError>>) {
    let mut current_type = input_type;
    let mut output_errors: Option<Vec<ParseError>> = None;
    'elem: for elem in &pipeline.elements {
        match elem {
            PipelineElement::Expression(
@ -997,7 +999,12 @@ pub fn check_pipeline_type(
                }
                if !decl.signature().input_output_types.is_empty() {
-                    working_set.error(ParseError::InputMismatch(current_type, call.head))
+                    if let Some(output_errors) = &mut output_errors {
                        output_errors.push(ParseError::InputMismatch(current_type, call.head))
                    } else {
                        output_errors =
                            Some(vec![ParseError::InputMismatch(current_type, call.head)]);
                    }
                }
                current_type = Type::Any;
            }
@ -1010,27 +1017,38 @@ pub fn check_pipeline_type(
        }
    }
-    current_type
+    (current_type, output_errors)
 }
-pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block) {
+pub fn check_block_input_output(working_set: &StateWorkingSet, block: &Block) -> Vec<ParseError> {
    // let inputs = block.input_types();
    let mut output_errors = vec![];
    for (input_type, output_type) in &block.signature.input_output_types {
        let mut current_type = input_type.clone();
        let mut current_output_type = Type::Nothing;
        for pipeline in &block.pipelines {
-            current_output_type = check_pipeline_type(working_set, pipeline, current_type);
+            let (checked_output_type, err) =
                check_pipeline_type(working_set, pipeline, current_type);
            current_output_type = checked_output_type;
            current_type = Type::Nothing;
            if let Some(err) = err {
                output_errors.extend_from_slice(&err);
            }
        }
        if !type_compatible(output_type, &current_output_type)
            && output_type != &Type::Any
            && current_output_type != Type::Any
        {
-            working_set.error(ParseError::OutputMismatch(
+            let span = if block.pipelines.is_empty() {
-                output_type.clone(),
+                if let Some(span) = block.span {
                    span
                } else {
                    continue;
                }
            } else {
                block
                    .pipelines
                    .last()
@ -1038,8 +1056,10 @@ pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block
                    .elements
                    .last()
                    .expect("internal error: we should have elements")
-                    .span(),
+                    .span()
-            ))
+            };
            output_errors.push(ParseError::OutputMismatch(output_type.clone(), span))
        }
    }
@ -1047,8 +1067,14 @@ pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block
        let mut current_type = Type::Any;
        for pipeline in &block.pipelines {
-            let _ = check_pipeline_type(working_set, pipeline, current_type);
+            let (_, err) = check_pipeline_type(working_set, pipeline, current_type);
            current_type = Type::Nothing;
            if let Some(err) = err {
                output_errors.extend_from_slice(&err);
            }
        }
    }
    output_errors
 }
--- a/src/tests/test_parser.rs
+++ b/src/tests/test_parser.rs
@ -576,3 +576,50 @@ fn filesize_is_not_hex() -> TestResult {
 fn let_variable_type_mismatch() -> TestResult {
    fail_test(r#"let x: int = "foo""#, "expected int, found string")
 }
 #[test]
 fn def_with_input_output_1() -> TestResult {
    run_test(r#"def foo []: nothing -> int { 3 }; foo"#, "3")
 }
 #[test]
 fn def_with_input_output_2() -> TestResult {
    run_test(
        r#"def foo []: [int -> int, string -> int] { 3 }; 10 | foo"#,
        "3",
    )
 }
 #[test]
 fn def_with_input_output_3() -> TestResult {
    run_test(
        r#"def foo []: [int -> int, string -> int] { 3 }; "bob" | foo"#,
        "3",
    )
 }
 #[test]
 fn def_with_input_output_mismatch_1() -> TestResult {
    fail_test(
        r#"def foo []: [int -> int, string -> int] { 3 }; foo"#,
        "command doesn't support",
    )
 }
 #[test]
 fn def_with_input_output_mismatch_2() -> TestResult {
    fail_test(
        r#"def foo []: [int -> int, string -> int] { 3 }; {x: 2} | foo"#,
        "command doesn't support",
    )
 }
 #[test]
 fn def_with_input_output_broken_1() -> TestResult {
    fail_test(r#"def foo []: int { 3 }"#, "expected arrow")
 }
 #[test]
 fn def_with_input_output_broken_2() -> TestResult {
    fail_test(r#"def foo []: int -> { 3 }"#, "expected type")
 }