Custom command input/output types (#9690)

# Description This adds input/output types to custom commands. These are input/output pairs that related an input type to an output type. For example (a single int-to-int input/output pair): ``` def foo []: int -> int { ... } ``` You can also have multiple input/output pairs: ``` def bar []: [int -> string, string -> list<string>] { ... } ``` These types are checked during definition time in the parser. If the block does not match the type, the user will get a parser error. This `:` to begin the input/output signatures should immediately follow the argument signature as shown above. The PR also improves type parsing by re-using the shape parser. The shape parser is now the canonical way to parse types/shapes in user code. This PR also splits `extern` into `extern`/`extern-wrapped` because of the parser limitation that a multi-span argument (which Signature now is) can't precede an optional argument. `extern-wrapped` now takes the required block that was previously optional. # User-Facing Changes The change to `extern` to split into `extern` and `extern-wrapped` is a breaking change. # Tests + Formatting  # After Submitting
2025-08-09 03:54:58 +02:00 · 2023-07-15 09:51:28 +12:00
parent ba766de5d1
commit 53ae03bd63
9 changed files with 282 additions and 73 deletions
--- a/crates/nu-cmd-lang/src/core_commands/extern_.rs
+++ b/crates/nu-cmd-lang/src/core_commands/extern_.rs
@ -19,7 +19,6 @@ impl Command for Extern {
            .input_output_types(vec![(Type::Nothing, Type::Nothing)])
            .required("def_name", SyntaxShape::String, "definition name")
            .required("params", SyntaxShape::Signature, "parameters")
-            .optional("body", SyntaxShape::Block, "wrapper function block")
            .category(Category::Core)
    }

--- a/crates/nu-cmd-lang/src/core_commands/extern_wrapped.rs
+++ b/crates/nu-cmd-lang/src/core_commands/extern_wrapped.rs
@ -0,0 +1,52 @@
+use nu_protocol::ast::Call;
+use nu_protocol::engine::{Command, EngineState, Stack};
+use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type};
+
+#[derive(Clone)]
+pub struct ExternWrapped;
+
+impl Command for ExternWrapped {
+    fn name(&self) -> &str {
+        "extern-wrapped"
+    }
+
+    fn usage(&self) -> &str {
+        "Define a signature for an external command."
+    }
+
+    fn signature(&self) -> nu_protocol::Signature {
+        Signature::build("extern-wrapped")
+            .input_output_types(vec![(Type::Nothing, Type::Nothing)])
+            .required("def_name", SyntaxShape::String, "definition name")
+            .required("params", SyntaxShape::Signature, "parameters")
+            .required("body", SyntaxShape::Block, "wrapper function block")
+            .category(Category::Core)
+    }
+
+    fn extra_usage(&self) -> &str {
+        r#"This command is a parser keyword. For details, check:
+  https://www.nushell.sh/book/thinking_in_nu.html"#
+    }
+
+    fn is_parser_keyword(&self) -> bool {
+        true
+    }
+
+    fn run(
+        &self,
+        _engine_state: &EngineState,
+        _stack: &mut Stack,
+        _call: &Call,
+        _input: PipelineData,
+    ) -> Result<PipelineData, ShellError> {
+        Ok(PipelineData::empty())
+    }
+
+    fn examples(&self) -> Vec<Example> {
+        vec![Example {
+            description: "Write a signature for an external command",
+            example: r#"extern-wrapped echo [text: string] { print $text }"#,
+            result: None,
+        }]
+    }
+}
--- a/crates/nu-cmd-lang/src/core_commands/mod.rs
+++ b/crates/nu-cmd-lang/src/core_commands/mod.rs
@ -17,6 +17,7 @@ mod export_extern;
 mod export_module;
 mod export_use;
 mod extern_;
+mod extern_wrapped;
 mod for_;
 mod hide;
 mod hide_env;
@ -55,6 +56,7 @@ pub use export_extern::ExportExtern;
 pub use export_module::ExportModule;
 pub use export_use::ExportUse;
 pub use extern_::Extern;
+pub use extern_wrapped::ExternWrapped;
 pub use for_::For;
 pub use hide::Hide;
 pub use hide_env::HideEnv;
--- a/crates/nu-cmd-lang/src/default_context.rs
+++ b/crates/nu-cmd-lang/src/default_context.rs
@ -35,6 +35,7 @@ pub fn create_default_context() -> EngineState {
            ExportUse,
            ExportModule,
            Extern,
+            ExternWrapped,
            For,
            Hide,
            HideEnv,
--- a/crates/nu-command/tests/commands/def.rs
+++ b/crates/nu-command/tests/commands/def.rs
@ -158,8 +158,9 @@ fn def_with_paren_params() {

 #[test]
 fn extern_with_block() {
-    let actual =
-        nu!("extern foo [...rest] { print ($rest | str join ',' ) }; foo --bar baz -- -q -u -x");
+    let actual = nu!(
+        "extern-wrapped foo [...rest] { print ($rest | str join ',' ) }; foo --bar baz -- -q -u -x"
+    );

    assert_eq!(actual.out, "--bar,baz,--,-q,-u,-x");
 }
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -1,4 +1,8 @@
-use crate::{parse_block, parser_path::ParserPath, type_check::type_compatible};
+use crate::{
+    parse_block,
+    parser_path::ParserPath,
+    type_check::{check_block_input_output, type_compatible},
+};
 use itertools::Itertools;
 use log::trace;
 use nu_path::canonicalize_with;
@ -43,6 +47,7 @@ pub const UNALIASABLE_PARSER_KEYWORDS: &[&[u8]] = &[
    b"export def",
    b"for",
    b"extern",
+    b"extern-wrapped",
    b"export extern",
    b"alias",
    b"export alias",
@ -181,7 +186,7 @@ pub fn parse_def_predecl(working_set: &mut StateWorkingSet, spans: &[Span]) {
                working_set.error(ParseError::DuplicateCommandDef(spans[1]));
            }
        }
-    } else if decl_name == b"extern" && spans.len() >= 3 {
+    } else if (decl_name == b"extern" || decl_name == b"extern-wrapped") && spans.len() >= 3 {
        let name_expr = parse_string(working_set, spans[1]);
        let name = name_expr.as_string();

@ -485,19 +490,20 @@ pub fn parse_def(
            block.signature = signature;
            block.redirect_env = def_call == b"def-env";

-            // Sadly we can't use this here as the inference would have to happen before
-            // all the definitions had been fully parsed.
-
-            // infer the return type based on the output of the block
-            // let block = working_set.get_block(block_id);
-
-            // let input_type = block.input_type(working_set);
-            // let output_type = block.output_type();
-            // block.signature.input_output_types = vec![(input_type, output_type)];
+            if block.signature.input_output_types.is_empty() {
                block
                    .signature
                    .input_output_types
                    .push((Type::Any, Type::Any));
+            }
+
+            let block = working_set.get_block(block_id);
+
+            let typecheck_errors = check_block_input_output(working_set, block);
+
+            working_set
+                .parse_errors
+                .extend_from_slice(&typecheck_errors);
        } else {
            working_set.error(ParseError::InternalError(
                "Predeclaration failed to add declaration".into(),
@ -529,15 +535,17 @@ pub fn parse_extern(
    // Checking that the function is used with the correct name
    // Maybe this is not necessary but it is a sanity check

-    let (name_span, split_id) =
-        if spans.len() > 1 && working_set.get_span_contents(spans[0]) == b"export" {
+    let (name_span, split_id) = if spans.len() > 1
+        && (working_set.get_span_contents(spans[0]) == b"export"
+            || working_set.get_span_contents(spans[0]) == b"export-wrapped")
+    {
        (spans[1], 2)
    } else {
        (spans[0], 1)
    };

    let extern_call = working_set.get_span_contents(name_span).to_vec();
-    if extern_call != b"extern" {
+    if extern_call != b"extern" && extern_call != b"extern-wrapped" {
        working_set.error(ParseError::UnknownState(
            "internal error: Wrong call name for extern function".into(),
            span(spans),
@ -932,7 +940,7 @@ pub fn parse_export_in_block(
    let full_name = if lite_command.parts.len() > 1 {
        let sub = working_set.get_span_contents(lite_command.parts[1]);
        match sub {
-            b"alias" | b"def" | b"def-env" | b"extern" | b"use" | b"module" => {
+            b"alias" | b"def" | b"def-env" | b"extern" | b"extern-wrapped" | b"use" | b"module" => {
                [b"export ", sub].concat()
            }
            _ => b"export".to_vec(),
@ -1175,7 +1183,7 @@ pub fn parse_export_in_module(

                result
            }
-            b"extern" => {
+            b"extern" | b"extern-wrapped" => {
                let lite_command = LiteCommand {
                    comments: lite_command.comments.clone(),
                    parts: spans[1..].to_vec(),
@ -1581,9 +1589,11 @@ pub fn parse_module_block(
                                None, // using commands named as the module locally is OK
                            ))
                        }
-                        b"extern" => block
+                        b"extern" | b"extern-wrapped" => {
+                            block
                                .pipelines
-                            .push(parse_extern(working_set, command, None)),
+                                .push(parse_extern(working_set, command, None))
+                        }
                        b"alias" => {
                            block.pipelines.push(parse_alias(
                                working_set,
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -682,6 +682,14 @@ pub fn parse_multispan_value(

            arg
        }
+        SyntaxShape::Signature => {
+            trace!("parsing: signature");
+
+            let sig = parse_full_signature(working_set, &spans[*spans_idx..]);
+            *spans_idx = spans.len() - 1;
+
+            sig
+        }
        SyntaxShape::Keyword(keyword, arg) => {
            trace!(
                "parsing: keyword({}) {:?}",
@ -2932,36 +2940,7 @@ fn prepare_inner_span(
 }

 pub fn parse_type(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) -> Type {
-    match bytes {
-        b"binary" => Type::Binary,
-        b"block" => {
-            working_set.error(ParseError::LabeledErrorWithHelp {
-                error: "Blocks are not support as first-class values".into(),
-                label: "blocks are not supported as values".into(),
-                help: "Use 'closure' instead of 'block'".into(),
-                span,
-            });
-
-            Type::Any
-        }
-        b"bool" => Type::Bool,
-        b"cellpath" => Type::CellPath,
-        b"closure" => Type::Closure,
-        b"date" => Type::Date,
-        b"duration" => Type::Duration,
-        b"error" => Type::Error,
-        b"filesize" => Type::Filesize,
-        b"float" | b"decimal" => Type::Float,
-        b"int" => Type::Int,
-        b"list" => Type::List(Box::new(Type::Any)),
-        b"number" => Type::Number,
-        b"range" => Type::Range,
-        b"record" => Type::Record(vec![]),
-        b"string" => Type::String,
-        b"table" => Type::Table(vec![]), //FIXME
-
-        _ => Type::Any,
-    }
+    parse_shape_name(working_set, bytes, span).to_type()
 }

 pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
@ -3199,6 +3178,95 @@ pub fn expand_to_cell_path(
    }
 }

+pub fn parse_input_output_types(
+    working_set: &mut StateWorkingSet,
+    spans: &[Span],
+) -> Vec<(Type, Type)> {
+    let mut full_span = span(spans);
+
+    let mut bytes = working_set.get_span_contents(full_span);
+
+    if bytes.starts_with(b"[") {
+        bytes = &bytes[1..];
+        full_span.start += 1;
+    }
+
+    if bytes.ends_with(b"]") {
+        bytes = &bytes[..(bytes.len() - 1)];
+        full_span.end -= 1;
+    }
+
+    let (tokens, parse_error) = lex(bytes, full_span.start, &[b','], &[], true);
+
+    if let Some(parse_error) = parse_error {
+        working_set.parse_errors.push(parse_error);
+    }
+
+    let mut output = vec![];
+
+    let mut idx = 0;
+    while idx < tokens.len() {
+        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
+        let input_type = parse_type(working_set, &type_bytes, tokens[idx].span);
+
+        idx += 1;
+        if idx >= tokens.len() {
+            working_set.error(ParseError::Expected(
+                "arrow (->)",
+                Span::new(tokens[idx - 1].span.end, tokens[idx - 1].span.end),
+            ));
+            break;
+        }
+
+        let arrow = working_set.get_span_contents(tokens[idx].span);
+        if arrow != b"->" {
+            working_set.error(ParseError::Expected("arrow (->)", tokens[idx].span));
+        }
+
+        idx += 1;
+        if idx >= tokens.len() {
+            working_set.error(ParseError::MissingType(Span::new(
+                tokens[idx - 1].span.end,
+                tokens[idx - 1].span.end,
+            )));
+            break;
+        }
+
+        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
+        let output_type = parse_type(working_set, &type_bytes, tokens[idx].span);
+
+        output.push((input_type, output_type));
+
+        idx += 1;
+    }
+
+    output
+}
+
+pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
+    let arg_signature = working_set.get_span_contents(spans[0]);
+
+    if arg_signature.ends_with(b":") {
+        let mut arg_signature =
+            parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1));
+
+        let input_output_types = parse_input_output_types(working_set, &spans[1..]);
+
+        if let Expression {
+            expr: Expr::Signature(sig),
+            span: expr_span,
+            ..
+        } = &mut arg_signature
+        {
+            sig.input_output_types = input_output_types;
+            expr_span.end = span(&spans[1..]).end;
+        }
+        arg_signature
+    } else {
+        parse_signature(working_set, spans[0])
+    }
+}
+
 pub fn parse_row_condition(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
    let var_id = working_set.add_variable(b"$it".to_vec(), span(spans), Type::Any, false);
    let expression = parse_math_expression(working_set, spans, Some(var_id));
@ -5026,8 +5094,8 @@ pub fn parse_expression(

        // For now, check for special parses of certain keywords
        match bytes.as_slice() {
-            b"def" | b"extern" | b"for" | b"module" | b"use" | b"source" | b"alias" | b"export"
-            | b"hide" => {
+            b"def" | b"extern" | b"extern-wrapped" | b"for" | b"module" | b"use" | b"source"
+            | b"alias" | b"export" | b"hide" => {
                working_set.error(ParseError::BuiltinCommandInPipeline(
                    String::from_utf8(bytes)
                        .expect("builtin commands bytes should be able to convert to string"),
@ -5194,7 +5262,7 @@ pub fn parse_builtin_commands(

    match name {
        b"def" | b"def-env" => parse_def(working_set, lite_command, None),
-        b"extern" => parse_extern(working_set, lite_command, None),
+        b"extern" | b"extern-wrapped" => parse_extern(working_set, lite_command, None),
        b"let" => parse_let(working_set, &lite_command.parts),
        b"const" => parse_const(working_set, &lite_command.parts),
        b"mut" => parse_mut(working_set, &lite_command.parts),
@ -5582,7 +5650,10 @@ pub fn parse_block(

    block.span = Some(span);

-    type_check::check_block_input_output(working_set, &block);
+    let errors = type_check::check_block_input_output(working_set, &block);
+    if !errors.is_empty() {
+        working_set.parse_errors.extend_from_slice(&errors);
+    }

    block
 }
--- a/crates/nu-parser/src/type_check.rs
+++ b/crates/nu-parser/src/type_check.rs
@ -949,12 +949,14 @@ pub fn math_result_type(
 }

 pub fn check_pipeline_type(
-    working_set: &mut StateWorkingSet,
+    working_set: &StateWorkingSet,
    pipeline: &Pipeline,
    input_type: Type,
-) -> Type {
+) -> (Type, Option<Vec<ParseError>>) {
    let mut current_type = input_type;

+    let mut output_errors: Option<Vec<ParseError>> = None;
+
    'elem: for elem in &pipeline.elements {
        match elem {
            PipelineElement::Expression(
@ -997,7 +999,12 @@ pub fn check_pipeline_type(
                }

                if !decl.signature().input_output_types.is_empty() {
-                    working_set.error(ParseError::InputMismatch(current_type, call.head))
+                    if let Some(output_errors) = &mut output_errors {
+                        output_errors.push(ParseError::InputMismatch(current_type, call.head))
+                    } else {
+                        output_errors =
+                            Some(vec![ParseError::InputMismatch(current_type, call.head)]);
+                    }
                }
                current_type = Type::Any;
            }
@ -1010,27 +1017,38 @@ pub fn check_pipeline_type(
        }
    }

-    current_type
+    (current_type, output_errors)
 }

-pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block) {
+pub fn check_block_input_output(working_set: &StateWorkingSet, block: &Block) -> Vec<ParseError> {
    // let inputs = block.input_types();
+    let mut output_errors = vec![];

    for (input_type, output_type) in &block.signature.input_output_types {
        let mut current_type = input_type.clone();
        let mut current_output_type = Type::Nothing;

        for pipeline in &block.pipelines {
-            current_output_type = check_pipeline_type(working_set, pipeline, current_type);
+            let (checked_output_type, err) =
+                check_pipeline_type(working_set, pipeline, current_type);
+            current_output_type = checked_output_type;
            current_type = Type::Nothing;
+            if let Some(err) = err {
+                output_errors.extend_from_slice(&err);
+            }
        }

        if !type_compatible(output_type, &current_output_type)
            && output_type != &Type::Any
            && current_output_type != Type::Any
        {
-            working_set.error(ParseError::OutputMismatch(
-                output_type.clone(),
+            let span = if block.pipelines.is_empty() {
+                if let Some(span) = block.span {
+                    span
+                } else {
+                    continue;
+                }
+            } else {
                block
                    .pipelines
                    .last()
@ -1038,8 +1056,10 @@ pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block
                    .elements
                    .last()
                    .expect("internal error: we should have elements")
-                    .span(),
-            ))
+                    .span()
+            };
+
+            output_errors.push(ParseError::OutputMismatch(output_type.clone(), span))
        }
    }

@ -1047,8 +1067,14 @@ pub fn check_block_input_output(working_set: &mut StateWorkingSet, block: &Block
        let mut current_type = Type::Any;

        for pipeline in &block.pipelines {
-            let _ = check_pipeline_type(working_set, pipeline, current_type);
+            let (_, err) = check_pipeline_type(working_set, pipeline, current_type);
            current_type = Type::Nothing;
+
+            if let Some(err) = err {
+                output_errors.extend_from_slice(&err);
            }
        }
+    }
+
+    output_errors
 }
--- a/src/tests/test_parser.rs
+++ b/src/tests/test_parser.rs
@ -576,3 +576,50 @@ fn filesize_is_not_hex() -> TestResult {
 fn let_variable_type_mismatch() -> TestResult {
    fail_test(r#"let x: int = "foo""#, "expected int, found string")
 }
+
+#[test]
+fn def_with_input_output_1() -> TestResult {
+    run_test(r#"def foo []: nothing -> int { 3 }; foo"#, "3")
+}
+
+#[test]
+fn def_with_input_output_2() -> TestResult {
+    run_test(
+        r#"def foo []: [int -> int, string -> int] { 3 }; 10 | foo"#,
+        "3",
+    )
+}
+
+#[test]
+fn def_with_input_output_3() -> TestResult {
+    run_test(
+        r#"def foo []: [int -> int, string -> int] { 3 }; "bob" | foo"#,
+        "3",
+    )
+}
+
+#[test]
+fn def_with_input_output_mismatch_1() -> TestResult {
+    fail_test(
+        r#"def foo []: [int -> int, string -> int] { 3 }; foo"#,
+        "command doesn't support",
+    )
+}
+
+#[test]
+fn def_with_input_output_mismatch_2() -> TestResult {
+    fail_test(
+        r#"def foo []: [int -> int, string -> int] { 3 }; {x: 2} | foo"#,
+        "command doesn't support",
+    )
+}
+
+#[test]
+fn def_with_input_output_broken_1() -> TestResult {
+    fail_test(r#"def foo []: int { 3 }"#, "expected arrow")
+}
+
+#[test]
+fn def_with_input_output_broken_2() -> TestResult {
+    fail_test(r#"def foo []: int -> { 3 }"#, "expected type")
+}