From 9daa5f9177cf76864f9918760eaf6d15fa61da06 Mon Sep 17 00:00:00 2001
From: Jess <34199683+ratherforky@users.noreply.github.com>
Date: Sat, 7 Dec 2024 15:55:15 +0000
Subject: [PATCH] Fix silent failure of parsing input output types (#14510)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- This PR should fix/close:
  - #11266
  - #12893
  - #13736
  - #13748
  - #14170
- It doesn't fix #13736 though unfortunately. The issue there is at a
different level to this fix (I think probably in the lexing somewhere,
which I haven't touched).

# The Problem

The linked issues have many examples of the problem and the related
confusion it causes, but I'll give some more examples here for
illustration. It boils down to the following:

This doesn't type check (good):
```nu
def foo []: string -> int { false }
```

This does (bad):
```nu
def foo [] : string -> int { false }
```

Because the parser is completely ignoring all the characters. This also
compiles in 0.100.0:
```nu
def blue [] Da ba Dee da Ba da { false }
```

And this also means commands which have a completely fine type, but an
extra space before `:`, lose that type information and end up as `any ->
any`, e.g.
```nu
def foo [] : int -> int {$in + 3}
```
```bash
$ foo --help
Input/output types:
  ╭───┬───────┬────────╮
  │ # │ input │ output │
  ├───┼───────┼────────┤
  │ 0 │ any   │ any    │
  ╰───┴───────┴────────╯
```

# The Fix

Special thank you to @texastoland whose draft PR (#12358) I referenced
heavily while making this fix.

That PR seeks to fix the invalid parsing by disallowing whitespace
between `[]` and `:` in declarations, e.g. `def foo [] : int -> any {}`

This PR instead allows the whitespace while properly parsing the type
signature. I think this is the better choice for a few reasons:
- The parsing is still straightforward and the information is all there
anyway,
- It's more consistent with type annotations in other places, e.g. `do
{|nums : list<int>| $nums | describe} [ 1 2 3 ]` from the [Type
Signatures doc
page](https://www.nushell.sh/lang-guide/chapters/types/type_signatures.html)
- It's more consistent with the new nu parser, which allows `let x :
bool = false` (current nu doesn't, but this PR doesn't change that)
- It will be less disruptive and should only break code where the types
are actually wrong (if your types were correct, but you had a space
before the `:`, those declarations will still compile and now have more
type information vs. throwing an error in all cases and requiring spaces
to be deleted)
- It's the more intuitive syntax for most functional programmers like
myself (haskell/lean/coq/agda and many more either allow or require
whitespace for type annotations)

I don't use Rust a lot, so I tried to keep most things the same and the
rest I wrote as if it was Haskell (if you squint a bit). Code
review/suggestions very welcome. I added all the tests I could think of
and `toolkit check pr` gives it the all-clear.

# User-Facing Changes

This PR meets part of the goal of #13849, but doesn't do anything about
parsing signatures twice and doesn't do much to improve error messages,
it just enforces the existing errors and error messages.

This will no doubt be a breaking change, mostly because the code is
already broken and users don't realise yet (one of my personal scripts
stopped compiling after this fix because I thought `def foo [] -> string
{}` was valid syntax). It shouldn't break any type-correct code though.
---
 crates/nu-parser/src/parser.rs        | 78 ++++++++++++++++++++-------
 crates/nu-parser/tests/test_parser.rs | 37 +++++++++++++
 crates/nu-std/testing.nu              |  6 +--
 3 files changed, 99 insertions(+), 22 deletions(-)
diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs
index eecd7863a9..07ab35dc36 100644
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@@ -3361,26 +3361,66 @@ pub fn parse_input_output_types(
 }
 
 pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
-    let arg_signature = working_set.get_span_contents(spans[0]);
-
-    if arg_signature.ends_with(b":") {
-        let mut arg_signature =
-            parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1));
-
-        let input_output_types = parse_input_output_types(working_set, &spans[1..]);
-
-        if let Expression {
-            expr: Expr::Signature(sig),
-            span: expr_span,
-            ..
-        } = &mut arg_signature
-        {
-            sig.input_output_types = input_output_types;
-            expr_span.end = Span::concat(&spans[1..]).end;
+    match spans.len() {
+        // This case should never happen. It corresponds to declarations like `def foo {}`,
+        // which should throw a 'Missing required positional argument.' before getting to this point
+        0 => {
+            working_set.error(ParseError::InternalError(
+                "failed to catch missing positional arguments".to_string(),
+                Span::concat(spans),
+            ));
+            garbage(working_set, Span::concat(spans))
+        }
+
+        // e.g. `[ b"[foo: string]" ]`
+        1 => parse_signature(working_set, spans[0]),
+
+        // This case is needed to distinguish between e.g.
+        // `[ b"[]", b"{ true }" ]` vs `[ b"[]:", b"int" ]`
+        2 if working_set.get_span_contents(spans[1]).starts_with(b"{") => {
+            parse_signature(working_set, spans[0])
+        }
+
+        // This should handle every other case, e.g.
+        // `[ b"[]:", b"int" ]`
+        // `[ b"[]", b":", b"int" ]`
+        // `[ b"[]", b":", b"int", b"->", b"bool" ]`
+        _ => {
+            let (mut arg_signature, input_output_types_pos) =
+                if working_set.get_span_contents(spans[0]).ends_with(b":") {
+                    (
+                        parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1)),
+                        1,
+                    )
+                } else if working_set.get_span_contents(spans[1]) == b":" {
+                    (parse_signature(working_set, spans[0]), 2)
+                } else {
+                    // This should be an error case, but we call parse_signature anyway
+                    // so it can handle the various possible errors
+                    // e.g. `[ b"[]", b"int" ]` or `[
+                    working_set.error(ParseError::Expected(
+                        "colon (:) before type signature",
+                        Span::concat(&spans[1..]),
+                    ));
+                    // (garbage(working_set, Span::concat(spans)), 1)
+
+                    (parse_signature(working_set, spans[0]), 1)
+                };
+
+            let input_output_types =
+                parse_input_output_types(working_set, &spans[input_output_types_pos..]);
+
+            if let Expression {
+                expr: Expr::Signature(sig),
+                span: expr_span,
+                ..
+            } = &mut arg_signature
+            {
+                sig.input_output_types = input_output_types;
+                expr_span.end = Span::concat(&spans[input_output_types_pos..]).end;
+            }
+            arg_signature
         }
-        arg_signature
-    } else {
-        parse_signature(working_set, spans[0])
     }
 }
 
diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs
index 11cca19d1f..2ceabf8a1a 100644
--- a/crates/nu-parser/tests/test_parser.rs
+++ b/crates/nu-parser/tests/test_parser.rs
@@ -2460,6 +2460,7 @@ mod input_types {
 
     #[rstest]
     #[case::input_output(b"def q []: int -> int {1}", false)]
+    #[case::input_output(b"def q [x: bool]: int -> int {2}", false)]
     #[case::input_output(b"def q []: string -> string {'qwe'}", false)]
     #[case::input_output(b"def q []: nothing -> nothing {null}", false)]
     #[case::input_output(b"def q []: list<string> -> list<string> {[]}", false)]
@@ -2479,6 +2480,42 @@ mod input_types {
     #[case::input_output(b"def q []: nothing -> record<c: int e: int {{c: 1 e: 1}}", true)]
     #[case::input_output(b"def q []: record<c: int e: int -> record<a: int> {{a: 1}}", true)]
     #[case::input_output(b"def q []: nothing -> record<a: record<a: int> {{a: {a: 1}}}", true)]
+    #[case::input_output(b"def q []: int []}", true)]
+    #[case::input_output(b"def q []: bool {[]", true)]
+    // Type signature variants with whitespace between inputs and `:`
+    #[case::input_output(b"def q [] : int -> int {1}", false)]
+    #[case::input_output(b"def q [x: bool] : int -> int {2}", false)]
+    #[case::input_output(b"def q []\t   : string -> string {'qwe'}", false)]
+    #[case::input_output(b"def q []  \t : nothing -> nothing {null}", false)]
+    #[case::input_output(b"def q [] \t: list<string> -> list<string> {[]}", false)]
+    #[case::input_output(
+        b"def q []\t: record<a: int b: int> -> record<c: int e: int> {{c: 1 e: 1}}",
+        false
+    )]
+    #[case::input_output(
+        b"def q [] : table<a: int b: int> -> table<c: int e: int> {[{c: 1 e: 1}]}",
+        false
+    )]
+    #[case::input_output(
+        b"def q [] : nothing -> record<c: record<a: int b: int> e: int> {{c: {a: 1 b: 2} e: 1}}",
+        false
+    )]
+    #[case::input_output(b"def q [] : nothing -> list<string {[]}", true)]
+    #[case::input_output(b"def q [] : nothing -> record<c: int e: int {{c: 1 e: 1}}", true)]
+    #[case::input_output(b"def q [] : record<c: int e: int -> record<a: int> {{a: 1}}", true)]
+    #[case::input_output(b"def q [] : nothing -> record<a: record<a: int> {{a: {a: 1}}}", true)]
+    #[case::input_output(b"def q [] : int []}", true)]
+    #[case::input_output(b"def q [] : bool {[]", true)]
+    // No input-output type signature
+    #[case::input_output(b"def qq [] {[]}", false)]
+    #[case::input_output(b"def q [] []}", true)]
+    #[case::input_output(b"def q [] {", true)]
+    #[case::input_output(b"def q []: []}", true)]
+    #[case::input_output(b"def q [] int {}", true)]
+    #[case::input_output(b"def q [x: string, y: int] {{c: 1 e: 1}}", false)]
+    #[case::input_output(b"def q [x: string, y: int]: {}", true)]
+    #[case::input_output(b"def q [x: string, y: int] {a: {a: 1}}", true)]
+    #[case::input_output(b"def foo {3}", true)]
     #[case::vardecl(b"let a: int = 1", false)]
     #[case::vardecl(b"let a: string = 'qwe'", false)]
     #[case::vardecl(b"let a: nothing = null", false)]
diff --git a/crates/nu-std/testing.nu b/crates/nu-std/testing.nu
index 7052983a21..e409920569 100644
--- a/crates/nu-std/testing.nu
+++ b/crates/nu-std/testing.nu
@@ -28,7 +28,7 @@ def valid-annotations [] {
 # Returns a table containing the list of function names together with their annotations (comments above the declaration)
 def get-annotated [
     file: path
-] path -> table<function_name: string, annotation: string> {
+]: path -> table<function_name: string, annotation: string> {
     let raw_file = (
         open $file
         | lines
@@ -59,7 +59,7 @@ def get-annotated [
 # Annotations that allow multiple functions are of type list<string>
 # Other annotations are of type string
 # Result gets merged with the template record so that the output shape remains consistent regardless of the table content
-def create-test-record [] nothing -> record<before-each: string, after-each: string, before-all: string, after-all: string, test: list<string>, test-skip: list<string>> {
+def create-test-record []: nothing -> record<before-each: string, after-each: string, before-all: string, after-all: string, test: list<string>, test-skip: list<string>> {
     let input = $in
 
     let template_record = {
@@ -187,7 +187,7 @@ export def ($test_function_name) [] {
 def run-tests-for-module [
     module: record<file: path name: string before-each: string after-each: string before-all: string after-all: string test: list test-skip: list>
     threads: int
-] -> table<file: path, name: string, test: string, result: string> {
+]: nothing -> table<file: path, name: string, test: string, result: string> {
     let global_context = if not ($module.before-all|is-empty) {
             log info $"Running before-all for module ($module.name)"
             run-test {