From eb3c2c9e76767f4b500a313973a0213a7fee1032 Mon Sep 17 00:00:00 2001
From: Leonhard Kipp <leonhard.kipp@web.de>
Date: Thu, 7 Jan 2021 18:14:51 +0100
Subject: [PATCH] Add comments to next LiteCommand (#2846)

This commit applied adds comments preceding a command to the LiteCommands new
field `comments`.

This can be usefull for example when defining a function with `def`. Nushell
could pick up the comments and display them when the user types `help my_def_func`.

Example
```shell
def my_echo [arg] { echo $arg }
```
The LiteCommand def will now contain the comments `My echo` and `It's much
better :)`.

The comment is not associated with the next command if there is a (or multiple) newline
between them.
Example
```shell

echo 42
```

This new functionality is similar to DocStrings. One might introduce a special
notation for such DocStrings, so that the parser can differentiate better
between discardable comments and usefull documentation.
---
 crates/nu-cli/src/commands/command.rs |   2 +-
 crates/nu-cli/tests/commands/def.rs   |  19 ++
 crates/nu-cli/tests/commands/mod.rs   |   1 +
 crates/nu-parser/src/lex.rs           | 245 ++++++++++++++++++++++----
 crates/nu-parser/src/parse.rs         |   6 +-
 5 files changed, 235 insertions(+), 38 deletions(-)
 create mode 100644 crates/nu-cli/tests/commands/def.rs
diff --git a/crates/nu-cli/src/commands/command.rs b/crates/nu-cli/src/commands/command.rs
index 4e95bb1111..7eaf5c8592 100644
--- a/crates/nu-cli/src/commands/command.rs
+++ b/crates/nu-cli/src/commands/command.rs
@@ -250,7 +250,7 @@ impl WholeStreamCommand for Block {
     }
 
     fn usage(&self) -> &str {
-        ""
+        &self.params.usage
     }
 
     async fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
diff --git a/crates/nu-cli/tests/commands/def.rs b/crates/nu-cli/tests/commands/def.rs
new file mode 100644
index 0000000000..e0f218b1eb
--- /dev/null
+++ b/crates/nu-cli/tests/commands/def.rs
@@ -0,0 +1,19 @@
+use nu_test_support::nu;
+use nu_test_support::playground::Playground;
+use std::fs;
+#[test]
+fn def_with_comment() {
+    Playground::setup("def_with_comment", |dirs, _| {
+        let data = r#"
+#My echo
+def e [arg] {echo $arg}
+            "#;
+        fs::write(dirs.root().join("def_test"), data).expect("Unable to write file");
+        let actual = nu!(
+            cwd: dirs.root(),
+            "source def_test; help e | to json"
+        );
+
+        assert!(actual.out.contains("My echo\\n\\n"));
+    });
+}
diff --git a/crates/nu-cli/tests/commands/mod.rs b/crates/nu-cli/tests/commands/mod.rs
index 897d858d7e..4b904ed803 100644
--- a/crates/nu-cli/tests/commands/mod.rs
+++ b/crates/nu-cli/tests/commands/mod.rs
@@ -7,6 +7,7 @@ mod cd;
 mod compact;
 mod count;
 mod cp;
+mod def;
 mod default;
 mod drop;
 mod each;
diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs
index f460b00b79..4f2f81cd96 100644
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@@ -18,7 +18,7 @@ impl Token {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum TokenContents {
     /// A baseline token is an atomic chunk of source code. This means that the
     /// token contains the entirety of string literals, as well as the entirety
@@ -28,6 +28,7 @@ pub enum TokenContents {
     /// until the closing `}` (after taking comments and string literals into
     /// consideration).
     Baseline(String),
+    Comment(String),
     Pipe,
     Semicolon,
     EOL,
@@ -38,11 +39,27 @@ pub enum TokenContents {
 #[derive(Debug, Clone)]
 pub struct LiteCommand {
     pub parts: Vec<Spanned<String>>,
+    ///Preceding comments. Each String in the vec is one line. The comment literal is not included.
+    pub comments: Option<Vec<Spanned<String>>>,
 }
 
 impl LiteCommand {
     fn new() -> LiteCommand {
-        LiteCommand { parts: vec![] }
+        LiteCommand {
+            parts: vec![],
+            comments: None,
+        }
+    }
+
+    pub fn comments_joined(&self) -> String {
+        match &self.comments {
+            None => "".to_string(),
+            Some(text) => text
+                .iter()
+                .map(|s| s.item.clone())
+                .collect::<Vec<_>>()
+                .join("\n"),
+        }
     }
 
     pub fn is_empty(&self) -> bool {
@@ -147,18 +164,6 @@ impl LiteGroup {
         self.pipelines.push(item)
     }
 
-    pub fn is_comment(&self) -> bool {
-        if !self.is_empty()
-            && !self.pipelines[0].is_empty()
-            && !self.pipelines[0].commands.is_empty()
-            && !self.pipelines[0].commands[0].parts.is_empty()
-        {
-            self.pipelines[0].commands[0].parts[0].item.starts_with('#')
-        } else {
-            false
-        }
-    }
-
     #[cfg(test)]
     pub(crate) fn span(&self) -> Span {
         let start = if !self.pipelines.is_empty() {
@@ -362,17 +367,6 @@ pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option
     (token_contents.spanned(span), None)
 }
 
-/// We encountered a `#` character. Keep consuming characters until we encounter
-/// a newline character (but don't consume it).
-fn skip_comment(input: &mut Input) {
-    while let Some((_, c)) = input.peek() {
-        if *c == '\n' || *c == '\r' {
-            break;
-        }
-        input.next();
-    }
-}
-
 /// Try to parse a list of tokens into a block.
 pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
     // Accumulate chunks of tokens into groups.
@@ -387,6 +381,9 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
     // The current command
     let mut command = LiteCommand::new();
 
+    let mut prev_comments = None;
+    let mut prev_comment_indent = 0;
+
     let mut prev_token: Option<Token> = None;
 
     // The parsing process repeats:
@@ -394,6 +391,21 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
     // - newline (`\n` or `\r`)
     // - pipes (`|`)
     // - semicolon
+    fn finish_command(
+        prev_comments: &mut Option<Vec<Spanned<String>>>,
+        command: &mut LiteCommand,
+        pipeline: &mut LitePipeline,
+    ) {
+        if let Some(prev_comments_) = prev_comments {
+            //Add previous comments to this command
+            command.comments = Some(prev_comments_.clone());
+            //Reset
+            *prev_comments = None;
+        }
+        pipeline.push(command.clone());
+        *command = LiteCommand::new();
+    }
+
     for token in tokens {
         match &token.contents {
             TokenContents::EOL => {
@@ -409,13 +421,21 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                     if let TokenContents::Pipe = prev.contents {
                         continue;
                     }
+                    if let TokenContents::EOL = prev.contents {
+                        //If we have an empty line we discard previous comments as they are not
+                        //part of a command
+                        //Example nu Code:
+                        //#I am a comment getting discarded
+                        //
+                        //def e [] {echo hi}
+                        prev_comments = None
+                    }
                 }
 
                 // If we have an open command, push it into the current
                 // pipeline.
                 if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                 }
 
                 // If we have an open pipeline, push it into the current group.
@@ -437,8 +457,7 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                 // If the current command has content, accumulate it into
                 // the current pipeline and start a new command.
                 if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                 } else {
                     // If the current command doesn't have content, return an
                     // error that indicates that the `|` was unexpected.
@@ -457,8 +476,7 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                 // If the current command has content, accumulate it into the
                 // current pipeline and start a new command.
                 if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                 }
 
                 // If the current pipeline has content, accumulate it into the
@@ -474,13 +492,34 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
 
                 command.push(bare.to_string().spanned(token.span));
             }
+            TokenContents::Comment(comment) => {
+                if prev_comments.is_none() {
+                    //Calculate amount of space indent
+                    if let Some((i, _)) = comment.chars().enumerate().find(|(_, ch)| *ch != ' ') {
+                        prev_comment_indent = i;
+                    }
+                }
+                let comment: String = comment
+                    .chars()
+                    .enumerate()
+                    .skip_while(|(i, ch)| *i < prev_comment_indent && *ch == ' ')
+                    .map(|(_, ch)| ch)
+                    .collect();
+
+                //Because we skipped some spaces at start, the span needs to be adjusted
+                let comment_span = Span::new(token.span.end() - comment.len(), token.span.end());
+
+                prev_comments
+                    .get_or_insert(vec![])
+                    .push(comment.spanned(comment_span));
+            }
         }
         prev_token = Some(token);
     }
 
     // If the current command has content, accumulate it into the current pipeline.
     if command.has_content() {
-        pipeline.push(command);
+        finish_command(&mut prev_comments, &mut command, &mut pipeline)
     }
 
     // If the current pipeline has content, accumulate it into the current group.
@@ -567,10 +606,26 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>)
                 Span::new(span_offset + idx, span_offset + idx + 1),
             ));
         } else if *c == '#' {
-            // If the next character is `#`, we're at the beginning of a line
-            // comment. The comment continues until the next newline.
-
-            skip_comment(&mut char_indices);
+            let comment_start = *idx + 1;
+            let mut comment = String::new();
+            //Don't copy '#' into comment string
+            char_indices.next();
+            while let Some((_, c)) = char_indices.peek() {
+                if *c == '\n' {
+                    break;
+                }
+                comment.push(*c);
+                //Advance char_indices
+                let _ = char_indices.next();
+            }
+            let token = Token::new(
+                TokenContents::Comment(comment.clone()),
+                Span::new(
+                    span_offset + comment_start,
+                    span_offset + comment_start + comment.len(),
+                ),
+            );
+            output.push(token);
         } else if c.is_whitespace() {
             // If the next character is non-newline whitespace, skip it.
 
@@ -702,6 +757,23 @@ mod tests {
             assert_eq!(result[0].span, span(0, 10));
         }
 
+        #[test]
+        fn lex_comment() {
+            let input = r#"
+#A comment
+def e [] {echo hi}
+                "#;
+
+            let (result, err) = lex(input, 0);
+            assert!(err.is_none());
+            //result[0] == EOL
+            assert_eq!(result[1].span, span(2, 11));
+            assert_eq!(
+                result[1].contents,
+                TokenContents::Comment("A comment".to_string())
+            );
+        }
+
         #[test]
         fn ignore_future() {
             let input = "foo 'bar";
@@ -804,5 +876,106 @@ mod tests {
                 "\"foo' --test\""
             );
         }
+        #[test]
+        fn command_with_comment() {
+            let code = r#"
+# My echo
+# * It's much better :)
+def my_echo [arg] { echo $arg }
+            "#;
+            let (result, err) = lex(code, 0);
+            assert!(err.is_none());
+            let (result, err) = block(result);
+            assert!(err.is_none());
+
+            assert_eq!(result.block.len(), 1);
+            assert_eq!(result.block[0].pipelines.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4);
+            assert_eq!(
+                result.block[0].pipelines[0].commands[0].comments,
+                Some(vec![
+                    //Leading space is trimmed
+                    "My echo".to_string().spanned(Span::new(3, 10)),
+                    "* It's much better :)"
+                        .to_string()
+                        .spanned(Span::new(13, 34))
+                ])
+            );
+        }
+        #[test]
+        fn discarded_comment() {
+            let code = r#"
+# This comment gets discarded, because of the following empty line
+
+echo 42
+            "#;
+            let (result, err) = lex(code, 0);
+            assert!(err.is_none());
+            // assert_eq!(format!("{:?}", result), "");
+            let (result, err) = block(result);
+            assert!(err.is_none());
+            assert_eq!(result.block.len(), 1);
+            assert_eq!(result.block[0].pipelines.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+            assert_eq!(result.block[0].pipelines[0].commands[0].comments, None);
+        }
+    }
+
+    #[test]
+    fn no_discarded_white_space_start_of_comment() {
+        let code = r#"
+#No white_space at firt line ==> No white_space discarded
+#   Starting space is not discarded
+echo 42
+            "#;
+        let (result, err) = lex(code, 0);
+        assert!(err.is_none());
+        // assert_eq!(format!("{:?}", result), "");
+        let (result, err) = block(result);
+        assert!(err.is_none());
+        assert_eq!(result.block.len(), 1);
+        assert_eq!(result.block[0].pipelines.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+        assert_eq!(
+            result.block[0].pipelines[0].commands[0].comments,
+            Some(vec![
+                "No white_space at firt line ==> No white_space discarded"
+                    .to_string()
+                    .spanned(Span::new(2, 58)),
+                "   Starting space is not discarded"
+                    .to_string()
+                    .spanned(Span::new(60, 94)),
+            ])
+        );
+    }
+
+    #[test]
+    fn multiple_discarded_white_space_start_of_comment() {
+        let code = r#"
+#  Discard 2 spaces
+# Discard 1 space
+#  Discard 2 spaces
+echo 42
+            "#;
+        let (result, err) = lex(code, 0);
+        assert!(err.is_none());
+        // assert_eq!(format!("{:?}", result), "");
+        let (result, err) = block(result);
+        assert!(err.is_none());
+        assert_eq!(result.block.len(), 1);
+        assert_eq!(result.block[0].pipelines.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+        assert_eq!(
+            result.block[0].pipelines[0].commands[0].comments,
+            Some(vec![
+                "Discard 2 spaces".to_string().spanned(Span::new(4, 20)),
+                "Discard 1 space".to_string().spanned(Span::new(23, 38)),
+                "Discard 2 spaces".to_string().spanned(Span::new(42, 58)),
+            ])
+        );
     }
 }
diff --git a/crates/nu-parser/src/parse.rs b/crates/nu-parser/src/parse.rs
index e3a054115f..e7d848a9c6 100644
--- a/crates/nu-parser/src/parse.rs
+++ b/crates/nu-parser/src/parse.rs
@@ -2161,7 +2161,11 @@ fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<Parse
         }
 
         let name = trim_quotes(&call.parts[1].item);
-        let (signature, err) = parse_signature(&name, &call.parts[2], scope);
+        let (mut signature, err) = parse_signature(&name, &call.parts[2], scope);
+
+        //Add commands comments to signature usage
+        signature.usage = call.comments_joined();
+
         if err.is_some() {
             return err;
         };