Add comments to next LiteCommand (#2846)

This commit applied adds comments preceding a command to the LiteCommands new field `comments`. This can be usefull for example when defining a function with `def`. Nushell could pick up the comments and display them when the user types `help my_def_func`. Example ```shell def my_echo [arg] { echo $arg } ``` The LiteCommand def will now contain the comments `My echo` and `It's much better :)`. The comment is not associated with the next command if there is a (or multiple) newline between them. Example ```shell echo 42 ``` This new functionality is similar to DocStrings. One might introduce a special notation for such DocStrings, so that the parser can differentiate better between discardable comments and usefull documentation.
2021-01-07 18:14:51 +01:00 · 2021-01-07 18:14:51 +01:00 · eb3c2c9e76
commit eb3c2c9e76
parent 3d29e3efbf
5 changed files with 235 additions and 38 deletions
--- a/crates/nu-cli/src/commands/command.rs
+++ b/crates/nu-cli/src/commands/command.rs
@ -250,7 +250,7 @@ impl WholeStreamCommand for Block {
    }

    fn usage(&self) -> &str {
-        ""
+        &self.params.usage
    }

    async fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
--- a/crates/nu-cli/tests/commands/def.rs
+++ b/crates/nu-cli/tests/commands/def.rs
@ -0,0 +1,19 @@
+use nu_test_support::nu;
+use nu_test_support::playground::Playground;
+use std::fs;
+#[test]
+fn def_with_comment() {
+    Playground::setup("def_with_comment", |dirs, _| {
+        let data = r#"
+#My echo
+def e [arg] {echo $arg}
+            "#;
+        fs::write(dirs.root().join("def_test"), data).expect("Unable to write file");
+        let actual = nu!(
+            cwd: dirs.root(),
+            "source def_test; help e | to json"
+        );
+
+        assert!(actual.out.contains("My echo\\n\\n"));
+    });
+}
--- a/crates/nu-cli/tests/commands/mod.rs
+++ b/crates/nu-cli/tests/commands/mod.rs
@ -7,6 +7,7 @@ mod cd;
 mod compact;
 mod count;
 mod cp;
+mod def;
 mod default;
 mod drop;
 mod each;
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -18,7 +18,7 @@ impl Token {
    }
 }

-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum TokenContents {
    /// A baseline token is an atomic chunk of source code. This means that the
    /// token contains the entirety of string literals, as well as the entirety
@ -28,6 +28,7 @@ pub enum TokenContents {
    /// until the closing `}` (after taking comments and string literals into
    /// consideration).
    Baseline(String),
+    Comment(String),
    Pipe,
    Semicolon,
    EOL,
@ -38,11 +39,27 @@ pub enum TokenContents {
 #[derive(Debug, Clone)]
 pub struct LiteCommand {
    pub parts: Vec<Spanned<String>>,
+    ///Preceding comments. Each String in the vec is one line. The comment literal is not included.
+    pub comments: Option<Vec<Spanned<String>>>,
 }

 impl LiteCommand {
    fn new() -> LiteCommand {
-        LiteCommand { parts: vec![] }
+        LiteCommand {
+            parts: vec![],
+            comments: None,
+        }
+    }
+
+    pub fn comments_joined(&self) -> String {
+        match &self.comments {
+            None => "".to_string(),
+            Some(text) => text
+                .iter()
+                .map(|s| s.item.clone())
+                .collect::<Vec<_>>()
+                .join("\n"),
+        }
    }

    pub fn is_empty(&self) -> bool {
@ -147,18 +164,6 @@ impl LiteGroup {
        self.pipelines.push(item)
    }

-    pub fn is_comment(&self) -> bool {
-        if !self.is_empty()
-            && !self.pipelines[0].is_empty()
-            && !self.pipelines[0].commands.is_empty()
-            && !self.pipelines[0].commands[0].parts.is_empty()
-        {
-            self.pipelines[0].commands[0].parts[0].item.starts_with('#')
-        } else {
-            false
-        }
-    }
-
    #[cfg(test)]
    pub(crate) fn span(&self) -> Span {
        let start = if !self.pipelines.is_empty() {
@ -362,17 +367,6 @@ pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option
    (token_contents.spanned(span), None)
 }

-/// We encountered a `#` character. Keep consuming characters until we encounter
-/// a newline character (but don't consume it).
-fn skip_comment(input: &mut Input) {
-    while let Some((_, c)) = input.peek() {
-        if *c == '\n' || *c == '\r' {
-            break;
-        }
-        input.next();
-    }
-}
-
 /// Try to parse a list of tokens into a block.
 pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
    // Accumulate chunks of tokens into groups.
@ -387,6 +381,9 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
    // The current command
    let mut command = LiteCommand::new();

+    let mut prev_comments = None;
+    let mut prev_comment_indent = 0;
+
    let mut prev_token: Option<Token> = None;

    // The parsing process repeats:
@ -394,6 +391,21 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
    // - newline (`\n` or `\r`)
    // - pipes (`|`)
    // - semicolon
+    fn finish_command(
+        prev_comments: &mut Option<Vec<Spanned<String>>>,
+        command: &mut LiteCommand,
+        pipeline: &mut LitePipeline,
+    ) {
+        if let Some(prev_comments_) = prev_comments {
+            //Add previous comments to this command
+            command.comments = Some(prev_comments_.clone());
+            //Reset
+            *prev_comments = None;
+        }
+        pipeline.push(command.clone());
+        *command = LiteCommand::new();
+    }
+
    for token in tokens {
        match &token.contents {
            TokenContents::EOL => {
@ -409,13 +421,21 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                    if let TokenContents::Pipe = prev.contents {
                        continue;
                    }
+                    if let TokenContents::EOL = prev.contents {
+                        //If we have an empty line we discard previous comments as they are not
+                        //part of a command
+                        //Example nu Code:
+                        //#I am a comment getting discarded
+                        //
+                        //def e [] {echo hi}
+                        prev_comments = None
+                    }
                }

                // If we have an open command, push it into the current
                // pipeline.
                if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                }

                // If we have an open pipeline, push it into the current group.
@ -437,8 +457,7 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                // If the current command has content, accumulate it into
                // the current pipeline and start a new command.
                if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                } else {
                    // If the current command doesn't have content, return an
                    // error that indicates that the `|` was unexpected.
@ -457,8 +476,7 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
                // If the current command has content, accumulate it into the
                // current pipeline and start a new command.
                if command.has_content() {
-                    pipeline.push(command);
-                    command = LiteCommand::new();
+                    finish_command(&mut prev_comments, &mut command, &mut pipeline);
                }

                // If the current pipeline has content, accumulate it into the
@ -474,13 +492,34 @@ pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {

                command.push(bare.to_string().spanned(token.span));
            }
+            TokenContents::Comment(comment) => {
+                if prev_comments.is_none() {
+                    //Calculate amount of space indent
+                    if let Some((i, _)) = comment.chars().enumerate().find(|(_, ch)| *ch != ' ') {
+                        prev_comment_indent = i;
+                    }
+                }
+                let comment: String = comment
+                    .chars()
+                    .enumerate()
+                    .skip_while(|(i, ch)| *i < prev_comment_indent && *ch == ' ')
+                    .map(|(_, ch)| ch)
+                    .collect();
+
+                //Because we skipped some spaces at start, the span needs to be adjusted
+                let comment_span = Span::new(token.span.end() - comment.len(), token.span.end());
+
+                prev_comments
+                    .get_or_insert(vec![])
+                    .push(comment.spanned(comment_span));
+            }
        }
        prev_token = Some(token);
    }

    // If the current command has content, accumulate it into the current pipeline.
    if command.has_content() {
-        pipeline.push(command);
+        finish_command(&mut prev_comments, &mut command, &mut pipeline)
    }

    // If the current pipeline has content, accumulate it into the current group.
@ -567,10 +606,26 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>)
                Span::new(span_offset + idx, span_offset + idx + 1),
            ));
        } else if *c == '#' {
-            // If the next character is `#`, we're at the beginning of a line
-            // comment. The comment continues until the next newline.
-
-            skip_comment(&mut char_indices);
+            let comment_start = *idx + 1;
+            let mut comment = String::new();
+            //Don't copy '#' into comment string
+            char_indices.next();
+            while let Some((_, c)) = char_indices.peek() {
+                if *c == '\n' {
+                    break;
+                }
+                comment.push(*c);
+                //Advance char_indices
+                let _ = char_indices.next();
+            }
+            let token = Token::new(
+                TokenContents::Comment(comment.clone()),
+                Span::new(
+                    span_offset + comment_start,
+                    span_offset + comment_start + comment.len(),
+                ),
+            );
+            output.push(token);
        } else if c.is_whitespace() {
            // If the next character is non-newline whitespace, skip it.

@ -702,6 +757,23 @@ mod tests {
            assert_eq!(result[0].span, span(0, 10));
        }

+        #[test]
+        fn lex_comment() {
+            let input = r#"
+#A comment
+def e [] {echo hi}
+                "#;
+
+            let (result, err) = lex(input, 0);
+            assert!(err.is_none());
+            //result[0] == EOL
+            assert_eq!(result[1].span, span(2, 11));
+            assert_eq!(
+                result[1].contents,
+                TokenContents::Comment("A comment".to_string())
+            );
+        }
+
        #[test]
        fn ignore_future() {
            let input = "foo 'bar";
@ -804,5 +876,106 @@ mod tests {
                "\"foo' --test\""
            );
        }
+        #[test]
+        fn command_with_comment() {
+            let code = r#"
+# My echo
+# * It's much better :)
+def my_echo [arg] { echo $arg }
+            "#;
+            let (result, err) = lex(code, 0);
+            assert!(err.is_none());
+            let (result, err) = block(result);
+            assert!(err.is_none());
+
+            assert_eq!(result.block.len(), 1);
+            assert_eq!(result.block[0].pipelines.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4);
+            assert_eq!(
+                result.block[0].pipelines[0].commands[0].comments,
+                Some(vec![
+                    //Leading space is trimmed
+                    "My echo".to_string().spanned(Span::new(3, 10)),
+                    "* It's much better :)"
+                        .to_string()
+                        .spanned(Span::new(13, 34))
+                ])
+            );
+        }
+        #[test]
+        fn discarded_comment() {
+            let code = r#"
+# This comment gets discarded, because of the following empty line
+
+echo 42
+            "#;
+            let (result, err) = lex(code, 0);
+            assert!(err.is_none());
+            // assert_eq!(format!("{:?}", result), "");
+            let (result, err) = block(result);
+            assert!(err.is_none());
+            assert_eq!(result.block.len(), 1);
+            assert_eq!(result.block[0].pipelines.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+            assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+            assert_eq!(result.block[0].pipelines[0].commands[0].comments, None);
+        }
+    }
+
+    #[test]
+    fn no_discarded_white_space_start_of_comment() {
+        let code = r#"
+#No white_space at firt line ==> No white_space discarded
+#   Starting space is not discarded
+echo 42
+            "#;
+        let (result, err) = lex(code, 0);
+        assert!(err.is_none());
+        // assert_eq!(format!("{:?}", result), "");
+        let (result, err) = block(result);
+        assert!(err.is_none());
+        assert_eq!(result.block.len(), 1);
+        assert_eq!(result.block[0].pipelines.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+        assert_eq!(
+            result.block[0].pipelines[0].commands[0].comments,
+            Some(vec![
+                "No white_space at firt line ==> No white_space discarded"
+                    .to_string()
+                    .spanned(Span::new(2, 58)),
+                "   Starting space is not discarded"
+                    .to_string()
+                    .spanned(Span::new(60, 94)),
+            ])
+        );
+    }
+
+    #[test]
+    fn multiple_discarded_white_space_start_of_comment() {
+        let code = r#"
+#  Discard 2 spaces
+# Discard 1 space
+#  Discard 2 spaces
+echo 42
+            "#;
+        let (result, err) = lex(code, 0);
+        assert!(err.is_none());
+        // assert_eq!(format!("{:?}", result), "");
+        let (result, err) = block(result);
+        assert!(err.is_none());
+        assert_eq!(result.block.len(), 1);
+        assert_eq!(result.block[0].pipelines.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
+        assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
+        assert_eq!(
+            result.block[0].pipelines[0].commands[0].comments,
+            Some(vec![
+                "Discard 2 spaces".to_string().spanned(Span::new(4, 20)),
+                "Discard 1 space".to_string().spanned(Span::new(23, 38)),
+                "Discard 2 spaces".to_string().spanned(Span::new(42, 58)),
+            ])
+        );
    }
 }
--- a/crates/nu-parser/src/parse.rs
+++ b/crates/nu-parser/src/parse.rs
@ -2161,7 +2161,11 @@ fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<Parse
        }

        let name = trim_quotes(&call.parts[1].item);
-        let (signature, err) = parse_signature(&name, &call.parts[2], scope);
+        let (mut signature, err) = parse_signature(&name, &call.parts[2], scope);
+
+        //Add commands comments to signature usage
+        signature.usage = call.comments_joined();
+
        if err.is_some() {
            return err;
        };