Add better comment skipping (#359)

2021-11-22 07:13:09 +13:00
parent d30dfc63c4
commit 143855b662
6 changed files with 58 additions and 32 deletions
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -207,6 +207,7 @@ pub fn lex(
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
    skip_comment: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
    let mut error = None;
@ -277,6 +278,7 @@ pub fn lex(
            while let Some(input) = input.get(curr_offset) {
                curr_offset += 1;
                if *input == b'\n' || *input == b'\r' {
                    if !skip_comment {
                        output.push(Token::new(
                            TokenContents::Comment,
                            Span::new(start, curr_offset - 1),
@ -289,12 +291,13 @@ pub fn lex(
                            TokenContents::Eol,
                            Span::new(curr_offset - 1, curr_offset),
                        ));
                    }
                    start = curr_offset;
                    break;
                }
            }
-            if start != curr_offset {
+            if start != curr_offset && !skip_comment {
                output.push(Token::new(
                    TokenContents::Comment,
                    Span::new(span_offset + start, span_offset + curr_offset),
--- a/crates/nu-parser/src/parse_keywords.rs
+++ b/crates/nu-parser/src/parse_keywords.rs
@ -488,7 +488,7 @@ pub fn parse_module_block(
    let source = working_set.get_span_contents(span);
-    let (output, err) = lex(source, span.start, &[], &[]);
+    let (output, err) = lex(source, span.start, &[], &[], true);
    error = error.or(err);
    let (output, err) = lite_parse(&output);
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -1311,7 +1311,7 @@ pub fn parse_full_cell_path(
    let source = working_set.get_span_contents(span);
    let mut error = None;
-    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']);
+    let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
    error = error.or(err);
    let mut tokens = tokens.into_iter().peekable();
@ -1336,7 +1336,7 @@ pub fn parse_full_cell_path(
            let source = working_set.get_span_contents(span);
-            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[]);
+            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[], true);
            error = error.or(err);
            let (output, err) = lite_parse(&output);
@ -2062,7 +2062,7 @@ pub fn parse_signature_helper(
    let mut error = None;
    let source = working_set.get_span_contents(span);
-    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':']);
+    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':'], false);
    error = error.or(err);
    let mut args: Vec<Arg> = vec![];
@ -2391,7 +2391,7 @@ pub fn parse_list_expression(
    let span = Span { start, end };
    let source = working_set.get_span_contents(span);
-    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[]);
+    let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[], true);
    error = error.or(err);
    let (output, err) = lite_parse(&output);
@ -2463,7 +2463,7 @@ pub fn parse_table_expression(
    let source = working_set.get_span_contents(span);
-    let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[]);
+    let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[], true);
    error = error.or(err);
    let (output, err) = lite_parse(&output);
@ -2578,7 +2578,7 @@ pub fn parse_block_expression(
    let source = working_set.get_span_contents(span);
-    let (output, err) = lex(source, start, &[], &[]);
+    let (output, err) = lex(source, start, &[], &[], true);
    error = error.or(err);
    working_set.enter_scope();
@ -2797,7 +2797,7 @@ pub fn parse_value(
            let source = working_set.get_span_contents(span);
            let mut error = None;
-            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']);
+            let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
            error = error.or(err);
            let tokens = tokens.into_iter().peekable();
@ -3232,7 +3232,7 @@ pub fn parse_record(
    let span = Span { start, end };
    let source = working_set.get_span_contents(span);
-    let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':']);
+    let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
    error = error.or(err);
    let mut output = vec![];
@ -3598,7 +3598,7 @@ pub fn parse(
    working_set.add_file(name, contents);
-    let (output, err) = lex(contents, span_offset, &[], &[]);
+    let (output, err) = lex(contents, span_offset, &[], &[], true);
    error = error.or(err);
    let (output, err) = lite_parse(&output);
--- a/crates/nu-parser/tests/test_lex.rs
+++ b/crates/nu-parser/tests/test_lex.rs
@ -5,7 +5,7 @@ use nu_protocol::Span;
 fn lex_basic() {
    let file = b"let x = 4";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    assert!(output.1.is_none());
 }
@ -14,7 +14,7 @@ fn lex_basic() {
 fn lex_newline() {
    let file = b"let x = 300\nlet y = 500;";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    assert!(output.0.contains(&Token {
        contents: TokenContents::Eol,
@ -26,7 +26,7 @@ fn lex_newline() {
 fn lex_empty() {
    let file = b"";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    assert!(output.0.is_empty());
    assert!(output.1.is_none());
@ -37,7 +37,7 @@ fn lex_parenthesis() {
    // The whole parenthesis is an item for the lexer
    let file = b"let x = (300 + (322 * 444));";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    assert_eq!(
        output.0.get(3).unwrap(),
@ -52,7 +52,7 @@ fn lex_parenthesis() {
 fn lex_comment() {
    let file = b"let x = 300 # a comment \n $x + 444";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], false);
    assert_eq!(
        output.0.get(4).unwrap(),
@ -67,7 +67,7 @@ fn lex_comment() {
 fn lex_is_incomplete() {
    let file = b"let x = 300 | ;";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::ExtraTokens(_)));
@ -77,7 +77,7 @@ fn lex_is_incomplete() {
 fn lex_incomplete_paren() {
    let file = b"let x = (300 + ( 4 + 1)";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == ")"));
@ -87,7 +87,7 @@ fn lex_incomplete_paren() {
 fn lex_incomplete_quote() {
    let file = b"let x = '300 + 4 + 1";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], true);
    let err = output.1.unwrap();
    assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'"));
@ -102,7 +102,7 @@ fn lex_comments() {
    // let y = 1 # comment
    let file = b"let z = 4 #comment \n let x = 4 # comment\n let y = 1 # comment";
-    let output = lex(file, 0, &[], &[]);
+    let output = lex(file, 0, &[], &[], false);
    assert_eq!(
        output.0.get(4).unwrap(),
--- a/crates/nu-parser/tests/test_lite_parser.rs
+++ b/crates/nu-parser/tests/test_lite_parser.rs
@ -2,7 +2,7 @@ use nu_parser::{lex, lite_parse, LiteBlock, ParseError};
 use nu_protocol::Span;
 fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
-    let (output, err) = lex(input, 0, &[], &[]);
+    let (output, err) = lex(input, 0, &[], &[], false);
    if let Some(err) = err {
        return Err(err);
    }
--- a/src/tests.rs
+++ b/src/tests.rs
@ -1120,3 +1120,26 @@ fn config_var_2() -> TestResult {
        "40.0 KB",
    )
 }
 #[test]
 fn comment_skipping_1() -> TestResult {
    run_test(
        r#"let x = {
        y: 20
        # foo
    }; $x.y"#,
        "20",
    )
 }
 #[test]
 fn comment_skipping_2() -> TestResult {
    run_test(
        r#"let x = {
        y: 20
        # foo
        z: 40
    }; $x.z"#,
        "40",
    )
 }