Add better comment skipping (#359)

This commit is contained in:
JT 2021-11-22 07:13:09 +13:00 committed by GitHub
parent d30dfc63c4
commit 143855b662
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 58 additions and 32 deletions

View File

@ -207,6 +207,7 @@ pub fn lex(
span_offset: usize, span_offset: usize,
additional_whitespace: &[u8], additional_whitespace: &[u8],
special_tokens: &[u8], special_tokens: &[u8],
skip_comment: bool,
) -> (Vec<Token>, Option<ParseError>) { ) -> (Vec<Token>, Option<ParseError>) {
let mut error = None; let mut error = None;
@ -277,24 +278,26 @@ pub fn lex(
while let Some(input) = input.get(curr_offset) { while let Some(input) = input.get(curr_offset) {
curr_offset += 1; curr_offset += 1;
if *input == b'\n' || *input == b'\r' { if *input == b'\n' || *input == b'\r' {
output.push(Token::new( if !skip_comment {
TokenContents::Comment, output.push(Token::new(
Span::new(start, curr_offset - 1), TokenContents::Comment,
)); Span::new(start, curr_offset - 1),
));
// Adding an end of line token after a comment // Adding an end of line token after a comment
// This helps during lite_parser to avoid losing a command // This helps during lite_parser to avoid losing a command
// in a statement // in a statement
output.push(Token::new( output.push(Token::new(
TokenContents::Eol, TokenContents::Eol,
Span::new(curr_offset - 1, curr_offset), Span::new(curr_offset - 1, curr_offset),
)); ));
}
start = curr_offset; start = curr_offset;
break; break;
} }
} }
if start != curr_offset { if start != curr_offset && !skip_comment {
output.push(Token::new( output.push(Token::new(
TokenContents::Comment, TokenContents::Comment,
Span::new(span_offset + start, span_offset + curr_offset), Span::new(span_offset + start, span_offset + curr_offset),

View File

@ -488,7 +488,7 @@ pub fn parse_module_block(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, span.start, &[], &[]); let (output, err) = lex(source, span.start, &[], &[], true);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);

View File

@ -1311,7 +1311,7 @@ pub fn parse_full_cell_path(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let mut error = None; let mut error = None;
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']); let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
error = error.or(err); error = error.or(err);
let mut tokens = tokens.into_iter().peekable(); let mut tokens = tokens.into_iter().peekable();
@ -1336,7 +1336,7 @@ pub fn parse_full_cell_path(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[]); let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[], true);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
@ -2062,7 +2062,7 @@ pub fn parse_signature_helper(
let mut error = None; let mut error = None;
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':']); let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[b':'], false);
error = error.or(err); error = error.or(err);
let mut args: Vec<Arg> = vec![]; let mut args: Vec<Arg> = vec![];
@ -2391,7 +2391,7 @@ pub fn parse_list_expression(
let span = Span { start, end }; let span = Span { start, end };
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[]); let (output, err) = lex(source, span.start, &[b'\n', b'\r', b','], &[], true);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
@ -2463,7 +2463,7 @@ pub fn parse_table_expression(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[]); let (output, err) = lex(source, start, &[b'\n', b'\r', b','], &[], true);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);
@ -2578,7 +2578,7 @@ pub fn parse_block_expression(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (output, err) = lex(source, start, &[], &[]); let (output, err) = lex(source, start, &[], &[], true);
error = error.or(err); error = error.or(err);
working_set.enter_scope(); working_set.enter_scope();
@ -2797,7 +2797,7 @@ pub fn parse_value(
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let mut error = None; let mut error = None;
let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.']); let (tokens, err) = lex(source, span.start, &[b'\n', b'\r'], &[b'.'], true);
error = error.or(err); error = error.or(err);
let tokens = tokens.into_iter().peekable(); let tokens = tokens.into_iter().peekable();
@ -3232,7 +3232,7 @@ pub fn parse_record(
let span = Span { start, end }; let span = Span { start, end };
let source = working_set.get_span_contents(span); let source = working_set.get_span_contents(span);
let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':']); let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
error = error.or(err); error = error.or(err);
let mut output = vec![]; let mut output = vec![];
@ -3598,7 +3598,7 @@ pub fn parse(
working_set.add_file(name, contents); working_set.add_file(name, contents);
let (output, err) = lex(contents, span_offset, &[], &[]); let (output, err) = lex(contents, span_offset, &[], &[], true);
error = error.or(err); error = error.or(err);
let (output, err) = lite_parse(&output); let (output, err) = lite_parse(&output);

View File

@ -5,7 +5,7 @@ use nu_protocol::Span;
fn lex_basic() { fn lex_basic() {
let file = b"let x = 4"; let file = b"let x = 4";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
assert!(output.1.is_none()); assert!(output.1.is_none());
} }
@ -14,7 +14,7 @@ fn lex_basic() {
fn lex_newline() { fn lex_newline() {
let file = b"let x = 300\nlet y = 500;"; let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
assert!(output.0.contains(&Token { assert!(output.0.contains(&Token {
contents: TokenContents::Eol, contents: TokenContents::Eol,
@ -26,7 +26,7 @@ fn lex_newline() {
fn lex_empty() { fn lex_empty() {
let file = b""; let file = b"";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
assert!(output.0.is_empty()); assert!(output.0.is_empty());
assert!(output.1.is_none()); assert!(output.1.is_none());
@ -37,7 +37,7 @@ fn lex_parenthesis() {
// The whole parenthesis is an item for the lexer // The whole parenthesis is an item for the lexer
let file = b"let x = (300 + (322 * 444));"; let file = b"let x = (300 + (322 * 444));";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
assert_eq!( assert_eq!(
output.0.get(3).unwrap(), output.0.get(3).unwrap(),
@ -52,7 +52,7 @@ fn lex_parenthesis() {
fn lex_comment() { fn lex_comment() {
let file = b"let x = 300 # a comment \n $x + 444"; let file = b"let x = 300 # a comment \n $x + 444";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], false);
assert_eq!( assert_eq!(
output.0.get(4).unwrap(), output.0.get(4).unwrap(),
@ -67,7 +67,7 @@ fn lex_comment() {
fn lex_is_incomplete() { fn lex_is_incomplete() {
let file = b"let x = 300 | ;"; let file = b"let x = 300 | ;";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
let err = output.1.unwrap(); let err = output.1.unwrap();
assert!(matches!(err, ParseError::ExtraTokens(_))); assert!(matches!(err, ParseError::ExtraTokens(_)));
@ -77,7 +77,7 @@ fn lex_is_incomplete() {
fn lex_incomplete_paren() { fn lex_incomplete_paren() {
let file = b"let x = (300 + ( 4 + 1)"; let file = b"let x = (300 + ( 4 + 1)";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
let err = output.1.unwrap(); let err = output.1.unwrap();
assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == ")")); assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == ")"));
@ -87,7 +87,7 @@ fn lex_incomplete_paren() {
fn lex_incomplete_quote() { fn lex_incomplete_quote() {
let file = b"let x = '300 + 4 + 1"; let file = b"let x = '300 + 4 + 1";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], true);
let err = output.1.unwrap(); let err = output.1.unwrap();
assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'")); assert!(matches!(err, ParseError::UnexpectedEof(v, _) if v == "'"));
@ -102,7 +102,7 @@ fn lex_comments() {
// let y = 1 # comment // let y = 1 # comment
let file = b"let z = 4 #comment \n let x = 4 # comment\n let y = 1 # comment"; let file = b"let z = 4 #comment \n let x = 4 # comment\n let y = 1 # comment";
let output = lex(file, 0, &[], &[]); let output = lex(file, 0, &[], &[], false);
assert_eq!( assert_eq!(
output.0.get(4).unwrap(), output.0.get(4).unwrap(),

View File

@ -2,7 +2,7 @@ use nu_parser::{lex, lite_parse, LiteBlock, ParseError};
use nu_protocol::Span; use nu_protocol::Span;
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> { fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, &[], &[]); let (output, err) = lex(input, 0, &[], &[], false);
if let Some(err) = err { if let Some(err) = err {
return Err(err); return Err(err);
} }

View File

@ -1120,3 +1120,26 @@ fn config_var_2() -> TestResult {
"40.0 KB", "40.0 KB",
) )
} }
#[test]
fn comment_skipping_1() -> TestResult {
run_test(
r#"let x = {
y: 20
# foo
}; $x.y"#,
"20",
)
}
#[test]
fn comment_skipping_2() -> TestResult {
run_test(
r#"let x = {
y: 20
# foo
z: 40
}; $x.z"#,
"40",
)
}