Make lexing configurable wrt newlines (#3682)

This commit is contained in:
JT 2021-06-25 17:50:24 +12:00 committed by GitHub
parent cac2875c96
commit 93b5f3f421
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 111 additions and 54 deletions

View File

@ -39,7 +39,7 @@ impl OptionsParser for NuParser {
fn parse(&self, input: &str) -> Result<Options, ShellError> {
let options = Options::default();
let (lite_result, _err) = nu_parser::lex(input, 0);
let (lite_result, _err) = nu_parser::lex(input, 0, nu_parser::NewlineMode::Normal);
let (lite_result, _err) = nu_parser::parse_block(lite_result);
let (parsed, err) = nu_parser::classify_block(&lite_result, &self.context.scope);

View File

@ -463,7 +463,7 @@ mod tests {
#[quickcheck]
fn quickcheck_parse(data: String) -> bool {
let (tokens, err) = nu_parser::lex(&data, 0);
let (tokens, err) = nu_parser::lex(&data, 0, nu_parser::NewlineMode::Normal);
let (lite_block, err2) = nu_parser::parse_block(tokens);
if err.is_none() && err2.is_none() {
let context = EvaluationContext::basic();

View File

@ -149,7 +149,7 @@ impl rustyline::validate::Validator for NuValidator {
) -> rustyline::Result<rustyline::validate::ValidationResult> {
let src = ctx.input();
let (tokens, err) = nu_parser::lex(src, 0);
let (tokens, err) = nu_parser::lex(src, 0, nu_parser::NewlineMode::Normal);
if let Some(err) = err {
if let nu_errors::ParseErrorReason::Eof { .. } = err.reason() {
return Ok(rustyline::validate::ValidationResult::Incomplete);

View File

@ -214,7 +214,7 @@ fn parse_line(line: &str, ctx: &EvaluationContext) -> Result<ClassifiedBlock, Sh
line
};
let (lite_result, err) = nu_parser::lex(line, 0);
let (lite_result, err) = nu_parser::lex(line, 0, nu_parser::NewlineMode::Normal);
if let Some(err) = err {
return Err(err.into());
}

View File

@ -1,5 +1,6 @@
use std::borrow::Cow;
use nu_parser::NewlineMode;
use nu_source::{Span, Tag};
use crate::command::CommandCompleter;
@ -24,7 +25,7 @@ impl NuCompleter {
) -> (usize, Vec<Suggestion>) {
use engine::LocationType;
let tokens = nu_parser::lex(line, 0).0;
let tokens = nu_parser::lex(line, 0, NewlineMode::Normal).0;
let locations = Some(nu_parser::parse_block(tokens).0)
.map(|block| nu_parser::classify_block(&block, context.scope()))

View File

@ -336,7 +336,7 @@ mod tests {
scope: &dyn ParserScope,
pos: usize,
) -> Vec<LocationType> {
let (tokens, _) = lex(line, 0);
let (tokens, _) = lex(line, 0, nu_parser::NewlineMode::Normal);
let (lite_block, _) = parse_block(tokens);
scope.enter_scope();

View File

@ -94,7 +94,7 @@ mod tests {
#[test]
fn parses_longform_flag_containing_equal_sign() {
let input = "bundle add rails --group=development";
let (tokens, _) = lex(input, 0);
let (tokens, _) = lex(input, 0, lex::lexer::NewlineMode::Normal);
let (root_node, _) = parse_block(tokens);
assert_eq!(root_node.block.len(), 1);

View File

@ -26,6 +26,14 @@ impl Token {
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum NewlineMode {
/// Treat newlines as a group separator
Normal,
/// Treat newlines as just another whitespace
Whitespace,
}
#[derive(Clone, Copy)]
enum BlockKind {
Paren,
@ -427,7 +435,11 @@ pub fn parse_block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
/// Breaks the input string into a vector of tokens. This tokenization only tries to classify separators like
/// semicolons, pipes, etc from external bare values (values that haven't been classified further)
/// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings)
pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>) {
pub fn lex(
input: &str,
span_offset: usize,
newline_mode: NewlineMode,
) -> (Vec<Token>, Option<ParseError>) {
// Break the input slice into an iterator of Unicode characters.
let mut char_indices = input.char_indices().peekable();
let mut error = None;
@ -489,10 +501,12 @@ pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>)
let idx = *idx;
let _ = char_indices.next();
if newline_mode == NewlineMode::Normal {
output.push(Token::new(
TokenContents::Eol,
Span::new(span_offset + idx, span_offset + idx + 1),
));
}
} else if *c == '#' {
// If the next character is `#`, we're at the beginning of a line
// comment. The comment continues until the next newline.

View File

@ -15,7 +15,7 @@ mod bare {
fn simple_1() {
let input = "foo bar baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 3));
@ -25,7 +25,7 @@ mod bare {
fn simple_2() {
let input = "'foo bar' baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
@ -35,7 +35,7 @@ mod bare {
fn simple_3() {
let input = "'foo\" bar' baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
@ -45,7 +45,7 @@ mod bare {
fn simple_4() {
let input = "[foo bar] baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
@ -55,7 +55,7 @@ mod bare {
fn simple_5() {
let input = "'foo 'bar baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
@ -65,7 +65,7 @@ mod bare {
fn simple_6() {
let input = "''foo baz";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 5));
@ -75,7 +75,7 @@ mod bare {
fn simple_7() {
let input = "'' foo";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 2));
@ -85,7 +85,7 @@ mod bare {
fn simple_8() {
let input = " '' foo";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 3));
@ -95,7 +95,7 @@ mod bare {
fn simple_9() {
let input = " 'foo' foo";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 6));
@ -105,7 +105,7 @@ mod bare {
fn simple_10() {
let input = "[foo, bar]";
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
@ -118,7 +118,7 @@ mod bare {
def e [] {echo hi}
"#;
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
//result[0] == EOL
@ -141,7 +141,7 @@ def e [] {echo hi}
def e2 [] {echo hello}
"#;
let (result, err) = lex(input, 0);
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
let span1 = span(2, 11);
@ -166,7 +166,7 @@ def e2 [] {echo hello}
# shouldn't return error
echo hi
}"#;
let (_result, err) = lex(input, 0);
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
@ -176,7 +176,7 @@ def e2 [] {echo hello}
# should "not return error
echo hi
}"#;
let (_result, err) = lex(input, 0);
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
@ -186,7 +186,7 @@ def e2 [] {echo hello}
# should not [return error
echo hi
}"#;
let (_result, err) = lex(input, 0);
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
@ -196,7 +196,7 @@ def e2 [] {echo hello}
# should not return {error
echo hi
}"#;
let (_result, err) = lex(input, 0);
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
@ -204,7 +204,7 @@ def e2 [] {echo hello}
fn ignore_future() {
let input = "foo 'bar";
let (result, _) = lex(input, 0);
let (result, _) = lex(input, 0, NewlineMode::Normal);
assert_eq!(result[0].span, span(0, 3));
}
@ -213,7 +213,7 @@ def e2 [] {echo hello}
fn invalid_1() {
let input = "'foo bar";
let (_, err) = lex(input, 0);
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
@ -222,7 +222,7 @@ def e2 [] {echo hello}
fn invalid_2() {
let input = "'bar";
let (_, err) = lex(input, 0);
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
@ -231,7 +231,7 @@ def e2 [] {echo hello}
fn invalid_4() {
let input = " 'bar";
let (_, err) = lex(input, 0);
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
@ -244,7 +244,7 @@ mod lite_parse {
#[test]
fn pipeline() {
let (result, err) = lex("cmd1 | cmd2 ; deploy", 0);
let (result, err) = lex("cmd1 | cmd2 ; deploy", 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
@ -255,7 +255,7 @@ mod lite_parse {
#[test]
fn simple_1() {
let (result, err) = lex("foo", 0);
let (result, err) = lex("foo", 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
@ -271,7 +271,7 @@ mod lite_parse {
#[test]
fn simple_offset() {
let (result, err) = lex("foo", 10);
let (result, err) = lex("foo", 10, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
@ -286,7 +286,7 @@ mod lite_parse {
#[test]
fn incomplete_result() {
let (result, err) = lex("my_command \"foo' --test", 10);
let (result, err) = lex("my_command \"foo' --test", 10, NewlineMode::Normal);
assert!(matches!(
err.unwrap().reason(),
nu_errors::ParseErrorReason::Eof { .. }
@ -314,7 +314,7 @@ mod lite_parse {
# * It's much better :)
def my_echo [arg] { echo $arg }
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
@ -352,7 +352,7 @@ def my_echo [arg] { echo $arg }
# * It's even better!
def my_echo2 [arg] { echo $arg }
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
@ -404,7 +404,7 @@ def my_echo2 [arg] { echo $arg }
echo 42
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
@ -425,7 +425,7 @@ echo 42
echo 42
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
@ -445,7 +445,7 @@ fn no_discarded_white_space_start_of_comment() {
# Starting space is not discarded
echo 42
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
@ -479,7 +479,7 @@ fn multiple_discarded_white_space_start_of_comment() {
# Discard 2 spaces
echo 42
"#;
let (result, err) = lex(code, 0);
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);

View File

@ -10,7 +10,7 @@ mod parse;
mod scope;
mod shapes;
pub use lex::lexer::{lex, parse_block};
pub use lex::lexer::{lex, parse_block, NewlineMode};
pub use lex::tokens::{LiteBlock, LiteCommand, LiteGroup, LitePipeline};
pub use parse::{classify_block, garbage, parse, parse_full_column_path, parse_math_expression};
pub use scope::ParserScope;

View File

@ -18,7 +18,7 @@ use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape, UnspannedPa
use nu_source::{HasSpan, Span, Spanned, SpannedItem};
use num_bigint::BigInt;
use crate::parse::def::parse_parameter;
use crate::{lex::lexer::NewlineMode, parse::def::parse_parameter};
use crate::{
lex::lexer::{lex, parse_block},
ParserScope,
@ -489,7 +489,7 @@ fn parse_subexpression(
.collect();
// We haven't done much with the inner string, so let's go ahead and work with it
let (tokens, err) = lex(&string, lite_arg.span.start() + 1);
let (tokens, err) = lex(&string, lite_arg.span.start() + 1, NewlineMode::Whitespace);
if error.is_none() {
error = err;
};
@ -792,7 +792,11 @@ fn parse_table(
error = err;
}
let (tokens, err) = lex(&string, lite_inner.parts[0].span.start() + 1);
let (tokens, err) = lex(
&string,
lite_inner.parts[0].span.start() + 1,
NewlineMode::Whitespace,
);
if err.is_some() {
return (garbage(lite_inner.span()), err);
}
@ -816,7 +820,7 @@ fn parse_table(
if error.is_none() {
error = err;
}
let (tokens, err) = lex(&string, arg.span.start() + 1);
let (tokens, err) = lex(&string, arg.span.start() + 1, NewlineMode::Whitespace);
if err.is_some() {
return (garbage(arg.span), err);
}
@ -1005,7 +1009,8 @@ fn parse_arg(
let string: String = chars.collect();
// We haven't done much with the inner string, so let's go ahead and work with it
let (tokens, err) = lex(&string, lite_arg.span.start() + 1);
let (tokens, err) =
lex(&string, lite_arg.span.start() + 1, NewlineMode::Whitespace);
if err.is_some() {
return (garbage(lite_arg.span), err);
}
@ -1071,7 +1076,8 @@ fn parse_arg(
let string: String = chars.into_iter().collect();
// We haven't done much with the inner string, so let's go ahead and work with it
let (mut tokens, err) = lex(&string, lite_arg.span.start() + 1);
let (mut tokens, err) =
lex(&string, lite_arg.span.start() + 1, NewlineMode::Normal);
if error.is_none() {
error = err;
}
@ -2172,7 +2178,7 @@ pub fn parse(
scope: &dyn ParserScope,
) -> (Arc<Block>, Option<ParseError>) {
let mut error = None;
let (output, err) = lex(input, span_offset);
let (output, err) = lex(input, span_offset, NewlineMode::Normal);
if error.is_none() {
error = err;
}

View File

@ -1,7 +1,7 @@
use std::sync::Arc;
use crate::{
lex::tokens::LiteCommand,
lex::{lexer::NewlineMode, tokens::LiteCommand},
parse::{classify_block, util::trim_quotes},
};
@ -52,7 +52,8 @@ pub(crate) fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> O
scope.enter_scope();
let (tokens, err) = lex(&string, call.parts[3].span.start() + 1);
let (tokens, err) =
lex(&string, call.parts[3].span.start() + 1, NewlineMode::Normal);
if err.is_some() {
return err;
};

View File

@ -18,7 +18,7 @@ use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape};
use nu_source::{Span, Spanned};
use crate::lex::{
lexer::{lex, Token},
lexer::{lex, NewlineMode, Token},
tokens::TokenContents,
};
@ -58,7 +58,11 @@ pub fn parse_signature(
"signature vec span start: {}",
signature_vec.span.start() + 1
);
let (tokens, error) = lex(&string, signature_vec.span.start() + 1);
let (tokens, error) = lex(
&string,
signature_vec.span.start() + 1,
NewlineMode::Whitespace,
);
err = err.or(error);
//After normal lexing, tokens also need to be split on ',' and ':'

View File

@ -1028,6 +1028,37 @@ fn pipeline_params_inner() {
assert_eq!(actual.out, "126");
}
#[test]
fn better_table_lex() {
let actual = nu!(
cwd: ".", pipeline(
r#"
let table = [
[name, size];
[small, 7]
[medium, 10]
[large, 12]
];
$table.1.size
"#)
);
assert_eq!(actual.out, "10");
}
#[test]
fn better_subexpr_lex() {
let actual = nu!(
cwd: ".", pipeline(
r#"
(echo boo
sam | str length | math sum)
"#)
);
assert_eq!(actual.out, "6");
}
mod parse {
use nu_test_support::nu;