Remove lexmode

This commit is contained in:
JT 2021-07-17 10:11:15 +12:00
parent 03a93bd089
commit 6f1a5c8e02
4 changed files with 39 additions and 77 deletions

View File

@ -38,35 +38,15 @@ impl BlockKind {
}
}
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum LexMode {
Normal,
Custom {
whitespace: Vec<u8>,
special: Vec<u8>,
},
}
impl LexMode {
pub fn whitespace_contains(&self, b: u8) -> bool {
match self {
LexMode::Custom { ref whitespace, .. } => whitespace.contains(&b),
_ => false,
}
}
pub fn special_contains(&self, b: u8) -> bool {
match self {
LexMode::Custom { ref special, .. } => special.contains(&b),
_ => false,
}
}
}
// A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any
// whitespace.
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
fn is_item_terminator(
block_level: &[BlockKind],
c: u8,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> bool {
block_level.is_empty()
&& (c == b' '
|| c == b'\t'
@ -74,26 +54,23 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> b
|| c == b'|'
|| c == b';'
|| c == b'#'
|| lex_mode.whitespace_contains(c)
|| lex_mode.special_contains(c))
|| additional_whitespace.contains(&c)
|| special_tokens.contains(&c))
}
// A special token is one that is a byte that stands alone as its own token. For example
// when parsing a signature you may want to have `:` be able to separate tokens and also
// to be handled as its own token to notify you you're about to parse a type in the example
// `foo:bar`
fn is_special_item(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
block_level.is_empty()
&& (match lex_mode {
LexMode::Custom { special, .. } => special.contains(&c),
_ => false,
})
fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool {
block_level.is_empty() && special_tokens.contains(&c)
}
pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
lex_mode: &LexMode,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
@ -128,20 +105,20 @@ pub fn lex_item(
quote_start = None;
}
} else if c == b'#' {
if is_item_terminator(&block_level, c, &lex_mode) {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
in_comment = true;
} else if c == b'\n' {
in_comment = false;
if is_item_terminator(&block_level, c, &lex_mode) {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
} else if in_comment {
if is_item_terminator(&block_level, c, &lex_mode) {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
} else if is_special_item(&block_level, c, &lex_mode) && token_start == *curr_offset {
} else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset {
*curr_offset += 1;
break;
} else if c == b'\'' || c == b'"' {
@ -172,7 +149,7 @@ pub fn lex_item(
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_item_terminator(&block_level, c, &lex_mode) {
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
@ -214,7 +191,8 @@ pub fn lex_item(
pub fn lex(
input: &[u8],
span_offset: usize,
lex_mode: &LexMode,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> (Vec<Token>, Option<ParseError>) {
let mut error = None;
@ -271,7 +249,7 @@ pub fn lex(
let idx = curr_offset;
curr_offset += 1;
if !lex_mode.whitespace_contains(c) {
if !additional_whitespace.contains(&c) {
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
}
} else if c == b'#' {
@ -297,13 +275,18 @@ pub fn lex(
Span::new(start, curr_offset),
));
}
} else if c == b' ' || c == b'\t' || lex_mode.whitespace_contains(c) {
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
// If the next character is non-newline whitespace, skip it.
curr_offset += 1;
} else {
// Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset, &lex_mode);
let (span, err) = lex_item(
input,
&mut curr_offset,
additional_whitespace,
special_tokens,
);
if error.is_none() {
error = err;
}
@ -322,7 +305,7 @@ mod lex_tests {
fn lex_basic() {
let file = b"let x = 4";
let output = lex(file, 0, &LexMode::Normal);
let output = lex(file, 0, &[], &[]);
assert!(output.1.is_none());
}
@ -331,7 +314,7 @@ mod lex_tests {
fn lex_newline() {
let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, &LexMode::Normal);
let output = lex(file, 0, &[], &[]);
println!("{:#?}", output.0);
assert!(output.0.contains(&Token {
@ -344,7 +327,7 @@ mod lex_tests {
fn lex_empty() {
let file = b"";
let output = lex(file, 0, &LexMode::Normal);
let output = lex(file, 0, &[], &[]);
assert!(output.0.is_empty());
assert!(output.1.is_none());

View File

@ -10,7 +10,7 @@ mod span;
pub use declaration::Declaration;
pub use eval::Engine;
pub use lex::{lex, LexMode, Token, TokenContents};
pub use lex::{lex, Token, TokenContents};
pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement};
pub use parse_error::ParseError;
pub use parser::{

View File

@ -128,7 +128,7 @@ mod tests {
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, &crate::LexMode::Normal);
let (output, err) = lex(input, 0, &[], &[]);
if let Some(err) = err {
return Err(err);
}

View File

@ -798,7 +798,7 @@ impl ParserWorkingSet {
let source = self.get_span_contents(span);
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
let (output, err) = lex(&source, start, &[], &[]);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -958,14 +958,7 @@ impl ParserWorkingSet {
let span = Span { start, end };
let source = &self.file_contents[..span.end];
let (output, err) = lex(
&source,
span.start,
&crate::LexMode::Custom {
whitespace: vec![b'\n', b','],
special: vec![b':', b'?'],
},
);
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[b':', b'?']);
error = error.or(err);
let mut args: Vec<Arg> = vec![];
@ -1086,14 +1079,7 @@ impl ParserWorkingSet {
let span = Span { start, end };
let source = &self.file_contents[..span.end];
let (output, err) = lex(
&source,
span.start,
&crate::LexMode::Custom {
whitespace: vec![b'\n', b','],
special: vec![],
},
);
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[]);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -1157,14 +1143,7 @@ impl ParserWorkingSet {
let source = &self.file_contents[..end];
let (output, err) = lex(
&source,
start,
&crate::LexMode::Custom {
whitespace: vec![b'\n', b','],
special: vec![],
},
);
let (output, err) = lex(&source, start, &[b'\n', b','], &[]);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -1254,7 +1233,7 @@ impl ParserWorkingSet {
let source = &self.file_contents[..end];
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
let (output, err) = lex(&source, start, &[], &[]);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -1718,7 +1697,7 @@ impl ParserWorkingSet {
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
let mut error = None;
let (output, err) = lex(&contents, 0, &crate::LexMode::Normal);
let (output, err) = lex(&contents, 0, &[], &[]);
error = error.or(err);
self.add_file(fname.into(), contents);
@ -1737,7 +1716,7 @@ impl ParserWorkingSet {
self.add_file("source".into(), source.into());
let (output, err) = lex(source, 0, &crate::LexMode::Normal);
let (output, err) = lex(source, 0, &[], &[]);
error = error.or(err);
let (output, err) = lite_parse(&output);