forked from extern/nushell
Remove lexmode
This commit is contained in:
parent
03a93bd089
commit
6f1a5c8e02
77
src/lex.rs
77
src/lex.rs
@ -38,35 +38,15 @@ impl BlockKind {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub enum LexMode {
|
||||
Normal,
|
||||
Custom {
|
||||
whitespace: Vec<u8>,
|
||||
special: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
impl LexMode {
|
||||
pub fn whitespace_contains(&self, b: u8) -> bool {
|
||||
match self {
|
||||
LexMode::Custom { ref whitespace, .. } => whitespace.contains(&b),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn special_contains(&self, b: u8) -> bool {
|
||||
match self {
|
||||
LexMode::Custom { ref special, .. } => special.contains(&b),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A baseline token is terminated if it's not nested inside of a paired
|
||||
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
||||
// whitespace.
|
||||
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
||||
fn is_item_terminator(
|
||||
block_level: &[BlockKind],
|
||||
c: u8,
|
||||
additional_whitespace: &[u8],
|
||||
special_tokens: &[u8],
|
||||
) -> bool {
|
||||
block_level.is_empty()
|
||||
&& (c == b' '
|
||||
|| c == b'\t'
|
||||
@ -74,26 +54,23 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> b
|
||||
|| c == b'|'
|
||||
|| c == b';'
|
||||
|| c == b'#'
|
||||
|| lex_mode.whitespace_contains(c)
|
||||
|| lex_mode.special_contains(c))
|
||||
|| additional_whitespace.contains(&c)
|
||||
|| special_tokens.contains(&c))
|
||||
}
|
||||
|
||||
// A special token is one that is a byte that stands alone as its own token. For example
|
||||
// when parsing a signature you may want to have `:` be able to separate tokens and also
|
||||
// to be handled as its own token to notify you you're about to parse a type in the example
|
||||
// `foo:bar`
|
||||
fn is_special_item(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
||||
block_level.is_empty()
|
||||
&& (match lex_mode {
|
||||
LexMode::Custom { special, .. } => special.contains(&c),
|
||||
_ => false,
|
||||
})
|
||||
fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool {
|
||||
block_level.is_empty() && special_tokens.contains(&c)
|
||||
}
|
||||
|
||||
pub fn lex_item(
|
||||
input: &[u8],
|
||||
curr_offset: &mut usize,
|
||||
lex_mode: &LexMode,
|
||||
additional_whitespace: &[u8],
|
||||
special_tokens: &[u8],
|
||||
) -> (Span, Option<ParseError>) {
|
||||
// This variable tracks the starting character of a string literal, so that
|
||||
// we remain inside the string literal lexer mode until we encounter the
|
||||
@ -128,20 +105,20 @@ pub fn lex_item(
|
||||
quote_start = None;
|
||||
}
|
||||
} else if c == b'#' {
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||
break;
|
||||
}
|
||||
in_comment = true;
|
||||
} else if c == b'\n' {
|
||||
in_comment = false;
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||
break;
|
||||
}
|
||||
} else if in_comment {
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||
break;
|
||||
}
|
||||
} else if is_special_item(&block_level, c, &lex_mode) && token_start == *curr_offset {
|
||||
} else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset {
|
||||
*curr_offset += 1;
|
||||
break;
|
||||
} else if c == b'\'' || c == b'"' {
|
||||
@ -172,7 +149,7 @@ pub fn lex_item(
|
||||
if let Some(BlockKind::Paren) = block_level.last() {
|
||||
let _ = block_level.pop();
|
||||
}
|
||||
} else if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -214,7 +191,8 @@ pub fn lex_item(
|
||||
pub fn lex(
|
||||
input: &[u8],
|
||||
span_offset: usize,
|
||||
lex_mode: &LexMode,
|
||||
additional_whitespace: &[u8],
|
||||
special_tokens: &[u8],
|
||||
) -> (Vec<Token>, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
@ -271,7 +249,7 @@ pub fn lex(
|
||||
|
||||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
if !lex_mode.whitespace_contains(c) {
|
||||
if !additional_whitespace.contains(&c) {
|
||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||
}
|
||||
} else if c == b'#' {
|
||||
@ -297,13 +275,18 @@ pub fn lex(
|
||||
Span::new(start, curr_offset),
|
||||
));
|
||||
}
|
||||
} else if c == b' ' || c == b'\t' || lex_mode.whitespace_contains(c) {
|
||||
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
||||
// If the next character is non-newline whitespace, skip it.
|
||||
curr_offset += 1;
|
||||
} else {
|
||||
// Otherwise, try to consume an unclassified token.
|
||||
|
||||
let (span, err) = lex_item(input, &mut curr_offset, &lex_mode);
|
||||
let (span, err) = lex_item(
|
||||
input,
|
||||
&mut curr_offset,
|
||||
additional_whitespace,
|
||||
special_tokens,
|
||||
);
|
||||
if error.is_none() {
|
||||
error = err;
|
||||
}
|
||||
@ -322,7 +305,7 @@ mod lex_tests {
|
||||
fn lex_basic() {
|
||||
let file = b"let x = 4";
|
||||
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
let output = lex(file, 0, &[], &[]);
|
||||
|
||||
assert!(output.1.is_none());
|
||||
}
|
||||
@ -331,7 +314,7 @@ mod lex_tests {
|
||||
fn lex_newline() {
|
||||
let file = b"let x = 300\nlet y = 500;";
|
||||
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
let output = lex(file, 0, &[], &[]);
|
||||
|
||||
println!("{:#?}", output.0);
|
||||
assert!(output.0.contains(&Token {
|
||||
@ -344,7 +327,7 @@ mod lex_tests {
|
||||
fn lex_empty() {
|
||||
let file = b"";
|
||||
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
let output = lex(file, 0, &[], &[]);
|
||||
|
||||
assert!(output.0.is_empty());
|
||||
assert!(output.1.is_none());
|
||||
|
@ -10,7 +10,7 @@ mod span;
|
||||
|
||||
pub use declaration::Declaration;
|
||||
pub use eval::Engine;
|
||||
pub use lex::{lex, LexMode, Token, TokenContents};
|
||||
pub use lex::{lex, Token, TokenContents};
|
||||
pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement};
|
||||
pub use parse_error::ParseError;
|
||||
pub use parser::{
|
||||
|
@ -128,7 +128,7 @@ mod tests {
|
||||
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
||||
|
||||
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
||||
let (output, err) = lex(input, 0, &crate::LexMode::Normal);
|
||||
let (output, err) = lex(input, 0, &[], &[]);
|
||||
if let Some(err) = err {
|
||||
return Err(err);
|
||||
}
|
||||
|
@ -798,7 +798,7 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = self.get_span_contents(span);
|
||||
|
||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, &[], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -958,14 +958,7 @@ impl ParserWorkingSet {
|
||||
let span = Span { start, end };
|
||||
let source = &self.file_contents[..span.end];
|
||||
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
span.start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![b':', b'?'],
|
||||
},
|
||||
);
|
||||
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[b':', b'?']);
|
||||
error = error.or(err);
|
||||
|
||||
let mut args: Vec<Arg> = vec![];
|
||||
@ -1086,14 +1079,7 @@ impl ParserWorkingSet {
|
||||
let span = Span { start, end };
|
||||
let source = &self.file_contents[..span.end];
|
||||
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
span.start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![],
|
||||
},
|
||||
);
|
||||
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -1157,14 +1143,7 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = &self.file_contents[..end];
|
||||
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![],
|
||||
},
|
||||
);
|
||||
let (output, err) = lex(&source, start, &[b'\n', b','], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -1254,7 +1233,7 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = &self.file_contents[..end];
|
||||
|
||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, &[], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -1718,7 +1697,7 @@ impl ParserWorkingSet {
|
||||
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
let (output, err) = lex(&contents, 0, &crate::LexMode::Normal);
|
||||
let (output, err) = lex(&contents, 0, &[], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
self.add_file(fname.into(), contents);
|
||||
@ -1737,7 +1716,7 @@ impl ParserWorkingSet {
|
||||
|
||||
self.add_file("source".into(), source.into());
|
||||
|
||||
let (output, err) = lex(source, 0, &crate::LexMode::Normal);
|
||||
let (output, err) = lex(source, 0, &[], &[]);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
Loading…
Reference in New Issue
Block a user