forked from extern/nushell
Remove lexmode
This commit is contained in:
parent
03a93bd089
commit
6f1a5c8e02
77
src/lex.rs
77
src/lex.rs
@ -38,35 +38,15 @@ impl BlockKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
|
||||||
pub enum LexMode {
|
|
||||||
Normal,
|
|
||||||
Custom {
|
|
||||||
whitespace: Vec<u8>,
|
|
||||||
special: Vec<u8>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexMode {
|
|
||||||
pub fn whitespace_contains(&self, b: u8) -> bool {
|
|
||||||
match self {
|
|
||||||
LexMode::Custom { ref whitespace, .. } => whitespace.contains(&b),
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn special_contains(&self, b: u8) -> bool {
|
|
||||||
match self {
|
|
||||||
LexMode::Custom { ref special, .. } => special.contains(&b),
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A baseline token is terminated if it's not nested inside of a paired
|
// A baseline token is terminated if it's not nested inside of a paired
|
||||||
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
||||||
// whitespace.
|
// whitespace.
|
||||||
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
fn is_item_terminator(
|
||||||
|
block_level: &[BlockKind],
|
||||||
|
c: u8,
|
||||||
|
additional_whitespace: &[u8],
|
||||||
|
special_tokens: &[u8],
|
||||||
|
) -> bool {
|
||||||
block_level.is_empty()
|
block_level.is_empty()
|
||||||
&& (c == b' '
|
&& (c == b' '
|
||||||
|| c == b'\t'
|
|| c == b'\t'
|
||||||
@ -74,26 +54,23 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> b
|
|||||||
|| c == b'|'
|
|| c == b'|'
|
||||||
|| c == b';'
|
|| c == b';'
|
||||||
|| c == b'#'
|
|| c == b'#'
|
||||||
|| lex_mode.whitespace_contains(c)
|
|| additional_whitespace.contains(&c)
|
||||||
|| lex_mode.special_contains(c))
|
|| special_tokens.contains(&c))
|
||||||
}
|
}
|
||||||
|
|
||||||
// A special token is one that is a byte that stands alone as its own token. For example
|
// A special token is one that is a byte that stands alone as its own token. For example
|
||||||
// when parsing a signature you may want to have `:` be able to separate tokens and also
|
// when parsing a signature you may want to have `:` be able to separate tokens and also
|
||||||
// to be handled as its own token to notify you you're about to parse a type in the example
|
// to be handled as its own token to notify you you're about to parse a type in the example
|
||||||
// `foo:bar`
|
// `foo:bar`
|
||||||
fn is_special_item(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool {
|
||||||
block_level.is_empty()
|
block_level.is_empty() && special_tokens.contains(&c)
|
||||||
&& (match lex_mode {
|
|
||||||
LexMode::Custom { special, .. } => special.contains(&c),
|
|
||||||
_ => false,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex_item(
|
pub fn lex_item(
|
||||||
input: &[u8],
|
input: &[u8],
|
||||||
curr_offset: &mut usize,
|
curr_offset: &mut usize,
|
||||||
lex_mode: &LexMode,
|
additional_whitespace: &[u8],
|
||||||
|
special_tokens: &[u8],
|
||||||
) -> (Span, Option<ParseError>) {
|
) -> (Span, Option<ParseError>) {
|
||||||
// This variable tracks the starting character of a string literal, so that
|
// This variable tracks the starting character of a string literal, so that
|
||||||
// we remain inside the string literal lexer mode until we encounter the
|
// we remain inside the string literal lexer mode until we encounter the
|
||||||
@ -128,20 +105,20 @@ pub fn lex_item(
|
|||||||
quote_start = None;
|
quote_start = None;
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
in_comment = true;
|
in_comment = true;
|
||||||
} else if c == b'\n' {
|
} else if c == b'\n' {
|
||||||
in_comment = false;
|
in_comment = false;
|
||||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if in_comment {
|
} else if in_comment {
|
||||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if is_special_item(&block_level, c, &lex_mode) && token_start == *curr_offset {
|
} else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset {
|
||||||
*curr_offset += 1;
|
*curr_offset += 1;
|
||||||
break;
|
break;
|
||||||
} else if c == b'\'' || c == b'"' {
|
} else if c == b'\'' || c == b'"' {
|
||||||
@ -172,7 +149,7 @@ pub fn lex_item(
|
|||||||
if let Some(BlockKind::Paren) = block_level.last() {
|
if let Some(BlockKind::Paren) = block_level.last() {
|
||||||
let _ = block_level.pop();
|
let _ = block_level.pop();
|
||||||
}
|
}
|
||||||
} else if is_item_terminator(&block_level, c, &lex_mode) {
|
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -214,7 +191,8 @@ pub fn lex_item(
|
|||||||
pub fn lex(
|
pub fn lex(
|
||||||
input: &[u8],
|
input: &[u8],
|
||||||
span_offset: usize,
|
span_offset: usize,
|
||||||
lex_mode: &LexMode,
|
additional_whitespace: &[u8],
|
||||||
|
special_tokens: &[u8],
|
||||||
) -> (Vec<Token>, Option<ParseError>) {
|
) -> (Vec<Token>, Option<ParseError>) {
|
||||||
let mut error = None;
|
let mut error = None;
|
||||||
|
|
||||||
@ -271,7 +249,7 @@ pub fn lex(
|
|||||||
|
|
||||||
let idx = curr_offset;
|
let idx = curr_offset;
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
if !lex_mode.whitespace_contains(c) {
|
if !additional_whitespace.contains(&c) {
|
||||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||||
}
|
}
|
||||||
} else if c == b'#' {
|
} else if c == b'#' {
|
||||||
@ -297,13 +275,18 @@ pub fn lex(
|
|||||||
Span::new(start, curr_offset),
|
Span::new(start, curr_offset),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
} else if c == b' ' || c == b'\t' || lex_mode.whitespace_contains(c) {
|
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
|
||||||
// If the next character is non-newline whitespace, skip it.
|
// If the next character is non-newline whitespace, skip it.
|
||||||
curr_offset += 1;
|
curr_offset += 1;
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, try to consume an unclassified token.
|
// Otherwise, try to consume an unclassified token.
|
||||||
|
|
||||||
let (span, err) = lex_item(input, &mut curr_offset, &lex_mode);
|
let (span, err) = lex_item(
|
||||||
|
input,
|
||||||
|
&mut curr_offset,
|
||||||
|
additional_whitespace,
|
||||||
|
special_tokens,
|
||||||
|
);
|
||||||
if error.is_none() {
|
if error.is_none() {
|
||||||
error = err;
|
error = err;
|
||||||
}
|
}
|
||||||
@ -322,7 +305,7 @@ mod lex_tests {
|
|||||||
fn lex_basic() {
|
fn lex_basic() {
|
||||||
let file = b"let x = 4";
|
let file = b"let x = 4";
|
||||||
|
|
||||||
let output = lex(file, 0, &LexMode::Normal);
|
let output = lex(file, 0, &[], &[]);
|
||||||
|
|
||||||
assert!(output.1.is_none());
|
assert!(output.1.is_none());
|
||||||
}
|
}
|
||||||
@ -331,7 +314,7 @@ mod lex_tests {
|
|||||||
fn lex_newline() {
|
fn lex_newline() {
|
||||||
let file = b"let x = 300\nlet y = 500;";
|
let file = b"let x = 300\nlet y = 500;";
|
||||||
|
|
||||||
let output = lex(file, 0, &LexMode::Normal);
|
let output = lex(file, 0, &[], &[]);
|
||||||
|
|
||||||
println!("{:#?}", output.0);
|
println!("{:#?}", output.0);
|
||||||
assert!(output.0.contains(&Token {
|
assert!(output.0.contains(&Token {
|
||||||
@ -344,7 +327,7 @@ mod lex_tests {
|
|||||||
fn lex_empty() {
|
fn lex_empty() {
|
||||||
let file = b"";
|
let file = b"";
|
||||||
|
|
||||||
let output = lex(file, 0, &LexMode::Normal);
|
let output = lex(file, 0, &[], &[]);
|
||||||
|
|
||||||
assert!(output.0.is_empty());
|
assert!(output.0.is_empty());
|
||||||
assert!(output.1.is_none());
|
assert!(output.1.is_none());
|
||||||
|
@ -10,7 +10,7 @@ mod span;
|
|||||||
|
|
||||||
pub use declaration::Declaration;
|
pub use declaration::Declaration;
|
||||||
pub use eval::Engine;
|
pub use eval::Engine;
|
||||||
pub use lex::{lex, LexMode, Token, TokenContents};
|
pub use lex::{lex, Token, TokenContents};
|
||||||
pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement};
|
pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement};
|
||||||
pub use parse_error::ParseError;
|
pub use parse_error::ParseError;
|
||||||
pub use parser::{
|
pub use parser::{
|
||||||
|
@ -128,7 +128,7 @@ mod tests {
|
|||||||
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
||||||
|
|
||||||
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
||||||
let (output, err) = lex(input, 0, &crate::LexMode::Normal);
|
let (output, err) = lex(input, 0, &[], &[]);
|
||||||
if let Some(err) = err {
|
if let Some(err) = err {
|
||||||
return Err(err);
|
return Err(err);
|
||||||
}
|
}
|
||||||
|
@ -798,7 +798,7 @@ impl ParserWorkingSet {
|
|||||||
|
|
||||||
let source = self.get_span_contents(span);
|
let source = self.get_span_contents(span);
|
||||||
|
|
||||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
let (output, err) = lex(&source, start, &[], &[]);
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let (output, err) = lite_parse(&output);
|
let (output, err) = lite_parse(&output);
|
||||||
@ -958,14 +958,7 @@ impl ParserWorkingSet {
|
|||||||
let span = Span { start, end };
|
let span = Span { start, end };
|
||||||
let source = &self.file_contents[..span.end];
|
let source = &self.file_contents[..span.end];
|
||||||
|
|
||||||
let (output, err) = lex(
|
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[b':', b'?']);
|
||||||
&source,
|
|
||||||
span.start,
|
|
||||||
&crate::LexMode::Custom {
|
|
||||||
whitespace: vec![b'\n', b','],
|
|
||||||
special: vec![b':', b'?'],
|
|
||||||
},
|
|
||||||
);
|
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let mut args: Vec<Arg> = vec![];
|
let mut args: Vec<Arg> = vec![];
|
||||||
@ -1086,14 +1079,7 @@ impl ParserWorkingSet {
|
|||||||
let span = Span { start, end };
|
let span = Span { start, end };
|
||||||
let source = &self.file_contents[..span.end];
|
let source = &self.file_contents[..span.end];
|
||||||
|
|
||||||
let (output, err) = lex(
|
let (output, err) = lex(&source, span.start, &[b'\n', b','], &[]);
|
||||||
&source,
|
|
||||||
span.start,
|
|
||||||
&crate::LexMode::Custom {
|
|
||||||
whitespace: vec![b'\n', b','],
|
|
||||||
special: vec![],
|
|
||||||
},
|
|
||||||
);
|
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let (output, err) = lite_parse(&output);
|
let (output, err) = lite_parse(&output);
|
||||||
@ -1157,14 +1143,7 @@ impl ParserWorkingSet {
|
|||||||
|
|
||||||
let source = &self.file_contents[..end];
|
let source = &self.file_contents[..end];
|
||||||
|
|
||||||
let (output, err) = lex(
|
let (output, err) = lex(&source, start, &[b'\n', b','], &[]);
|
||||||
&source,
|
|
||||||
start,
|
|
||||||
&crate::LexMode::Custom {
|
|
||||||
whitespace: vec![b'\n', b','],
|
|
||||||
special: vec![],
|
|
||||||
},
|
|
||||||
);
|
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let (output, err) = lite_parse(&output);
|
let (output, err) = lite_parse(&output);
|
||||||
@ -1254,7 +1233,7 @@ impl ParserWorkingSet {
|
|||||||
|
|
||||||
let source = &self.file_contents[..end];
|
let source = &self.file_contents[..end];
|
||||||
|
|
||||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
let (output, err) = lex(&source, start, &[], &[]);
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let (output, err) = lite_parse(&output);
|
let (output, err) = lite_parse(&output);
|
||||||
@ -1718,7 +1697,7 @@ impl ParserWorkingSet {
|
|||||||
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
||||||
let mut error = None;
|
let mut error = None;
|
||||||
|
|
||||||
let (output, err) = lex(&contents, 0, &crate::LexMode::Normal);
|
let (output, err) = lex(&contents, 0, &[], &[]);
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
self.add_file(fname.into(), contents);
|
self.add_file(fname.into(), contents);
|
||||||
@ -1737,7 +1716,7 @@ impl ParserWorkingSet {
|
|||||||
|
|
||||||
self.add_file("source".into(), source.into());
|
self.add_file("source".into(), source.into());
|
||||||
|
|
||||||
let (output, err) = lex(source, 0, &crate::LexMode::Normal);
|
let (output, err) = lex(source, 0, &[], &[]);
|
||||||
error = error.or(err);
|
error = error.or(err);
|
||||||
|
|
||||||
let (output, err) = lite_parse(&output);
|
let (output, err) = lite_parse(&output);
|
||||||
|
Loading…
Reference in New Issue
Block a user