forked from extern/nushell
Revert "Removed file_id in Span, compact file sources"
This commit is contained in:
parent
049477a9bd
commit
80e0cd4e00
39
src/lex.rs
39
src/lex.rs
@ -51,7 +51,11 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
|
||||
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
|
||||
}
|
||||
|
||||
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
|
||||
pub fn lex_item(
|
||||
input: &[u8],
|
||||
curr_offset: &mut usize,
|
||||
file_id: usize,
|
||||
) -> (Span, Option<ParseError>) {
|
||||
// This variable tracks the starting character of a string literal, so that
|
||||
// we remain inside the string literal lexer mode until we encounter the
|
||||
// closing quote.
|
||||
@ -133,7 +137,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
|
||||
*curr_offset += 1;
|
||||
}
|
||||
|
||||
let span = Span::new(token_start, *curr_offset);
|
||||
let span = Span::new(token_start, *curr_offset, file_id);
|
||||
|
||||
// If there is still unclosed opening delimiters, close them and add
|
||||
// synthetic closing characters to the accumulated token.
|
||||
@ -167,6 +171,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
|
||||
|
||||
pub fn lex(
|
||||
input: &[u8],
|
||||
file_id: usize,
|
||||
span_offset: usize,
|
||||
lex_mode: LexMode,
|
||||
) -> (Vec<Token>, Option<ParseError>) {
|
||||
@ -193,7 +198,7 @@ pub fn lex(
|
||||
curr_offset += 1;
|
||||
output.push(Token::new(
|
||||
TokenContents::Item,
|
||||
Span::new(span_offset + prev_idx, span_offset + idx + 1),
|
||||
Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
@ -202,7 +207,7 @@ pub fn lex(
|
||||
// Otherwise, it's just a regular `|` token.
|
||||
output.push(Token::new(
|
||||
TokenContents::Pipe,
|
||||
Span::new(span_offset + idx, span_offset + idx + 1),
|
||||
Span::new(span_offset + idx, span_offset + idx + 1, file_id),
|
||||
));
|
||||
is_complete = false;
|
||||
} else if c == b';' {
|
||||
@ -212,13 +217,14 @@ pub fn lex(
|
||||
error = Some(ParseError::ExtraTokens(Span::new(
|
||||
curr_offset,
|
||||
curr_offset + 1,
|
||||
file_id,
|
||||
)));
|
||||
}
|
||||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
output.push(Token::new(
|
||||
TokenContents::Semicolon,
|
||||
Span::new(idx, idx + 1),
|
||||
Span::new(idx, idx + 1, file_id),
|
||||
));
|
||||
} else if c == b'\n' || c == b'\r' {
|
||||
// If the next character is a newline, we're looking at an EOL (end of line) token.
|
||||
@ -226,7 +232,10 @@ pub fn lex(
|
||||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
if lex_mode == LexMode::Normal {
|
||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||
output.push(Token::new(
|
||||
TokenContents::Eol,
|
||||
Span::new(idx, idx + 1, file_id),
|
||||
));
|
||||
}
|
||||
} else if c == b'#' {
|
||||
// If the next character is `#`, we're at the beginning of a line
|
||||
@ -238,7 +247,7 @@ pub fn lex(
|
||||
if *input == b'\n' {
|
||||
output.push(Token::new(
|
||||
TokenContents::Comment,
|
||||
Span::new(start, curr_offset),
|
||||
Span::new(start, curr_offset, file_id),
|
||||
));
|
||||
start = curr_offset;
|
||||
|
||||
@ -248,7 +257,7 @@ pub fn lex(
|
||||
if start != curr_offset {
|
||||
output.push(Token::new(
|
||||
TokenContents::Comment,
|
||||
Span::new(start, curr_offset),
|
||||
Span::new(start, curr_offset, file_id),
|
||||
));
|
||||
}
|
||||
} else if c == b' ' || c == b'\t' {
|
||||
@ -257,7 +266,7 @@ pub fn lex(
|
||||
} else {
|
||||
// Otherwise, try to consume an unclassified token.
|
||||
|
||||
let (span, err) = lex_item(input, &mut curr_offset);
|
||||
let (span, err) = lex_item(input, &mut curr_offset, file_id);
|
||||
if error.is_none() {
|
||||
error = err;
|
||||
}
|
||||
@ -276,7 +285,7 @@ mod lex_tests {
|
||||
fn lex_basic() {
|
||||
let file = b"let x = 4";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
|
||||
assert!(output.1.is_none());
|
||||
}
|
||||
@ -285,12 +294,16 @@ mod lex_tests {
|
||||
fn lex_newline() {
|
||||
let file = b"let x = 300\nlet y = 500;";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
|
||||
println!("{:#?}", output.0);
|
||||
assert!(output.0.contains(&Token {
|
||||
contents: TokenContents::Eol,
|
||||
span: Span { start: 11, end: 12 }
|
||||
span: Span {
|
||||
start: 11,
|
||||
end: 12,
|
||||
file_id: 0
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
@ -298,7 +311,7 @@ mod lex_tests {
|
||||
fn lex_empty() {
|
||||
let file = b"";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, 0, LexMode::Normal);
|
||||
|
||||
assert!(output.0.is_empty());
|
||||
assert!(output.1.is_none());
|
||||
|
@ -128,7 +128,7 @@ mod tests {
|
||||
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
||||
|
||||
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
||||
let (output, err) = lex(input, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
|
||||
if let Some(err) = err {
|
||||
return Err(err);
|
||||
}
|
||||
@ -194,7 +194,11 @@ mod tests {
|
||||
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
|
||||
assert_eq!(
|
||||
lite_block.block[0].commands[0].comments[0],
|
||||
Span { start: 21, end: 39 }
|
||||
Span {
|
||||
start: 21,
|
||||
end: 39,
|
||||
file_id: 0
|
||||
}
|
||||
);
|
||||
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
|
||||
|
||||
|
@ -8,17 +8,10 @@ fn main() -> std::io::Result<()> {
|
||||
working_set.add_decl((b"foo").to_vec(), sig);
|
||||
|
||||
let file = std::fs::read(&path)?;
|
||||
let (output, err) = working_set.parse_file(&path, file);
|
||||
let (output, err) = working_set.parse_file(&path, &file);
|
||||
//let (output, err) = working_set.parse_source(path.as_bytes());
|
||||
println!("{}", output.len());
|
||||
println!("error: {:?}", err);
|
||||
// println!("{}", size_of::<Statement>());
|
||||
|
||||
// let mut buffer = String::new();
|
||||
// let stdin = std::io::stdin();
|
||||
// let mut handle = stdin.lock();
|
||||
|
||||
// handle.read_to_string(&mut buffer)?;
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
|
@ -109,6 +109,7 @@ pub enum Expr {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Expression {
|
||||
expr: Expr,
|
||||
ty: Type,
|
||||
span: Span,
|
||||
}
|
||||
impl Expression {
|
||||
@ -116,7 +117,7 @@ impl Expression {
|
||||
Expression {
|
||||
expr: Expr::Garbage,
|
||||
span,
|
||||
//ty: Type::Unknown,
|
||||
ty: Type::Unknown,
|
||||
}
|
||||
}
|
||||
pub fn precedence(&self) -> usize {
|
||||
@ -263,12 +264,13 @@ fn span(spans: &[Span]) -> Span {
|
||||
|
||||
if length == 0 {
|
||||
Span::unknown()
|
||||
} else if length == 1 {
|
||||
} else if length == 1 || spans[0].file_id != spans[length - 1].file_id {
|
||||
spans[0]
|
||||
} else {
|
||||
Span {
|
||||
start: spans[0].start,
|
||||
end: spans[length - 1].end,
|
||||
file_id: spans[0].file_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -340,6 +342,7 @@ impl ParserWorkingSet {
|
||||
let short_flag_span = Span {
|
||||
start: orig.start + 1 + short_flag.0,
|
||||
end: orig.start + 1 + short_flag.0 + 1,
|
||||
file_id: orig.file_id,
|
||||
};
|
||||
if let Some(flag) = sig.get_short_flag(short_flag_char) {
|
||||
// If we require an arg and are in a batch of short flags, error
|
||||
@ -416,7 +419,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Call(Box::new(call)),
|
||||
//ty: Type::Unknown,
|
||||
ty: Type::Unknown,
|
||||
span: span(spans),
|
||||
},
|
||||
error,
|
||||
@ -432,6 +435,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -447,6 +451,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -462,6 +467,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Int(v),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -476,6 +482,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Int(x),
|
||||
ty: Type::Int,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -503,9 +510,14 @@ impl ParserWorkingSet {
|
||||
let bytes = self.get_span_contents(span);
|
||||
|
||||
if let Some(var_id) = self.find_variable(bytes) {
|
||||
let ty = *self
|
||||
.get_variable(var_id)
|
||||
.expect("internal error: invalid VarId");
|
||||
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Var(var_id),
|
||||
ty,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -535,16 +547,21 @@ impl ParserWorkingSet {
|
||||
Span {
|
||||
start: end,
|
||||
end: end + 1,
|
||||
file_id: span.file_id,
|
||||
},
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
let span = Span { start, end };
|
||||
let span = Span {
|
||||
start,
|
||||
end,
|
||||
file_id: span.file_id,
|
||||
};
|
||||
|
||||
let source = self.get_span_contents(span);
|
||||
let source = self.get_file_contents(span.file_id);
|
||||
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -556,6 +573,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Subexpression(Box::new(output)),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
error,
|
||||
@ -581,16 +599,21 @@ impl ParserWorkingSet {
|
||||
Span {
|
||||
start: end,
|
||||
end: end + 1,
|
||||
file_id: span.file_id,
|
||||
},
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
let span = Span { start, end };
|
||||
let span = Span {
|
||||
start,
|
||||
end,
|
||||
file_id: span.file_id,
|
||||
};
|
||||
|
||||
let source = &self.file_contents[..end];
|
||||
let source = self.get_file_contents(span.file_id);
|
||||
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -604,6 +627,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Block(Box::new(output)),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
error,
|
||||
@ -714,6 +738,7 @@ impl ParserWorkingSet {
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Operator(operator),
|
||||
ty: Type::Unknown,
|
||||
span,
|
||||
},
|
||||
None,
|
||||
@ -778,6 +803,7 @@ impl ParserWorkingSet {
|
||||
expr_stack.push(Expression {
|
||||
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
|
||||
span: op_span,
|
||||
ty: Type::Unknown,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -803,6 +829,7 @@ impl ParserWorkingSet {
|
||||
let binary_op_span = span(&[lhs.span, rhs.span]);
|
||||
expr_stack.push(Expression {
|
||||
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
|
||||
ty: Type::Unknown,
|
||||
span: binary_op_span,
|
||||
});
|
||||
}
|
||||
@ -862,7 +889,7 @@ impl ParserWorkingSet {
|
||||
error = error.or(err);
|
||||
|
||||
let var_name: Vec<_> = self.get_span_contents(spans[1]).into();
|
||||
let var_id = self.add_variable(var_name, Type::Unknown);
|
||||
let var_id = self.add_variable(var_name, expression.ty);
|
||||
|
||||
(Statement::VarDecl(VarDecl { var_id, expression }), error)
|
||||
} else {
|
||||
@ -901,13 +928,13 @@ impl ParserWorkingSet {
|
||||
(block, error)
|
||||
}
|
||||
|
||||
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
||||
pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
let (output, err) = lex(&contents, 0, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
let file_id = self.add_file(fname.into(), contents.into());
|
||||
|
||||
self.add_file(fname.into(), contents);
|
||||
let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
error = error.or(err);
|
||||
@ -921,9 +948,9 @@ impl ParserWorkingSet {
|
||||
pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
self.add_file("source".into(), source.into());
|
||||
let file_id = self.add_file("source".into(), source.into());
|
||||
|
||||
let (output, err) = lex(source, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
@ -2,8 +2,7 @@ use crate::{Signature, Span};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
pub struct ParserState {
|
||||
files: Vec<(String, usize, usize)>,
|
||||
file_contents: Vec<u8>,
|
||||
files: Vec<(String, Vec<u8>)>,
|
||||
vars: Vec<Type>,
|
||||
decls: Vec<Signature>,
|
||||
}
|
||||
@ -42,7 +41,6 @@ impl ParserState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
files: vec![],
|
||||
file_contents: vec![],
|
||||
vars: vec![],
|
||||
decls: vec![],
|
||||
}
|
||||
@ -55,7 +53,6 @@ impl ParserState {
|
||||
// Take the mutable reference and extend the permanent state from the working set
|
||||
if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
|
||||
this.files.extend(working_set.files);
|
||||
this.file_contents.extend(working_set.file_contents);
|
||||
this.decls.extend(working_set.decls);
|
||||
this.vars.extend(working_set.vars);
|
||||
|
||||
@ -85,27 +82,20 @@ impl ParserState {
|
||||
self.decls.get(decl_id)
|
||||
}
|
||||
|
||||
pub fn next_span_start(&self) -> usize {
|
||||
self.file_contents.len()
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
|
||||
let next_span_start = self.next_span_start();
|
||||
|
||||
self.file_contents.extend(&contents);
|
||||
|
||||
let next_span_end = self.next_span_start();
|
||||
|
||||
self.files.push((filename, next_span_start, next_span_end));
|
||||
self.files.push((filename, contents));
|
||||
|
||||
self.num_files() - 1
|
||||
}
|
||||
|
||||
pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
|
||||
&self.files[idx].1
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParserWorkingSet {
|
||||
files: Vec<(String, usize, usize)>,
|
||||
pub(crate) file_contents: Vec<u8>,
|
||||
files: Vec<(String, Vec<u8>)>,
|
||||
vars: Vec<Type>, // indexed by VarId
|
||||
decls: Vec<Signature>, // indexed by DeclId
|
||||
permanent_state: Option<Arc<ParserState>>,
|
||||
@ -116,7 +106,6 @@ impl ParserWorkingSet {
|
||||
pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
|
||||
Self {
|
||||
files: vec![],
|
||||
file_contents: vec![],
|
||||
vars: vec![],
|
||||
decls: vec![],
|
||||
permanent_state,
|
||||
@ -148,36 +137,35 @@ impl ParserWorkingSet {
|
||||
decl_id
|
||||
}
|
||||
|
||||
pub fn next_span_start(&self) -> usize {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
permanent_state.next_span_start() + self.file_contents.len()
|
||||
} else {
|
||||
self.file_contents.len()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
|
||||
let next_span_start = self.next_span_start();
|
||||
|
||||
self.file_contents.extend(&contents);
|
||||
|
||||
let next_span_end = self.next_span_start();
|
||||
|
||||
self.files.push((filename, next_span_start, next_span_end));
|
||||
self.files.push((filename, contents));
|
||||
|
||||
self.num_files() - 1
|
||||
}
|
||||
|
||||
pub fn get_span_contents(&self, span: Span) -> &[u8] {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
let permanent_end = permanent_state.next_span_start();
|
||||
if permanent_end <= span.start {
|
||||
&self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)]
|
||||
let num_permanent_files = permanent_state.num_files();
|
||||
if span.file_id < num_permanent_files {
|
||||
&permanent_state.get_file_contents(span.file_id)[span.start..span.end]
|
||||
} else {
|
||||
&permanent_state.file_contents[span.start..span.end]
|
||||
&self.files[span.file_id - num_permanent_files].1[span.start..span.end]
|
||||
}
|
||||
} else {
|
||||
&self.file_contents[span.start..span.end]
|
||||
&self.files[span.file_id].1[span.start..span.end]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_file_contents(&self, file_id: usize) -> &[u8] {
|
||||
if let Some(permanent_state) = &self.permanent_state {
|
||||
let num_permanent_files = permanent_state.num_files();
|
||||
if file_id < num_permanent_files {
|
||||
&permanent_state.get_file_contents(file_id)
|
||||
} else {
|
||||
&self.files[file_id - num_permanent_files].1
|
||||
}
|
||||
} else {
|
||||
&self.files[file_id].1
|
||||
}
|
||||
}
|
||||
|
||||
|
15
src/span.rs
15
src/span.rs
@ -2,14 +2,23 @@
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
pub file_id: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Span {
|
||||
Span { start, end }
|
||||
pub fn new(start: usize, end: usize, file_id: usize) -> Span {
|
||||
Span {
|
||||
start,
|
||||
end,
|
||||
file_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unknown() -> Span {
|
||||
Span { start: 0, end: 0 }
|
||||
Span {
|
||||
start: usize::MAX,
|
||||
end: usize::MAX,
|
||||
file_id: usize::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user