Revert "Revert "Removed file_id in Span, compact file sources""

This commit is contained in:
JT 2021-07-03 15:35:15 +12:00 committed by GitHub
parent 03ce896f6f
commit a6e0f0bb74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 82 additions and 116 deletions

View File

@ -51,11 +51,7 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
}
pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
file_id: usize,
) -> (Span, Option<ParseError>) {
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
// closing quote.
@ -137,7 +133,7 @@ pub fn lex_item(
*curr_offset += 1;
}
let span = Span::new(token_start, *curr_offset, file_id);
let span = Span::new(token_start, *curr_offset);
// If there is still unclosed opening delimiters, close them and add
// synthetic closing characters to the accumulated token.
@ -171,7 +167,6 @@ pub fn lex_item(
pub fn lex(
input: &[u8],
file_id: usize,
span_offset: usize,
lex_mode: LexMode,
) -> (Vec<Token>, Option<ParseError>) {
@ -198,7 +193,7 @@ pub fn lex(
curr_offset += 1;
output.push(Token::new(
TokenContents::Item,
Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
Span::new(span_offset + prev_idx, span_offset + idx + 1),
));
continue;
}
@ -207,7 +202,7 @@ pub fn lex(
// Otherwise, it's just a regular `|` token.
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1, file_id),
Span::new(span_offset + idx, span_offset + idx + 1),
));
is_complete = false;
} else if c == b';' {
@ -217,14 +212,13 @@ pub fn lex(
error = Some(ParseError::ExtraTokens(Span::new(
curr_offset,
curr_offset + 1,
file_id,
)));
}
let idx = curr_offset;
curr_offset += 1;
output.push(Token::new(
TokenContents::Semicolon,
Span::new(idx, idx + 1, file_id),
Span::new(idx, idx + 1),
));
} else if c == b'\n' || c == b'\r' {
// If the next character is a newline, we're looking at an EOL (end of line) token.
@ -232,10 +226,7 @@ pub fn lex(
let idx = curr_offset;
curr_offset += 1;
if lex_mode == LexMode::Normal {
output.push(Token::new(
TokenContents::Eol,
Span::new(idx, idx + 1, file_id),
));
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
}
} else if c == b'#' {
// If the next character is `#`, we're at the beginning of a line
@ -247,7 +238,7 @@ pub fn lex(
if *input == b'\n' {
output.push(Token::new(
TokenContents::Comment,
Span::new(start, curr_offset, file_id),
Span::new(start, curr_offset),
));
start = curr_offset;
@ -257,7 +248,7 @@ pub fn lex(
if start != curr_offset {
output.push(Token::new(
TokenContents::Comment,
Span::new(start, curr_offset, file_id),
Span::new(start, curr_offset),
));
}
} else if c == b' ' || c == b'\t' {
@ -266,7 +257,7 @@ pub fn lex(
} else {
// Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset, file_id);
let (span, err) = lex_item(input, &mut curr_offset);
if error.is_none() {
error = err;
}
@ -285,7 +276,7 @@ mod lex_tests {
fn lex_basic() {
let file = b"let x = 4";
let output = lex(file, 0, 0, LexMode::Normal);
let output = lex(file, 0, LexMode::Normal);
assert!(output.1.is_none());
}
@ -294,16 +285,12 @@ mod lex_tests {
fn lex_newline() {
let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, 0, LexMode::Normal);
let output = lex(file, 0, LexMode::Normal);
println!("{:#?}", output.0);
assert!(output.0.contains(&Token {
contents: TokenContents::Eol,
span: Span {
start: 11,
end: 12,
file_id: 0
}
span: Span { start: 11, end: 12 }
}));
}
@ -311,7 +298,7 @@ mod lex_tests {
fn lex_empty() {
let file = b"";
let output = lex(file, 0, 0, LexMode::Normal);
let output = lex(file, 0, LexMode::Normal);
assert!(output.0.is_empty());
assert!(output.1.is_none());

View File

@ -128,7 +128,7 @@ mod tests {
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
let (output, err) = lex(input, 0, crate::LexMode::Normal);
if let Some(err) = err {
return Err(err);
}
@ -194,11 +194,7 @@ mod tests {
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!(
lite_block.block[0].commands[0].comments[0],
Span {
start: 21,
end: 39,
file_id: 0
}
Span { start: 21, end: 39 }
);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);

View File

@ -8,10 +8,17 @@ fn main() -> std::io::Result<()> {
working_set.add_decl((b"foo").to_vec(), sig);
let file = std::fs::read(&path)?;
let (output, err) = working_set.parse_file(&path, &file);
let (output, err) = working_set.parse_file(&path, file);
//let (output, err) = working_set.parse_source(path.as_bytes());
println!("{}", output.len());
println!("error: {:?}", err);
// println!("{}", size_of::<Statement>());
// let mut buffer = String::new();
// let stdin = std::io::stdin();
// let mut handle = stdin.lock();
// handle.read_to_string(&mut buffer)?;
Ok(())
} else {

View File

@ -109,7 +109,6 @@ pub enum Expr {
#[derive(Debug, Clone)]
pub struct Expression {
expr: Expr,
ty: Type,
span: Span,
}
impl Expression {
@ -117,7 +116,7 @@ impl Expression {
Expression {
expr: Expr::Garbage,
span,
ty: Type::Unknown,
//ty: Type::Unknown,
}
}
pub fn precedence(&self) -> usize {
@ -264,13 +263,12 @@ fn span(spans: &[Span]) -> Span {
if length == 0 {
Span::unknown()
} else if length == 1 || spans[0].file_id != spans[length - 1].file_id {
} else if length == 1 {
spans[0]
} else {
Span {
start: spans[0].start,
end: spans[length - 1].end,
file_id: spans[0].file_id,
}
}
}
@ -342,7 +340,6 @@ impl ParserWorkingSet {
let short_flag_span = Span {
start: orig.start + 1 + short_flag.0,
end: orig.start + 1 + short_flag.0 + 1,
file_id: orig.file_id,
};
if let Some(flag) = sig.get_short_flag(short_flag_char) {
// If we require an arg and are in a batch of short flags, error
@ -419,7 +416,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Call(Box::new(call)),
ty: Type::Unknown,
//ty: Type::Unknown,
span: span(spans),
},
error,
@ -435,7 +432,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -451,7 +447,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -467,7 +462,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -482,7 +476,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(x),
ty: Type::Int,
span,
},
None,
@ -510,14 +503,9 @@ impl ParserWorkingSet {
let bytes = self.get_span_contents(span);
if let Some(var_id) = self.find_variable(bytes) {
let ty = *self
.get_variable(var_id)
.expect("internal error: invalid VarId");
(
Expression {
expr: Expr::Var(var_id),
ty,
span,
},
None,
@ -547,21 +535,16 @@ impl ParserWorkingSet {
Span {
start: end,
end: end + 1,
file_id: span.file_id,
},
))
});
}
let span = Span {
start,
end,
file_id: span.file_id,
};
let span = Span { start, end };
let source = self.get_file_contents(span.file_id);
let source = self.get_span_contents(span);
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
let (output, err) = lex(&source, start, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -573,7 +556,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Subexpression(Box::new(output)),
ty: Type::Unknown,
span,
},
error,
@ -599,21 +581,16 @@ impl ParserWorkingSet {
Span {
start: end,
end: end + 1,
file_id: span.file_id,
},
))
});
}
let span = Span {
start,
end,
file_id: span.file_id,
};
let span = Span { start, end };
let source = self.get_file_contents(span.file_id);
let source = &self.file_contents[..end];
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
let (output, err) = lex(&source, start, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -627,7 +604,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Block(Box::new(output)),
ty: Type::Unknown,
span,
},
error,
@ -738,7 +714,6 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Operator(operator),
ty: Type::Unknown,
span,
},
None,
@ -803,7 +778,6 @@ impl ParserWorkingSet {
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
span: op_span,
ty: Type::Unknown,
});
}
}
@ -829,7 +803,6 @@ impl ParserWorkingSet {
let binary_op_span = span(&[lhs.span, rhs.span]);
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
ty: Type::Unknown,
span: binary_op_span,
});
}
@ -889,7 +862,7 @@ impl ParserWorkingSet {
error = error.or(err);
let var_name: Vec<_> = self.get_span_contents(spans[1]).into();
let var_id = self.add_variable(var_name, expression.ty);
let var_id = self.add_variable(var_name, Type::Unknown);
(Statement::VarDecl(VarDecl { var_id, expression }), error)
} else {
@ -928,14 +901,14 @@ impl ParserWorkingSet {
(block, error)
}
pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
let mut error = None;
let file_id = self.add_file(fname.into(), contents.into());
let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
let (output, err) = lex(&contents, 0, crate::LexMode::Normal);
error = error.or(err);
self.add_file(fname.into(), contents);
let (output, err) = lite_parse(&output);
error = error.or(err);
@ -948,9 +921,9 @@ impl ParserWorkingSet {
pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) {
let mut error = None;
let file_id = self.add_file("source".into(), source.into());
self.add_file("source".into(), source.into());
let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal);
let (output, err) = lex(source, 0, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);

View File

@ -2,7 +2,8 @@ use crate::{Signature, Span};
use std::{collections::HashMap, sync::Arc};
pub struct ParserState {
files: Vec<(String, Vec<u8>)>,
files: Vec<(String, usize, usize)>,
file_contents: Vec<u8>,
vars: Vec<Type>,
decls: Vec<Signature>,
}
@ -41,6 +42,7 @@ impl ParserState {
pub fn new() -> Self {
Self {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
}
@ -53,6 +55,7 @@ impl ParserState {
// Take the mutable reference and extend the permanent state from the working set
if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
this.files.extend(working_set.files);
this.file_contents.extend(working_set.file_contents);
this.decls.extend(working_set.decls);
this.vars.extend(working_set.vars);
@ -82,20 +85,27 @@ impl ParserState {
self.decls.get(decl_id)
}
#[allow(unused)]
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
self.files.push((filename, contents));
self.num_files() - 1
pub fn next_span_start(&self) -> usize {
self.file_contents.len()
}
pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
&self.files[idx].1
#[allow(unused)]
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start();
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.num_files() - 1
}
}
pub struct ParserWorkingSet {
files: Vec<(String, Vec<u8>)>,
files: Vec<(String, usize, usize)>,
pub(crate) file_contents: Vec<u8>,
vars: Vec<Type>, // indexed by VarId
decls: Vec<Signature>, // indexed by DeclId
permanent_state: Option<Arc<ParserState>>,
@ -106,6 +116,7 @@ impl ParserWorkingSet {
pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
Self {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
permanent_state,
@ -137,35 +148,36 @@ impl ParserWorkingSet {
decl_id
}
pub fn next_span_start(&self) -> usize {
if let Some(permanent_state) = &self.permanent_state {
permanent_state.next_span_start() + self.file_contents.len()
} else {
self.file_contents.len()
}
}
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
self.files.push((filename, contents));
let next_span_start = self.next_span_start();
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.num_files() - 1
}
pub fn get_span_contents(&self, span: Span) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state {
let num_permanent_files = permanent_state.num_files();
if span.file_id < num_permanent_files {
&permanent_state.get_file_contents(span.file_id)[span.start..span.end]
let permanent_end = permanent_state.next_span_start();
if permanent_end <= span.start {
&self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)]
} else {
&self.files[span.file_id - num_permanent_files].1[span.start..span.end]
&permanent_state.file_contents[span.start..span.end]
}
} else {
&self.files[span.file_id].1[span.start..span.end]
}
}
pub fn get_file_contents(&self, file_id: usize) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state {
let num_permanent_files = permanent_state.num_files();
if file_id < num_permanent_files {
&permanent_state.get_file_contents(file_id)
} else {
&self.files[file_id - num_permanent_files].1
}
} else {
&self.files[file_id].1
&self.file_contents[span.start..span.end]
}
}

View File

@ -2,23 +2,14 @@
pub struct Span {
pub start: usize,
pub end: usize,
pub file_id: usize,
}
impl Span {
pub fn new(start: usize, end: usize, file_id: usize) -> Span {
Span {
start,
end,
file_id,
}
pub fn new(start: usize, end: usize) -> Span {
Span { start, end }
}
pub fn unknown() -> Span {
Span {
start: usize::MAX,
end: usize::MAX,
file_id: usize::MAX,
}
Span { start: 0, end: 0 }
}
}