Revert "Removed file_id in Span, compact file sources"

This commit is contained in:
JT 2021-07-03 15:11:24 +12:00 committed by GitHub
parent 049477a9bd
commit 80e0cd4e00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 114 additions and 80 deletions

View File

@ -51,7 +51,11 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
&& (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
}
pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseError>) {
pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
file_id: usize,
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
// closing quote.
@ -133,7 +137,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
*curr_offset += 1;
}
let span = Span::new(token_start, *curr_offset);
let span = Span::new(token_start, *curr_offset, file_id);
// If there is still unclosed opening delimiters, close them and add
// synthetic closing characters to the accumulated token.
@ -167,6 +171,7 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option<ParseErr
pub fn lex(
input: &[u8],
file_id: usize,
span_offset: usize,
lex_mode: LexMode,
) -> (Vec<Token>, Option<ParseError>) {
@ -193,7 +198,7 @@ pub fn lex(
curr_offset += 1;
output.push(Token::new(
TokenContents::Item,
Span::new(span_offset + prev_idx, span_offset + idx + 1),
Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
));
continue;
}
@ -202,7 +207,7 @@ pub fn lex(
// Otherwise, it's just a regular `|` token.
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1),
Span::new(span_offset + idx, span_offset + idx + 1, file_id),
));
is_complete = false;
} else if c == b';' {
@ -212,13 +217,14 @@ pub fn lex(
error = Some(ParseError::ExtraTokens(Span::new(
curr_offset,
curr_offset + 1,
file_id,
)));
}
let idx = curr_offset;
curr_offset += 1;
output.push(Token::new(
TokenContents::Semicolon,
Span::new(idx, idx + 1),
Span::new(idx, idx + 1, file_id),
));
} else if c == b'\n' || c == b'\r' {
// If the next character is a newline, we're looking at an EOL (end of line) token.
@ -226,7 +232,10 @@ pub fn lex(
let idx = curr_offset;
curr_offset += 1;
if lex_mode == LexMode::Normal {
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
output.push(Token::new(
TokenContents::Eol,
Span::new(idx, idx + 1, file_id),
));
}
} else if c == b'#' {
// If the next character is `#`, we're at the beginning of a line
@ -238,7 +247,7 @@ pub fn lex(
if *input == b'\n' {
output.push(Token::new(
TokenContents::Comment,
Span::new(start, curr_offset),
Span::new(start, curr_offset, file_id),
));
start = curr_offset;
@ -248,7 +257,7 @@ pub fn lex(
if start != curr_offset {
output.push(Token::new(
TokenContents::Comment,
Span::new(start, curr_offset),
Span::new(start, curr_offset, file_id),
));
}
} else if c == b' ' || c == b'\t' {
@ -257,7 +266,7 @@ pub fn lex(
} else {
// Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(input, &mut curr_offset);
let (span, err) = lex_item(input, &mut curr_offset, file_id);
if error.is_none() {
error = err;
}
@ -276,7 +285,7 @@ mod lex_tests {
fn lex_basic() {
let file = b"let x = 4";
let output = lex(file, 0, LexMode::Normal);
let output = lex(file, 0, 0, LexMode::Normal);
assert!(output.1.is_none());
}
@ -285,12 +294,16 @@ mod lex_tests {
fn lex_newline() {
let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, LexMode::Normal);
let output = lex(file, 0, 0, LexMode::Normal);
println!("{:#?}", output.0);
assert!(output.0.contains(&Token {
contents: TokenContents::Eol,
span: Span { start: 11, end: 12 }
span: Span {
start: 11,
end: 12,
file_id: 0
}
}));
}
@ -298,7 +311,7 @@ mod lex_tests {
fn lex_empty() {
let file = b"";
let output = lex(file, 0, LexMode::Normal);
let output = lex(file, 0, 0, LexMode::Normal);
assert!(output.0.is_empty());
assert!(output.1.is_none());

View File

@ -128,7 +128,7 @@ mod tests {
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, crate::LexMode::Normal);
let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
if let Some(err) = err {
return Err(err);
}
@ -194,7 +194,11 @@ mod tests {
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!(
lite_block.block[0].commands[0].comments[0],
Span { start: 21, end: 39 }
Span {
start: 21,
end: 39,
file_id: 0
}
);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);

View File

@ -8,17 +8,10 @@ fn main() -> std::io::Result<()> {
working_set.add_decl((b"foo").to_vec(), sig);
let file = std::fs::read(&path)?;
let (output, err) = working_set.parse_file(&path, file);
let (output, err) = working_set.parse_file(&path, &file);
//let (output, err) = working_set.parse_source(path.as_bytes());
println!("{}", output.len());
println!("error: {:?}", err);
// println!("{}", size_of::<Statement>());
// let mut buffer = String::new();
// let stdin = std::io::stdin();
// let mut handle = stdin.lock();
// handle.read_to_string(&mut buffer)?;
Ok(())
} else {

View File

@ -109,6 +109,7 @@ pub enum Expr {
#[derive(Debug, Clone)]
pub struct Expression {
expr: Expr,
ty: Type,
span: Span,
}
impl Expression {
@ -116,7 +117,7 @@ impl Expression {
Expression {
expr: Expr::Garbage,
span,
//ty: Type::Unknown,
ty: Type::Unknown,
}
}
pub fn precedence(&self) -> usize {
@ -263,12 +264,13 @@ fn span(spans: &[Span]) -> Span {
if length == 0 {
Span::unknown()
} else if length == 1 {
} else if length == 1 || spans[0].file_id != spans[length - 1].file_id {
spans[0]
} else {
Span {
start: spans[0].start,
end: spans[length - 1].end,
file_id: spans[0].file_id,
}
}
}
@ -340,6 +342,7 @@ impl ParserWorkingSet {
let short_flag_span = Span {
start: orig.start + 1 + short_flag.0,
end: orig.start + 1 + short_flag.0 + 1,
file_id: orig.file_id,
};
if let Some(flag) = sig.get_short_flag(short_flag_char) {
// If we require an arg and are in a batch of short flags, error
@ -416,7 +419,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Call(Box::new(call)),
//ty: Type::Unknown,
ty: Type::Unknown,
span: span(spans),
},
error,
@ -432,6 +435,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -447,6 +451,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -462,6 +467,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(v),
ty: Type::Int,
span,
},
None,
@ -476,6 +482,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Int(x),
ty: Type::Int,
span,
},
None,
@ -503,9 +510,14 @@ impl ParserWorkingSet {
let bytes = self.get_span_contents(span);
if let Some(var_id) = self.find_variable(bytes) {
let ty = *self
.get_variable(var_id)
.expect("internal error: invalid VarId");
(
Expression {
expr: Expr::Var(var_id),
ty,
span,
},
None,
@ -535,16 +547,21 @@ impl ParserWorkingSet {
Span {
start: end,
end: end + 1,
file_id: span.file_id,
},
))
});
}
let span = Span { start, end };
let span = Span {
start,
end,
file_id: span.file_id,
};
let source = self.get_span_contents(span);
let source = self.get_file_contents(span.file_id);
let (output, err) = lex(&source, start, crate::LexMode::Normal);
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -556,6 +573,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Subexpression(Box::new(output)),
ty: Type::Unknown,
span,
},
error,
@ -581,16 +599,21 @@ impl ParserWorkingSet {
Span {
start: end,
end: end + 1,
file_id: span.file_id,
},
))
});
}
let span = Span { start, end };
let span = Span {
start,
end,
file_id: span.file_id,
};
let source = &self.file_contents[..end];
let source = self.get_file_contents(span.file_id);
let (output, err) = lex(&source, start, crate::LexMode::Normal);
let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);
@ -604,6 +627,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Block(Box::new(output)),
ty: Type::Unknown,
span,
},
error,
@ -714,6 +738,7 @@ impl ParserWorkingSet {
(
Expression {
expr: Expr::Operator(operator),
ty: Type::Unknown,
span,
},
None,
@ -778,6 +803,7 @@ impl ParserWorkingSet {
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
span: op_span,
ty: Type::Unknown,
});
}
}
@ -803,6 +829,7 @@ impl ParserWorkingSet {
let binary_op_span = span(&[lhs.span, rhs.span]);
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
ty: Type::Unknown,
span: binary_op_span,
});
}
@ -862,7 +889,7 @@ impl ParserWorkingSet {
error = error.or(err);
let var_name: Vec<_> = self.get_span_contents(spans[1]).into();
let var_id = self.add_variable(var_name, Type::Unknown);
let var_id = self.add_variable(var_name, expression.ty);
(Statement::VarDecl(VarDecl { var_id, expression }), error)
} else {
@ -901,13 +928,13 @@ impl ParserWorkingSet {
(block, error)
}
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
let mut error = None;
let (output, err) = lex(&contents, 0, crate::LexMode::Normal);
error = error.or(err);
let file_id = self.add_file(fname.into(), contents.into());
self.add_file(fname.into(), contents);
let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
@ -921,9 +948,9 @@ impl ParserWorkingSet {
pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option<ParseError>) {
let mut error = None;
self.add_file("source".into(), source.into());
let file_id = self.add_file("source".into(), source.into());
let (output, err) = lex(source, 0, crate::LexMode::Normal);
let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal);
error = error.or(err);
let (output, err) = lite_parse(&output);

View File

@ -2,8 +2,7 @@ use crate::{Signature, Span};
use std::{collections::HashMap, sync::Arc};
pub struct ParserState {
files: Vec<(String, usize, usize)>,
file_contents: Vec<u8>,
files: Vec<(String, Vec<u8>)>,
vars: Vec<Type>,
decls: Vec<Signature>,
}
@ -42,7 +41,6 @@ impl ParserState {
pub fn new() -> Self {
Self {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
}
@ -55,7 +53,6 @@ impl ParserState {
// Take the mutable reference and extend the permanent state from the working set
if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
this.files.extend(working_set.files);
this.file_contents.extend(working_set.file_contents);
this.decls.extend(working_set.decls);
this.vars.extend(working_set.vars);
@ -85,27 +82,20 @@ impl ParserState {
self.decls.get(decl_id)
}
pub fn next_span_start(&self) -> usize {
self.file_contents.len()
}
#[allow(unused)]
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start();
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.files.push((filename, contents));
self.num_files() - 1
}
pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
&self.files[idx].1
}
}
pub struct ParserWorkingSet {
files: Vec<(String, usize, usize)>,
pub(crate) file_contents: Vec<u8>,
files: Vec<(String, Vec<u8>)>,
vars: Vec<Type>, // indexed by VarId
decls: Vec<Signature>, // indexed by DeclId
permanent_state: Option<Arc<ParserState>>,
@ -116,7 +106,6 @@ impl ParserWorkingSet {
pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
Self {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
permanent_state,
@ -148,36 +137,35 @@ impl ParserWorkingSet {
decl_id
}
pub fn next_span_start(&self) -> usize {
if let Some(permanent_state) = &self.permanent_state {
permanent_state.next_span_start() + self.file_contents.len()
} else {
self.file_contents.len()
}
}
pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start();
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.files.push((filename, contents));
self.num_files() - 1
}
pub fn get_span_contents(&self, span: Span) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state {
let permanent_end = permanent_state.next_span_start();
if permanent_end <= span.start {
&self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)]
let num_permanent_files = permanent_state.num_files();
if span.file_id < num_permanent_files {
&permanent_state.get_file_contents(span.file_id)[span.start..span.end]
} else {
&permanent_state.file_contents[span.start..span.end]
&self.files[span.file_id - num_permanent_files].1[span.start..span.end]
}
} else {
&self.file_contents[span.start..span.end]
&self.files[span.file_id].1[span.start..span.end]
}
}
pub fn get_file_contents(&self, file_id: usize) -> &[u8] {
if let Some(permanent_state) = &self.permanent_state {
let num_permanent_files = permanent_state.num_files();
if file_id < num_permanent_files {
&permanent_state.get_file_contents(file_id)
} else {
&self.files[file_id - num_permanent_files].1
}
} else {
&self.files[file_id].1
}
}

View File

@ -2,14 +2,23 @@
pub struct Span {
pub start: usize,
pub end: usize,
pub file_id: usize,
}
impl Span {
pub fn new(start: usize, end: usize) -> Span {
Span { start, end }
pub fn new(start: usize, end: usize, file_id: usize) -> Span {
Span {
start,
end,
file_id,
}
}
pub fn unknown() -> Span {
Span { start: 0, end: 0 }
Span {
start: usize::MAX,
end: usize::MAX,
file_id: usize::MAX,
}
}
}