diff --git a/src/lex.rs b/src/lex.rs index 84f91f8cc0..b10f67055f 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -51,11 +51,7 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool { && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#') } -pub fn lex_item( - input: &[u8], - curr_offset: &mut usize, - file_id: usize, -) -> (Span, Option) { +pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option) { // This variable tracks the starting character of a string literal, so that // we remain inside the string literal lexer mode until we encounter the // closing quote. @@ -137,7 +133,7 @@ pub fn lex_item( *curr_offset += 1; } - let span = Span::new(token_start, *curr_offset, file_id); + let span = Span::new(token_start, *curr_offset); // If there is still unclosed opening delimiters, close them and add // synthetic closing characters to the accumulated token. @@ -171,7 +167,6 @@ pub fn lex_item( pub fn lex( input: &[u8], - file_id: usize, span_offset: usize, lex_mode: LexMode, ) -> (Vec, Option) { @@ -198,7 +193,7 @@ pub fn lex( curr_offset += 1; output.push(Token::new( TokenContents::Item, - Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id), + Span::new(span_offset + prev_idx, span_offset + idx + 1), )); continue; } @@ -207,7 +202,7 @@ pub fn lex( // Otherwise, it's just a regular `|` token. output.push(Token::new( TokenContents::Pipe, - Span::new(span_offset + idx, span_offset + idx + 1, file_id), + Span::new(span_offset + idx, span_offset + idx + 1), )); is_complete = false; } else if c == b';' { @@ -217,14 +212,13 @@ pub fn lex( error = Some(ParseError::ExtraTokens(Span::new( curr_offset, curr_offset + 1, - file_id, ))); } let idx = curr_offset; curr_offset += 1; output.push(Token::new( TokenContents::Semicolon, - Span::new(idx, idx + 1, file_id), + Span::new(idx, idx + 1), )); } else if c == b'\n' || c == b'\r' { // If the next character is a newline, we're looking at an EOL (end of line) token. @@ -232,10 +226,7 @@ pub fn lex( let idx = curr_offset; curr_offset += 1; if lex_mode == LexMode::Normal { - output.push(Token::new( - TokenContents::Eol, - Span::new(idx, idx + 1, file_id), - )); + output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1))); } } else if c == b'#' { // If the next character is `#`, we're at the beginning of a line @@ -247,7 +238,7 @@ pub fn lex( if *input == b'\n' { output.push(Token::new( TokenContents::Comment, - Span::new(start, curr_offset, file_id), + Span::new(start, curr_offset), )); start = curr_offset; @@ -257,7 +248,7 @@ pub fn lex( if start != curr_offset { output.push(Token::new( TokenContents::Comment, - Span::new(start, curr_offset, file_id), + Span::new(start, curr_offset), )); } } else if c == b' ' || c == b'\t' { @@ -266,7 +257,7 @@ pub fn lex( } else { // Otherwise, try to consume an unclassified token. - let (span, err) = lex_item(input, &mut curr_offset, file_id); + let (span, err) = lex_item(input, &mut curr_offset); if error.is_none() { error = err; } @@ -285,7 +276,7 @@ mod lex_tests { fn lex_basic() { let file = b"let x = 4"; - let output = lex(file, 0, 0, LexMode::Normal); + let output = lex(file, 0, LexMode::Normal); assert!(output.1.is_none()); } @@ -294,16 +285,12 @@ mod lex_tests { fn lex_newline() { let file = b"let x = 300\nlet y = 500;"; - let output = lex(file, 0, 0, LexMode::Normal); + let output = lex(file, 0, LexMode::Normal); println!("{:#?}", output.0); assert!(output.0.contains(&Token { contents: TokenContents::Eol, - span: Span { - start: 11, - end: 12, - file_id: 0 - } + span: Span { start: 11, end: 12 } })); } @@ -311,7 +298,7 @@ mod lex_tests { fn lex_empty() { let file = b""; - let output = lex(file, 0, 0, LexMode::Normal); + let output = lex(file, 0, LexMode::Normal); assert!(output.0.is_empty()); assert!(output.1.is_none()); diff --git a/src/lite_parse.rs b/src/lite_parse.rs index a3f2d9e0cb..9e3e15a3d9 100644 --- a/src/lite_parse.rs +++ b/src/lite_parse.rs @@ -128,7 +128,7 @@ mod tests { use crate::{lex, lite_parse, LiteBlock, ParseError, Span}; fn lite_parse_helper(input: &[u8]) -> Result { - let (output, err) = lex(input, 0, 0, crate::LexMode::Normal); + let (output, err) = lex(input, 0, crate::LexMode::Normal); if let Some(err) = err { return Err(err); } @@ -194,11 +194,7 @@ mod tests { assert_eq!(lite_block.block[0].commands[0].comments.len(), 1); assert_eq!( lite_block.block[0].commands[0].comments[0], - Span { - start: 21, - end: 39, - file_id: 0 - } + Span { start: 21, end: 39 } ); assert_eq!(lite_block.block[0].commands[0].parts.len(), 3); diff --git a/src/main.rs b/src/main.rs index 4c39ef36e0..a3199ad260 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,6 @@ -use engine_q::{ParserWorkingSet, Signature, SyntaxShape}; +use std::{io::Read, mem::size_of}; + +use engine_q::{ParserWorkingSet, Signature, Statement, SyntaxShape}; fn main() -> std::io::Result<()> { if let Some(path) = std::env::args().nth(1) { @@ -8,10 +10,17 @@ fn main() -> std::io::Result<()> { working_set.add_decl((b"foo").to_vec(), sig); let file = std::fs::read(&path)?; - let (output, err) = working_set.parse_file(&path, &file); + let (output, err) = working_set.parse_file(&path, file); //let (output, err) = working_set.parse_source(path.as_bytes()); println!("{}", output.len()); println!("error: {:?}", err); + // println!("{}", size_of::()); + + // let mut buffer = String::new(); + // let stdin = std::io::stdin(); + // let mut handle = stdin.lock(); + + // handle.read_to_string(&mut buffer)?; Ok(()) } else { diff --git a/src/parser.rs b/src/parser.rs index 07b010f446..7baa5a2d78 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -109,7 +109,6 @@ pub enum Expr { #[derive(Debug, Clone)] pub struct Expression { expr: Expr, - ty: Type, span: Span, } impl Expression { @@ -117,7 +116,7 @@ impl Expression { Expression { expr: Expr::Garbage, span, - ty: Type::Unknown, + //ty: Type::Unknown, } } pub fn precedence(&self) -> usize { @@ -264,13 +263,12 @@ fn span(spans: &[Span]) -> Span { if length == 0 { Span::unknown() - } else if length == 1 || spans[0].file_id != spans[length - 1].file_id { + } else if length == 1 { spans[0] } else { Span { start: spans[0].start, end: spans[length - 1].end, - file_id: spans[0].file_id, } } } @@ -342,7 +340,6 @@ impl ParserWorkingSet { let short_flag_span = Span { start: orig.start + 1 + short_flag.0, end: orig.start + 1 + short_flag.0 + 1, - file_id: orig.file_id, }; if let Some(flag) = sig.get_short_flag(short_flag_char) { // If we require an arg and are in a batch of short flags, error @@ -419,7 +416,7 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Call(Box::new(call)), - ty: Type::Unknown, + //ty: Type::Unknown, span: span(spans), }, error, @@ -435,7 +432,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), - ty: Type::Int, span, }, None, @@ -451,7 +447,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), - ty: Type::Int, span, }, None, @@ -467,7 +462,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(v), - ty: Type::Int, span, }, None, @@ -482,7 +476,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Int(x), - ty: Type::Int, span, }, None, @@ -510,14 +503,9 @@ impl ParserWorkingSet { let bytes = self.get_span_contents(span); if let Some(var_id) = self.find_variable(bytes) { - let ty = *self - .get_variable(var_id) - .expect("internal error: invalid VarId"); - ( Expression { expr: Expr::Var(var_id), - ty, span, }, None, @@ -547,21 +535,16 @@ impl ParserWorkingSet { Span { start: end, end: end + 1, - file_id: span.file_id, }, )) }); } - let span = Span { - start, - end, - file_id: span.file_id, - }; + let span = Span { start, end }; - let source = self.get_file_contents(span.file_id); + let source = self.get_span_contents(span); - let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal); + let (output, err) = lex(&source, start, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); @@ -573,7 +556,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Subexpression(Box::new(output)), - ty: Type::Unknown, span, }, error, @@ -599,21 +581,16 @@ impl ParserWorkingSet { Span { start: end, end: end + 1, - file_id: span.file_id, }, )) }); } - let span = Span { - start, - end, - file_id: span.file_id, - }; + let span = Span { start, end }; - let source = self.get_file_contents(span.file_id); + let source = &self.file_contents[..end]; - let (output, err) = lex(&source[..end], span.file_id, start, crate::LexMode::Normal); + let (output, err) = lex(&source, start, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); @@ -627,7 +604,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Block(Box::new(output)), - ty: Type::Unknown, span, }, error, @@ -738,7 +714,6 @@ impl ParserWorkingSet { ( Expression { expr: Expr::Operator(operator), - ty: Type::Unknown, span, }, None, @@ -803,7 +778,6 @@ impl ParserWorkingSet { expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), span: op_span, - ty: Type::Unknown, }); } } @@ -829,7 +803,6 @@ impl ParserWorkingSet { let binary_op_span = span(&[lhs.span, rhs.span]); expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), - ty: Type::Unknown, span: binary_op_span, }); } @@ -889,7 +862,7 @@ impl ParserWorkingSet { error = error.or(err); let var_name: Vec<_> = self.get_span_contents(spans[1]).into(); - let var_id = self.add_variable(var_name, expression.ty); + let var_id = self.add_variable(var_name, Type::Unknown); (Statement::VarDecl(VarDecl { var_id, expression }), error) } else { @@ -928,14 +901,14 @@ impl ParserWorkingSet { (block, error) } - pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option) { + pub fn parse_file(&mut self, fname: &str, contents: Vec) -> (Block, Option) { let mut error = None; - let file_id = self.add_file(fname.into(), contents.into()); - - let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal); + let (output, err) = lex(&contents, 0, crate::LexMode::Normal); error = error.or(err); + self.add_file(fname.into(), contents); + let (output, err) = lite_parse(&output); error = error.or(err); @@ -948,9 +921,9 @@ impl ParserWorkingSet { pub fn parse_source(&mut self, source: &[u8]) -> (Block, Option) { let mut error = None; - let file_id = self.add_file("source".into(), source.into()); + self.add_file("source".into(), source.into()); - let (output, err) = lex(source, file_id, 0, crate::LexMode::Normal); + let (output, err) = lex(source, 0, crate::LexMode::Normal); error = error.or(err); let (output, err) = lite_parse(&output); diff --git a/src/parser_state.rs b/src/parser_state.rs index bd0dcfb41f..e37260736e 100644 --- a/src/parser_state.rs +++ b/src/parser_state.rs @@ -2,7 +2,8 @@ use crate::{Signature, Span}; use std::{collections::HashMap, sync::Arc}; pub struct ParserState { - files: Vec<(String, Vec)>, + files: Vec<(String, usize, usize)>, + file_contents: Vec, vars: Vec, decls: Vec, } @@ -41,6 +42,7 @@ impl ParserState { pub fn new() -> Self { Self { files: vec![], + file_contents: vec![], vars: vec![], decls: vec![], } @@ -53,6 +55,7 @@ impl ParserState { // Take the mutable reference and extend the permanent state from the working set if let Some(this) = std::sync::Arc::::get_mut(this) { this.files.extend(working_set.files); + this.file_contents.extend(working_set.file_contents); this.decls.extend(working_set.decls); this.vars.extend(working_set.vars); @@ -82,20 +85,27 @@ impl ParserState { self.decls.get(decl_id) } - #[allow(unused)] - pub(crate) fn add_file(&mut self, filename: String, contents: Vec) -> usize { - self.files.push((filename, contents)); - - self.num_files() - 1 + pub fn next_span_start(&self) -> usize { + self.file_contents.len() } - pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] { - &self.files[idx].1 + #[allow(unused)] + pub(crate) fn add_file(&mut self, filename: String, contents: Vec) -> usize { + let next_span_start = self.next_span_start(); + + self.file_contents.extend(&contents); + + let next_span_end = self.next_span_start(); + + self.files.push((filename, next_span_start, next_span_end)); + + self.num_files() - 1 } } pub struct ParserWorkingSet { - files: Vec<(String, Vec)>, + files: Vec<(String, usize, usize)>, + pub(crate) file_contents: Vec, vars: Vec, // indexed by VarId decls: Vec, // indexed by DeclId permanent_state: Option>, @@ -106,6 +116,7 @@ impl ParserWorkingSet { pub fn new(permanent_state: Option>) -> Self { Self { files: vec![], + file_contents: vec![], vars: vec![], decls: vec![], permanent_state, @@ -137,35 +148,36 @@ impl ParserWorkingSet { decl_id } + pub fn next_span_start(&self) -> usize { + if let Some(permanent_state) = &self.permanent_state { + permanent_state.next_span_start() + self.file_contents.len() + } else { + self.file_contents.len() + } + } + pub fn add_file(&mut self, filename: String, contents: Vec) -> usize { - self.files.push((filename, contents)); + let next_span_start = self.next_span_start(); + + self.file_contents.extend(&contents); + + let next_span_end = self.next_span_start(); + + self.files.push((filename, next_span_start, next_span_end)); self.num_files() - 1 } pub fn get_span_contents(&self, span: Span) -> &[u8] { if let Some(permanent_state) = &self.permanent_state { - let num_permanent_files = permanent_state.num_files(); - if span.file_id < num_permanent_files { - &permanent_state.get_file_contents(span.file_id)[span.start..span.end] + let permanent_end = permanent_state.next_span_start(); + if permanent_end <= span.start { + &self.file_contents[(span.start - permanent_end)..(span.end - permanent_end)] } else { - &self.files[span.file_id - num_permanent_files].1[span.start..span.end] + &permanent_state.file_contents[span.start..span.end] } } else { - &self.files[span.file_id].1[span.start..span.end] - } - } - - pub fn get_file_contents(&self, file_id: usize) -> &[u8] { - if let Some(permanent_state) = &self.permanent_state { - let num_permanent_files = permanent_state.num_files(); - if file_id < num_permanent_files { - &permanent_state.get_file_contents(file_id) - } else { - &self.files[file_id - num_permanent_files].1 - } - } else { - &self.files[file_id].1 + &self.file_contents[span.start..span.end] } } diff --git a/src/span.rs b/src/span.rs index 8c3f8664e4..4d436245d0 100644 --- a/src/span.rs +++ b/src/span.rs @@ -2,23 +2,14 @@ pub struct Span { pub start: usize, pub end: usize, - pub file_id: usize, } impl Span { - pub fn new(start: usize, end: usize, file_id: usize) -> Span { - Span { - start, - end, - file_id, - } + pub fn new(start: usize, end: usize) -> Span { + Span { start, end } } pub fn unknown() -> Span { - Span { - start: usize::MAX, - end: usize::MAX, - file_id: usize::MAX, - } + Span { start: 0, end: 0 } } }