first commit

2021-06-30 13:42:56 +12:00 · 2021-06-30 13:42:56 +12:00 · 29d2449fb3
commit 29d2449fb3
9 changed files with 871 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,8 @@
+[package]
+name = "engine-q"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
--- a/src/lex.rs
+++ b/src/lex.rs
@ -0,0 +1,319 @@
+use crate::{ParseError, Span};
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum TokenContents {
+    Item,
+    Comment,
+    Pipe,
+    Semicolon,
+    Eol,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Token {
+    pub contents: TokenContents,
+    pub span: Span,
+}
+
+impl Token {
+    pub fn new(contents: TokenContents, span: Span) -> Token {
+        Token { contents, span }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum BlockKind {
+    Paren,
+    CurlyBracket,
+    SquareBracket,
+}
+
+impl BlockKind {
+    fn closing(self) -> u8 {
+        match self {
+            BlockKind::Paren => b')',
+            BlockKind::SquareBracket => b']',
+            BlockKind::CurlyBracket => b'}',
+        }
+    }
+}
+
+#[derive(PartialEq, Eq, Debug)]
+pub enum LexMode {
+    Normal,
+}
+
+// A baseline token is terminated if it's not nested inside of a paired
+// delimiter and the next character is one of: `|`, `;`, `#` or any
+// whitespace.
+fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool {
+    block_level.is_empty()
+        && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#')
+}
+
+pub fn lex_item(
+    input: &[u8],
+    curr_offset: &mut usize,
+    file_id: usize,
+) -> (Span, Option<ParseError>) {
+    // This variable tracks the starting character of a string literal, so that
+    // we remain inside the string literal lexer mode until we encounter the
+    // closing quote.
+    let mut quote_start: Option<u8> = None;
+
+    let mut in_comment = false;
+
+    let token_start = *curr_offset;
+
+    // This Vec tracks paired delimiters
+    let mut block_level: Vec<BlockKind> = vec![];
+
+    // The process of slurping up a baseline token repeats:
+    //
+    // - String literal, which begins with `'`, `"` or `\``, and continues until
+    //   the same character is encountered again.
+    // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
+    //   the matching closing delimiter is found, skipping comments and string
+    //   literals.
+    // - When not nested inside of a delimiter pair, when a terminating
+    //   character (whitespace, `|`, `;` or `#`) is encountered, the baseline
+    //   token is done.
+    // - Otherwise, accumulate the character into the current baseline token.
+    while let Some(c) = input.get(*curr_offset) {
+        let c = *c;
+
+        if quote_start.is_some() {
+            // If we encountered the closing quote character for the current
+            // string, we're done with the current string.
+            if Some(c) == quote_start {
+                quote_start = None;
+            }
+        } else if c == b'#' {
+            if is_item_terminator(&block_level, c) {
+                break;
+            }
+            in_comment = true;
+        } else if c == b'\n' {
+            in_comment = false;
+            if is_item_terminator(&block_level, c) {
+                break;
+            }
+        } else if in_comment {
+            if is_item_terminator(&block_level, c) {
+                break;
+            }
+        } else if c == b'\'' || c == b'"' {
+            // We encountered the opening quote of a string literal.
+            quote_start = Some(c);
+        } else if c == b'[' {
+            // We encountered an opening `[` delimiter.
+            block_level.push(BlockKind::SquareBracket);
+        } else if c == b']' {
+            // We encountered a closing `]` delimiter. Pop off the opening `[`
+            // delimiter.
+            if let Some(BlockKind::SquareBracket) = block_level.last() {
+                let _ = block_level.pop();
+            }
+        } else if c == b'{' {
+            // We encountered an opening `{` delimiter.
+            block_level.push(BlockKind::CurlyBracket);
+        } else if c == b'}' {
+            // We encountered a closing `}` delimiter. Pop off the opening `{`.
+            if let Some(BlockKind::CurlyBracket) = block_level.last() {
+                let _ = block_level.pop();
+            }
+        } else if c == b'(' {
+            // We enceountered an opening `(` delimiter.
+            block_level.push(BlockKind::Paren);
+        } else if c == b')' {
+            // We encountered a closing `)` delimiter. Pop off the opening `(`.
+            if let Some(BlockKind::Paren) = block_level.last() {
+                let _ = block_level.pop();
+            }
+        } else if is_item_terminator(&block_level, c) {
+            break;
+        }
+
+        *curr_offset += 1;
+    }
+
+    let span = Span::new(token_start, *curr_offset, file_id);
+
+    // If there is still unclosed opening delimiters, close them and add
+    // synthetic closing characters to the accumulated token.
+    if let Some(block) = block_level.last() {
+        let delim = block.closing();
+        let cause = ParseError::UnexpectedEof((delim as char).to_string(), span);
+
+        return (span, Some(cause));
+    }
+
+    if let Some(delim) = quote_start {
+        // The non-lite parse trims quotes on both sides, so we add the expected quote so that
+        // anyone wanting to consume this partial parse (e.g., completions) will be able to get
+        // correct information from the non-lite parse.
+        return (
+            span,
+            Some(ParseError::UnexpectedEof((delim as char).to_string(), span)),
+        );
+    }
+
+    // If we didn't accumulate any characters, it's an unexpected error.
+    if *curr_offset - token_start == 0 {
+        return (
+            span,
+            Some(ParseError::UnexpectedEof("command".to_string(), span)),
+        );
+    }
+
+    (span, None)
+}
+
+pub fn lex(
+    input: &[u8],
+    file_id: usize,
+    span_offset: usize,
+    lex_mode: LexMode,
+) -> (Vec<Token>, Option<ParseError>) {
+    let mut error = None;
+
+    let mut curr_offset = span_offset;
+
+    let mut output = vec![];
+    let mut is_complete = true;
+
+    while let Some(c) = input.get(curr_offset) {
+        let c = *c;
+        if c == b'|' {
+            // If the next character is `|`, it's either `|` or `||`.
+
+            let idx = curr_offset;
+            let prev_idx = idx;
+            curr_offset += 1;
+
+            // If the next character is `|`, we're looking at a `||`.
+            if let Some(c) = input.get(curr_offset) {
+                if *c == b'|' {
+                    let idx = curr_offset;
+                    curr_offset += 1;
+                    output.push(Token::new(
+                        TokenContents::Item,
+                        Span::new(span_offset + prev_idx, span_offset + idx + 1, file_id),
+                    ));
+                    continue;
+                }
+            }
+
+            // Otherwise, it's just a regular `|` token.
+            output.push(Token::new(
+                TokenContents::Pipe,
+                Span::new(span_offset + idx, span_offset + idx + 1, file_id),
+            ));
+            is_complete = false;
+        } else if c == b';' {
+            // If the next character is a `;`, we're looking at a semicolon token.
+
+            if !is_complete && error.is_none() {
+                error = Some(ParseError::ExtraTokens(Span::new(
+                    curr_offset,
+                    curr_offset + 1,
+                    file_id,
+                )));
+            }
+            let idx = curr_offset;
+            curr_offset += 1;
+            output.push(Token::new(
+                TokenContents::Semicolon,
+                Span::new(idx, idx + 1, file_id),
+            ));
+        } else if c == b'\n' || c == b'\r' {
+            // If the next character is a newline, we're looking at an EOL (end of line) token.
+
+            let idx = curr_offset;
+            curr_offset += 1;
+            if lex_mode == LexMode::Normal {
+                output.push(Token::new(
+                    TokenContents::Eol,
+                    Span::new(idx, idx + 1, file_id),
+                ));
+            }
+        } else if c == b'#' {
+            // If the next character is `#`, we're at the beginning of a line
+            // comment. The comment continues until the next newline.
+            let mut start = curr_offset;
+
+            while let Some(input) = input.get(curr_offset) {
+                curr_offset += 1;
+                if *input == b'\n' {
+                    output.push(Token::new(
+                        TokenContents::Comment,
+                        Span::new(start, curr_offset, file_id),
+                    ));
+                    start = curr_offset;
+
+                    break;
+                }
+            }
+            if start != curr_offset {
+                output.push(Token::new(
+                    TokenContents::Comment,
+                    Span::new(start, curr_offset, file_id),
+                ));
+            }
+        } else if c == b' ' || c == b'\t' {
+            // If the next character is non-newline whitespace, skip it.
+            curr_offset += 1;
+        } else {
+            // Otherwise, try to consume an unclassified token.
+
+            let (span, err) = lex_item(input, &mut curr_offset, file_id);
+            if error.is_none() {
+                error = err;
+            }
+            is_complete = true;
+            output.push(Token::new(TokenContents::Item, span));
+        }
+    }
+    (output, error)
+}
+
+#[cfg(test)]
+mod lex_tests {
+    use super::*;
+
+    #[test]
+    fn lex_basic() {
+        let file = b"let x = 4";
+
+        let output = lex(file, 0, 0, LexMode::Normal);
+
+        assert!(output.1.is_none());
+    }
+
+    #[test]
+    fn lex_newline() {
+        let file = b"let x = 300\nlet y = 500;";
+
+        let output = lex(file, 0, 0, LexMode::Normal);
+
+        println!("{:#?}", output.0);
+        assert!(output.0.contains(&Token {
+            contents: TokenContents::Eol,
+            span: Span {
+                start: 11,
+                end: 12,
+                file_id: 0
+            }
+        }));
+    }
+
+    #[test]
+    fn lex_empty() {
+        let file = b"";
+
+        let output = lex(file, 0, 0, LexMode::Normal);
+
+        assert!(output.0.is_empty());
+        assert!(output.1.is_none());
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,12 @@
+mod lex;
+mod lite_parse;
+mod parse_error;
+mod parser;
+mod parser_state;
+mod span;
+
+pub use lex::{lex, LexMode, Token, TokenContents};
+pub use lite_parse::{lite_parse, LiteBlock, LiteCommand, LiteStatement};
+pub use parse_error::ParseError;
+pub use parser_state::{ParserState, ParserWorkingSet, VarLocation};
+pub use span::Span;
--- a/src/lite_parse.rs
+++ b/src/lite_parse.rs
@ -0,0 +1,207 @@
+use crate::{ParseError, Span, Token, TokenContents};
+
+#[derive(Debug)]
+pub struct LiteCommand {
+    pub comments: Vec<Span>,
+    pub parts: Vec<Span>,
+}
+
+impl Default for LiteCommand {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl LiteCommand {
+    pub fn new() -> Self {
+        Self {
+            comments: vec![],
+            parts: vec![],
+        }
+    }
+
+    pub fn push(&mut self, span: Span) {
+        self.parts.push(span);
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.parts.is_empty()
+    }
+}
+
+#[derive(Debug)]
+pub struct LiteStatement {
+    pub commands: Vec<LiteCommand>,
+}
+
+impl Default for LiteStatement {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl LiteStatement {
+    pub fn new() -> Self {
+        Self { commands: vec![] }
+    }
+
+    pub fn push(&mut self, command: LiteCommand) {
+        self.commands.push(command);
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.commands.is_empty()
+    }
+}
+
+#[derive(Debug)]
+pub struct LiteBlock {
+    pub block: Vec<LiteStatement>,
+}
+
+impl Default for LiteBlock {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl LiteBlock {
+    pub fn new() -> Self {
+        Self { block: vec![] }
+    }
+
+    pub fn push(&mut self, pipeline: LiteStatement) {
+        self.block.push(pipeline);
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.block.is_empty()
+    }
+}
+
+pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
+    let mut curr_token = 0;
+
+    let mut block = LiteBlock::new();
+    let mut curr_pipeline = LiteStatement::new();
+    let mut curr_command = LiteCommand::new();
+
+    while let Some(token) = tokens.get(curr_token) {
+        match &token.contents {
+            TokenContents::Item => curr_command.push(token.span),
+            TokenContents::Pipe => {
+                if !curr_command.is_empty() {
+                    curr_pipeline.push(curr_command);
+                    curr_command = LiteCommand::new();
+                }
+            }
+            TokenContents::Eol | TokenContents::Semicolon => {
+                if !curr_command.is_empty() {
+                    curr_pipeline.push(curr_command);
+                }
+                curr_command = LiteCommand::new();
+
+                if !curr_pipeline.is_empty() {
+                    block.push(curr_pipeline);
+                }
+                curr_pipeline = LiteStatement::new();
+            }
+            TokenContents::Comment => {
+                curr_command.comments.push(token.span);
+            }
+        }
+        curr_token += 1;
+    }
+    if !curr_command.is_empty() {
+        curr_pipeline.push(curr_command);
+    }
+
+    if !curr_pipeline.is_empty() {
+        block.push(curr_pipeline);
+    }
+
+    (block, None)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
+
+    fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
+        let (output, err) = lex(input, 0, 0, crate::LexMode::Normal);
+        if let Some(err) = err {
+            return Err(err);
+        }
+
+        let (output, err) = lite_parse(&output);
+        if let Some(err) = err {
+            return Err(err);
+        }
+
+        Ok(output)
+    }
+
+    #[test]
+    fn comment_before() -> Result<(), ParseError> {
+        let input = b"# this is a comment\ndef foo bar";
+
+        let lite_block = lite_parse_helper(input)?;
+
+        assert_eq!(lite_block.block.len(), 1);
+        assert_eq!(lite_block.block[0].commands.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
+
+        Ok(())
+    }
+
+    #[test]
+    fn comment_beside() -> Result<(), ParseError> {
+        let input = b"def foo bar # this is a comment";
+
+        let lite_block = lite_parse_helper(input)?;
+
+        assert_eq!(lite_block.block.len(), 1);
+        assert_eq!(lite_block.block[0].commands.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
+
+        Ok(())
+    }
+
+    #[test]
+    fn comments_stack() -> Result<(), ParseError> {
+        let input = b"# this is a comment\n# another comment\ndef foo bar ";
+
+        let lite_block = lite_parse_helper(input)?;
+
+        assert_eq!(lite_block.block.len(), 1);
+        assert_eq!(lite_block.block[0].commands.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].comments.len(), 2);
+        assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
+
+        Ok(())
+    }
+
+    #[test]
+    fn separated_comments_dont_stack() -> Result<(), ParseError> {
+        let input = b"# this is a comment\n\n# another comment\ndef foo bar ";
+
+        let lite_block = lite_parse_helper(input)?;
+
+        assert_eq!(lite_block.block.len(), 1);
+        assert_eq!(lite_block.block[0].commands.len(), 1);
+        assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
+        assert_eq!(
+            lite_block.block[0].commands[0].comments[0],
+            Span {
+                start: 21,
+                end: 39,
+                file_id: 0
+            }
+        );
+        assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
+
+        Ok(())
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,25 @@
+use nu_parser_new::{lex, lite_parse, LexMode, ParserWorkingSet};
+
+fn main() -> std::io::Result<()> {
+    if let Some(path) = std::env::args().nth(1) {
+        let file = std::fs::read(&path)?;
+
+        // let (output, err) = lex(&file, 0, 0, LexMode::Normal);
+
+        // println!("{:?} tokens, error: {:?}", output, err);
+
+        // let (output, err) = lite_parse(&output);
+
+        // println!("{:?}, error: {:?}", output, err);
+
+        let mut working_set = ParserWorkingSet::new(None);
+
+        let (output, err) = working_set.parse_file(&path, &file);
+        println!("{:?} {:?}", output, err);
+
+        Ok(())
+    } else {
+        println!("specify file to lex");
+        Ok(())
+    }
+}
--- a/src/parse_error.rs
+++ b/src/parse_error.rs
@ -0,0 +1,7 @@
+pub use crate::Span;
+
+#[derive(Debug)]
+pub enum ParseError {
+    ExtraTokens(Span),
+    UnexpectedEof(String, Span),
+}
--- a/src/parser.rs
+++ b/src/parser.rs
@ -0,0 +1,102 @@
+use std::str::Utf8Error;
+
+use crate::{lex, lite_parse, LiteBlock, LiteStatement, ParseError, ParserWorkingSet, Span};
+
+#[derive(Debug)]
+pub enum Expression {}
+
+#[derive(Debug)]
+pub enum Import {}
+
+#[derive(Debug)]
+pub struct Block {
+    stmts: Vec<Statement>,
+}
+
+impl Block {
+    pub fn new() -> Self {
+        Self { stmts: vec![] }
+    }
+}
+
+#[derive(Debug)]
+pub struct VarDecl {
+    name: String,
+    value: Expression,
+}
+
+#[derive(Debug)]
+pub enum Statement {
+    Pipeline(Pipeline),
+    VarDecl(VarDecl),
+    Import(Import),
+    None,
+}
+
+#[derive(Debug)]
+pub struct Pipeline {}
+
+impl Pipeline {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl ParserWorkingSet {
+    fn parse_statement(
+        &mut self,
+        block: &mut Block,
+        lite_pipeline: &LiteStatement,
+    ) -> Option<ParseError> {
+        match lite_pipeline.commands.len() {
+            0 => None,
+            1 => {
+                let command_name = self.get_span_contents(lite_pipeline.commands[0].parts[0]);
+                println!("{:?}", command_name);
+                if command_name == b"let" {
+                    println!("found let")
+                }
+                None
+            }
+            _ => {
+                // pipeline
+                None
+            }
+        }
+    }
+
+    pub fn parse_block(&mut self, lite_block: &LiteBlock) -> (Block, Option<ParseError>) {
+        let mut error = None;
+        self.enter_scope();
+
+        let mut block = Block::new();
+
+        for pipeline in &lite_block.block {
+            let err = self.parse_statement(&mut block, pipeline);
+            error = error.or(err);
+        }
+
+        self.exit_scope();
+
+        (block, error)
+    }
+
+    pub fn parse_file(&mut self, fname: &str, contents: &[u8]) -> (Block, Option<ParseError>) {
+        let mut error = None;
+
+        let file_id = self.add_file(fname.into(), contents.into());
+
+        let (output, err) = lex(contents, file_id, 0, crate::LexMode::Normal);
+        error = error.or(err);
+
+        let (output, err) = lite_parse(&output);
+        error = error.or(err);
+
+        println!("{:?}", output);
+
+        let (output, err) = self.parse_block(&output);
+        error = error.or(err);
+
+        (output, error)
+    }
+}
--- a/src/parser_state.rs
+++ b/src/parser_state.rs
@ -0,0 +1,175 @@
+use crate::Span;
+use std::{collections::HashMap, sync::Arc};
+
+pub struct ParserState {
+    files: Vec<(String, Vec<u8>)>,
+}
+
+pub enum VarLocation {
+    CurrentScope,
+    OuterScope,
+}
+
+#[derive(Clone, Copy)]
+pub enum Type {}
+
+struct ScopeFrame {
+    vars: HashMap<String, Type>,
+}
+
+impl ScopeFrame {
+    pub fn new() -> Self {
+        Self {
+            vars: HashMap::new(),
+        }
+    }
+}
+
+pub struct ParserWorkingSet {
+    files: Vec<(String, Vec<u8>)>,
+    permanent_state: Option<Arc<ParserState>>,
+    scope: Vec<ScopeFrame>,
+}
+
+impl Default for ParserState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ParserState {
+    pub fn new() -> Self {
+        Self { files: vec![] }
+    }
+
+    pub fn merge_working_set(this: &mut Arc<ParserState>, mut working_set: ParserWorkingSet) {
+        // Remove the working set's reference to the permanent state so we can safely take a mutable reference
+        working_set.permanent_state = None;
+
+        // Take the mutable reference and extend the permanent state from the working set
+        if let Some(this) = std::sync::Arc::<ParserState>::get_mut(this) {
+            this.files.extend(working_set.files);
+        } else {
+            panic!("Internal error: merging working set should always succeed");
+        }
+    }
+
+    pub fn num_files(&self) -> usize {
+        self.files.len()
+    }
+
+    pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
+        self.files.push((filename, contents));
+
+        self.num_files() - 1
+    }
+
+    pub(crate) fn get_file_contents(&self, idx: usize) -> &[u8] {
+        &self.files[idx].1
+    }
+}
+
+impl ParserWorkingSet {
+    pub fn new(permanent_state: Option<Arc<ParserState>>) -> Self {
+        Self {
+            files: vec![],
+            permanent_state,
+            scope: vec![],
+        }
+    }
+
+    pub fn num_files(&self) -> usize {
+        let parent_len = if let Some(permanent_state) = &self.permanent_state {
+            permanent_state.num_files()
+        } else {
+            0
+        };
+
+        self.files.len() + parent_len
+    }
+
+    pub fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
+        self.files.push((filename, contents));
+
+        self.num_files() - 1
+    }
+
+    pub fn get_span_contents(&self, span: Span) -> &[u8] {
+        if let Some(permanent_state) = &self.permanent_state {
+            let num_permanent_files = permanent_state.num_files();
+            if span.file_id < num_permanent_files {
+                &permanent_state.get_file_contents(span.file_id)[span.start..span.end]
+            } else {
+                &self.files[span.file_id - num_permanent_files].1[span.start..span.end]
+            }
+        } else {
+            &self.files[span.file_id].1[span.start..span.end]
+        }
+    }
+
+    pub fn enter_scope(&mut self) {
+        self.scope.push(ScopeFrame::new());
+    }
+
+    pub fn exit_scope(&mut self) {
+        self.scope.push(ScopeFrame::new());
+    }
+
+    pub fn find_variable(&self, name: &str) -> Option<(VarLocation, Type)> {
+        for scope in self.scope.iter().rev().enumerate() {
+            if let Some(result) = scope.1.vars.get(name) {
+                if scope.0 == 0 {
+                    // Top level
+                    return Some((VarLocation::CurrentScope, result.clone()));
+                } else {
+                    return Some((VarLocation::OuterScope, result.clone()));
+                }
+            }
+        }
+
+        None
+    }
+}
+
+fn main() {}
+
+#[cfg(test)]
+mod parser_state_tests {
+    use super::*;
+
+    #[test]
+    fn add_file_gives_id() {
+        let mut parser_state = ParserWorkingSet::new(Some(Arc::new(ParserState::new())));
+        let id = parser_state.add_file("test.nu".into(), vec![]);
+
+        assert_eq!(id, 0);
+    }
+
+    #[test]
+    fn add_file_gives_id_including_parent() {
+        let mut parser_state = ParserState::new();
+        let parent_id = parser_state.add_file("test.nu".into(), vec![]);
+
+        let mut working_set = ParserWorkingSet::new(Some(Arc::new(parser_state)));
+        let working_set_id = working_set.add_file("child.nu".into(), vec![]);
+
+        assert_eq!(parent_id, 0);
+        assert_eq!(working_set_id, 1);
+    }
+
+    #[test]
+    fn merge_states() {
+        let mut parser_state = ParserState::new();
+        let parent_id = parser_state.add_file("test.nu".into(), vec![]);
+        let mut parser_state = Arc::new(parser_state);
+
+        let mut working_set = ParserWorkingSet::new(Some(parser_state.clone()));
+        let working_set_id = working_set.add_file("child.nu".into(), vec![]);
+
+        ParserState::merge_working_set(&mut parser_state, working_set);
+
+        assert_eq!(parser_state.num_files(), 2);
+        assert_eq!(&parser_state.files[0].0, "test.nu");
+        assert_eq!(&parser_state.files[1].0, "child.nu");
+    }
+}
--- a/src/span.rs
+++ b/src/span.rs
@ -0,0 +1,16 @@
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Span {
+    pub start: usize,
+    pub end: usize,
+    pub file_id: usize,
+}
+
+impl Span {
+    pub fn new(start: usize, end: usize, file_id: usize) -> Span {
+        Span {
+            start,
+            end,
+            file_id,
+        }
+    }
+}