diff --git a/src/parser/parse2.rs b/src/parser/parse2.rs index 510c3e8aaf..9d2e522c98 100644 --- a/src/parser/parse2.rs +++ b/src/parser/parse2.rs @@ -3,6 +3,7 @@ crate mod operator; crate mod parser; crate mod span; crate mod token_tree; +crate mod token_tree_builder; crate mod tokens; crate mod unit; crate mod util; diff --git a/src/parser/parse2/parser.rs b/src/parser/parse2/parser.rs index dc4c7909c0..267cee78ed 100644 --- a/src/parser/parse2/parser.rs +++ b/src/parser/parse2/parser.rs @@ -190,7 +190,7 @@ named!(pub path( NomSpan ) -> TokenNode, >> tag!(".") >> tail: separated_list!(tag!("."), alt!(identifier | string)) >> r: position!() - >> (TokenNode::Path(Spanned::from_nom(PathNode::new(Box::new(head), tail), l, r))) + >> (TokenNode::Path(Spanned::from_nom(PathNode::new(Box::new(head), tail.into_iter().map(TokenNode::Token).collect()), l, r))) ) ); @@ -253,6 +253,8 @@ fn is_id_continue(c: char) -> bool { #[cfg(test)] mod tests { use super::*; + use crate::parser::parse2::token_tree_builder::TokenTreeBuilder as b; + use crate::parser::parse2::token_tree_builder::{CurriedToken, TokenTreeBuilder}; use nom_trace::{print_trace, reset_trace}; use pretty_assertions::assert_eq; @@ -422,52 +424,37 @@ mod tests { #[test] fn test_delimited() { - assert_eq!( - apply(node, "(abc)"), - delimited( - Delimiter::Paren, - vec![TokenNode::Token(token(RawToken::Bare, 1, 4))], - 0, - 5 - ) - ); + assert_eq!(apply(node, "(abc)"), build(b::parens(vec![b::bare("abc")]))); assert_eq!( apply(node, "( abc )"), - delimited( - Delimiter::Paren, - vec![TokenNode::Token(token(RawToken::Bare, 3, 6))], - 0, - 9 - ) + build(b::parens(vec![b::ws(" "), b::bare("abc"), b::ws(" ")])) ); assert_eq!( apply(node, "( abc def )"), - delimited( - Delimiter::Paren, - vec![ - TokenNode::Token(token(RawToken::Bare, 3, 6)), - TokenNode::Token(token(RawToken::Bare, 7, 10)), - ], - 0, - 12 - ) + build(b::parens(vec![ + b::ws(" "), + b::bare("abc"), + b::sp(), + b::bare("def"), + b::sp() + ])) ); assert_eq!( apply(node, "( abc def 123 456GB )"), - delimited( - Delimiter::Paren, - vec![ - TokenNode::Token(token(RawToken::Bare, 3, 6)), - TokenNode::Token(token(RawToken::Bare, 7, 10)), - TokenNode::Token(token(RawToken::Integer(123), 11, 14)), - TokenNode::Token(token(RawToken::Size(456, Unit::GB), 15, 20)), - ], - 0, - 22 - ) + build(b::parens(vec![ + b::ws(" "), + b::bare("abc"), + b::sp(), + b::bare("def"), + b::sp(), + b::int(123), + b::sp(), + b::size(456, "GB"), + b::sp() + ])) ); } @@ -475,55 +462,31 @@ mod tests { fn test_path() { assert_eq!( apply(node, "$it.print"), - path( - TokenNode::Token(token(RawToken::Variable(Span::from((1, 3))), 0, 3)), - vec![token(RawToken::Identifier, 4, 9)], - 0, - 9 - ) + build(b::path(b::var("it"), vec![b::ident("print")])) ); assert_eq!( apply(node, "$head.part1.part2"), - path( - TokenNode::Token(token(RawToken::Variable(Span::from((1, 5))), 0, 5)), - vec![ - token(RawToken::Identifier, 6, 11), - token(RawToken::Identifier, 12, 17) - ], - 0, - 17 - ) + build(b::path( + b::var("head"), + vec![b::ident("part1"), b::ident("part2")] + )) ); assert_eq!( apply(node, "( hello ).world"), - path( - delimited( - Delimiter::Paren, - vec![TokenNode::Token(token(RawToken::Bare, 2, 7))], - 0, - 9 - ), - vec![token(RawToken::Identifier, 10, 15)], - 0, - 15 - ) + build(b::path( + b::parens(vec![b::sp(), b::bare("hello"), b::sp()]), + vec![b::ident("world")] + )) ); assert_eq!( apply(node, "( hello ).\"world\""), - path( - delimited( - Delimiter::Paren, - vec![TokenNode::Token(token(RawToken::Bare, 2, 7))], - 0, - 9 - ), - vec![token(RawToken::String(Span::from((11, 16))), 10, 17)], - 0, - 17 - ) + build(b::path( + b::parens(vec![b::sp(), b::bare("hello"), b::sp()],), + vec![b::string("world")] + )) ); } @@ -531,31 +494,22 @@ mod tests { fn test_nested_path() { assert_eq!( apply(node, "( $it.is.\"great news\".right yep $yep ).\"world\""), - path( - delimited( - Delimiter::Paren, - vec![ - path( - TokenNode::Token(token(RawToken::Variable(Span::from((3, 5))), 2, 5)), - vec![ - token(RawToken::Identifier, 6, 8), - token(RawToken::String(Span::from((10, 20))), 9, 21), - token(RawToken::Identifier, 22, 27) - ], - 2, - 27 - ), - leaf_token(RawToken::Bare, 28, 31), - leaf_token(RawToken::Variable(Span::from((33, 36))), 32, 36) - ], - 0, - 38 - ), - vec![token(RawToken::String(Span::from((40, 45))), 39, 46)], - 0, - 46 - ) - ); + build(b::path( + b::parens(vec![ + b::sp(), + b::path( + b::var("it"), + vec![b::ident("is"), b::string("great news"), b::ident("right")] + ), + b::sp(), + b::bare("yep"), + b::sp(), + b::var("yep"), + b::sp() + ]), + vec![b::string("world")] + )) + ) } fn apply(f: impl Fn(NomSpan) -> Result<(NomSpan, T), nom::Err>, string: &str) -> T { @@ -584,7 +538,10 @@ mod tests { } fn path(head: TokenNode, tail: Vec, left: usize, right: usize) -> TokenNode { - let node = PathNode::new(Box::new(head), tail); + let node = PathNode::new( + Box::new(head), + tail.into_iter().map(TokenNode::Token).collect(), + ); let spanned = Spanned::from_item(node, (left, right)); TokenNode::Path(spanned) } @@ -596,4 +553,9 @@ mod tests { fn token(token: RawToken, left: usize, right: usize) -> Token { Spanned::from_item(token, (left, right)) } + + fn build(block: CurriedToken) -> TokenNode { + let mut builder = TokenTreeBuilder::new(); + block(&mut builder).expect("Expected to build into a token") + } } diff --git a/src/parser/parse2/token_tree.rs b/src/parser/parse2/token_tree.rs index f79dd20554..6ef1e78289 100644 --- a/src/parser/parse2/token_tree.rs +++ b/src/parser/parse2/token_tree.rs @@ -25,5 +25,5 @@ pub enum Delimiter { #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, new)] pub struct PathNode { head: Box, - tail: Vec, + tail: Vec, } diff --git a/src/parser/parse2/token_tree_builder.rs b/src/parser/parse2/token_tree_builder.rs new file mode 100644 index 0000000000..5c7df7de77 --- /dev/null +++ b/src/parser/parse2/token_tree_builder.rs @@ -0,0 +1,221 @@ +use crate::parser::parse2::operator::Operator; +use crate::parser::parse2::span::{Span, Spanned}; +use crate::parser::parse2::token_tree::{DelimitedNode, Delimiter, PathNode, TokenNode}; +use crate::parser::parse2::tokens::{RawToken, Token}; +use crate::parser::parse2::unit::Unit; +use derive_new::new; + +#[derive(new)] +pub struct TokenTreeBuilder { + #[new(default)] + pos: usize, +} + +pub type CurriedToken = Box Option + 'static>; + +#[allow(unused)] +impl TokenTreeBuilder { + pub fn build(block: impl FnOnce(&mut Self) -> TokenNode) -> TokenNode { + let mut builder = TokenTreeBuilder::new(); + block(&mut builder) + } + + pub fn op(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(input.as_str()); + + b.pos = end; + + Some(TokenTreeBuilder::spanned_op(input, (start, end))) + }) + } + + pub fn spanned_op(input: impl Into, span: impl Into) -> TokenNode { + TokenNode::Token(Spanned::from_item( + RawToken::Operator(input.into()), + span.into(), + )) + } + + pub fn string(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, _) = b.consume("\""); + let (inner_start, inner_end) = b.consume(&input); + let (_, end) = b.consume("\""); + b.pos = end; + + Some(TokenTreeBuilder::spanned_string( + (inner_start, inner_end), + (start, end), + )) + }) + } + + pub fn spanned_string(input: impl Into, span: impl Into) -> TokenNode { + TokenNode::Token(Spanned::from_item( + RawToken::String(input.into()), + span.into(), + )) + } + + pub fn bare(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + b.pos = end; + + Some(TokenTreeBuilder::spanned_bare((start, end))) + }) + } + + pub fn spanned_bare(input: impl Into) -> TokenNode { + TokenNode::Token(Spanned::from_item(RawToken::Bare, input.into())) + } + + pub fn int(input: impl Into) -> CurriedToken { + let int = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&int.to_string()); + b.pos = end; + + Some(TokenTreeBuilder::spanned_int(int, (start, end))) + }) + } + + pub fn spanned_int(input: impl Into, span: impl Into) -> TokenNode { + TokenNode::Token(Token::from_item(RawToken::Integer(input.into()), span)) + } + + pub fn size(int: impl Into, unit: impl Into) -> CurriedToken { + let int = int.into(); + let unit = unit.into(); + + Box::new(move |b| { + let (start, _) = b.consume(&int.to_string()); + let (_, end) = b.consume(unit.as_str()); + b.pos = end; + + Some(TokenTreeBuilder::spanned_size((int, unit), (start, end))) + }) + } + + pub fn spanned_size( + input: (impl Into, impl Into), + span: impl Into, + ) -> TokenNode { + let (int, unit) = (input.0.into(), input.1.into()); + + TokenNode::Token(Spanned::from_item(RawToken::Size(int, unit), span)) + } + + pub fn path(head: CurriedToken, tail: Vec) -> CurriedToken { + Box::new(move |b| { + let start = b.pos; + let head = head(b).expect("The head of a path must not be whitespace"); + + let mut output = vec![]; + + for item in tail { + b.consume("."); + + match item(b) { + None => {} + Some(v) => output.push(v), + }; + } + + let end = b.pos; + + Some(TokenTreeBuilder::spanned_path((head, output), (start, end))) + }) + } + + pub fn spanned_path(input: (TokenNode, Vec), span: impl Into) -> TokenNode { + TokenNode::Path(Spanned::from_item( + PathNode::new(Box::new(input.0), input.1), + span, + )) + } + + pub fn var(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, _) = b.consume("$"); + let (inner_start, end) = b.consume(&input); + + Some(TokenTreeBuilder::spanned_var( + (inner_start, end), + (start, end), + )) + }) + } + + pub fn spanned_var(input: impl Into, span: impl Into) -> TokenNode { + TokenNode::Token(Spanned::from_item( + RawToken::Variable(input.into()), + span.into(), + )) + } + + pub fn ident(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + Some(TokenTreeBuilder::spanned_ident((start, end))) + }) + } + + pub fn spanned_ident(span: impl Into) -> TokenNode { + TokenNode::Token(Spanned::from_item(RawToken::Identifier, span.into())) + } + + pub fn parens(input: Vec) -> CurriedToken { + Box::new(move |b| { + let (start, _) = b.consume("("); + let mut output = vec![]; + for item in input { + match item(b) { + None => {} + Some(v) => output.push(v), + }; + } + + let (_, end) = b.consume(")"); + + Some(TokenNode::Delimited(Spanned::from_item( + DelimitedNode::new(Delimiter::Paren, output), + (start, end), + ))) + }) + } + + pub fn sp() -> CurriedToken { + Box::new(|b| { + b.consume(" "); + None + }) + } + + pub fn ws(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + b.consume(&input); + None + }) + } + + fn consume(&mut self, input: &str) -> (usize, usize) { + let start = self.pos; + self.pos += input.len(); + (start, self.pos) + } +}