From 04a6a4f860c5c9314a00ba99d7629d10eb9aaaf0 Mon Sep 17 00:00:00 2001 From: JT Date: Tue, 6 Jul 2021 10:58:56 +1200 Subject: [PATCH] Add list parsing --- src/lex.rs | 34 ++++++++++++++------- src/main.rs | 8 ++--- src/parser.rs | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 15 deletions(-) diff --git a/src/lex.rs b/src/lex.rs index b10f67055f..8f7fb841f7 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -38,20 +38,32 @@ impl BlockKind { } } -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Eq, Debug, Clone, Copy)] pub enum LexMode { Normal, + CommaIsSpace, + NewlineIsSpace, } // A baseline token is terminated if it's not nested inside of a paired // delimiter and the next character is one of: `|`, `;`, `#` or any // whitespace. -fn is_item_terminator(block_level: &[BlockKind], c: u8) -> bool { +fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool { block_level.is_empty() - && (c == b' ' || c == b'\t' || c == b'\n' || c == b'|' || c == b';' || c == b'#') + && (c == b' ' + || c == b'\t' + || c == b'\n' + || c == b'|' + || c == b';' + || c == b'#' + || (c == b',' && lex_mode == LexMode::CommaIsSpace)) } -pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option) { +pub fn lex_item( + input: &[u8], + curr_offset: &mut usize, + lex_mode: LexMode, +) -> (Span, Option) { // This variable tracks the starting character of a string literal, so that // we remain inside the string literal lexer mode until we encounter the // closing quote. @@ -85,17 +97,17 @@ pub fn lex_item(input: &[u8], curr_offset: &mut usize) -> (Span, Option (Span, Option std::io::Result<()> { let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); working_set.add_decl((b"foo").to_vec(), sig); - let file = std::fs::read(&path)?; - let (output, err) = working_set.parse_file(&path, file); - //let (output, err) = working_set.parse_source(path.as_bytes()); - println!("{}", output.len()); + //let file = std::fs::read(&path)?; + //let (output, err) = working_set.parse_file(&path, file); + let (output, err) = working_set.parse_source(path.as_bytes()); + println!("{:#?}", output); println!("error: {:?}", err); // println!("{}", size_of::()); diff --git a/src/parser.rs b/src/parser.rs index 7baa5a2d78..627e7616ea 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -103,6 +103,7 @@ pub enum Expr { BinaryOp(Box, Box, Box), //lhs, op, rhs Subexpression(Box), Block(Box), + List(Vec), Garbage, } @@ -562,6 +563,78 @@ impl ParserWorkingSet { ) } + pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option) { + let bytes = self.get_span_contents(span); + let mut error = None; + + let mut start = span.start; + let mut end = span.end; + + if bytes.starts_with(b"[") { + start += 1; + } + if bytes.ends_with(b"]") { + end -= 1; + } else { + error = error.or_else(|| { + Some(ParseError::Unclosed( + "]".into(), + Span { + start: end, + end: end + 1, + }, + )) + }); + } + + let span = Span { start, end }; + + let source = &self.file_contents[..end]; + + let (output, err) = lex(&source, start, crate::LexMode::CommaIsSpace); + error = error.or(err); + + let (output, err) = lite_parse(&output); + error = error.or(err); + + println!("{:?}", output.block); + + match output.block.len() { + 0 => ( + Expression { + expr: Expr::List(vec![]), + span, + }, + None, + ), + 1 => { + // List + + let mut args = vec![]; + for arg in &output.block[0].commands { + for part in &arg.parts { + let (arg, err) = self.parse_arg(*part, SyntaxShape::Any); + error = error.or(err); + + args.push(arg); + } + } + + ( + Expression { + expr: Expr::List(args), + span, + }, + error, + ) + } + _ => ( + garbage(span), + Some(ParseError::Mismatch("table".into(), span)), + ), + } + } + pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option) { let bytes = self.get_span_contents(span); let mut error = None; @@ -629,6 +702,15 @@ impl ParserWorkingSet { ); } return self.parse_block_expression(span); + } else if bytes.starts_with(b"[") { + if shape != SyntaxShape::Table && shape != SyntaxShape::Any { + // FIXME: need better errors + return ( + garbage(span), + Some(ParseError::Mismatch("not a table".into(), span)), + ); + } + return self.parse_table_expression(span); } match shape {