From 7922bb40201bacbff7aedf4cf39a62022ce40114 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 16 Jul 2021 18:24:46 +1200 Subject: [PATCH] More decl parsing --- src/main.rs | 41 ++++++--- src/parse_error.rs | 1 + src/parser.rs | 201 ++++++++++++++++++++++++++++++++++++++++---- src/parser_state.rs | 49 +++++++++-- 4 files changed, 252 insertions(+), 40 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1adbb4037..2b6c83abc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,17 +4,17 @@ fn main() -> std::io::Result<()> { if let Some(path) = std::env::args().nth(1) { let mut working_set = ParserWorkingSet::new(None); - let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + // let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); + // working_set.add_decl(sig.into()); - let sig = Signature::build("bar") - .named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')) - .switch("--rock", "rock!!", Some('r')); - working_set.add_decl((b"bar").to_vec(), sig.into()); + // let sig = Signature::build("bar") + // .named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')) + // .switch("--rock", "rock!!", Some('r')); + // working_set.add_decl(sig.into()); let sig = Signature::build("where").required("cond", SyntaxShape::RowCondition, "condition"); - working_set.add_decl((b"where").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let sig = Signature::build("if") .required("cond", SyntaxShape::RowCondition, "condition") @@ -25,7 +25,7 @@ fn main() -> std::io::Result<()> { "else keyword", ) .required("else_block", SyntaxShape::Block, "else block"); - working_set.add_decl((b"if").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let sig = Signature::build("let") .required("var_name", SyntaxShape::Variable, "variable name") @@ -35,7 +35,7 @@ fn main() -> std::io::Result<()> { SyntaxShape::Expression, "the value to set the variable to", ); - working_set.add_decl((b"let").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let sig = Signature::build("alias") .required("var_name", SyntaxShape::Variable, "variable name") @@ -45,25 +45,38 @@ fn main() -> std::io::Result<()> { SyntaxShape::Expression, "the value to set the variable to", ); - working_set.add_decl((b"alias").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let sig = Signature::build("sum").required( "arg", SyntaxShape::List(Box::new(SyntaxShape::Number)), "list of numbers", ); - working_set.add_decl((b"sum").to_vec(), sig.into()); + working_set.add_decl(sig.into()); + + let sig = Signature::build("def") + .required("def_name", SyntaxShape::String, "definition name") + .required( + "params", + SyntaxShape::List(Box::new(SyntaxShape::VarWithOptType)), + "parameters", + ) + .required("block", SyntaxShape::Block, "body of the definition"); + working_set.add_decl(sig.into()); //let file = std::fs::read(&path)?; //let (output, err) = working_set.parse_file(&path, file); let (output, err) = working_set.parse_source(path.as_bytes()); println!("{:#?}", output); println!("error: {:?}", err); + + println!("working set: {:#?}", working_set); + // println!("{}", size_of::()); - let engine = Engine::new(); - let result = engine.eval_block(&output); - println!("{:?}", result); + // let engine = Engine::new(); + // let result = engine.eval_block(&output); + // println!("{:?}", result); // let mut buffer = String::new(); // let stdin = std::io::stdin(); diff --git a/src/parse_error.rs b/src/parse_error.rs index 39fa14fa6..a41870711 100644 --- a/src/parse_error.rs +++ b/src/parse_error.rs @@ -15,6 +15,7 @@ pub enum ParseError { MissingFlagParam(Span), ShortFlagBatchCantTakeArg(Span), MissingPositional(String, Span), + MissingType(Span), MissingRequiredFlag(String, Span), IncompleteMathExpression(Span), UnknownState(String, Span), diff --git a/src/parser.rs b/src/parser.rs index 3d7e1889d..2b2e06c43 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,7 +4,7 @@ use crate::{ lex, lite_parse, parser_state::{Type, VarId}, signature::Flag, - DeclId, LiteBlock, ParseError, ParserWorkingSet, Signature, Span, + DeclId, Declaration, LiteBlock, ParseError, ParserWorkingSet, Signature, Span, }; /// The syntactic shapes that values must match to be passed into a command. You can think of this as the type-checking that occurs when you call a function. @@ -68,6 +68,9 @@ pub enum SyntaxShape { /// A variable name Variable, + /// A variable with optional type, `x` or `x: int` + VarWithOptType, + /// A general expression, eg `1 + 2` or `foo --bar` Expression, } @@ -174,6 +177,34 @@ impl Expression { _ => 0, } } + + pub fn as_block(self) -> Option> { + match self.expr { + Expr::Block(block) => Some(block), + _ => None, + } + } + + pub fn as_list(self) -> Option> { + match self.expr { + Expr::List(list) => Some(list), + _ => None, + } + } + + pub fn as_var(self) -> Option { + match self.expr { + Expr::Var(var_id) => Some(var_id), + _ => None, + } + } + + pub fn as_string(self) -> Option { + match self.expr { + Expr::String(string) => Some(string), + _ => None, + } + } } #[derive(Debug, Clone)] @@ -230,6 +261,7 @@ pub struct VarDecl { pub enum Statement { Pipeline(Pipeline), VarDecl(VarDecl), + Declaration(DeclId), Import(Import), Expression(Expression), None, @@ -450,6 +482,12 @@ impl ParserWorkingSet { let arg_span = spans[*spans_idx]; match shape { + SyntaxShape::VarWithOptType => { + let (arg, err) = self.parse_var_with_opt_type(spans, spans_idx); + error = error.or(err); + + (arg, error) + } SyntaxShape::RowCondition => { let (arg, err) = self.parse_row_condition(spans); error = error.or(err); @@ -786,6 +824,63 @@ impl ParserWorkingSet { } } + pub fn parse_type(&self, bytes: &[u8]) -> Type { + if bytes == b"int" { + Type::Int + } else { + Type::Unknown + } + } + + pub fn parse_var_with_opt_type( + &mut self, + spans: &[Span], + spans_idx: &mut usize, + ) -> (Expression, Option) { + let bytes = self.get_span_contents(spans[*spans_idx]).to_vec(); + + if bytes.ends_with(b":") { + // We end with colon, so the next span should be the type + if *spans_idx + 1 < spans.len() { + *spans_idx += 1; + let type_bytes = self.get_span_contents(spans[*spans_idx]); + + let ty = self.parse_type(type_bytes); + *spans_idx += 1; + + let id = self.add_variable(bytes[0..(bytes.len() - 1)].to_vec(), ty); + + ( + Expression { + expr: Expr::Var(id), + span: span(&spans[*spans_idx - 2..*spans_idx]), + }, + None, + ) + } else { + let id = self.add_variable(bytes[0..(bytes.len() - 1)].to_vec(), Type::Unknown); + *spans_idx += 1; + ( + Expression { + expr: Expr::Var(id), + span: spans[*spans_idx], + }, + Some(ParseError::MissingType(spans[*spans_idx])), + ) + } + } else { + let id = self.add_variable(bytes, Type::Unknown); + *spans_idx += 1; + + ( + Expression { + expr: Expr::Var(id), + span: span(&spans[*spans_idx - 1..*spans_idx]), + }, + None, + ) + } + } pub fn parse_row_condition(&mut self, spans: &[Span]) -> (Expression, Option) { self.parse_math_expression(spans) } @@ -829,17 +924,23 @@ impl ParserWorkingSet { error = error.or(err); let mut args = vec![]; - for arg in &output.block[0].commands { - let mut spans_idx = 0; - while spans_idx < arg.parts.len() { - let (arg, err) = - self.parse_multispan_value(&arg.parts, &mut spans_idx, element_shape.clone()); - error = error.or(err); + if !output.block.is_empty() { + for arg in &output.block[0].commands { + let mut spans_idx = 0; - args.push(arg); + while spans_idx < arg.parts.len() { + let (arg, err) = self.parse_multispan_value( + &arg.parts, + &mut spans_idx, + element_shape.clone(), + ); + error = error.or(err); - spans_idx += 1; + args.push(arg); + + spans_idx += 1; + } } } @@ -1292,6 +1393,67 @@ impl ParserWorkingSet { } } + pub fn parse_def(&mut self, spans: &[Span]) -> (Statement, Option) { + let name = self.get_span_contents(spans[0]); + + if name == b"def" { + if let Some(decl_id) = self.find_decl(b"def") { + let (mut call, call_span, err) = self.parse_internal_call(spans, decl_id); + + if err.is_some() { + return ( + Statement::Expression(Expression { + expr: Expr::Call(call), + span: call_span, + }), + err, + ); + } else { + println!("{:?}", call); + let name = call + .positional + .remove(0) + .as_string() + .expect("internal error: expected def name"); + let args = call + .positional + .remove(0) + .as_list() + .expect("internal error: expected param list") + .into_iter() + .map(|x| x.as_var().expect("internal error: expected parameter")) + .collect::>(); + let block = call + .positional + .remove(0) + .as_block() + .expect("internal error: expected block"); + + let block_id = self.add_block(block); + + let decl = Declaration { + signature: Signature::new(name), + body: Some(block_id), + }; + + let decl_id = self.add_decl(decl); + + return (Statement::Declaration(decl_id), None); + } + } + } + ( + Statement::Expression(Expression { + expr: Expr::Garbage, + span: span(spans), + }), + Some(ParseError::UnknownState( + "internal error: let statement unparseable".into(), + span(spans), + )), + ) + } + pub fn parse_let(&mut self, spans: &[Span]) -> (Statement, Option) { let name = self.get_span_contents(spans[0]); @@ -1330,7 +1492,10 @@ impl ParserWorkingSet { } pub fn parse_statement(&mut self, spans: &[Span]) -> (Statement, Option) { - if let (stmt, None) = self.parse_let(spans) { + // FIXME: improve errors by checking keyword first + if let (decl, None) = self.parse_def(spans) { + (decl, None) + } else if let (stmt, None) = self.parse_let(spans) { (stmt, None) } else { let (expr, err) = self.parse_expression(spans); @@ -1419,7 +1584,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (block, err) = working_set.parse_source(b"foo"); @@ -1442,7 +1607,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo --jazz"); assert!(matches!(err, Some(ParseError::MissingFlagParam(..)))); @@ -1453,7 +1618,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo -j"); assert!(matches!(err, Some(ParseError::MissingFlagParam(..)))); @@ -1466,7 +1631,7 @@ mod tests { let sig = Signature::build("foo") .named("--jazz", SyntaxShape::Int, "jazz!!", Some('j')) .named("--math", SyntaxShape::Int, "math!!", Some('m')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo -mj"); assert!(matches!( err, @@ -1479,7 +1644,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").switch("--jazz", "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo -mj"); assert!(matches!(err, Some(ParseError::UnknownFlag(..)))); } @@ -1489,7 +1654,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").switch("--jazz", "jazz!!", Some('j')); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo -j 100"); assert!(matches!(err, Some(ParseError::ExtraPositional(..)))); } @@ -1499,7 +1664,7 @@ mod tests { let mut working_set = ParserWorkingSet::new(None); let sig = Signature::build("foo").required("jazz", SyntaxShape::Int, "jazz!!"); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo"); assert!(matches!(err, Some(ParseError::MissingPositional(..)))); } @@ -1510,7 +1675,7 @@ mod tests { let sig = Signature::build("foo").required_named("--jazz", SyntaxShape::Int, "jazz!!", None); - working_set.add_decl((b"foo").to_vec(), sig.into()); + working_set.add_decl(sig.into()); let (_, err) = working_set.parse_source(b"foo"); assert!(matches!(err, Some(ParseError::MissingRequiredFlag(..)))); } diff --git a/src/parser_state.rs b/src/parser_state.rs index 861dda523..d864903a3 100644 --- a/src/parser_state.rs +++ b/src/parser_state.rs @@ -1,12 +1,13 @@ use crate::{parser::Block, Declaration, Signature, Span}; use std::{collections::HashMap, sync::Arc}; +#[derive(Debug)] pub struct ParserState { files: Vec<(String, usize, usize)>, file_contents: Vec, vars: Vec, decls: Vec, - blocks: Vec, + blocks: Vec>, } #[derive(Clone, Copy, Debug)] @@ -81,6 +82,10 @@ impl ParserState { self.decls.len() } + pub fn num_blocks(&self) -> usize { + self.blocks.len() + } + pub fn get_var(&self, var_id: VarId) -> Option<&Type> { self.vars.get(var_id) } @@ -107,12 +112,13 @@ impl ParserState { } } +#[derive(Debug)] pub struct ParserWorkingSet { files: Vec<(String, usize, usize)>, pub(crate) file_contents: Vec, vars: Vec, // indexed by VarId decls: Vec, // indexed by DeclId - blocks: Vec, // indexed by BlockId + blocks: Vec>, // indexed by BlockId permanent_state: Option>, scope: Vec, } @@ -140,20 +146,47 @@ impl ParserWorkingSet { self.files.len() + parent_len } - pub fn add_decl(&mut self, name: Vec, decl: Declaration) -> DeclId { + pub fn num_decls(&self) -> usize { + let parent_len = if let Some(permanent_state) = &self.permanent_state { + permanent_state.num_decls() + } else { + 0 + }; + + self.decls.len() + parent_len + } + + pub fn num_blocks(&self) -> usize { + let parent_len = if let Some(permanent_state) = &self.permanent_state { + permanent_state.num_blocks() + } else { + 0 + }; + + self.blocks.len() + parent_len + } + + pub fn add_decl(&mut self, decl: Declaration) -> DeclId { + let name = decl.signature.name.as_bytes().to_vec(); + + self.decls.push(decl); + let decl_id = self.num_decls() - 1; + let scope_frame = self .scope .last_mut() .expect("internal error: missing required scope frame"); - - self.decls.push(decl); - let decl_id = self.decls.len() - 1; - scope_frame.decls.insert(name, decl_id); decl_id } + pub fn add_block(&mut self, block: Box) -> BlockId { + self.blocks.push(block); + + self.num_blocks() - 1 + } + pub fn next_span_start(&self) -> usize { if let Some(permanent_state) = &self.permanent_state { permanent_state.next_span_start() + self.file_contents.len() @@ -192,7 +225,7 @@ impl ParserWorkingSet { } pub fn exit_scope(&mut self) { - self.scope.push(ScopeFrame::new()); + self.scope.pop(); } pub fn find_decl(&self, name: &[u8]) -> Option {