refactor to subcrates

This commit is contained in:
JT
2021-08-11 06:51:08 +12:00
parent f62e3119c4
commit 1355a5dd33
24 changed files with 296 additions and 257 deletions

View File

@ -0,0 +1,7 @@
use crate::{BlockId, Signature};
#[derive(Clone, Debug)]
pub struct Declaration {
pub signature: Box<Signature>,
pub body: Option<BlockId>,
}

View File

@ -0,0 +1,95 @@
use std::ops::Range;
use crate::ParserWorkingSet;
impl<'a> codespan_reporting::files::Files<'a> for ParserWorkingSet<'a> {
type FileId = usize;
type Name = String;
type Source = String;
fn name(&'a self, id: Self::FileId) -> Result<Self::Name, codespan_reporting::files::Error> {
Ok(self.get_filename(id))
}
fn source(
&'a self,
id: Self::FileId,
) -> Result<Self::Source, codespan_reporting::files::Error> {
Ok(self.get_file_source(id))
}
fn line_index(
&'a self,
id: Self::FileId,
byte_index: usize,
) -> Result<usize, codespan_reporting::files::Error> {
let source = self.get_file_source(id);
let mut count = 0;
for byte in source.bytes().enumerate() {
if byte.0 == byte_index {
// println!("count: {} for file: {} index: {}", count, id, byte_index);
return Ok(count);
}
if byte.1 == b'\n' {
count += 1;
}
}
// println!("count: {} for file: {} index: {}", count, id, byte_index);
Ok(count)
}
fn line_range(
&'a self,
id: Self::FileId,
line_index: usize,
) -> Result<Range<usize>, codespan_reporting::files::Error> {
let source = self.get_file_source(id);
let mut count = 0;
let mut start = Some(0);
let mut end = None;
for byte in source.bytes().enumerate() {
#[allow(clippy::comparison_chain)]
if count > line_index {
let start = start.expect("internal error: couldn't find line");
let end = end.expect("internal error: couldn't find line");
// println!(
// "Span: {}..{} for fileid: {} index: {}",
// start, end, id, line_index
// );
return Ok(start..end);
} else if count == line_index {
end = Some(byte.0 + 1);
}
#[allow(clippy::comparison_chain)]
if byte.1 == b'\n' {
count += 1;
if count > line_index {
break;
} else if count == line_index {
start = Some(byte.0 + 1);
}
}
}
match (start, end) {
(Some(start), Some(end)) => {
// println!(
// "Span: {}..{} for fileid: {} index: {}",
// start, end, id, line_index
// );
Ok(start..end)
}
_ => Err(codespan_reporting::files::Error::FileMissing),
}
}
}

View File

@ -0,0 +1,115 @@
use crate::{Block, Expr, Expression, ParserWorkingSet, Pipeline, Span, Statement};
#[derive(Debug)]
pub enum FlatShape {
Garbage,
Bool,
Int,
Float,
InternalCall,
External,
Literal,
Operator,
Signature,
String,
Variable,
}
impl<'a> ParserWorkingSet<'a> {
pub fn flatten_block(&self, block: &Block) -> Vec<(Span, FlatShape)> {
let mut output = vec![];
for stmt in &block.stmts {
output.extend(self.flatten_statement(stmt));
}
output
}
pub fn flatten_statement(&self, stmt: &Statement) -> Vec<(Span, FlatShape)> {
match stmt {
Statement::Expression(expr) => self.flatten_expression(expr),
Statement::Pipeline(pipeline) => self.flatten_pipeline(pipeline),
_ => vec![],
}
}
pub fn flatten_expression(&self, expr: &Expression) -> Vec<(Span, FlatShape)> {
match &expr.expr {
Expr::BinaryOp(lhs, op, rhs) => {
let mut output = vec![];
output.extend(self.flatten_expression(lhs));
output.extend(self.flatten_expression(op));
output.extend(self.flatten_expression(rhs));
output
}
Expr::Block(block_id) => self.flatten_block(self.get_block(*block_id)),
Expr::Call(call) => {
let mut output = vec![(call.head, FlatShape::InternalCall)];
for positional in &call.positional {
output.extend(self.flatten_expression(positional));
}
output
}
Expr::ExternalCall(..) => {
vec![(expr.span, FlatShape::External)]
}
Expr::Garbage => {
vec![(expr.span, FlatShape::Garbage)]
}
Expr::Int(_) => {
vec![(expr.span, FlatShape::Int)]
}
Expr::Float(_) => {
vec![(expr.span, FlatShape::Float)]
}
Expr::Bool(_) => {
vec![(expr.span, FlatShape::Bool)]
}
Expr::List(list) => {
let mut output = vec![];
for l in list {
output.extend(self.flatten_expression(l));
}
output
}
Expr::Keyword(_, span, expr) => {
let mut output = vec![(*span, FlatShape::Operator)];
output.extend(self.flatten_expression(expr));
output
}
Expr::Operator(_) => {
vec![(expr.span, FlatShape::Operator)]
}
Expr::Signature(_) => {
vec![(expr.span, FlatShape::Signature)]
}
Expr::String(_) => {
vec![(expr.span, FlatShape::String)]
}
Expr::Subexpression(block_id) => self.flatten_block(self.get_block(*block_id)),
Expr::Table(headers, cells) => {
let mut output = vec![];
for e in headers {
output.extend(self.flatten_expression(e));
}
for row in cells {
for expr in row {
output.extend(self.flatten_expression(expr));
}
}
output
}
Expr::Var(_) => {
vec![(expr.span, FlatShape::Variable)]
}
}
}
pub fn flatten_pipeline(&self, pipeline: &Pipeline) -> Vec<(Span, FlatShape)> {
let mut output = vec![];
for expr in &pipeline.expressions {
output.extend(self.flatten_expression(expr))
}
output
}
}

347
crates/nu-parser/src/lex.rs Normal file
View File

@ -0,0 +1,347 @@
use crate::{ParseError, Span};
#[derive(Debug, PartialEq, Eq)]
pub enum TokenContents {
Item,
Comment,
Pipe,
Semicolon,
Eol,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Token {
pub contents: TokenContents,
pub span: Span,
}
impl Token {
pub fn new(contents: TokenContents, span: Span) -> Token {
Token { contents, span }
}
}
#[derive(Clone, Copy, Debug)]
pub enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
}
impl BlockKind {
fn closing(self) -> u8 {
match self {
BlockKind::Paren => b')',
BlockKind::SquareBracket => b']',
BlockKind::CurlyBracket => b'}',
}
}
}
// A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any
// whitespace.
fn is_item_terminator(
block_level: &[BlockKind],
c: u8,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> bool {
block_level.is_empty()
&& (c == b' '
|| c == b'\t'
|| c == b'\n'
|| c == b'\r'
|| c == b'|'
|| c == b';'
|| c == b'#'
|| additional_whitespace.contains(&c)
|| special_tokens.contains(&c))
}
// A special token is one that is a byte that stands alone as its own token. For example
// when parsing a signature you may want to have `:` be able to separate tokens and also
// to be handled as its own token to notify you you're about to parse a type in the example
// `foo:bar`
fn is_special_item(block_level: &[BlockKind], c: u8, special_tokens: &[u8]) -> bool {
block_level.is_empty() && special_tokens.contains(&c)
}
pub fn lex_item(
input: &[u8],
curr_offset: &mut usize,
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> (Span, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
// closing quote.
let mut quote_start: Option<u8> = None;
let mut in_comment = false;
let token_start = *curr_offset;
// This Vec tracks paired delimiters
let mut block_level: Vec<BlockKind> = vec![];
// The process of slurping up a baseline token repeats:
//
// - String literal, which begins with `'`, `"` or `\``, and continues until
// the same character is encountered again.
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
// the matching closing delimiter is found, skipping comments and string
// literals.
// - When not nested inside of a delimiter pair, when a terminating
// character (whitespace, `|`, `;` or `#`) is encountered, the baseline
// token is done.
// - Otherwise, accumulate the character into the current baseline token.
while let Some(c) = input.get(*curr_offset) {
let c = *c;
if quote_start.is_some() {
// If we encountered the closing quote character for the current
// string, we're done with the current string.
if Some(c) == quote_start {
quote_start = None;
}
} else if c == b'#' {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
in_comment = true;
} else if c == b'\n' {
in_comment = false;
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
} else if in_comment {
if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
} else if is_special_item(&block_level, c, special_tokens) && token_start == *curr_offset {
*curr_offset += 1;
break;
} else if c == b'\'' || c == b'"' {
// We encountered the opening quote of a string literal.
quote_start = Some(c);
} else if c == b'[' {
// We encountered an opening `[` delimiter.
block_level.push(BlockKind::SquareBracket);
} else if c == b']' {
// We encountered a closing `]` delimiter. Pop off the opening `[`
// delimiter.
if let Some(BlockKind::SquareBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == b'{' {
// We encountered an opening `{` delimiter.
block_level.push(BlockKind::CurlyBracket);
} else if c == b'}' {
// We encountered a closing `}` delimiter. Pop off the opening `{`.
if let Some(BlockKind::CurlyBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == b'(' {
// We enceountered an opening `(` delimiter.
block_level.push(BlockKind::Paren);
} else if c == b')' {
// We encountered a closing `)` delimiter. Pop off the opening `(`.
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) {
break;
}
*curr_offset += 1;
}
let span = Span::new(span_offset + token_start, span_offset + *curr_offset);
// If there is still unclosed opening delimiters, close them and add
// synthetic closing characters to the accumulated token.
if let Some(block) = block_level.last() {
let delim = block.closing();
let cause = ParseError::UnexpectedEof(
(delim as char).to_string(),
Span {
start: span.end - 1,
end: span.end,
},
);
return (span, Some(cause));
}
if let Some(delim) = quote_start {
// The non-lite parse trims quotes on both sides, so we add the expected quote so that
// anyone wanting to consume this partial parse (e.g., completions) will be able to get
// correct information from the non-lite parse.
return (
span,
Some(ParseError::UnexpectedEof((delim as char).to_string(), span)),
);
}
// If we didn't accumulate any characters, it's an unexpected error.
if *curr_offset - token_start == 0 {
return (
span,
Some(ParseError::UnexpectedEof("command".to_string(), span)),
);
}
(span, None)
}
pub fn lex(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
) -> (Vec<Token>, Option<ParseError>) {
let mut error = None;
let mut curr_offset = 0;
let mut output = vec![];
let mut is_complete = true;
while let Some(c) = input.get(curr_offset) {
let c = *c;
if c == b'|' {
// If the next character is `|`, it's either `|` or `||`.
let idx = curr_offset;
let prev_idx = idx;
curr_offset += 1;
// If the next character is `|`, we're looking at a `||`.
if let Some(c) = input.get(curr_offset) {
if *c == b'|' {
let idx = curr_offset;
curr_offset += 1;
output.push(Token::new(
TokenContents::Item,
Span::new(span_offset + prev_idx, span_offset + idx + 1),
));
continue;
}
}
// Otherwise, it's just a regular `|` token.
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1),
));
is_complete = false;
} else if c == b';' {
// If the next character is a `;`, we're looking at a semicolon token.
if !is_complete && error.is_none() {
error = Some(ParseError::ExtraTokens(Span::new(
curr_offset,
curr_offset + 1,
)));
}
let idx = curr_offset;
curr_offset += 1;
output.push(Token::new(
TokenContents::Semicolon,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if c == b'\n' || c == b'\r' {
// If the next character is a newline, we're looking at an EOL (end of line) token.
let idx = curr_offset;
curr_offset += 1;
if !additional_whitespace.contains(&c) {
output.push(Token::new(
TokenContents::Eol,
Span::new(span_offset + idx, span_offset + idx + 1),
));
}
} else if c == b'#' {
// If the next character is `#`, we're at the beginning of a line
// comment. The comment continues until the next newline.
let mut start = curr_offset;
while let Some(input) = input.get(curr_offset) {
curr_offset += 1;
if *input == b'\n' || *input == b'\r' {
output.push(Token::new(
TokenContents::Comment,
Span::new(start, curr_offset),
));
start = curr_offset;
break;
}
}
if start != curr_offset {
output.push(Token::new(
TokenContents::Comment,
Span::new(span_offset + start, span_offset + curr_offset),
));
}
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
// If the next character is non-newline whitespace, skip it.
curr_offset += 1;
} else {
// Otherwise, try to consume an unclassified token.
let (span, err) = lex_item(
input,
&mut curr_offset,
span_offset,
additional_whitespace,
special_tokens,
);
if error.is_none() {
error = err;
}
is_complete = true;
output.push(Token::new(TokenContents::Item, span));
}
}
(output, error)
}
#[cfg(test)]
mod lex_tests {
use super::*;
#[test]
fn lex_basic() {
let file = b"let x = 4";
let output = lex(file, 0, &[], &[]);
assert!(output.1.is_none());
}
#[test]
fn lex_newline() {
let file = b"let x = 300\nlet y = 500;";
let output = lex(file, 0, &[], &[]);
println!("{:#?}", output.0);
assert!(output.0.contains(&Token {
contents: TokenContents::Eol,
span: Span { start: 11, end: 12 }
}));
}
#[test]
fn lex_empty() {
let file = b"";
let output = lex(file, 0, &[], &[]);
assert!(output.0.is_empty());
assert!(output.1.is_none());
}
}

View File

@ -0,0 +1,24 @@
mod declaration;
mod errors;
mod flatten;
mod lex;
mod lite_parse;
mod parse_error;
mod parser;
mod parser_state;
mod signature;
mod span;
mod type_check;
pub use declaration::Declaration;
pub use flatten::FlatShape;
pub use lex::{lex, Token, TokenContents};
pub use lite_parse::{lite_parse, LiteBlock};
pub use parse_error::ParseError;
pub use parser::{
span, Block, Call, Expr, Expression, Import, Operator, Pipeline, Statement, SyntaxShape,
VarDecl,
};
pub use parser_state::{BlockId, DeclId, ParserDelta, ParserState, ParserWorkingSet, Type, VarId};
pub use signature::{Flag, PositionalArg, Signature};
pub use span::Span;

View File

@ -0,0 +1,203 @@
use crate::{ParseError, Span, Token, TokenContents};
#[derive(Debug)]
pub struct LiteCommand {
pub comments: Vec<Span>,
pub parts: Vec<Span>,
}
impl Default for LiteCommand {
fn default() -> Self {
Self::new()
}
}
impl LiteCommand {
pub fn new() -> Self {
Self {
comments: vec![],
parts: vec![],
}
}
pub fn push(&mut self, span: Span) {
self.parts.push(span);
}
pub fn is_empty(&self) -> bool {
self.parts.is_empty()
}
}
#[derive(Debug)]
pub struct LiteStatement {
pub commands: Vec<LiteCommand>,
}
impl Default for LiteStatement {
fn default() -> Self {
Self::new()
}
}
impl LiteStatement {
pub fn new() -> Self {
Self { commands: vec![] }
}
pub fn push(&mut self, command: LiteCommand) {
self.commands.push(command);
}
pub fn is_empty(&self) -> bool {
self.commands.is_empty()
}
}
#[derive(Debug)]
pub struct LiteBlock {
pub block: Vec<LiteStatement>,
}
impl Default for LiteBlock {
fn default() -> Self {
Self::new()
}
}
impl LiteBlock {
pub fn new() -> Self {
Self { block: vec![] }
}
pub fn push(&mut self, pipeline: LiteStatement) {
self.block.push(pipeline);
}
pub fn is_empty(&self) -> bool {
self.block.is_empty()
}
}
pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
let mut curr_token = 0;
let mut block = LiteBlock::new();
let mut curr_pipeline = LiteStatement::new();
let mut curr_command = LiteCommand::new();
while let Some(token) = tokens.get(curr_token) {
match &token.contents {
TokenContents::Item => curr_command.push(token.span),
TokenContents::Pipe => {
if !curr_command.is_empty() {
curr_pipeline.push(curr_command);
curr_command = LiteCommand::new();
}
}
TokenContents::Eol | TokenContents::Semicolon => {
if !curr_command.is_empty() {
curr_pipeline.push(curr_command);
}
curr_command = LiteCommand::new();
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
}
curr_pipeline = LiteStatement::new();
}
TokenContents::Comment => {
curr_command.comments.push(token.span);
}
}
curr_token += 1;
}
if !curr_command.is_empty() {
curr_pipeline.push(curr_command);
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
}
(block, None)
}
#[cfg(test)]
mod tests {
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
let (output, err) = lex(input, 0, &[], &[]);
if let Some(err) = err {
return Err(err);
}
let (output, err) = lite_parse(&output);
if let Some(err) = err {
return Err(err);
}
Ok(output)
}
#[test]
fn comment_before() -> Result<(), ParseError> {
let input = b"# this is a comment\ndef foo bar";
let lite_block = lite_parse_helper(input)?;
assert_eq!(lite_block.block.len(), 1);
assert_eq!(lite_block.block[0].commands.len(), 1);
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
Ok(())
}
#[test]
fn comment_beside() -> Result<(), ParseError> {
let input = b"def foo bar # this is a comment";
let lite_block = lite_parse_helper(input)?;
assert_eq!(lite_block.block.len(), 1);
assert_eq!(lite_block.block[0].commands.len(), 1);
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
Ok(())
}
#[test]
fn comments_stack() -> Result<(), ParseError> {
let input = b"# this is a comment\n# another comment\ndef foo bar ";
let lite_block = lite_parse_helper(input)?;
assert_eq!(lite_block.block.len(), 1);
assert_eq!(lite_block.block[0].commands.len(), 1);
assert_eq!(lite_block.block[0].commands[0].comments.len(), 2);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
Ok(())
}
#[test]
fn separated_comments_dont_stack() -> Result<(), ParseError> {
let input = b"# this is a comment\n\n# another comment\ndef foo bar ";
let lite_block = lite_parse_helper(input)?;
assert_eq!(lite_block.block.len(), 1);
assert_eq!(lite_block.block[0].commands.len(), 1);
assert_eq!(lite_block.block[0].commands[0].comments.len(), 1);
assert_eq!(
lite_block.block[0].commands[0].comments[0],
Span { start: 21, end: 39 }
);
assert_eq!(lite_block.block[0].commands[0].parts.len(), 3);
Ok(())
}
}

View File

@ -0,0 +1,26 @@
use crate::parser_state::Type;
pub use crate::Span;
#[derive(Debug)]
pub enum ParseError {
ExtraTokens(Span),
ExtraPositional(Span),
UnexpectedEof(String, Span),
Unclosed(String, Span),
UnknownStatement(Span),
Mismatch(String, Span),
MultipleRestParams(Span),
VariableNotFound(Span),
UnknownCommand(Span),
NonUtf8(Span),
UnknownFlag(Span),
UnknownType(Span),
MissingFlagParam(Span),
ShortFlagBatchCantTakeArg(Span),
MissingPositional(String, Span),
MissingType(Span),
TypeMismatch(Type, Span),
MissingRequiredFlag(String, Span),
IncompleteMathExpression(Span),
UnknownState(String, Span),
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,591 @@
use crate::{parser::Block, Declaration, Span};
use core::panic;
use std::{collections::HashMap, fmt::Display, slice::Iter};
#[derive(Debug)]
pub struct ParserState {
files: Vec<(String, usize, usize)>,
file_contents: Vec<u8>,
vars: Vec<Type>,
decls: Vec<Declaration>,
blocks: Vec<Block>,
scope: Vec<ScopeFrame>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Type {
Int,
Float,
Bool,
String,
Block,
ColumnPath,
Duration,
FilePath,
Filesize,
List(Box<Type>),
Number,
Nothing,
Table,
Unknown,
}
impl Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Block => write!(f, "block"),
Type::Bool => write!(f, "bool"),
Type::ColumnPath => write!(f, "column path"),
Type::Duration => write!(f, "duration"),
Type::FilePath => write!(f, "filepath"),
Type::Filesize => write!(f, "filesize"),
Type::Float => write!(f, "float"),
Type::Int => write!(f, "int"),
Type::List(l) => write!(f, "list<{}>", l),
Type::Nothing => write!(f, "nothing"),
Type::Number => write!(f, "number"),
Type::String => write!(f, "string"),
Type::Table => write!(f, "table"),
Type::Unknown => write!(f, "unknown"),
}
}
}
pub type VarId = usize;
pub type DeclId = usize;
pub type BlockId = usize;
#[derive(Debug)]
struct ScopeFrame {
vars: HashMap<Vec<u8>, VarId>,
decls: HashMap<Vec<u8>, DeclId>,
aliases: HashMap<Vec<u8>, Vec<Span>>,
}
impl ScopeFrame {
pub fn new() -> Self {
Self {
vars: HashMap::new(),
decls: HashMap::new(),
aliases: HashMap::new(),
}
}
}
impl Default for ParserState {
fn default() -> Self {
Self::new()
}
}
impl ParserState {
pub fn new() -> Self {
Self {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
blocks: vec![],
scope: vec![ScopeFrame::new()],
}
}
pub fn merge_delta(this: &mut ParserState, mut delta: ParserDelta) {
// Take the mutable reference and extend the permanent state from the working set
this.files.extend(delta.files);
this.file_contents.extend(delta.file_contents);
this.decls.extend(delta.decls);
this.vars.extend(delta.vars);
this.blocks.extend(delta.blocks);
if let Some(last) = this.scope.last_mut() {
let first = delta.scope.remove(0);
for item in first.decls.into_iter() {
last.decls.insert(item.0, item.1);
}
for item in first.vars.into_iter() {
last.vars.insert(item.0, item.1);
}
for item in first.aliases.into_iter() {
last.aliases.insert(item.0, item.1);
}
}
}
pub fn num_files(&self) -> usize {
self.files.len()
}
pub fn num_vars(&self) -> usize {
self.vars.len()
}
pub fn num_decls(&self) -> usize {
self.decls.len()
}
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
pub fn print_vars(&self) {
for var in self.vars.iter().enumerate() {
println!("var{}: {:?}", var.0, var.1);
}
}
pub fn print_decls(&self) {
for decl in self.decls.iter().enumerate() {
println!("decl{}: {:?}", decl.0, decl.1);
}
}
pub fn print_blocks(&self) {
for block in self.blocks.iter().enumerate() {
println!("block{}: {:?}", block.0, block.1);
}
}
pub fn find_decl(&self, name: &[u8]) -> Option<DeclId> {
for scope in self.scope.iter().rev() {
if let Some(decl_id) = scope.decls.get(name) {
return Some(*decl_id);
}
}
None
}
pub fn get_var(&self, var_id: VarId) -> &Type {
self.vars
.get(var_id)
.expect("internal error: missing variable")
}
pub fn get_decl(&self, decl_id: DeclId) -> &Declaration {
self.decls
.get(decl_id)
.expect("internal error: missing declaration")
}
pub fn get_block(&self, block_id: BlockId) -> &Block {
self.blocks
.get(block_id)
.expect("internal error: missing block")
}
pub fn next_span_start(&self) -> usize {
self.file_contents.len()
}
pub fn files(&self) -> Iter<(String, usize, usize)> {
self.files.iter()
}
pub fn get_filename(&self, file_id: usize) -> String {
for file in self.files.iter().enumerate() {
if file.0 == file_id {
return file.1 .0.clone();
}
}
"<unknown>".into()
}
pub fn get_file_source(&self, file_id: usize) -> String {
for file in self.files.iter().enumerate() {
if file.0 == file_id {
let output =
String::from_utf8_lossy(&self.file_contents[file.1 .1..file.1 .2]).to_string();
return output;
}
}
"<unknown>".into()
}
#[allow(unused)]
pub(crate) fn add_file(&mut self, filename: String, contents: Vec<u8>) -> usize {
let next_span_start = self.next_span_start();
self.file_contents.extend(&contents);
let next_span_end = self.next_span_start();
self.files.push((filename, next_span_start, next_span_end));
self.num_files() - 1
}
}
#[derive(Debug)]
pub struct ParserWorkingSet<'a> {
permanent_state: &'a ParserState,
pub delta: ParserDelta,
}
#[derive(Debug)]
pub struct ParserDelta {
files: Vec<(String, usize, usize)>,
pub(crate) file_contents: Vec<u8>,
vars: Vec<Type>, // indexed by VarId
decls: Vec<Declaration>, // indexed by DeclId
blocks: Vec<Block>, // indexed by BlockId
scope: Vec<ScopeFrame>,
}
impl ParserDelta {
pub fn num_files(&self) -> usize {
self.files.len()
}
pub fn num_decls(&self) -> usize {
self.decls.len()
}
pub fn num_blocks(&self) -> usize {
self.blocks.len()
}
pub fn enter_scope(&mut self) {
self.scope.push(ScopeFrame::new());
}
pub fn exit_scope(&mut self) {
self.scope.pop();
}
}
impl<'a> ParserWorkingSet<'a> {
pub fn new(permanent_state: &'a ParserState) -> Self {
Self {
delta: ParserDelta {
files: vec![],
file_contents: vec![],
vars: vec![],
decls: vec![],
blocks: vec![],
scope: vec![ScopeFrame::new()],
},
permanent_state,
}
}
pub fn num_files(&self) -> usize {
self.delta.num_files() + self.permanent_state.num_files()
}
pub fn num_decls(&self) -> usize {
self.delta.num_decls() + self.permanent_state.num_decls()
}
pub fn num_blocks(&self) -> usize {
self.delta.num_blocks() + self.permanent_state.num_blocks()
}
pub fn add_decl(&mut self, decl: Declaration) -> DeclId {
let name = decl.signature.name.as_bytes().to_vec();
self.delta.decls.push(decl);
let decl_id = self.num_decls() - 1;
let scope_frame = self
.delta
.scope
.last_mut()
.expect("internal error: missing required scope frame");
scope_frame.decls.insert(name, decl_id);
decl_id
}
pub fn add_block(&mut self, block: Block) -> BlockId {
self.delta.blocks.push(block);
self.num_blocks() - 1
}
pub fn next_span_start(&self) -> usize {
self.permanent_state.next_span_start() + self.delta.file_contents.len()
}
pub fn global_span_offset(&self) -> usize {
self.permanent_state.next_span_start()
}
pub fn files(&'a self) -> impl Iterator<Item = &(String, usize, usize)> {
self.permanent_state.files().chain(self.delta.files.iter())
}
pub fn get_filename(&self, file_id: usize) -> String {
for file in self.files().enumerate() {
if file.0 == file_id {
return file.1 .0.clone();
}
}
"<unknown>".into()
}
pub fn get_file_source(&self, file_id: usize) -> String {
for file in self.files().enumerate() {
if file.0 == file_id {
let output = String::from_utf8_lossy(self.get_span_contents(Span {
start: file.1 .1,
end: file.1 .2,
}))
.to_string();
return output;
}
}
"<unknown>".into()
}
pub fn add_file(&mut self, filename: String, contents: &[u8]) -> usize {
let next_span_start = self.next_span_start();
self.delta.file_contents.extend(contents);
let next_span_end = self.next_span_start();
self.delta
.files
.push((filename, next_span_start, next_span_end));
self.num_files() - 1
}
pub fn get_span_contents(&self, span: Span) -> &[u8] {
let permanent_end = self.permanent_state.next_span_start();
if permanent_end <= span.start {
&self.delta.file_contents[(span.start - permanent_end)..(span.end - permanent_end)]
} else {
&self.permanent_state.file_contents[span.start..span.end]
}
}
pub fn enter_scope(&mut self) {
self.delta.enter_scope();
}
pub fn exit_scope(&mut self) {
self.delta.exit_scope();
}
pub fn find_decl(&self, name: &[u8]) -> Option<DeclId> {
for scope in self.delta.scope.iter().rev() {
if let Some(decl_id) = scope.decls.get(name) {
return Some(*decl_id);
}
}
for scope in self.permanent_state.scope.iter().rev() {
if let Some(decl_id) = scope.decls.get(name) {
return Some(*decl_id);
}
}
None
}
pub fn update_decl(&mut self, decl_id: usize, block: Option<BlockId>) {
let decl = self.get_decl_mut(decl_id);
decl.body = block;
}
pub fn contains_decl_partial_match(&self, name: &[u8]) -> bool {
for scope in self.delta.scope.iter().rev() {
for decl in &scope.decls {
if decl.0.starts_with(name) {
return true;
}
}
}
for scope in self.permanent_state.scope.iter().rev() {
for decl in &scope.decls {
if decl.0.starts_with(name) {
return true;
}
}
}
false
}
pub fn next_var_id(&self) -> VarId {
let num_permanent_vars = self.permanent_state.num_vars();
num_permanent_vars + self.delta.vars.len()
}
pub fn find_variable(&self, name: &[u8]) -> Option<VarId> {
for scope in self.delta.scope.iter().rev() {
if let Some(var_id) = scope.vars.get(name) {
return Some(*var_id);
}
}
for scope in self.permanent_state.scope.iter().rev() {
if let Some(var_id) = scope.vars.get(name) {
return Some(*var_id);
}
}
None
}
pub fn find_alias(&self, name: &[u8]) -> Option<&[Span]> {
for scope in self.delta.scope.iter().rev() {
if let Some(spans) = scope.aliases.get(name) {
return Some(spans);
}
}
for scope in self.permanent_state.scope.iter().rev() {
if let Some(spans) = scope.aliases.get(name) {
return Some(spans);
}
}
None
}
pub fn add_variable(&mut self, mut name: Vec<u8>, ty: Type) -> VarId {
let next_id = self.next_var_id();
// correct name if necessary
if !name.starts_with(b"$") {
name.insert(0, b'$');
}
let last = self
.delta
.scope
.last_mut()
.expect("internal error: missing stack frame");
last.vars.insert(name, next_id);
self.delta.vars.push(ty);
next_id
}
pub fn add_alias(&mut self, name: Vec<u8>, replacement: Vec<Span>) {
let last = self
.delta
.scope
.last_mut()
.expect("internal error: missing stack frame");
last.aliases.insert(name, replacement);
}
pub fn set_variable_type(&mut self, var_id: VarId, ty: Type) {
let num_permanent_vars = self.permanent_state.num_vars();
if var_id < num_permanent_vars {
panic!("Internal error: attempted to set into permanent state from working set")
} else {
self.delta.vars[var_id - num_permanent_vars] = ty;
}
}
pub fn get_variable(&self, var_id: VarId) -> &Type {
let num_permanent_vars = self.permanent_state.num_vars();
if var_id < num_permanent_vars {
self.permanent_state.get_var(var_id)
} else {
self.delta
.vars
.get(var_id - num_permanent_vars)
.expect("internal error: missing variable")
}
}
pub fn get_decl(&self, decl_id: DeclId) -> &Declaration {
let num_permanent_decls = self.permanent_state.num_decls();
if decl_id < num_permanent_decls {
self.permanent_state.get_decl(decl_id)
} else {
self.delta
.decls
.get(decl_id - num_permanent_decls)
.expect("internal error: missing declaration")
}
}
pub fn get_decl_mut(&mut self, decl_id: DeclId) -> &mut Declaration {
let num_permanent_decls = self.permanent_state.num_decls();
if decl_id < num_permanent_decls {
panic!("internal error: can only mutate declarations in working set")
} else {
self.delta
.decls
.get_mut(decl_id - num_permanent_decls)
.expect("internal error: missing declaration")
}
}
pub fn get_block(&self, block_id: BlockId) -> &Block {
let num_permanent_blocks = self.permanent_state.num_blocks();
if block_id < num_permanent_blocks {
self.permanent_state.get_block(block_id)
} else {
self.delta
.blocks
.get(block_id - num_permanent_blocks)
.expect("internal error: missing block")
}
}
pub fn render(self) -> ParserDelta {
self.delta
}
}
#[cfg(test)]
mod parser_state_tests {
use super::*;
#[test]
fn add_file_gives_id() {
let parser_state = ParserState::new();
let mut parser_state = ParserWorkingSet::new(&parser_state);
let id = parser_state.add_file("test.nu".into(), &[]);
assert_eq!(id, 0);
}
#[test]
fn add_file_gives_id_including_parent() {
let mut parser_state = ParserState::new();
let parent_id = parser_state.add_file("test.nu".into(), vec![]);
let mut working_set = ParserWorkingSet::new(&parser_state);
let working_set_id = working_set.add_file("child.nu".into(), &[]);
assert_eq!(parent_id, 0);
assert_eq!(working_set_id, 1);
}
#[test]
fn merge_states() {
let mut parser_state = ParserState::new();
parser_state.add_file("test.nu".into(), vec![]);
let delta = {
let mut working_set = ParserWorkingSet::new(&parser_state);
working_set.add_file("child.nu".into(), &[]);
working_set.render()
};
ParserState::merge_delta(&mut parser_state, delta);
assert_eq!(parser_state.num_files(), 2);
assert_eq!(&parser_state.files[0].0, "test.nu");
assert_eq!(&parser_state.files[1].0, "child.nu");
}
}

View File

@ -0,0 +1,309 @@
use crate::{parser::SyntaxShape, Declaration, VarId};
#[derive(Debug, Clone)]
pub struct Flag {
pub long: String,
pub short: Option<char>,
pub arg: Option<SyntaxShape>,
pub required: bool,
pub desc: String,
// For custom commands
pub var_id: Option<VarId>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PositionalArg {
pub name: String,
pub desc: String,
pub shape: SyntaxShape,
// For custom commands
pub var_id: Option<VarId>,
}
#[derive(Clone, Debug)]
pub struct Signature {
pub name: String,
pub usage: String,
pub extra_usage: String,
pub required_positional: Vec<PositionalArg>,
pub optional_positional: Vec<PositionalArg>,
pub rest_positional: Option<PositionalArg>,
pub named: Vec<Flag>,
pub is_filter: bool,
}
impl PartialEq for Signature {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.usage == other.usage
&& self.required_positional == other.required_positional
&& self.optional_positional == other.optional_positional
&& self.rest_positional == other.rest_positional
&& self.is_filter == other.is_filter
}
}
impl Eq for Signature {}
impl Signature {
pub fn new(name: impl Into<String>) -> Signature {
Signature {
name: name.into(),
usage: String::new(),
extra_usage: String::new(),
required_positional: vec![],
optional_positional: vec![],
rest_positional: None,
named: vec![],
is_filter: false,
}
}
pub fn build(name: impl Into<String>) -> Signature {
Signature::new(name.into())
}
/// Add a description to the signature
pub fn desc(mut self, usage: impl Into<String>) -> Signature {
self.usage = usage.into();
self
}
/// Add a required positional argument to the signature
pub fn required(
mut self,
name: impl Into<String>,
shape: impl Into<SyntaxShape>,
desc: impl Into<String>,
) -> Signature {
self.required_positional.push(PositionalArg {
name: name.into(),
desc: desc.into(),
shape: shape.into(),
var_id: None,
});
self
}
/// Add a required positional argument to the signature
pub fn optional(
mut self,
name: impl Into<String>,
shape: impl Into<SyntaxShape>,
desc: impl Into<String>,
) -> Signature {
self.optional_positional.push(PositionalArg {
name: name.into(),
desc: desc.into(),
shape: shape.into(),
var_id: None,
});
self
}
pub fn rest(mut self, shape: impl Into<SyntaxShape>, desc: impl Into<String>) -> Signature {
self.rest_positional = Some(PositionalArg {
name: "rest".into(),
desc: desc.into(),
shape: shape.into(),
var_id: None,
});
self
}
/// Add an optional named flag argument to the signature
pub fn named(
mut self,
name: impl Into<String>,
shape: impl Into<SyntaxShape>,
desc: impl Into<String>,
short: Option<char>,
) -> Signature {
let s = short.map(|c| {
debug_assert!(!self.get_shorts().contains(&c));
c
});
self.named.push(Flag {
long: name.into(),
short: s,
arg: Some(shape.into()),
required: false,
desc: desc.into(),
var_id: None,
});
self
}
/// Add a required named flag argument to the signature
pub fn required_named(
mut self,
name: impl Into<String>,
shape: impl Into<SyntaxShape>,
desc: impl Into<String>,
short: Option<char>,
) -> Signature {
let s = short.map(|c| {
debug_assert!(!self.get_shorts().contains(&c));
c
});
self.named.push(Flag {
long: name.into(),
short: s,
arg: Some(shape.into()),
required: true,
desc: desc.into(),
var_id: None,
});
self
}
/// Add a switch to the signature
pub fn switch(
mut self,
name: impl Into<String>,
desc: impl Into<String>,
short: Option<char>,
) -> Signature {
let s = short.map(|c| {
debug_assert!(
!self.get_shorts().contains(&c),
"There may be duplicate short flags, such as -h"
);
c
});
self.named.push(Flag {
long: name.into(),
short: s,
arg: None,
required: false,
desc: desc.into(),
var_id: None,
});
self
}
/// Get list of the short-hand flags
pub fn get_shorts(&self) -> Vec<char> {
let mut shorts = Vec::new();
for Flag { short, .. } in &self.named {
if let Some(c) = short {
shorts.push(*c);
}
}
shorts
}
pub fn get_positional(&self, position: usize) -> Option<PositionalArg> {
if position < self.required_positional.len() {
self.required_positional.get(position).cloned()
} else if position < (self.required_positional.len() + self.optional_positional.len()) {
self.optional_positional
.get(position - self.required_positional.len())
.cloned()
} else {
self.rest_positional.clone()
}
}
pub fn num_positionals(&self) -> usize {
let mut total = self.required_positional.len() + self.optional_positional.len();
for positional in &self.required_positional {
if let SyntaxShape::Keyword(..) = positional.shape {
// Keywords have a required argument, so account for that
total += 1;
}
}
for positional in &self.optional_positional {
if let SyntaxShape::Keyword(..) = positional.shape {
// Keywords have a required argument, so account for that
total += 1;
}
}
total
}
pub fn num_positionals_after(&self, idx: usize) -> usize {
let mut total = 0;
let mut curr = 0;
for positional in &self.required_positional {
match positional.shape {
SyntaxShape::Keyword(..) => {
// Keywords have a required argument, so account for that
if curr > idx {
total += 2;
}
}
_ => {
if curr > idx {
total += 1;
}
}
}
curr += 1;
}
for positional in &self.optional_positional {
match positional.shape {
SyntaxShape::Keyword(..) => {
// Keywords have a required argument, so account for that
if curr > idx {
total += 2;
}
}
_ => {
if curr > idx {
total += 1;
}
}
}
curr += 1;
}
total
}
/// Find the matching long flag
pub fn get_long_flag(&self, name: &str) -> Option<Flag> {
for flag in &self.named {
if flag.long == name {
return Some(flag.clone());
}
}
None
}
/// Find the matching long flag
pub fn get_short_flag(&self, short: char) -> Option<Flag> {
for flag in &self.named {
if let Some(short_flag) = &flag.short {
if *short_flag == short {
return Some(flag.clone());
}
}
}
None
}
}
impl From<Box<Signature>> for Declaration {
fn from(val: Box<Signature>) -> Self {
Declaration {
signature: val,
body: None,
}
}
}
impl From<Signature> for Declaration {
fn from(val: Signature) -> Self {
Declaration {
signature: Box::new(val),
body: None,
}
}
}

View File

@ -0,0 +1,22 @@
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn new(start: usize, end: usize) -> Span {
Span { start, end }
}
pub fn unknown() -> Span {
Span { start: 0, end: 0 }
}
pub fn offset(&self, offset: usize) -> Span {
Span {
start: self.start - offset,
end: self.end - offset,
}
}
}

View File

@ -0,0 +1,62 @@
use crate::{parser::Operator, parser_state::Type, Expr, Expression, ParseError, ParserWorkingSet};
impl<'a> ParserWorkingSet<'a> {
pub fn math_result_type(
&self,
lhs: &mut Expression,
op: &mut Expression,
rhs: &mut Expression,
) -> (Type, Option<ParseError>) {
match &op.expr {
Expr::Operator(operator) => match operator {
Operator::Equal => (Type::Bool, None),
Operator::Multiply => match (&lhs.ty, &rhs.ty) {
(Type::Int, Type::Int) => (Type::Int, None),
(Type::Unknown, _) => (Type::Unknown, None),
(_, Type::Unknown) => (Type::Unknown, None),
_ => {
*op = Expression::garbage(op.span);
(
Type::Unknown,
Some(ParseError::Mismatch("math".into(), op.span)),
)
}
},
Operator::Plus => match (&lhs.ty, &rhs.ty) {
(Type::Int, Type::Int) => (Type::Int, None),
(Type::String, Type::String) => (Type::String, None),
(Type::Unknown, _) => (Type::Unknown, None),
(_, Type::Unknown) => (Type::Unknown, None),
(Type::Int, _) => {
*rhs = Expression::garbage(rhs.span);
(
Type::Unknown,
Some(ParseError::Mismatch("int".into(), rhs.span)),
)
}
_ => {
*op = Expression::garbage(op.span);
(
Type::Unknown,
Some(ParseError::Mismatch("math".into(), op.span)),
)
}
},
_ => {
*op = Expression::garbage(op.span);
(
Type::Unknown,
Some(ParseError::Mismatch("math".into(), op.span)),
)
}
},
_ => {
*op = Expression::garbage(op.span);
(
Type::Unknown,
Some(ParseError::Mismatch("operator".into(), op.span)),
)
}
}
}
}