Better errors and more fleshed out semantics

This commit is contained in:
Yehuda Katz
2019-05-29 21:19:46 -07:00
parent 8f5d959692
commit b7d15c2afd
14 changed files with 1844 additions and 1793 deletions

View File

@ -1,7 +1,8 @@
use crate::parser::lexer::SpannedToken;
use derive_new::new;
use getset::Getters;
use std::str::FromStr;
use serde_derive::{Deserialize, Serialize};
use std::str::FromStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
pub enum Operator {
@ -65,9 +66,8 @@ impl Expression {
crate fn as_string(&self) -> Option<String> {
match self {
Expression::Leaf(Leaf::String(s)) | Expression::Leaf(Leaf::Bare(s)) => {
Some(s.to_string())
}
Expression::Leaf(Leaf::String(s)) => Some(s.to_string()),
Expression::Leaf(Leaf::Bare(path)) => Some(path.to_string()),
_ => None,
}
}
@ -135,21 +135,58 @@ impl Variable {
}
}
impl FromStr for Variable {
type Err = ();
fn from_str(input: &str) -> Result<Self, <Self as std::str::FromStr>::Err> {
Ok(match input {
"it" => Variable::It,
"true" => Variable::True,
"false" => Variable::False,
other => Variable::Other(other.to_string()),
})
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct BarePath {
head: String,
tail: Vec<String>,
}
impl BarePath {
crate fn from_tokens(head: SpannedToken, tail: Vec<SpannedToken>) -> BarePath {
BarePath {
head: head.to_string(),
tail: tail.iter().map(|i| i.to_string()).collect(),
}
}
crate fn to_string(&self) -> String {
bare_string(&self.head, &self.tail)
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum Leaf {
String(String),
Bare(String),
Bare(BarePath),
#[allow(unused)]
Boolean(bool),
Int(i64),
}
crate fn bare_string(head: &String, tail: &Vec<String>) -> String {
let mut out = vec![head.clone()];
out.extend(tail.clone());
itertools::join(out, ".")
}
impl Leaf {
fn print(&self) -> String {
match self {
Leaf::String(s) => format!("{:?}", s),
Leaf::Bare(s) => format!("{}", s),
Leaf::Bare(path) => format!("{}", path.to_string()),
Leaf::Boolean(b) => format!("{}", b),
Leaf::Int(i) => format!("{}", i),
}

View File

@ -1,5 +1,6 @@
use crate::errors::ShellError;
use derive_new::new;
use log::debug;
use logos_derive::Logos;
use std::ops::Range;
@ -68,17 +69,57 @@ crate enum TopToken {
#[token = "!="]
OpNeq,
#[token = "--"]
DashDash,
#[token = "-"]
Dash,
#[regex = r"\s+"]
Whitespace,
}
impl TopToken {
fn to_token(&self) -> Option<Token> {
use TopToken::*;
let result = match self {
END => return None,
Num => Token::Num,
SQString => Token::SQString,
DQString => Token::DQString,
Size => Token::Size,
Dollar => Token::Dollar,
Bare => Token::Bare,
Pipe => Token::Pipe,
Dot => Token::Dot,
OpenBrace => Token::OpenBrace,
CloseBrace => Token::CloseBrace,
OpenParen => Token::OpenParen,
CloseParen => Token::CloseParen,
OpGt => Token::OpGt,
OpLt => Token::OpLt,
OpGte => Token::OpGte,
OpLte => Token::OpLte,
OpEq => Token::OpEq,
OpNeq => Token::OpNeq,
DashDash => Token::DashDash,
Dash => Token::Dash,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn start_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
debug!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Var;
}
fn end_bare_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
debug!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -96,8 +137,22 @@ crate enum VariableToken {
Variable,
}
impl VariableToken {
fn to_token(&self) -> Option<Token> {
use VariableToken::*;
let result = match self {
END => return None,
Variable => Token::Variable,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn end_variable<S>(lex: &mut logos::Lexer<VariableToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
debug!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -119,13 +174,28 @@ crate enum AfterVariableToken {
Whitespace,
}
impl AfterVariableToken {
fn to_token(&self) -> Option<Token> {
use AfterVariableToken::*;
let result = match self {
END => return None,
Dot => Token::Dot,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn start_member<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
debug!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterMemberDot;
}
fn terminate_variable<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("terminate_variable EXTRAS={:?}", lex.extras);
debug!("terminate_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Top;
}
@ -146,8 +216,23 @@ crate enum AfterMemberDot {
Whitespace,
}
impl AfterMemberDot {
fn to_token(&self) -> Option<Token> {
use AfterMemberDot::*;
let result = match self {
END => return None,
Member => Token::Member,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn finish_member<S>(lex: &mut logos::Lexer<AfterMemberDot, S>) {
println!("finish_member EXTRAS={:?}", lex.extras);
debug!("finish_member EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -175,43 +260,117 @@ impl logos::Extras for LexerState {
fn on_whitespace(&mut self, _byte: u8) {}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
pub struct Span {
start: usize,
end: usize,
// source: &'source str,
}
impl From<(usize, usize)> for Span {
fn from(input: (usize, usize)) -> Span {
Span {
start: input.0,
end: input.1,
}
}
}
impl From<&std::ops::Range<usize>> for Span {
fn from(input: &std::ops::Range<usize>) -> Span {
Span {
start: input.start,
end: input.end,
}
}
}
impl Span {
fn new(range: &Range<usize>) -> Span {
Span {
start: range.start,
end: range.end,
// source,
}
}
}
impl language_reporting::ReportingSpan for Span {
fn with_start(&self, start: usize) -> Self {
Span {
start,
end: self.end,
}
}
fn with_end(&self, end: usize) -> Self {
Span {
start: self.start,
end,
}
}
fn start(&self) -> usize {
self.start
}
fn end(&self) -> usize {
self.end
}
}
#[derive(new, Debug, Clone, Eq, PartialEq)]
crate struct SpannedToken<'source, T> {
span: std::ops::Range<usize>,
slice: &'source str,
token: T,
pub struct SpannedToken<'source> {
crate span: Span,
crate slice: &'source str,
crate token: Token,
}
impl SpannedToken<'source> {
crate fn to_string(&self) -> String {
self.slice.to_string()
}
crate fn as_slice(&self) -> &str {
self.slice
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
crate enum Token<'source> {
Top(SpannedToken<'source, TopToken>),
Var(SpannedToken<'source, VariableToken>),
Dot(SpannedToken<'source, &'source str>),
Member(SpannedToken<'source, &'source str>),
Whitespace(SpannedToken<'source, &'source str>),
pub enum Token {
Variable,
Dot,
Member,
Num,
SQString,
DQString,
Size,
Dollar,
Bare,
Pipe,
OpenBrace,
CloseBrace,
OpenParen,
CloseParen,
OpGt,
OpLt,
OpGte,
OpLte,
OpEq,
OpNeq,
Dash,
DashDash,
Whitespace,
}
impl Token<'source> {
crate fn range(&self) -> &Range<usize> {
match self {
Token::Top(spanned) => &spanned.span,
Token::Var(spanned) => &spanned.span,
Token::Dot(spanned) => &spanned.span,
Token::Member(spanned) => &spanned.span,
Token::Whitespace(spanned) => &spanned.span,
}
}
crate fn slice(&self) -> &str {
match self {
Token::Top(spanned) => spanned.slice,
Token::Var(spanned) => spanned.slice,
Token::Dot(spanned) => spanned.slice,
Token::Member(spanned) => spanned.slice,
Token::Whitespace(spanned) => spanned.slice,
}
}
}
// #[derive(Debug, Clone, Eq, PartialEq)]
// crate enum Token<'source> {
// Top(SpannedToken<'source, TopToken>),
// Var(SpannedToken<'source, VariableToken>),
// Dot(SpannedToken<'source, &'source str>),
// Member(SpannedToken<'source, &'source str>),
// Whitespace(SpannedToken<'source, &'source str>),
// }
crate struct Lexer<'source> {
lexer: logos::Lexer<TopToken, &'source str>,
@ -230,30 +389,24 @@ impl Lexer<'source> {
}
impl Iterator for Lexer<'source> {
type Item = Result<Token<'source>, ShellError>;
type Item = Result<(usize, SpannedToken<'source>, usize), ShellError>;
// type Item = Result<Token<'source>, ShellError>;
fn next(&mut self) -> Option<Self::Item> {
if self.first {
self.first = false;
match self.lexer.token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.slice(),
)))),
_ => {
let token = Token::Top(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.token,
));
Some(Ok(token))
TopToken::Error => {
return Some(Err(lex_error(&self.lexer.range(), self.lexer.source)))
}
TopToken::Whitespace => return self.next(),
other => {
return spanned(other.to_token()?, self.lexer.slice(), &self.lexer.range())
}
}
} else {
println!("STATE={:?}", self.lexer.extras);
debug!("STATE={:?}", self.lexer.extras);
match self.lexer.extras.current {
LexerStateName::Top => {
@ -261,14 +414,9 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
range, slice, slice,
)))),
other => {
let token = Token::Top(SpannedToken::new(range, slice, other));
Some(Ok(token))
}
TopToken::Error => return Some(Err(lex_error(&range, self.lexer.source))),
TopToken::Whitespace => return self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -278,16 +426,11 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
AfterMemberDot::END => None,
AfterMemberDot::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterMemberDot::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterMemberDot::Member => {
Some(Ok(Token::Member(SpannedToken::new(range, slice, slice))))
return Some(Err(lex_error(&range, self.lexer.source)))
}
AfterMemberDot::Whitespace => self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -297,16 +440,12 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
AfterVariableToken::END => None,
AfterVariableToken::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterVariableToken::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterVariableToken::Dot => {
Some(Ok(Token::Dot(SpannedToken::new(range, slice, slice))))
return Some(Err(lex_error(&range, self.lexer.source)))
}
AfterVariableToken::Whitespace => self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -315,11 +454,10 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
VariableToken::END => None,
other => {
let token = Token::Var(SpannedToken::new(range, slice, other));
Some(Ok(token))
VariableToken::Error => {
return Some(Err(lex_error(&range, self.lexer.source)))
}
other => return spanned(other.to_token()?, slice, &range),
}
}
}
@ -327,6 +465,25 @@ impl Iterator for Lexer<'source> {
}
}
fn lex_error(range: &Range<usize>, source: &str) -> ShellError {
use language_reporting::*;
ShellError::diagnostic(
Diagnostic::new(Severity::Error, "Lex error")
.with_label(Label::new_primary(Span::new(range))),
source.to_string(),
)
}
fn spanned<'source>(
token: Token,
slice: &'source str,
range: &Range<usize>,
) -> Option<Result<(usize, SpannedToken<'source>, usize), ShellError>> {
let token = SpannedToken::new(Span::new(range), slice, token);
Some(Ok((range.start, token, range.end)))
}
fn advance<T>(
lexer: logos::Lexer<TopToken, &'source str>,
) -> (
@ -348,34 +505,37 @@ where
#[cfg(test)]
mod tests {
use super::*;
use logos::Logos;
use pretty_assertions::assert_eq;
fn assert_lex(source: &str, tokens: &[TestToken<'_>]) {
let lex = Lexer::new(source);
let mut current = 0;
let expected_tokens: Vec<Token> = tokens
let expected_tokens: Vec<SpannedToken> = tokens
.iter()
.map(|token_desc| {
println!("{:?}", token_desc);
.filter_map(|token_desc| {
debug!("{:?}", token_desc);
let len = token_desc.source.len();
let range = current..(current + len);
let token = token_desc.to_token(range);
let token = token_desc.to_token(&range);
current = current + len;
token
if let SpannedToken {
token: Token::Whitespace,
..
} = token
{
None
} else {
Some(token)
}
})
.collect();
let actual_tokens: Result<Vec<Token>, _> = lex
.map(|i| {
println!("{:?}", i);
i
})
.collect();
let actual_tokens: Result<Vec<SpannedToken>, _> =
lex.map(|result| result.map(|(_, i, _)| i)).collect();
let actual_tokens = actual_tokens.unwrap();
@ -397,18 +557,23 @@ mod tests {
}
impl TestToken<'source> {
fn to_token(&self, span: std::ops::Range<usize>) -> Token {
fn to_token(&self, range: &std::ops::Range<usize>) -> SpannedToken<'source> {
match self.desc {
TokenDesc::Top(TopToken::Dot) => {
Token::Dot(SpannedToken::new(span, self.source, "."))
SpannedToken::new(Span::new(range), self.source, Token::Dot)
}
TokenDesc::Top(tok) => {
SpannedToken::new(Span::new(range), self.source, tok.to_token().unwrap())
}
TokenDesc::Var(tok) => {
SpannedToken::new(Span::new(range), self.source, tok.to_token().unwrap())
}
TokenDesc::Top(tok) => Token::Top(SpannedToken::new(span, self.source, tok)),
TokenDesc::Var(tok) => Token::Var(SpannedToken::new(span, self.source, tok)),
TokenDesc::Member => {
Token::Member(SpannedToken::new(span, self.source, self.source))
SpannedToken::new(Span::new(range), self.source, Token::Member)
}
TokenDesc::Ws => {
Token::Whitespace(SpannedToken::new(span, self.source, self.source))
SpannedToken::new(Span::new(range), self.source, Token::Whitespace)
}
}
}

View File

@ -1,8 +1,11 @@
use std::str::FromStr;
use byte_unit::Byte;
use crate::parser::ast::*;
#![allow(unused)]
grammar;
use std::str::FromStr;
use crate::parser::ast::*;
use crate::prelude::*;
use crate::parser::lexer::{SpannedToken, Token};
grammar<'input>;
pub Pipeline: Pipeline = {
<first:Command> => Pipeline::new(vec![first]),
@ -10,19 +13,20 @@ pub Pipeline: Pipeline = {
}
Command: ParsedCommand = {
<command:BareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
<command:BareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
<command:BarePath> => ParsedCommand::new(command.to_string(), vec![]),
<command:BarePath> <expr:Expr+> => ParsedCommand::new(command.to_string(), expr),
<command:BarePath> <expr:BinaryExpression> => ParsedCommand::new(command.to_string(), vec![expr]),
}
Leaf: Expression = {
<String> => Expression::Leaf(Leaf::String(<>)),
<Num> => Expression::Leaf(Leaf::Int(<>)),
<Size> => Expression::Leaf(Leaf::Int(<>)),
<Int> => Expression::Leaf(Leaf::Int(<>)),
//<Size> => Expression::Leaf(Leaf::Int(<>)),
<Variable> => Expression::VariableReference(<>),
}
BinaryExpression: Expression = {
<left:Expr> <op:Operator> <right:Leaf> => Expression::Binary(Box::new(Binary::new(left, op, right))),
<left:Expr> <op:Operator> <right:Expr> => Expression::Binary(Box::new(Binary::new(left, op, right))),
}
Parenthesized: Expression = {
@ -45,25 +49,26 @@ WholeExpression: Expression = {
<Block>,
}
PathHead: Expression = {
<WholeExpression>,
<BarePath> => Expression::Leaf(Leaf::Bare(<>)),
}
PathExpression: Expression = {
<head:WholeExpression> <tail: ( "." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
}
Expr: Expression = {
<BareWord> => Expression::Leaf(Leaf::Bare(<>)),
<WholeExpression> => <>,
<PathExpression>
<PathExpression>,
<PathHead>
}
Variable: Variable = {
"$true" => Variable::True,
"$false" => Variable::False,
"$it" => Variable::It,
<VariableBody> => Variable::Other(<>[1..].to_string()),
"$" <"variable"> => Variable::from_str(<>.as_slice()).unwrap(),
}
Member: String = {
<BareWord>,
<"member"> => <>.to_string(),
<String>
}
@ -77,22 +82,49 @@ Operator: Operator = {
}
Flag: Flag = {
"-" <BareWord> => Flag::Shorthand(<>.to_string()),
"--" <BareWord> => Flag::Longhand(<>.to_string()),
"-" <BarePath> => Flag::Shorthand(<>.to_string()),
"--" <BarePath> => Flag::Longhand(<>.to_string()),
}
String: String = {
SQString,
DQString,
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string(),
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string()
}
BareWord = {
<RawBareWord>,
BarePath: BarePath = {
<head: "bare"> <tail: ( "." <"member"> )*> => BarePath::from_tokens(head, tail)
}
VariableBody: &'input str = <s:r"\$\p{XID_Start}(\p{XID_Continue}|[\-?!])*"> => <>;
RawBareWord: String = <s:r#"[^0-9"'$\-\.][^\s"']*"#> => <>.to_string();
DQString: String = <s:r#""([^"]|\\")*""#> => s[1..s.len() - 1].to_string();
SQString: String = <s:r#"'([^']|\\')*'"#> => s[1..s.len() - 1].to_string();
Num: i64 = <s:r"-?[0-9]+"> => i64::from_str(s).unwrap();
Size: i64 = <s:r"-?[0-9]+[A-Za-z]+"> => Byte::from_string(s).unwrap().get_bytes() as i64;
Int: i64 = {
<"num"> => i64::from_str(<>.as_slice()).unwrap()
}
extern {
type Location = usize;
type Error = ShellError;
enum SpannedToken<'input> {
"." => SpannedToken { token: Token::Dot, .. },
"|" => SpannedToken { token: Token::Pipe, .. },
"(" => SpannedToken { token: Token::OpenParen, .. },
")" => SpannedToken { token: Token::CloseParen, .. },
"{" => SpannedToken { token: Token::OpenBrace, .. },
"}" => SpannedToken { token: Token::CloseBrace, .. },
"==" => SpannedToken { token: Token::OpEq, .. },
"!=" => SpannedToken { token: Token::OpNeq, .. },
"<" => SpannedToken { token: Token::OpLt, .. },
"<=" => SpannedToken { token: Token::OpLte, .. },
">" => SpannedToken { token: Token::OpGt, .. },
">=" => SpannedToken { token: Token::OpGte, .. },
"-" => SpannedToken { token: Token::Dash, .. },
"--" => SpannedToken { token: Token::DashDash, .. },
"$" => SpannedToken { token: Token::Dollar, .. },
"num" => SpannedToken { token: Token::Num, .. },
"member" => SpannedToken { token: Token::Member, .. },
"variable" => SpannedToken { token: Token::Variable, .. },
"bare" => SpannedToken { token: Token::Bare, .. },
"dqstring" => SpannedToken { token: Token::DQString, .. },
"sqstring" => SpannedToken { token: Token::SQString, .. },
"size" => SpannedToken { token: Token::Size, .. },
}
}

File diff suppressed because one or more lines are too long

80
src/parser/span.rs Normal file
View File

@ -0,0 +1,80 @@
#[allow(unused)]
use crate::prelude::*;
use crate::parser::lexer::Span;
use derive_new::new;
use language_reporting::{FileName, Location, ReportingSpan};
#[derive(new, Debug, Clone)]
pub struct Files {
snippet: String,
}
impl language_reporting::ReportingFiles for Files {
type Span = Span;
type FileId = usize;
fn byte_span(
&self,
_file: Self::FileId,
from_index: usize,
to_index: usize,
) -> Option<Self::Span> {
Some(Span::from((from_index, to_index)))
}
fn file_id(&self, _span: Self::Span) -> Self::FileId {
0
}
fn file_name(&self, _file: Self::FileId) -> FileName {
FileName::Verbatim(format!("<eval>"))
}
fn byte_index(&self, _file: Self::FileId, _line: usize, _column: usize) -> Option<usize> {
unimplemented!("byte_index")
}
fn location(&self, _file: Self::FileId, byte_index: usize) -> Option<Location> {
let source = &self.snippet;
let mut seen_lines = 0;
let mut seen_bytes = 0;
for (pos, _) in source.match_indices('\n') {
if pos > byte_index {
return Some(language_reporting::Location::new(
seen_lines,
byte_index - seen_bytes,
));
} else {
seen_lines += 1;
seen_bytes = pos;
}
}
if seen_lines == 0 {
Some(language_reporting::Location::new(0, byte_index))
} else {
None
}
}
fn line_span(&self, _file: Self::FileId, lineno: usize) -> Option<Self::Span> {
let source = &self.snippet;
let mut seen_lines = 0;
let mut seen_bytes = 0;
for (pos, _) in source.match_indices('\n') {
if seen_lines == lineno {
return Some(Span::from((seen_bytes, pos)));
} else {
seen_lines += 1;
seen_bytes = pos + 1;
}
}
if seen_lines == 0 {
Some(Span::from((0, self.snippet.len() - 1)))
} else {
None
}
}
fn source(&self, span: Self::Span) -> Option<String> {
Some(self.snippet[span.start()..span.end()].to_string())
}
}