Better errors and more fleshed out semantics

This commit is contained in:
Yehuda Katz 2019-05-29 21:19:46 -07:00
parent 8f5d959692
commit b7d15c2afd
14 changed files with 1844 additions and 1793 deletions

34
Cargo.lock generated
View File

@ -842,6 +842,14 @@ dependencies = [
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itertools"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itertools"
version = "0.8.0"
@ -869,6 +877,18 @@ name = "lalrpop-util"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "language-reporting"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"derive-new 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"render-tree 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.3.0"
@ -1049,6 +1069,7 @@ dependencies = [
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
"language-reporting 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"logos 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
"logos-derive 0.10.0-rc2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1504,6 +1525,16 @@ dependencies = [
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "render-tree"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"termcolor 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rustc-demangle"
version = "0.1.14"
@ -2104,10 +2135,12 @@ dependencies = [
"checksum ident_case 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
"checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08"
"checksum itertools 0.7.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0d47946d458e94a1b7bcabbf6521ea7c037062c81f534615abcad76e84d4970d"
"checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lalrpop-util 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9768f55211206d3c17181108d8facb80bdffc1f1e674a67b1dddb2743529ca19"
"checksum language-reporting 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "61e5d4e5c7a76724d544bb5652a8a3ded29475a1b260a263b5d6743f5871ac83"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f"
"checksum lexical-core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3e82e023e062f1d25f807ad182008fba1b46538e999f908a08cc0c29e084462e"
@ -2174,6 +2207,7 @@ dependencies = [
"checksum redox_users 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3fe5204c3a17e97dde73f285d49be585df59ed84b50a872baf416e73b62c3828"
"checksum regex 1.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8f0a0bcab2fd7d1d7c54fa9eae6f43eddeb9ce2e7352f8518a814a4f65d60c58"
"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96"
"checksum render-tree 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "68ed587df09cfb7ce1bc6fe8f77e24db219f222c049326ccbfb948ec67e31664"
"checksum rustc-demangle 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "ccc78bfd5acd7bf3e89cffcf899e5cb1a52d6fafa8dec2739ad70c9577a57288"
"checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"

View File

@ -43,6 +43,7 @@ serde_derive = "1.0.91"
getset = "0.0.7"
logos = "0.10.0-rc2"
logos-derive = "0.10.0-rc2"
language-reporting = "0.3.0"
[dependencies.pancurses]
version = "0.16"

View File

@ -3,6 +3,9 @@ install_crate = { crate_name = "lalrpop", binary = "lalrpop", test_arg = "--help
command = "lalrpop"
args = ["src/parser/parser.lalrpop"]
[tasks.baseline]
dependencies = ["lalrpop"]
[tasks.build]
command = "cargo"
args = ["build"]
@ -10,5 +13,16 @@ dependencies = ["lalrpop"]
[tasks.run]
command = "cargo"
args = ["run"]
dependencies = ["build"]
args = ["run", "--release"]
dependencies = ["baseline"]
[tasks.test]
command = "cargo"
args = ["test"]
dependencies = ["baseline"]
[tasks.check]
command = "cargo"
args = ["check"]
dependencies = ["baseline"]

View File

@ -87,10 +87,24 @@ pub async fn cli() -> Result<(), Box<Error>> {
rl.add_history_entry(line.clone());
}
LineResult::Error(err) => {
context.host.lock().unwrap().stdout(&err);
LineResult::Error(err) => match err {
ShellError::Diagnostic(diag, source) => {
let host = context.host.lock().unwrap();
let writer = host.err_termcolor();
let files = crate::parser::span::Files::new(source);
language_reporting::emit(
&mut writer.lock(),
&files,
&diag.diagnostic,
&language_reporting::DefaultConfig,
)
.unwrap();
}
ShellError::String(s) => context.host.lock().unwrap().stdout(&format!("{:?}", s)),
},
LineResult::Break => {
break;
}
@ -111,7 +125,7 @@ pub async fn cli() -> Result<(), Box<Error>> {
enum LineResult {
Success(String),
Error(String),
Error(ShellError),
Break,
#[allow(unused)]
@ -125,13 +139,13 @@ impl std::ops::Try for LineResult {
fn into_result(self) -> Result<Option<String>, ShellError> {
match self {
LineResult::Success(s) => Ok(Some(s)),
LineResult::Error(s) => Err(ShellError::string(s)),
LineResult::Error(s) => Err(s),
LineResult::Break => Ok(None),
LineResult::FatalError(err) => Err(err),
}
}
fn from_error(v: ShellError) -> Self {
LineResult::Error(v.to_string())
LineResult::Error(v)
}
fn from_ok(v: Option<String>) -> Self {
@ -151,7 +165,7 @@ async fn process_line(readline: Result<String, ReadlineError>, ctx: &mut Context
Ok(line) => {
let result = match crate::parser::parse(&line, &ctx.registry()) {
Err(err) => {
return LineResult::Error(format!("{:?}", err));
return LineResult::Error(err);
}
Ok(val) => val,
@ -178,13 +192,13 @@ async fn process_line(readline: Result<String, ReadlineError>, ctx: &mut Context
Some(ClassifiedCommand::Internal(_)),
) => match left.run(ctx, input).await {
Ok(val) => ClassifiedInputStream::from_input_stream(val),
Err(err) => return LineResult::Error(format!("{}", err.description())),
Err(err) => return LineResult::Error(err),
},
(Some(ClassifiedCommand::Internal(left)), None) => {
match left.run(ctx, input).await {
Ok(val) => ClassifiedInputStream::from_input_stream(val),
Err(err) => return LineResult::Error(format!("{}", err.description())),
Err(err) => return LineResult::Error(err),
}
}
@ -193,18 +207,18 @@ async fn process_line(readline: Result<String, ReadlineError>, ctx: &mut Context
Some(ClassifiedCommand::External(_)),
) => match left.run(ctx, input, StreamNext::External).await {
Ok(val) => val,
Err(err) => return LineResult::Error(format!("{}", err.description())),
Err(err) => return LineResult::Error(err),
},
(
Some(ClassifiedCommand::Internal(ref i)),
Some(ClassifiedCommand::External(ref e)),
) => {
return LineResult::Error(format!(
return LineResult::Error(ShellError::string(&format!(
"Unimplemented Internal({}) -> External({})",
i.name(),
e.name()
))
)))
}
(
@ -212,13 +226,13 @@ async fn process_line(readline: Result<String, ReadlineError>, ctx: &mut Context
Some(ClassifiedCommand::Internal(_)),
) => match left.run(ctx, input, StreamNext::Internal).await {
Ok(val) => val,
Err(err) => return LineResult::Error(format!("{}", err.description())),
Err(err) => return LineResult::Error(err),
},
(Some(ClassifiedCommand::External(left)), None) => {
match left.run(ctx, input, StreamNext::Last).await {
Ok(val) => val,
Err(err) => return LineResult::Error(format!("{}", err.description())),
Err(err) => return LineResult::Error(err),
}
}
}

19
src/env/host.rs vendored
View File

@ -1,9 +1,13 @@
use crate::prelude::*;
use language_reporting::termcolor;
pub trait Host {
fn out_terminal(&self) -> Box<term::StdoutTerminal>;
fn err_terminal(&self) -> Box<term::StderrTerminal>;
fn out_termcolor(&self) -> termcolor::StandardStream;
fn err_termcolor(&self) -> termcolor::StandardStream;
fn stdout(&mut self, out: &str);
fn stderr(&mut self, out: &str);
}
@ -24,6 +28,14 @@ impl Host for Box<dyn Host> {
fn stderr(&mut self, out: &str) {
(**self).stderr(out)
}
fn out_termcolor(&self) -> termcolor::StandardStream {
(**self).out_termcolor()
}
fn err_termcolor(&self) -> termcolor::StandardStream {
(**self).err_termcolor()
}
}
crate struct BasicHost;
@ -50,6 +62,13 @@ impl Host for BasicHost {
other => eprintln!("{}", other),
}
}
fn out_termcolor(&self) -> termcolor::StandardStream {
termcolor::StandardStream::stdout(termcolor::ColorChoice::Auto)
}
fn err_termcolor(&self) -> termcolor::StandardStream {
termcolor::StandardStream::stderr(termcolor::ColorChoice::Auto)
}
}
crate fn handle_unexpected<T>(

View File

@ -1,34 +1,102 @@
use crate::parser::lexer::{Span, SpannedToken};
#[allow(unused)]
use crate::prelude::*;
use serde_derive::Serialize;
use derive_new::new;
use language_reporting::Diagnostic;
use serde::{Serialize, Serializer};
use serde_derive::Serialize;
#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Serialize)]
pub enum ShellError {
String(StringError),
Diagnostic(ShellDiagnostic, String),
}
impl ShellError {
crate fn parse_error(
error: lalrpop_util::ParseError<usize, SpannedToken, ShellError>,
source: String,
) -> ShellError {
use lalrpop_util::ParseError;
use language_reporting::*;
match error {
ParseError::UnrecognizedToken {
token: (start, SpannedToken { token, .. }, end),
expected,
} => {
let diagnostic = Diagnostic::new(
Severity::Error,
format!("Unexpected {:?}, expected {:?}", token, expected),
)
.with_label(Label::new_primary(Span::from((start, end))));
ShellError::diagnostic(diagnostic, source)
}
other => ShellError::string(format!("{:?}", other)),
}
}
crate fn diagnostic(diagnostic: Diagnostic<Span>, source: String) -> ShellError {
ShellError::Diagnostic(ShellDiagnostic { diagnostic }, source)
}
crate fn string(title: impl Into<String>) -> ShellError {
ShellError::String(StringError::new(title.into(), Value::nothing()))
}
crate fn copy_error(&self) -> ShellError {
self.clone()
}
}
#[derive(Debug, Clone)]
pub struct ShellDiagnostic {
crate diagnostic: Diagnostic<Span>,
}
impl PartialEq for ShellDiagnostic {
fn eq(&self, _other: &ShellDiagnostic) -> bool {
false
}
}
impl Eq for ShellDiagnostic {}
impl std::cmp::PartialOrd for ShellDiagnostic {
fn partial_cmp(&self, _other: &Self) -> Option<std::cmp::Ordering> {
Some(std::cmp::Ordering::Less)
}
}
impl std::cmp::Ord for ShellDiagnostic {
fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
std::cmp::Ordering::Less
}
}
impl Serialize for ShellDiagnostic {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
"<diagnostic>".serialize(serializer)
}
}
#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, new, Clone, Serialize)]
pub struct ShellError {
pub struct StringError {
title: String,
error: Value,
}
impl ShellError {
crate fn string(title: impl Into<String>) -> ShellError {
ShellError::new(title.into(), Value::nothing())
}
crate fn copy_error(&self) -> ShellError {
ShellError {
title: self.title.clone(),
error: self.error.copy(),
}
}
crate fn description(&self) -> String {
self.title.clone()
}
}
impl std::fmt::Display for ShellError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", &self.title)
match self {
ShellError::String(s) => write!(f, "{}", &s.title),
ShellError::Diagnostic(_, _) => write!(f, "<diagnostic>"),
}
}
}
@ -36,36 +104,36 @@ impl std::error::Error for ShellError {}
impl std::convert::From<std::io::Error> for ShellError {
fn from(input: std::io::Error) -> ShellError {
ShellError {
ShellError::String(StringError {
title: format!("{}", input),
error: Value::nothing(),
}
})
}
}
impl std::convert::From<futures_sink::VecSinkError> for ShellError {
fn from(_input: futures_sink::VecSinkError) -> ShellError {
ShellError {
ShellError::String(StringError {
title: format!("Unexpected Vec Sink Error"),
error: Value::nothing(),
}
})
}
}
impl std::convert::From<subprocess::PopenError> for ShellError {
fn from(input: subprocess::PopenError) -> ShellError {
ShellError {
ShellError::String(StringError {
title: format!("{}", input),
error: Value::nothing(),
}
})
}
}
impl std::convert::From<nom::Err<(&str, nom::error::ErrorKind)>> for ShellError {
fn from(input: nom::Err<(&str, nom::error::ErrorKind)>) -> ShellError {
ShellError {
ShellError::String(StringError {
title: format!("{:?}", input),
error: Value::nothing(),
}
})
}
}

View File

@ -32,7 +32,7 @@ fn evaluate_leaf(leaf: &ast::Leaf) -> Value {
match leaf {
Leaf::String(s) => Value::string(s),
Leaf::Bare(s) => Value::string(s),
Leaf::Bare(path) => Value::string(path.to_string()),
Leaf::Boolean(b) => Value::boolean(*b),
Leaf::Int(i) => Value::int(*i),
}

View File

@ -183,7 +183,6 @@ impl Value {
crate fn compare(&self, operator: ast::Operator, other: &Value) -> Option<bool> {
match operator {
ast::Operator::Equal | ast::Operator::NotEqual => unimplemented!(),
_ => {
let coerced = coerce_compare(self, other)?;
let ordering = coerced.compare();
@ -192,6 +191,8 @@ impl Value {
let result = match (operator, ordering) {
(Operator::Equal, Ordering::Equal) => true,
(Operator::NotEqual, Ordering::Less)
| (Operator::NotEqual, Ordering::Greater) => true,
(Operator::LessThan, Ordering::Less) => true,
(Operator::GreaterThan, Ordering::Greater) => true,
(Operator::GreaterThanOrEqual, Ordering::Greater)

View File

@ -3,17 +3,21 @@ crate mod completer;
crate mod lexer;
crate mod parser;
crate mod registry;
crate mod span;
crate use ast::{ParsedCommand, Pipeline};
crate use registry::{CommandConfig, CommandRegistry};
use crate::errors::ShellError;
use lexer::Lexer;
use parser::PipelineParser;
pub fn parse(input: &str, _registry: &dyn CommandRegistry) -> Result<Pipeline, ShellError> {
let parser = PipelineParser::new();
let tokens = Lexer::new(input);
parser
.parse(input)
.map_err(|e| ShellError::string(format!("{:?}", e)))
match parser.parse(tokens) {
Ok(val) => Ok(val),
Err(err) => Err(ShellError::parse_error(err, input.to_string())),
}
}

View File

@ -1,7 +1,8 @@
use crate::parser::lexer::SpannedToken;
use derive_new::new;
use getset::Getters;
use std::str::FromStr;
use serde_derive::{Deserialize, Serialize};
use std::str::FromStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
pub enum Operator {
@ -65,9 +66,8 @@ impl Expression {
crate fn as_string(&self) -> Option<String> {
match self {
Expression::Leaf(Leaf::String(s)) | Expression::Leaf(Leaf::Bare(s)) => {
Some(s.to_string())
}
Expression::Leaf(Leaf::String(s)) => Some(s.to_string()),
Expression::Leaf(Leaf::Bare(path)) => Some(path.to_string()),
_ => None,
}
}
@ -135,21 +135,58 @@ impl Variable {
}
}
impl FromStr for Variable {
type Err = ();
fn from_str(input: &str) -> Result<Self, <Self as std::str::FromStr>::Err> {
Ok(match input {
"it" => Variable::It,
"true" => Variable::True,
"false" => Variable::False,
other => Variable::Other(other.to_string()),
})
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct BarePath {
head: String,
tail: Vec<String>,
}
impl BarePath {
crate fn from_tokens(head: SpannedToken, tail: Vec<SpannedToken>) -> BarePath {
BarePath {
head: head.to_string(),
tail: tail.iter().map(|i| i.to_string()).collect(),
}
}
crate fn to_string(&self) -> String {
bare_string(&self.head, &self.tail)
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum Leaf {
String(String),
Bare(String),
Bare(BarePath),
#[allow(unused)]
Boolean(bool),
Int(i64),
}
crate fn bare_string(head: &String, tail: &Vec<String>) -> String {
let mut out = vec![head.clone()];
out.extend(tail.clone());
itertools::join(out, ".")
}
impl Leaf {
fn print(&self) -> String {
match self {
Leaf::String(s) => format!("{:?}", s),
Leaf::Bare(s) => format!("{}", s),
Leaf::Bare(path) => format!("{}", path.to_string()),
Leaf::Boolean(b) => format!("{}", b),
Leaf::Int(i) => format!("{}", i),
}

View File

@ -1,5 +1,6 @@
use crate::errors::ShellError;
use derive_new::new;
use log::debug;
use logos_derive::Logos;
use std::ops::Range;
@ -68,17 +69,57 @@ crate enum TopToken {
#[token = "!="]
OpNeq,
#[token = "--"]
DashDash,
#[token = "-"]
Dash,
#[regex = r"\s+"]
Whitespace,
}
impl TopToken {
fn to_token(&self) -> Option<Token> {
use TopToken::*;
let result = match self {
END => return None,
Num => Token::Num,
SQString => Token::SQString,
DQString => Token::DQString,
Size => Token::Size,
Dollar => Token::Dollar,
Bare => Token::Bare,
Pipe => Token::Pipe,
Dot => Token::Dot,
OpenBrace => Token::OpenBrace,
CloseBrace => Token::CloseBrace,
OpenParen => Token::OpenParen,
CloseParen => Token::CloseParen,
OpGt => Token::OpGt,
OpLt => Token::OpLt,
OpGte => Token::OpGte,
OpLte => Token::OpLte,
OpEq => Token::OpEq,
OpNeq => Token::OpNeq,
DashDash => Token::DashDash,
Dash => Token::Dash,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn start_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
debug!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Var;
}
fn end_bare_variable<S>(lex: &mut logos::Lexer<TopToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
debug!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -96,8 +137,22 @@ crate enum VariableToken {
Variable,
}
impl VariableToken {
fn to_token(&self) -> Option<Token> {
use VariableToken::*;
let result = match self {
END => return None,
Variable => Token::Variable,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn end_variable<S>(lex: &mut logos::Lexer<VariableToken, S>) {
println!("end_variable EXTRAS={:?}", lex.extras);
debug!("end_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -119,13 +174,28 @@ crate enum AfterVariableToken {
Whitespace,
}
impl AfterVariableToken {
fn to_token(&self) -> Option<Token> {
use AfterVariableToken::*;
let result = match self {
END => return None,
Dot => Token::Dot,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn start_member<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("start_variable EXTRAS={:?}", lex.extras);
debug!("start_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterMemberDot;
}
fn terminate_variable<S>(lex: &mut logos::Lexer<AfterVariableToken, S>) {
println!("terminate_variable EXTRAS={:?}", lex.extras);
debug!("terminate_variable EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::Top;
}
@ -146,8 +216,23 @@ crate enum AfterMemberDot {
Whitespace,
}
impl AfterMemberDot {
fn to_token(&self) -> Option<Token> {
use AfterMemberDot::*;
let result = match self {
END => return None,
Member => Token::Member,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
Some(result)
}
}
fn finish_member<S>(lex: &mut logos::Lexer<AfterMemberDot, S>) {
println!("finish_member EXTRAS={:?}", lex.extras);
debug!("finish_member EXTRAS={:?}", lex.extras);
lex.extras.current = LexerStateName::AfterVariableToken;
}
@ -175,43 +260,117 @@ impl logos::Extras for LexerState {
fn on_whitespace(&mut self, _byte: u8) {}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
pub struct Span {
start: usize,
end: usize,
// source: &'source str,
}
impl From<(usize, usize)> for Span {
fn from(input: (usize, usize)) -> Span {
Span {
start: input.0,
end: input.1,
}
}
}
impl From<&std::ops::Range<usize>> for Span {
fn from(input: &std::ops::Range<usize>) -> Span {
Span {
start: input.start,
end: input.end,
}
}
}
impl Span {
fn new(range: &Range<usize>) -> Span {
Span {
start: range.start,
end: range.end,
// source,
}
}
}
impl language_reporting::ReportingSpan for Span {
fn with_start(&self, start: usize) -> Self {
Span {
start,
end: self.end,
}
}
fn with_end(&self, end: usize) -> Self {
Span {
start: self.start,
end,
}
}
fn start(&self) -> usize {
self.start
}
fn end(&self) -> usize {
self.end
}
}
#[derive(new, Debug, Clone, Eq, PartialEq)]
crate struct SpannedToken<'source, T> {
span: std::ops::Range<usize>,
slice: &'source str,
token: T,
pub struct SpannedToken<'source> {
crate span: Span,
crate slice: &'source str,
crate token: Token,
}
impl SpannedToken<'source> {
crate fn to_string(&self) -> String {
self.slice.to_string()
}
crate fn as_slice(&self) -> &str {
self.slice
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
crate enum Token<'source> {
Top(SpannedToken<'source, TopToken>),
Var(SpannedToken<'source, VariableToken>),
Dot(SpannedToken<'source, &'source str>),
Member(SpannedToken<'source, &'source str>),
Whitespace(SpannedToken<'source, &'source str>),
pub enum Token {
Variable,
Dot,
Member,
Num,
SQString,
DQString,
Size,
Dollar,
Bare,
Pipe,
OpenBrace,
CloseBrace,
OpenParen,
CloseParen,
OpGt,
OpLt,
OpGte,
OpLte,
OpEq,
OpNeq,
Dash,
DashDash,
Whitespace,
}
impl Token<'source> {
crate fn range(&self) -> &Range<usize> {
match self {
Token::Top(spanned) => &spanned.span,
Token::Var(spanned) => &spanned.span,
Token::Dot(spanned) => &spanned.span,
Token::Member(spanned) => &spanned.span,
Token::Whitespace(spanned) => &spanned.span,
}
}
crate fn slice(&self) -> &str {
match self {
Token::Top(spanned) => spanned.slice,
Token::Var(spanned) => spanned.slice,
Token::Dot(spanned) => spanned.slice,
Token::Member(spanned) => spanned.slice,
Token::Whitespace(spanned) => spanned.slice,
}
}
}
// #[derive(Debug, Clone, Eq, PartialEq)]
// crate enum Token<'source> {
// Top(SpannedToken<'source, TopToken>),
// Var(SpannedToken<'source, VariableToken>),
// Dot(SpannedToken<'source, &'source str>),
// Member(SpannedToken<'source, &'source str>),
// Whitespace(SpannedToken<'source, &'source str>),
// }
crate struct Lexer<'source> {
lexer: logos::Lexer<TopToken, &'source str>,
@ -230,30 +389,24 @@ impl Lexer<'source> {
}
impl Iterator for Lexer<'source> {
type Item = Result<Token<'source>, ShellError>;
type Item = Result<(usize, SpannedToken<'source>, usize), ShellError>;
// type Item = Result<Token<'source>, ShellError>;
fn next(&mut self) -> Option<Self::Item> {
if self.first {
self.first = false;
match self.lexer.token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.slice(),
)))),
_ => {
let token = Token::Top(SpannedToken::new(
self.lexer.range(),
self.lexer.slice(),
self.lexer.token,
));
Some(Ok(token))
TopToken::Error => {
return Some(Err(lex_error(&self.lexer.range(), self.lexer.source)))
}
TopToken::Whitespace => return self.next(),
other => {
return spanned(other.to_token()?, self.lexer.slice(), &self.lexer.range())
}
}
} else {
println!("STATE={:?}", self.lexer.extras);
debug!("STATE={:?}", self.lexer.extras);
match self.lexer.extras.current {
LexerStateName::Top => {
@ -261,14 +414,9 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
TopToken::END => None,
TopToken::Whitespace => Some(Ok(Token::Whitespace(SpannedToken::new(
range, slice, slice,
)))),
other => {
let token = Token::Top(SpannedToken::new(range, slice, other));
Some(Ok(token))
}
TopToken::Error => return Some(Err(lex_error(&range, self.lexer.source))),
TopToken::Whitespace => return self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -278,16 +426,11 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
AfterMemberDot::END => None,
AfterMemberDot::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterMemberDot::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterMemberDot::Member => {
Some(Ok(Token::Member(SpannedToken::new(range, slice, slice))))
return Some(Err(lex_error(&range, self.lexer.source)))
}
AfterMemberDot::Whitespace => self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -297,16 +440,12 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
AfterVariableToken::END => None,
AfterVariableToken::Error => {
Some(Err(ShellError::string(&format!("Lex error at {}", slice))))
}
AfterVariableToken::Whitespace => Some(Ok(Token::Whitespace(
SpannedToken::new(range, slice, slice),
))),
AfterVariableToken::Dot => {
Some(Ok(Token::Dot(SpannedToken::new(range, slice, slice))))
return Some(Err(lex_error(&range, self.lexer.source)))
}
AfterVariableToken::Whitespace => self.next(),
other => return spanned(other.to_token()?, slice, &range),
}
}
@ -315,11 +454,10 @@ impl Iterator for Lexer<'source> {
self.lexer = lexer;
match token {
VariableToken::END => None,
other => {
let token = Token::Var(SpannedToken::new(range, slice, other));
Some(Ok(token))
}
VariableToken::Error => {
return Some(Err(lex_error(&range, self.lexer.source)))
}
other => return spanned(other.to_token()?, slice, &range),
}
}
}
@ -327,6 +465,25 @@ impl Iterator for Lexer<'source> {
}
}
fn lex_error(range: &Range<usize>, source: &str) -> ShellError {
use language_reporting::*;
ShellError::diagnostic(
Diagnostic::new(Severity::Error, "Lex error")
.with_label(Label::new_primary(Span::new(range))),
source.to_string(),
)
}
fn spanned<'source>(
token: Token,
slice: &'source str,
range: &Range<usize>,
) -> Option<Result<(usize, SpannedToken<'source>, usize), ShellError>> {
let token = SpannedToken::new(Span::new(range), slice, token);
Some(Ok((range.start, token, range.end)))
}
fn advance<T>(
lexer: logos::Lexer<TopToken, &'source str>,
) -> (
@ -348,34 +505,37 @@ where
#[cfg(test)]
mod tests {
use super::*;
use logos::Logos;
use pretty_assertions::assert_eq;
fn assert_lex(source: &str, tokens: &[TestToken<'_>]) {
let lex = Lexer::new(source);
let mut current = 0;
let expected_tokens: Vec<Token> = tokens
let expected_tokens: Vec<SpannedToken> = tokens
.iter()
.map(|token_desc| {
println!("{:?}", token_desc);
.filter_map(|token_desc| {
debug!("{:?}", token_desc);
let len = token_desc.source.len();
let range = current..(current + len);
let token = token_desc.to_token(range);
let token = token_desc.to_token(&range);
current = current + len;
token
if let SpannedToken {
token: Token::Whitespace,
..
} = token
{
None
} else {
Some(token)
}
})
.collect();
let actual_tokens: Result<Vec<Token>, _> = lex
.map(|i| {
println!("{:?}", i);
i
})
.collect();
let actual_tokens: Result<Vec<SpannedToken>, _> =
lex.map(|result| result.map(|(_, i, _)| i)).collect();
let actual_tokens = actual_tokens.unwrap();
@ -397,18 +557,23 @@ mod tests {
}
impl TestToken<'source> {
fn to_token(&self, span: std::ops::Range<usize>) -> Token {
fn to_token(&self, range: &std::ops::Range<usize>) -> SpannedToken<'source> {
match self.desc {
TokenDesc::Top(TopToken::Dot) => {
Token::Dot(SpannedToken::new(span, self.source, "."))
SpannedToken::new(Span::new(range), self.source, Token::Dot)
}
TokenDesc::Top(tok) => {
SpannedToken::new(Span::new(range), self.source, tok.to_token().unwrap())
}
TokenDesc::Var(tok) => {
SpannedToken::new(Span::new(range), self.source, tok.to_token().unwrap())
}
TokenDesc::Top(tok) => Token::Top(SpannedToken::new(span, self.source, tok)),
TokenDesc::Var(tok) => Token::Var(SpannedToken::new(span, self.source, tok)),
TokenDesc::Member => {
Token::Member(SpannedToken::new(span, self.source, self.source))
SpannedToken::new(Span::new(range), self.source, Token::Member)
}
TokenDesc::Ws => {
Token::Whitespace(SpannedToken::new(span, self.source, self.source))
SpannedToken::new(Span::new(range), self.source, Token::Whitespace)
}
}
}

View File

@ -1,8 +1,11 @@
use std::str::FromStr;
use byte_unit::Byte;
use crate::parser::ast::*;
#![allow(unused)]
grammar;
use std::str::FromStr;
use crate::parser::ast::*;
use crate::prelude::*;
use crate::parser::lexer::{SpannedToken, Token};
grammar<'input>;
pub Pipeline: Pipeline = {
<first:Command> => Pipeline::new(vec![first]),
@ -10,19 +13,20 @@ pub Pipeline: Pipeline = {
}
Command: ParsedCommand = {
<command:BareWord> <expr:Expr*> => ParsedCommand::new(command, expr),
<command:BareWord> <expr:BinaryExpression> => ParsedCommand::new(command, vec![expr]),
<command:BarePath> => ParsedCommand::new(command.to_string(), vec![]),
<command:BarePath> <expr:Expr+> => ParsedCommand::new(command.to_string(), expr),
<command:BarePath> <expr:BinaryExpression> => ParsedCommand::new(command.to_string(), vec![expr]),
}
Leaf: Expression = {
<String> => Expression::Leaf(Leaf::String(<>)),
<Num> => Expression::Leaf(Leaf::Int(<>)),
<Size> => Expression::Leaf(Leaf::Int(<>)),
<Int> => Expression::Leaf(Leaf::Int(<>)),
//<Size> => Expression::Leaf(Leaf::Int(<>)),
<Variable> => Expression::VariableReference(<>),
}
BinaryExpression: Expression = {
<left:Expr> <op:Operator> <right:Leaf> => Expression::Binary(Box::new(Binary::new(left, op, right))),
<left:Expr> <op:Operator> <right:Expr> => Expression::Binary(Box::new(Binary::new(left, op, right))),
}
Parenthesized: Expression = {
@ -45,25 +49,26 @@ WholeExpression: Expression = {
<Block>,
}
PathHead: Expression = {
<WholeExpression>,
<BarePath> => Expression::Leaf(Leaf::Bare(<>)),
}
PathExpression: Expression = {
<head:WholeExpression> <tail: ( "." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
}
Expr: Expression = {
<BareWord> => Expression::Leaf(Leaf::Bare(<>)),
<WholeExpression> => <>,
<PathExpression>
<PathExpression>,
<PathHead>
}
Variable: Variable = {
"$true" => Variable::True,
"$false" => Variable::False,
"$it" => Variable::It,
<VariableBody> => Variable::Other(<>[1..].to_string()),
"$" <"variable"> => Variable::from_str(<>.as_slice()).unwrap(),
}
Member: String = {
<BareWord>,
<"member"> => <>.to_string(),
<String>
}
@ -77,22 +82,49 @@ Operator: Operator = {
}
Flag: Flag = {
"-" <BareWord> => Flag::Shorthand(<>.to_string()),
"--" <BareWord> => Flag::Longhand(<>.to_string()),
"-" <BarePath> => Flag::Shorthand(<>.to_string()),
"--" <BarePath> => Flag::Longhand(<>.to_string()),
}
String: String = {
SQString,
DQString,
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string(),
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string()
}
BareWord = {
<RawBareWord>,
BarePath: BarePath = {
<head: "bare"> <tail: ( "." <"member"> )*> => BarePath::from_tokens(head, tail)
}
VariableBody: &'input str = <s:r"\$\p{XID_Start}(\p{XID_Continue}|[\-?!])*"> => <>;
RawBareWord: String = <s:r#"[^0-9"'$\-\.][^\s"']*"#> => <>.to_string();
DQString: String = <s:r#""([^"]|\\")*""#> => s[1..s.len() - 1].to_string();
SQString: String = <s:r#"'([^']|\\')*'"#> => s[1..s.len() - 1].to_string();
Num: i64 = <s:r"-?[0-9]+"> => i64::from_str(s).unwrap();
Size: i64 = <s:r"-?[0-9]+[A-Za-z]+"> => Byte::from_string(s).unwrap().get_bytes() as i64;
Int: i64 = {
<"num"> => i64::from_str(<>.as_slice()).unwrap()
}
extern {
type Location = usize;
type Error = ShellError;
enum SpannedToken<'input> {
"." => SpannedToken { token: Token::Dot, .. },
"|" => SpannedToken { token: Token::Pipe, .. },
"(" => SpannedToken { token: Token::OpenParen, .. },
")" => SpannedToken { token: Token::CloseParen, .. },
"{" => SpannedToken { token: Token::OpenBrace, .. },
"}" => SpannedToken { token: Token::CloseBrace, .. },
"==" => SpannedToken { token: Token::OpEq, .. },
"!=" => SpannedToken { token: Token::OpNeq, .. },
"<" => SpannedToken { token: Token::OpLt, .. },
"<=" => SpannedToken { token: Token::OpLte, .. },
">" => SpannedToken { token: Token::OpGt, .. },
">=" => SpannedToken { token: Token::OpGte, .. },
"-" => SpannedToken { token: Token::Dash, .. },
"--" => SpannedToken { token: Token::DashDash, .. },
"$" => SpannedToken { token: Token::Dollar, .. },
"num" => SpannedToken { token: Token::Num, .. },
"member" => SpannedToken { token: Token::Member, .. },
"variable" => SpannedToken { token: Token::Variable, .. },
"bare" => SpannedToken { token: Token::Bare, .. },
"dqstring" => SpannedToken { token: Token::DQString, .. },
"sqstring" => SpannedToken { token: Token::SQString, .. },
"size" => SpannedToken { token: Token::Size, .. },
}
}

File diff suppressed because one or more lines are too long

80
src/parser/span.rs Normal file
View File

@ -0,0 +1,80 @@
#[allow(unused)]
use crate::prelude::*;
use crate::parser::lexer::Span;
use derive_new::new;
use language_reporting::{FileName, Location, ReportingSpan};
#[derive(new, Debug, Clone)]
pub struct Files {
snippet: String,
}
impl language_reporting::ReportingFiles for Files {
type Span = Span;
type FileId = usize;
fn byte_span(
&self,
_file: Self::FileId,
from_index: usize,
to_index: usize,
) -> Option<Self::Span> {
Some(Span::from((from_index, to_index)))
}
fn file_id(&self, _span: Self::Span) -> Self::FileId {
0
}
fn file_name(&self, _file: Self::FileId) -> FileName {
FileName::Verbatim(format!("<eval>"))
}
fn byte_index(&self, _file: Self::FileId, _line: usize, _column: usize) -> Option<usize> {
unimplemented!("byte_index")
}
fn location(&self, _file: Self::FileId, byte_index: usize) -> Option<Location> {
let source = &self.snippet;
let mut seen_lines = 0;
let mut seen_bytes = 0;
for (pos, _) in source.match_indices('\n') {
if pos > byte_index {
return Some(language_reporting::Location::new(
seen_lines,
byte_index - seen_bytes,
));
} else {
seen_lines += 1;
seen_bytes = pos;
}
}
if seen_lines == 0 {
Some(language_reporting::Location::new(0, byte_index))
} else {
None
}
}
fn line_span(&self, _file: Self::FileId, lineno: usize) -> Option<Self::Span> {
let source = &self.snippet;
let mut seen_lines = 0;
let mut seen_bytes = 0;
for (pos, _) in source.match_indices('\n') {
if seen_lines == lineno {
return Some(Span::from((seen_bytes, pos)));
} else {
seen_lines += 1;
seen_bytes = pos + 1;
}
}
if seen_lines == 0 {
Some(Span::from((0, self.snippet.len() - 1)))
} else {
None
}
}
fn source(&self, span: Self::Span) -> Option<String> {
Some(self.snippet[span.start()..span.end()].to_string())
}
}