Improve parser

This commit is contained in:
Yehuda Katz
2019-06-04 14:42:31 -07:00
parent 45c76be4d1
commit f3bb4a03c2
13 changed files with 3689 additions and 1178 deletions

View File

@ -59,6 +59,7 @@ pub enum Expression {
Block(Box<Block>),
Binary(Box<Binary>),
Path(Box<Path>),
Call(Box<ParsedCommand>),
VariableReference(Variable),
}
@ -68,6 +69,12 @@ impl From<&str> for Expression {
}
}
impl From<String> for Expression {
fn from(input: String) -> Expression {
Expression::Leaf(Leaf::String(input.into()))
}
}
impl From<i64> for Expression {
fn from(input: i64) -> Expression {
Expression::Leaf(Leaf::Int(input.into()))
@ -99,8 +106,41 @@ impl From<Binary> for Expression {
}
impl Expression {
crate fn leaf(leaf: impl Into<Leaf>) -> Expression {
Expression::Leaf(leaf.into())
}
crate fn flag(flag: impl Into<Flag>) -> Expression {
Expression::Flag(flag.into())
}
crate fn call(head: Expression, tail: Vec<Expression>) -> Expression {
if tail.len() == 0 {
Expression::Call(Box::new(ParsedCommand::new(head.into(), None)))
} else {
Expression::Call(Box::new(ParsedCommand::new(head.into(), Some(tail))))
}
}
crate fn binary(
left: impl Into<Expression>,
operator: impl Into<Operator>,
right: impl Into<Expression>,
) -> Expression {
Expression::Binary(Box::new(Binary {
left: left.into(),
operator: operator.into(),
right: right.into(),
}))
}
crate fn block(expr: impl Into<Expression>) -> Expression {
Expression::Block(Box::new(Block::new(expr.into())))
}
crate fn print(&self) -> String {
match self {
Expression::Call(c) => c.print(),
Expression::Leaf(l) => l.print(),
Expression::Flag(f) => f.print(),
Expression::Parenthesized(p) => p.print(),
@ -113,6 +153,7 @@ impl Expression {
crate fn as_external_arg(&self) -> String {
match self {
Expression::Call(c) => c.as_external_arg(),
Expression::Leaf(l) => l.as_external_arg(),
Expression::Flag(f) => f.as_external_arg(),
Expression::Parenthesized(p) => p.as_external_arg(),
@ -123,6 +164,10 @@ impl Expression {
}
}
crate fn bare(path: impl Into<BarePath>) -> Expression {
Expression::Leaf(Leaf::Bare(path.into()))
}
crate fn as_string(&self) -> Option<String> {
match self {
Expression::Leaf(Leaf::String(s)) => Some(s.to_string()),
@ -131,6 +176,13 @@ impl Expression {
}
}
crate fn as_bare(&self) -> Option<String> {
match self {
Expression::Leaf(Leaf::Bare(p)) => Some(p.to_string()),
_ => None,
}
}
crate fn is_flag(&self, value: &str) -> bool {
match self {
Expression::Flag(Flag::Longhand(f)) if value == f => true,
@ -218,8 +270,8 @@ impl Variable {
crate fn from_str(input: &str) -> Expression {
match input {
"it" => Expression::VariableReference(Variable::It),
"true" => Expression::Leaf(Leaf::Boolean(true)),
"false" => Expression::Leaf(Leaf::Boolean(false)),
"yes" => Expression::Leaf(Leaf::Boolean(true)),
"no" => Expression::Leaf(Leaf::Boolean(false)),
other => Expression::VariableReference(Variable::Other(other.to_string())),
}
}
@ -236,8 +288,7 @@ impl Variable {
}
}
#[cfg(test)]
pub fn bare(s: &str) -> BarePath {
pub fn bare(s: impl Into<String>) -> BarePath {
BarePath {
head: s.into(),
tail: vec![],
@ -250,7 +301,23 @@ pub struct BarePath {
tail: Vec<String>,
}
impl<T: Into<String>> From<T> for BarePath {
fn from(input: T) -> BarePath {
BarePath {
head: input.into(),
tail: vec![],
}
}
}
impl BarePath {
crate fn from_token(head: SpannedToken) -> BarePath {
BarePath {
head: head.to_string(),
tail: vec![],
}
}
crate fn from_tokens(head: SpannedToken, tail: Vec<SpannedToken>) -> BarePath {
BarePath {
head: head.to_string(),
@ -363,19 +430,6 @@ impl Binary {
}
}
#[cfg(test)]
crate fn binary(
left: impl Into<Expression>,
operator: impl Into<Operator>,
right: impl Into<Expression>,
) -> Binary {
Binary {
left: left.into(),
operator: operator.into(),
right: right.into(),
}
}
impl Binary {
fn print(&self) -> String {
format!(
@ -427,21 +481,36 @@ impl Flag {
}
}
#[derive(new, Debug, Clone, Eq, PartialEq)]
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, new)]
pub struct ParsedCommand {
crate name: String,
crate args: Vec<Expression>,
crate name: Expression,
crate args: Option<Vec<Expression>>,
}
impl ParsedCommand {
#[allow(unused)]
fn as_external_arg(&self) -> String {
let mut out = vec![];
write!(out, "{}", self.name.as_external_arg()).unwrap();
if let Some(args) = &self.args {
for arg in args.iter() {
write!(out, " {}", arg.as_external_arg()).unwrap();
}
}
String::from_utf8_lossy(&out).into_owned()
}
fn print(&self) -> String {
let mut out = vec![];
write!(out, "{}", self.name).unwrap();
write!(out, "{}", self.name.print()).unwrap();
for arg in self.args.iter() {
write!(out, " {}", arg.print()).unwrap();
if let Some(args) = &self.args {
for arg in args.iter() {
write!(out, " {}", arg.print()).unwrap();
}
}
String::from_utf8_lossy(&out).into_owned()
@ -451,8 +520,8 @@ impl ParsedCommand {
impl From<&str> for ParsedCommand {
fn from(input: &str) -> ParsedCommand {
ParsedCommand {
name: input.to_string(),
args: vec![],
name: Expression::Leaf(Leaf::Bare(bare(input))),
args: None,
}
}
}
@ -460,19 +529,19 @@ impl From<&str> for ParsedCommand {
impl From<(&str, Vec<Expression>)> for ParsedCommand {
fn from(input: (&str, Vec<Expression>)) -> ParsedCommand {
ParsedCommand {
name: input.0.to_string(),
args: input.1,
name: Expression::bare(input.0),
args: Some(input.1),
}
}
}
#[derive(new, Debug, Eq, PartialEq)]
pub struct Pipeline {
crate commands: Vec<ParsedCommand>,
crate commands: Vec<Expression>,
}
impl Pipeline {
crate fn from_parts(command: ParsedCommand, rest: Vec<ParsedCommand>) -> Pipeline {
crate fn from_parts(command: Expression, rest: Vec<Expression>) -> Pipeline {
let mut commands = vec![command];
commands.extend(rest);

View File

@ -23,7 +23,7 @@ crate enum TopToken {
#[regex = r#""([^"]|\\")*""#]
DQString,
#[regex = r"\$"]
#[token = "$"]
#[callback = "start_variable"]
Dollar,
@ -257,6 +257,12 @@ crate enum AfterMemberDot {
#[callback = "finish_member"]
Member,
#[regex = r#"'([^']|\\')*'"#]
SQString,
#[regex = r#""([^"]|\\")*""#]
DQString,
#[regex = r"\s"]
Whitespace,
}
@ -268,6 +274,9 @@ impl AfterMemberDot {
let result = match self {
END => return None,
Member => Token::Member,
SQString => Token::SQMember,
DQString => Token::DQMember,
Whitespace => Token::Whitespace,
Error => unreachable!("Don't call to_token with the error variant"),
};
@ -387,6 +396,8 @@ pub enum Token {
Variable,
PathDot,
Member,
SQMember,
DQMember,
Num,
SQString,
DQString,
@ -418,6 +429,7 @@ pub enum Token {
// Whitespace(SpannedToken<'source, &'source str>),
// }
#[derive(Clone)]
crate struct Lexer<'source> {
lexer: logos::Lexer<TopToken, &'source str>,
first: bool,

View File

@ -6,72 +6,117 @@ use crate::prelude::*;
use crate::parser::lexer::{SpannedToken, Token};
use byte_unit::Byte;
// nu's grammar is a little bit different from a lot of other languages, to better match
// the idioms and constraints of a shell environment. A lot of the constraints are
// the same as PowerShell, but mostly derived from the same first principles.
//
// - Other than at the beginning of a command, bare words are virtually always parsed as
// strings. This means that, in general, bare words cannot be used as keywords or
// variables.
// - Variable names begin with `$`, and so do keywords
// - Functions are invoked without `()` and without comma separation
// - In general, because of the lack of comma-separation, expressions must be grouped:
// - a single token
// - a path ($variable followed by any number of `"." member`)
// - parenthesized expression
// - This means that more elaborate expressions, like binary expressions, must usually
// be parenthesized
// - There is a special case for a command that takes a single expression, which can
// omit the parens
grammar<'input>;
pub Pipeline: Pipeline = {
<first:Command> => Pipeline::new(vec![first]),
<first:Command> <rest: ( "|" <Command> )+> => Pipeline::from_parts(first, rest),
<first:PipelineElement> <rest: ( "|" <PipelineElement> )*> => Pipeline::from_parts(first, rest),
}
Command: ParsedCommand = {
<command:BarePath> => ParsedCommand::new(command.to_string(), vec![]),
<command:BarePath> <expr:Expr+> => ParsedCommand::new(command.to_string(), expr),
<command:BarePath> <expr:BinaryExpression> => ParsedCommand::new(command.to_string(), vec![expr]),
PipelineElement: Expression = {
<Bare> => Expression::call(Expression::bare(<>), vec![]),
<SingleExpression> => <>,
}
Leaf: Expression = {
<String> => Expression::Leaf(Leaf::String(<>)),
<Int> => Expression::Leaf(Leaf::Int(<>)),
<UnitsNum> => Expression::Leaf(<>),
// A leaf expression is a single logical token that directly represents an expression
LeafExpression: Expression = {
<String> => <>,
<Int> => Expression::leaf(Leaf::Int(<>)),
<UnitsNum> => <>,
<Var> => <>,
}
BinaryExpression: Expression = {
<left:Expr> <op:Operator> <right:Expr> => Expression::Binary(Box::new(Binary::new(left, op, right))),
pub Call: Expression = {
<expr:Expression> <rest:SingleCallArgument> => Expression::call(expr, vec![rest]),
<expr:Expression> <first:CallArgument> <rest:( <CallArgument> )+> => Expression::call(expr, { let mut rest = rest; let mut v = vec![first]; v.append(&mut rest); v }),
<expr:Bare> <rest:SingleCallArgument> => Expression::call(Expression::bare(expr), vec![rest]),
<expr:Bare> <first:CallArgument> <rest:( <CallArgument> )+> => Expression::call(Expression::bare(expr), { let mut v = vec![first]; let mut rest = rest; v.append(&mut rest); v }),
}
Parenthesized: Expression = {
"(" <Leaf> ")" => Expression::Parenthesized(Box::new(Parenthesized::new(<>))),
"(" <BinaryExpression> ")" => Expression::Parenthesized(Box::new(Parenthesized::new(<>))),
}
AtomicExpression: Expression = {
<Parenthesized>,
<Leaf>,
Binary: Expression = {
<left:ArgumentExpression> <op:Operator> <right:ArgumentExpression> => Expression::binary(left, op, right),
}
// In a block, a single bare word is interpreted as a call:
//
// foreach { ls }
Block: Expression = {
"{" <AtomicExpression> "}" => Expression::Block(Box::new(Block::new(<>))),
"{" <BinaryExpression> "}" => Expression::Block(Box::new(Block::new(<>))),
"{" <SingleExpression> "}" => Expression::block(<>),
"{" <Bare> "}" => Expression::block(Expression::call(Expression::bare(<>), vec![])),
}
WholeExpression: Expression = {
<AtomicExpression>,
<Block>,
// An `Expression` is the most general kind of expression. It can go anywhere, even right next to another expression, and
// even as the first part of a call.
Expression: Expression = {
<LeafExpression> => <>,
<Block> => <>,
"(" <Call> ")" => <>,
"(" <Bare> ")" => Expression::call(Expression::bare(<>), vec![]),
"(" <Binary> ")" => <>,
}
PathHead: Expression = {
<WholeExpression>,
<BarePath> => Expression::Leaf(Leaf::Bare(<>)),
<Flag> => Expression::Flag(<>),
// An `ArgumentExpression` is an expression that appears in an argument list. It includes all of `Expression`, and
// bare words are interpreted as strings.
ArgumentExpression: Expression = {
<Expression>,
<Bare> => Expression::bare(<>),
}
PathExpression: Expression = {
<head:WholeExpression> <tail: ( "???." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
CallArgument: Expression = {
<ArgumentExpression> => <>,
<Flag> => Expression::flag(<>),
}
Expr: Expression = {
<PathExpression>,
<PathHead>
SingleCallArgument: Expression = {
<CallArgument>,
<Binary>,
}
Var: Expression = {
"$" <"variable"> => Variable::from_str(<>.as_slice()),
// A `SingleExpression` is a special-case of `Expression` for situations where expressions do not appear side-by-side.
// Because expression lists in nu are not comma-separated, composite expressions (like binary expressions) must be
// parenthesized in lists. If only a single expression appears alone, the parentheses may be left out.
//
// `SingleExpression` does not include `Bare`, because expressions that include `SingleExpression` must decide how
// to interpret a single bare word (`foreach { ls }` vs `cd ls`).
SingleExpression: Expression = {
<Expression>,
<Call>,
<Binary>,
}
// === LOGICAL TOKENS === //
// A logical token may be composed of more than one raw token, but the tokens must be emitted
// from the stream in exactly one sequence. This allows us to use parser infrastructure to
// compose tokens without the risk that these logical tokens will introduce ambiguities.
Bare: BarePath = {
<head: "bare"> => BarePath::from_token(head)
}
// A member is a special token that represents bare words or string literals immediate
// following a dot.
Member: String = {
<"member"> => <>.to_string(),
<String>
<"dqmember"> => <>.to_string(),
<"sqmember"> => <>.to_string(),
}
Operator: Operator = {
@ -83,26 +128,26 @@ Operator: Operator = {
">=" => Operator::GreaterThanOrEqual
}
Flag: Flag = {
"-" <BarePath> => Flag::Shorthand(<>.to_string()),
"--" <BarePath> => Flag::Longhand(<>.to_string()),
}
String: String = {
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string(),
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string()
}
BarePath: BarePath = {
<head: "bare"> <tail: ( "???." <"member"> )*> => BarePath::from_tokens(head, tail)
}
Int: i64 = {
<"num"> => i64::from_str(<>.as_slice()).unwrap()
}
UnitsNum: Leaf = {
<num: Int> <unit: "unit"> => Leaf::Unit(num, Unit::from_str(unit.as_slice()).unwrap())
UnitsNum: Expression = {
<num: Int> <unit: "unit"> => Expression::leaf(Leaf::Unit(num, Unit::from_str(unit.as_slice()).unwrap()))
}
String: Expression = {
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string().into(),
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string().into()
}
Flag: Flag = {
"-" <Bare> => Flag::Shorthand(<>.to_string()),
"--" <Bare> => Flag::Longhand(<>.to_string()),
}
Var: Expression = {
"$" <"variable"> => Variable::from_str(<>.as_slice()).into(),
}
extern {
@ -127,6 +172,8 @@ extern {
"???." => SpannedToken { token: Token::PathDot, .. },
"num" => SpannedToken { token: Token::Num, .. },
"member" => SpannedToken { token: Token::Member, .. },
"sqmember" => SpannedToken { token: Token::SQMember, .. },
"dqmember" => SpannedToken { token: Token::SQMember, .. },
"variable" => SpannedToken { token: Token::Variable, .. },
"bare" => SpannedToken { token: Token::Bare, .. },
"dqstring" => SpannedToken { token: Token::DQString, .. },

File diff suppressed because it is too large Load Diff

View File

@ -73,6 +73,7 @@ pub struct CommandConfig {
crate named: IndexMap<String, NamedType>,
}
#[derive(Debug, Default)]
pub struct Args {
pub positional: Vec<Value>,
pub named: IndexMap<String, Value>,