mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 22:07:57 +02:00
Improve parser
This commit is contained in:
@ -59,6 +59,7 @@ pub enum Expression {
|
||||
Block(Box<Block>),
|
||||
Binary(Box<Binary>),
|
||||
Path(Box<Path>),
|
||||
Call(Box<ParsedCommand>),
|
||||
VariableReference(Variable),
|
||||
}
|
||||
|
||||
@ -68,6 +69,12 @@ impl From<&str> for Expression {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Expression {
|
||||
fn from(input: String) -> Expression {
|
||||
Expression::Leaf(Leaf::String(input.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Expression {
|
||||
fn from(input: i64) -> Expression {
|
||||
Expression::Leaf(Leaf::Int(input.into()))
|
||||
@ -99,8 +106,41 @@ impl From<Binary> for Expression {
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
crate fn leaf(leaf: impl Into<Leaf>) -> Expression {
|
||||
Expression::Leaf(leaf.into())
|
||||
}
|
||||
|
||||
crate fn flag(flag: impl Into<Flag>) -> Expression {
|
||||
Expression::Flag(flag.into())
|
||||
}
|
||||
|
||||
crate fn call(head: Expression, tail: Vec<Expression>) -> Expression {
|
||||
if tail.len() == 0 {
|
||||
Expression::Call(Box::new(ParsedCommand::new(head.into(), None)))
|
||||
} else {
|
||||
Expression::Call(Box::new(ParsedCommand::new(head.into(), Some(tail))))
|
||||
}
|
||||
}
|
||||
|
||||
crate fn binary(
|
||||
left: impl Into<Expression>,
|
||||
operator: impl Into<Operator>,
|
||||
right: impl Into<Expression>,
|
||||
) -> Expression {
|
||||
Expression::Binary(Box::new(Binary {
|
||||
left: left.into(),
|
||||
operator: operator.into(),
|
||||
right: right.into(),
|
||||
}))
|
||||
}
|
||||
|
||||
crate fn block(expr: impl Into<Expression>) -> Expression {
|
||||
Expression::Block(Box::new(Block::new(expr.into())))
|
||||
}
|
||||
|
||||
crate fn print(&self) -> String {
|
||||
match self {
|
||||
Expression::Call(c) => c.print(),
|
||||
Expression::Leaf(l) => l.print(),
|
||||
Expression::Flag(f) => f.print(),
|
||||
Expression::Parenthesized(p) => p.print(),
|
||||
@ -113,6 +153,7 @@ impl Expression {
|
||||
|
||||
crate fn as_external_arg(&self) -> String {
|
||||
match self {
|
||||
Expression::Call(c) => c.as_external_arg(),
|
||||
Expression::Leaf(l) => l.as_external_arg(),
|
||||
Expression::Flag(f) => f.as_external_arg(),
|
||||
Expression::Parenthesized(p) => p.as_external_arg(),
|
||||
@ -123,6 +164,10 @@ impl Expression {
|
||||
}
|
||||
}
|
||||
|
||||
crate fn bare(path: impl Into<BarePath>) -> Expression {
|
||||
Expression::Leaf(Leaf::Bare(path.into()))
|
||||
}
|
||||
|
||||
crate fn as_string(&self) -> Option<String> {
|
||||
match self {
|
||||
Expression::Leaf(Leaf::String(s)) => Some(s.to_string()),
|
||||
@ -131,6 +176,13 @@ impl Expression {
|
||||
}
|
||||
}
|
||||
|
||||
crate fn as_bare(&self) -> Option<String> {
|
||||
match self {
|
||||
Expression::Leaf(Leaf::Bare(p)) => Some(p.to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
crate fn is_flag(&self, value: &str) -> bool {
|
||||
match self {
|
||||
Expression::Flag(Flag::Longhand(f)) if value == f => true,
|
||||
@ -218,8 +270,8 @@ impl Variable {
|
||||
crate fn from_str(input: &str) -> Expression {
|
||||
match input {
|
||||
"it" => Expression::VariableReference(Variable::It),
|
||||
"true" => Expression::Leaf(Leaf::Boolean(true)),
|
||||
"false" => Expression::Leaf(Leaf::Boolean(false)),
|
||||
"yes" => Expression::Leaf(Leaf::Boolean(true)),
|
||||
"no" => Expression::Leaf(Leaf::Boolean(false)),
|
||||
other => Expression::VariableReference(Variable::Other(other.to_string())),
|
||||
}
|
||||
}
|
||||
@ -236,8 +288,7 @@ impl Variable {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn bare(s: &str) -> BarePath {
|
||||
pub fn bare(s: impl Into<String>) -> BarePath {
|
||||
BarePath {
|
||||
head: s.into(),
|
||||
tail: vec![],
|
||||
@ -250,7 +301,23 @@ pub struct BarePath {
|
||||
tail: Vec<String>,
|
||||
}
|
||||
|
||||
impl<T: Into<String>> From<T> for BarePath {
|
||||
fn from(input: T) -> BarePath {
|
||||
BarePath {
|
||||
head: input.into(),
|
||||
tail: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BarePath {
|
||||
crate fn from_token(head: SpannedToken) -> BarePath {
|
||||
BarePath {
|
||||
head: head.to_string(),
|
||||
tail: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
crate fn from_tokens(head: SpannedToken, tail: Vec<SpannedToken>) -> BarePath {
|
||||
BarePath {
|
||||
head: head.to_string(),
|
||||
@ -363,19 +430,6 @@ impl Binary {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
crate fn binary(
|
||||
left: impl Into<Expression>,
|
||||
operator: impl Into<Operator>,
|
||||
right: impl Into<Expression>,
|
||||
) -> Binary {
|
||||
Binary {
|
||||
left: left.into(),
|
||||
operator: operator.into(),
|
||||
right: right.into(),
|
||||
}
|
||||
}
|
||||
|
||||
impl Binary {
|
||||
fn print(&self) -> String {
|
||||
format!(
|
||||
@ -427,21 +481,36 @@ impl Flag {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(new, Debug, Clone, Eq, PartialEq)]
|
||||
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, new)]
|
||||
pub struct ParsedCommand {
|
||||
crate name: String,
|
||||
crate args: Vec<Expression>,
|
||||
crate name: Expression,
|
||||
crate args: Option<Vec<Expression>>,
|
||||
}
|
||||
|
||||
impl ParsedCommand {
|
||||
#[allow(unused)]
|
||||
fn as_external_arg(&self) -> String {
|
||||
let mut out = vec![];
|
||||
|
||||
write!(out, "{}", self.name.as_external_arg()).unwrap();
|
||||
|
||||
if let Some(args) = &self.args {
|
||||
for arg in args.iter() {
|
||||
write!(out, " {}", arg.as_external_arg()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
String::from_utf8_lossy(&out).into_owned()
|
||||
}
|
||||
|
||||
fn print(&self) -> String {
|
||||
let mut out = vec![];
|
||||
|
||||
write!(out, "{}", self.name).unwrap();
|
||||
write!(out, "{}", self.name.print()).unwrap();
|
||||
|
||||
for arg in self.args.iter() {
|
||||
write!(out, " {}", arg.print()).unwrap();
|
||||
if let Some(args) = &self.args {
|
||||
for arg in args.iter() {
|
||||
write!(out, " {}", arg.print()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
String::from_utf8_lossy(&out).into_owned()
|
||||
@ -451,8 +520,8 @@ impl ParsedCommand {
|
||||
impl From<&str> for ParsedCommand {
|
||||
fn from(input: &str) -> ParsedCommand {
|
||||
ParsedCommand {
|
||||
name: input.to_string(),
|
||||
args: vec![],
|
||||
name: Expression::Leaf(Leaf::Bare(bare(input))),
|
||||
args: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -460,19 +529,19 @@ impl From<&str> for ParsedCommand {
|
||||
impl From<(&str, Vec<Expression>)> for ParsedCommand {
|
||||
fn from(input: (&str, Vec<Expression>)) -> ParsedCommand {
|
||||
ParsedCommand {
|
||||
name: input.0.to_string(),
|
||||
args: input.1,
|
||||
name: Expression::bare(input.0),
|
||||
args: Some(input.1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(new, Debug, Eq, PartialEq)]
|
||||
pub struct Pipeline {
|
||||
crate commands: Vec<ParsedCommand>,
|
||||
crate commands: Vec<Expression>,
|
||||
}
|
||||
|
||||
impl Pipeline {
|
||||
crate fn from_parts(command: ParsedCommand, rest: Vec<ParsedCommand>) -> Pipeline {
|
||||
crate fn from_parts(command: Expression, rest: Vec<Expression>) -> Pipeline {
|
||||
let mut commands = vec![command];
|
||||
commands.extend(rest);
|
||||
|
||||
|
@ -23,7 +23,7 @@ crate enum TopToken {
|
||||
#[regex = r#""([^"]|\\")*""#]
|
||||
DQString,
|
||||
|
||||
#[regex = r"\$"]
|
||||
#[token = "$"]
|
||||
#[callback = "start_variable"]
|
||||
Dollar,
|
||||
|
||||
@ -257,6 +257,12 @@ crate enum AfterMemberDot {
|
||||
#[callback = "finish_member"]
|
||||
Member,
|
||||
|
||||
#[regex = r#"'([^']|\\')*'"#]
|
||||
SQString,
|
||||
|
||||
#[regex = r#""([^"]|\\")*""#]
|
||||
DQString,
|
||||
|
||||
#[regex = r"\s"]
|
||||
Whitespace,
|
||||
}
|
||||
@ -268,6 +274,9 @@ impl AfterMemberDot {
|
||||
let result = match self {
|
||||
END => return None,
|
||||
Member => Token::Member,
|
||||
SQString => Token::SQMember,
|
||||
DQString => Token::DQMember,
|
||||
|
||||
Whitespace => Token::Whitespace,
|
||||
Error => unreachable!("Don't call to_token with the error variant"),
|
||||
};
|
||||
@ -387,6 +396,8 @@ pub enum Token {
|
||||
Variable,
|
||||
PathDot,
|
||||
Member,
|
||||
SQMember,
|
||||
DQMember,
|
||||
Num,
|
||||
SQString,
|
||||
DQString,
|
||||
@ -418,6 +429,7 @@ pub enum Token {
|
||||
// Whitespace(SpannedToken<'source, &'source str>),
|
||||
// }
|
||||
|
||||
#[derive(Clone)]
|
||||
crate struct Lexer<'source> {
|
||||
lexer: logos::Lexer<TopToken, &'source str>,
|
||||
first: bool,
|
||||
|
@ -6,72 +6,117 @@ use crate::prelude::*;
|
||||
use crate::parser::lexer::{SpannedToken, Token};
|
||||
use byte_unit::Byte;
|
||||
|
||||
// nu's grammar is a little bit different from a lot of other languages, to better match
|
||||
// the idioms and constraints of a shell environment. A lot of the constraints are
|
||||
// the same as PowerShell, but mostly derived from the same first principles.
|
||||
//
|
||||
// - Other than at the beginning of a command, bare words are virtually always parsed as
|
||||
// strings. This means that, in general, bare words cannot be used as keywords or
|
||||
// variables.
|
||||
// - Variable names begin with `$`, and so do keywords
|
||||
// - Functions are invoked without `()` and without comma separation
|
||||
// - In general, because of the lack of comma-separation, expressions must be grouped:
|
||||
// - a single token
|
||||
// - a path ($variable followed by any number of `"." member`)
|
||||
// - parenthesized expression
|
||||
// - This means that more elaborate expressions, like binary expressions, must usually
|
||||
// be parenthesized
|
||||
// - There is a special case for a command that takes a single expression, which can
|
||||
// omit the parens
|
||||
|
||||
grammar<'input>;
|
||||
|
||||
pub Pipeline: Pipeline = {
|
||||
<first:Command> => Pipeline::new(vec![first]),
|
||||
<first:Command> <rest: ( "|" <Command> )+> => Pipeline::from_parts(first, rest),
|
||||
<first:PipelineElement> <rest: ( "|" <PipelineElement> )*> => Pipeline::from_parts(first, rest),
|
||||
}
|
||||
|
||||
Command: ParsedCommand = {
|
||||
<command:BarePath> => ParsedCommand::new(command.to_string(), vec![]),
|
||||
<command:BarePath> <expr:Expr+> => ParsedCommand::new(command.to_string(), expr),
|
||||
<command:BarePath> <expr:BinaryExpression> => ParsedCommand::new(command.to_string(), vec![expr]),
|
||||
PipelineElement: Expression = {
|
||||
<Bare> => Expression::call(Expression::bare(<>), vec![]),
|
||||
<SingleExpression> => <>,
|
||||
}
|
||||
|
||||
Leaf: Expression = {
|
||||
<String> => Expression::Leaf(Leaf::String(<>)),
|
||||
<Int> => Expression::Leaf(Leaf::Int(<>)),
|
||||
<UnitsNum> => Expression::Leaf(<>),
|
||||
// A leaf expression is a single logical token that directly represents an expression
|
||||
LeafExpression: Expression = {
|
||||
<String> => <>,
|
||||
<Int> => Expression::leaf(Leaf::Int(<>)),
|
||||
<UnitsNum> => <>,
|
||||
<Var> => <>,
|
||||
}
|
||||
|
||||
BinaryExpression: Expression = {
|
||||
<left:Expr> <op:Operator> <right:Expr> => Expression::Binary(Box::new(Binary::new(left, op, right))),
|
||||
pub Call: Expression = {
|
||||
<expr:Expression> <rest:SingleCallArgument> => Expression::call(expr, vec![rest]),
|
||||
<expr:Expression> <first:CallArgument> <rest:( <CallArgument> )+> => Expression::call(expr, { let mut rest = rest; let mut v = vec![first]; v.append(&mut rest); v }),
|
||||
<expr:Bare> <rest:SingleCallArgument> => Expression::call(Expression::bare(expr), vec![rest]),
|
||||
<expr:Bare> <first:CallArgument> <rest:( <CallArgument> )+> => Expression::call(Expression::bare(expr), { let mut v = vec![first]; let mut rest = rest; v.append(&mut rest); v }),
|
||||
}
|
||||
|
||||
Parenthesized: Expression = {
|
||||
"(" <Leaf> ")" => Expression::Parenthesized(Box::new(Parenthesized::new(<>))),
|
||||
"(" <BinaryExpression> ")" => Expression::Parenthesized(Box::new(Parenthesized::new(<>))),
|
||||
}
|
||||
|
||||
AtomicExpression: Expression = {
|
||||
<Parenthesized>,
|
||||
<Leaf>,
|
||||
Binary: Expression = {
|
||||
<left:ArgumentExpression> <op:Operator> <right:ArgumentExpression> => Expression::binary(left, op, right),
|
||||
}
|
||||
|
||||
// In a block, a single bare word is interpreted as a call:
|
||||
//
|
||||
// foreach { ls }
|
||||
Block: Expression = {
|
||||
"{" <AtomicExpression> "}" => Expression::Block(Box::new(Block::new(<>))),
|
||||
"{" <BinaryExpression> "}" => Expression::Block(Box::new(Block::new(<>))),
|
||||
"{" <SingleExpression> "}" => Expression::block(<>),
|
||||
"{" <Bare> "}" => Expression::block(Expression::call(Expression::bare(<>), vec![])),
|
||||
}
|
||||
|
||||
WholeExpression: Expression = {
|
||||
<AtomicExpression>,
|
||||
<Block>,
|
||||
// An `Expression` is the most general kind of expression. It can go anywhere, even right next to another expression, and
|
||||
// even as the first part of a call.
|
||||
Expression: Expression = {
|
||||
<LeafExpression> => <>,
|
||||
<Block> => <>,
|
||||
"(" <Call> ")" => <>,
|
||||
"(" <Bare> ")" => Expression::call(Expression::bare(<>), vec![]),
|
||||
"(" <Binary> ")" => <>,
|
||||
}
|
||||
|
||||
PathHead: Expression = {
|
||||
<WholeExpression>,
|
||||
<BarePath> => Expression::Leaf(Leaf::Bare(<>)),
|
||||
<Flag> => Expression::Flag(<>),
|
||||
// An `ArgumentExpression` is an expression that appears in an argument list. It includes all of `Expression`, and
|
||||
// bare words are interpreted as strings.
|
||||
ArgumentExpression: Expression = {
|
||||
<Expression>,
|
||||
<Bare> => Expression::bare(<>),
|
||||
}
|
||||
|
||||
PathExpression: Expression = {
|
||||
<head:WholeExpression> <tail: ( "???." <Member> )+> => Expression::Path(Box::new(Path::new(head, tail)))
|
||||
CallArgument: Expression = {
|
||||
<ArgumentExpression> => <>,
|
||||
<Flag> => Expression::flag(<>),
|
||||
}
|
||||
|
||||
Expr: Expression = {
|
||||
<PathExpression>,
|
||||
<PathHead>
|
||||
SingleCallArgument: Expression = {
|
||||
<CallArgument>,
|
||||
<Binary>,
|
||||
}
|
||||
|
||||
Var: Expression = {
|
||||
"$" <"variable"> => Variable::from_str(<>.as_slice()),
|
||||
// A `SingleExpression` is a special-case of `Expression` for situations where expressions do not appear side-by-side.
|
||||
// Because expression lists in nu are not comma-separated, composite expressions (like binary expressions) must be
|
||||
// parenthesized in lists. If only a single expression appears alone, the parentheses may be left out.
|
||||
//
|
||||
// `SingleExpression` does not include `Bare`, because expressions that include `SingleExpression` must decide how
|
||||
// to interpret a single bare word (`foreach { ls }` vs `cd ls`).
|
||||
SingleExpression: Expression = {
|
||||
<Expression>,
|
||||
<Call>,
|
||||
<Binary>,
|
||||
}
|
||||
|
||||
// === LOGICAL TOKENS === //
|
||||
|
||||
// A logical token may be composed of more than one raw token, but the tokens must be emitted
|
||||
// from the stream in exactly one sequence. This allows us to use parser infrastructure to
|
||||
// compose tokens without the risk that these logical tokens will introduce ambiguities.
|
||||
|
||||
Bare: BarePath = {
|
||||
<head: "bare"> => BarePath::from_token(head)
|
||||
}
|
||||
|
||||
// A member is a special token that represents bare words or string literals immediate
|
||||
// following a dot.
|
||||
Member: String = {
|
||||
<"member"> => <>.to_string(),
|
||||
<String>
|
||||
<"dqmember"> => <>.to_string(),
|
||||
<"sqmember"> => <>.to_string(),
|
||||
}
|
||||
|
||||
Operator: Operator = {
|
||||
@ -83,26 +128,26 @@ Operator: Operator = {
|
||||
">=" => Operator::GreaterThanOrEqual
|
||||
}
|
||||
|
||||
Flag: Flag = {
|
||||
"-" <BarePath> => Flag::Shorthand(<>.to_string()),
|
||||
"--" <BarePath> => Flag::Longhand(<>.to_string()),
|
||||
}
|
||||
|
||||
String: String = {
|
||||
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string(),
|
||||
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string()
|
||||
}
|
||||
|
||||
BarePath: BarePath = {
|
||||
<head: "bare"> <tail: ( "???." <"member"> )*> => BarePath::from_tokens(head, tail)
|
||||
}
|
||||
|
||||
Int: i64 = {
|
||||
<"num"> => i64::from_str(<>.as_slice()).unwrap()
|
||||
}
|
||||
|
||||
UnitsNum: Leaf = {
|
||||
<num: Int> <unit: "unit"> => Leaf::Unit(num, Unit::from_str(unit.as_slice()).unwrap())
|
||||
UnitsNum: Expression = {
|
||||
<num: Int> <unit: "unit"> => Expression::leaf(Leaf::Unit(num, Unit::from_str(unit.as_slice()).unwrap()))
|
||||
}
|
||||
|
||||
String: Expression = {
|
||||
<"sqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string().into(),
|
||||
<"dqstring"> => <>.as_slice()[1..(<>.as_slice().len() - 1)].to_string().into()
|
||||
}
|
||||
|
||||
Flag: Flag = {
|
||||
"-" <Bare> => Flag::Shorthand(<>.to_string()),
|
||||
"--" <Bare> => Flag::Longhand(<>.to_string()),
|
||||
}
|
||||
|
||||
Var: Expression = {
|
||||
"$" <"variable"> => Variable::from_str(<>.as_slice()).into(),
|
||||
}
|
||||
|
||||
extern {
|
||||
@ -127,6 +172,8 @@ extern {
|
||||
"???." => SpannedToken { token: Token::PathDot, .. },
|
||||
"num" => SpannedToken { token: Token::Num, .. },
|
||||
"member" => SpannedToken { token: Token::Member, .. },
|
||||
"sqmember" => SpannedToken { token: Token::SQMember, .. },
|
||||
"dqmember" => SpannedToken { token: Token::SQMember, .. },
|
||||
"variable" => SpannedToken { token: Token::Variable, .. },
|
||||
"bare" => SpannedToken { token: Token::Bare, .. },
|
||||
"dqstring" => SpannedToken { token: Token::DQString, .. },
|
||||
|
4370
src/parser/parser.rs
4370
src/parser/parser.rs
File diff suppressed because it is too large
Load Diff
@ -73,6 +73,7 @@ pub struct CommandConfig {
|
||||
crate named: IndexMap<String, NamedType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Args {
|
||||
pub positional: Vec<Value>,
|
||||
pub named: IndexMap<String, Value>,
|
||||
|
Reference in New Issue
Block a user