nushell/crates/nu-parser/src/parser.rs

2732 lines
92 KiB
Rust
Raw Normal View History

2021-08-17 01:00:00 +02:00
use std::{
fmt::Display,
ops::{Index, IndexMut},
};
2021-07-02 03:42:25 +02:00
2021-07-01 02:01:04 +02:00
use crate::{
lex, lite_parse,
parser_state::{Type, VarId},
2021-07-16 23:55:12 +02:00
signature::{Flag, PositionalArg},
BlockId, DeclId, Declaration, LiteBlock, ParseError, ParserWorkingSet, Signature, Span, Token,
2021-08-25 21:29:36 +02:00
TokenContents,
2021-07-01 02:01:04 +02:00
};
/// The syntactic shapes that values must match to be passed into a command. You can think of this as the type-checking that occurs when you call a function.
2021-07-02 00:40:08 +02:00
#[derive(Debug, Clone, PartialEq, Eq)]
2021-07-01 02:01:04 +02:00
pub enum SyntaxShape {
2021-07-01 08:09:55 +02:00
/// A specific match to a word or symbol
2021-07-24 07:57:17 +02:00
Keyword(Vec<u8>, Box<SyntaxShape>),
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// Any syntactic form is allowed
Any,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// Strings and string-like bare words are allowed
String,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A dotted path to navigate the table
ColumnPath,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A dotted path to navigate the table (including variable)
FullColumnPath,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// Only a numeric (integer or decimal) value is allowed
Number,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A range is allowed (eg, `1..3`)
Range,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// Only an integer value is allowed
Int,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A filepath is allowed
FilePath,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A glob pattern is allowed, eg `foo*`
GlobPattern,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A block is allowed, eg `{start this thing}`
Block,
2021-07-08 00:55:46 +02:00
2021-07-09 08:23:20 +02:00
/// A table is allowed, eg `[[first, second]; [1, 2]]`
2021-07-01 02:01:04 +02:00
Table,
2021-07-08 00:55:46 +02:00
2021-07-08 09:49:17 +02:00
/// A table is allowed, eg `[first second]`
List(Box<SyntaxShape>),
2021-07-01 02:01:04 +02:00
/// A filesize value is allowed, eg `10kb`
Filesize,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A duration value is allowed, eg `19day`
Duration,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// An operator
Operator,
2021-07-08 00:55:46 +02:00
2021-07-01 02:01:04 +02:00
/// A math expression which expands shorthand forms on the lefthand side, eg `foo > 1`
/// The shorthand allows us to more easily reach columns inside of the row being passed in
RowCondition,
2021-07-08 00:55:46 +02:00
2021-07-02 00:54:04 +02:00
/// A general math expression, eg `1 + 2`
2021-07-01 02:01:04 +02:00
MathExpression,
2021-07-08 00:55:46 +02:00
2021-07-08 08:19:38 +02:00
/// A variable name
Variable,
2021-07-16 08:24:46 +02:00
/// A variable with optional type, `x` or `x: int`
VarWithOptType,
2021-07-16 23:55:12 +02:00
/// A signature for a definition, `[x:int, --foo]`
Signature,
2021-07-02 08:44:37 +02:00
/// A general expression, eg `1 + 2` or `foo --bar`
Expression,
}
2021-07-23 23:19:30 +02:00
impl SyntaxShape {
pub fn to_type(&self) -> Type {
match self {
SyntaxShape::Any => Type::Unknown,
SyntaxShape::Block => Type::Block,
SyntaxShape::ColumnPath => Type::Unknown,
SyntaxShape::Duration => Type::Duration,
SyntaxShape::Expression => Type::Unknown,
SyntaxShape::FilePath => Type::FilePath,
SyntaxShape::Filesize => Type::Filesize,
SyntaxShape::FullColumnPath => Type::Unknown,
SyntaxShape::GlobPattern => Type::String,
SyntaxShape::Int => Type::Int,
SyntaxShape::List(x) => {
let contents = x.to_type();
Type::List(Box::new(contents))
}
2021-07-24 07:57:17 +02:00
SyntaxShape::Keyword(_, expr) => expr.to_type(),
2021-07-23 23:19:30 +02:00
SyntaxShape::MathExpression => Type::Unknown,
SyntaxShape::Number => Type::Number,
SyntaxShape::Operator => Type::Unknown,
SyntaxShape::Range => Type::Unknown,
SyntaxShape::RowCondition => Type::Bool,
SyntaxShape::Signature => Type::Unknown,
SyntaxShape::String => Type::String,
SyntaxShape::Table => Type::Table,
SyntaxShape::VarWithOptType => Type::Unknown,
SyntaxShape::Variable => Type::Unknown,
}
}
}
2021-07-02 08:44:37 +02:00
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Operator {
Equal,
NotEqual,
LessThan,
GreaterThan,
LessThanOrEqual,
GreaterThanOrEqual,
Contains,
NotContains,
Plus,
Minus,
Multiply,
Divide,
In,
NotIn,
Modulo,
And,
Or,
Pow,
2021-07-01 02:01:04 +02:00
}
2021-06-30 03:42:56 +02:00
2021-08-17 01:00:00 +02:00
impl Display for Operator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Operator::Equal => write!(f, "=="),
Operator::NotEqual => write!(f, "!="),
Operator::LessThan => write!(f, "<"),
Operator::GreaterThan => write!(f, ">"),
Operator::Contains => write!(f, "=~"),
Operator::NotContains => write!(f, "!~"),
Operator::Plus => write!(f, "+"),
Operator::Minus => write!(f, "-"),
Operator::Multiply => write!(f, "*"),
Operator::Divide => write!(f, "/"),
Operator::In => write!(f, "in"),
Operator::NotIn => write!(f, "not-in"),
Operator::Modulo => write!(f, "mod"),
Operator::And => write!(f, "&&"),
Operator::Or => write!(f, "||"),
Operator::Pow => write!(f, "**"),
Operator::LessThanOrEqual => write!(f, "<="),
Operator::GreaterThanOrEqual => write!(f, ">="),
}
}
}
2021-07-02 00:40:08 +02:00
#[derive(Debug, Clone)]
pub struct Call {
/// identifier of the declaration to call
pub decl_id: DeclId,
2021-07-22 21:50:59 +02:00
pub head: Span,
2021-07-02 00:40:08 +02:00
pub positional: Vec<Expression>,
pub named: Vec<(String, Option<Expression>)>,
}
impl Default for Call {
fn default() -> Self {
Self::new()
}
}
impl Call {
pub fn new() -> Call {
Self {
decl_id: 0,
2021-07-22 21:50:59 +02:00
head: Span::unknown(),
2021-07-02 00:40:08 +02:00
positional: vec![],
named: vec![],
}
}
}
#[derive(Debug, Clone)]
2021-07-01 02:01:04 +02:00
pub enum Expr {
2021-07-24 07:57:17 +02:00
Bool(bool),
2021-07-01 02:01:04 +02:00
Int(i64),
2021-08-08 22:21:21 +02:00
Float(f64),
2021-07-01 02:01:04 +02:00
Var(VarId),
2021-07-02 21:30:03 +02:00
Call(Box<Call>),
2021-07-08 09:20:01 +02:00
ExternalCall(Vec<u8>, Vec<Vec<u8>>),
2021-07-02 08:44:37 +02:00
Operator(Operator),
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
2021-07-16 22:26:40 +02:00
Subexpression(BlockId),
Block(BlockId),
2021-07-06 00:58:56 +02:00
List(Vec<Expression>),
2021-07-06 03:48:45 +02:00
Table(Vec<Expression>, Vec<Vec<Expression>>),
2021-07-30 06:38:41 +02:00
Keyword(Vec<u8>, Span, Box<Expression>),
2021-07-06 03:48:45 +02:00
String(String), // FIXME: improve this in the future?
2021-07-30 00:56:51 +02:00
Signature(Box<Signature>),
2021-07-01 02:01:04 +02:00
Garbage,
}
2021-07-02 00:40:08 +02:00
#[derive(Debug, Clone)]
2021-07-01 02:01:04 +02:00
pub struct Expression {
pub expr: Expr,
pub span: Span,
2021-07-23 23:19:30 +02:00
pub ty: Type,
2021-07-01 02:01:04 +02:00
}
impl Expression {
pub fn garbage(span: Span) -> Expression {
Expression {
expr: Expr::Garbage,
span,
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-01 02:01:04 +02:00
}
}
2021-07-02 08:44:37 +02:00
pub fn precedence(&self) -> usize {
match &self.expr {
Expr::Operator(operator) => {
// Higher precedence binds tighter
match operator {
Operator::Pow => 100,
Operator::Multiply | Operator::Divide | Operator::Modulo => 95,
Operator::Plus | Operator::Minus => 90,
Operator::NotContains
| Operator::Contains
| Operator::LessThan
| Operator::LessThanOrEqual
| Operator::GreaterThan
| Operator::GreaterThanOrEqual
| Operator::Equal
| Operator::NotEqual
| Operator::In
| Operator::NotIn => 80,
Operator::And => 50,
Operator::Or => 40, // TODO: should we have And and Or be different precedence?
}
}
_ => 0,
}
}
2021-07-16 08:24:46 +02:00
2021-07-17 07:28:25 +02:00
pub fn as_block(&self) -> Option<BlockId> {
2021-07-16 08:24:46 +02:00
match self.expr {
2021-07-16 22:26:40 +02:00
Expr::Block(block_id) => Some(block_id),
2021-07-16 08:24:46 +02:00
_ => None,
}
}
2021-07-30 00:56:51 +02:00
pub fn as_signature(&self) -> Option<Box<Signature>> {
2021-07-17 07:28:25 +02:00
match &self.expr {
Expr::Signature(sig) => Some(sig.clone()),
2021-07-16 23:55:12 +02:00
_ => None,
}
}
2021-07-17 07:28:25 +02:00
pub fn as_list(&self) -> Option<Vec<Expression>> {
match &self.expr {
Expr::List(list) => Some(list.clone()),
2021-07-16 08:24:46 +02:00
_ => None,
}
}
2021-07-24 07:57:17 +02:00
pub fn as_keyword(&self) -> Option<&Expression> {
match &self.expr {
2021-07-30 06:38:41 +02:00
Expr::Keyword(_, _, expr) => Some(expr),
2021-07-24 07:57:17 +02:00
_ => None,
}
}
2021-07-17 07:28:25 +02:00
pub fn as_var(&self) -> Option<VarId> {
2021-07-16 08:24:46 +02:00
match self.expr {
Expr::Var(var_id) => Some(var_id),
_ => None,
}
}
2021-07-17 07:28:25 +02:00
pub fn as_string(&self) -> Option<String> {
match &self.expr {
Expr::String(string) => Some(string.clone()),
2021-07-16 08:24:46 +02:00
_ => None,
}
}
2021-07-01 02:01:04 +02:00
}
2021-06-30 03:42:56 +02:00
2021-07-02 09:15:30 +02:00
#[derive(Debug, Clone)]
2021-06-30 03:42:56 +02:00
pub enum Import {}
2021-07-02 09:15:30 +02:00
#[derive(Debug, Clone)]
2021-06-30 03:42:56 +02:00
pub struct Block {
2021-07-02 03:42:25 +02:00
pub stmts: Vec<Statement>,
}
impl Block {
pub fn len(&self) -> usize {
self.stmts.len()
}
pub fn is_empty(&self) -> bool {
self.stmts.is_empty()
}
}
impl Index<usize> for Block {
type Output = Statement;
fn index(&self, index: usize) -> &Self::Output {
&self.stmts[index]
}
}
impl IndexMut<usize> for Block {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.stmts[index]
}
2021-06-30 03:42:56 +02:00
}
2021-07-01 02:01:04 +02:00
impl Default for Block {
fn default() -> Self {
Self::new()
}
}
2021-06-30 03:42:56 +02:00
impl Block {
pub fn new() -> Self {
Self { stmts: vec![] }
}
}
2021-07-02 09:15:30 +02:00
#[derive(Debug, Clone)]
2021-06-30 03:42:56 +02:00
pub struct VarDecl {
2021-07-01 02:01:04 +02:00
var_id: VarId,
expression: Expression,
2021-06-30 03:42:56 +02:00
}
2021-07-02 09:15:30 +02:00
#[derive(Debug, Clone)]
2021-06-30 03:42:56 +02:00
pub enum Statement {
2021-07-23 23:19:30 +02:00
Declaration(DeclId),
2021-06-30 03:42:56 +02:00
Pipeline(Pipeline),
2021-07-01 02:01:04 +02:00
Expression(Expression),
2021-06-30 03:42:56 +02:00
}
2021-07-02 09:15:30 +02:00
#[derive(Debug, Clone)]
2021-07-17 05:42:08 +02:00
pub struct Pipeline {
pub expressions: Vec<Expression>,
}
2021-06-30 03:42:56 +02:00
2021-07-01 02:01:04 +02:00
impl Default for Pipeline {
fn default() -> Self {
Self::new()
}
}
2021-06-30 03:42:56 +02:00
impl Pipeline {
pub fn new() -> Self {
2021-07-17 05:42:08 +02:00
Self {
expressions: vec![],
}
2021-06-30 03:42:56 +02:00
}
}
2021-07-01 02:01:04 +02:00
fn garbage(span: Span) -> Expression {
Expression::garbage(span)
}
2021-07-01 03:31:02 +02:00
fn is_identifier_byte(b: u8) -> bool {
b != b'.' && b != b'[' && b != b'(' && b != b'{'
}
fn is_identifier(bytes: &[u8]) -> bool {
bytes.iter().all(|x| is_identifier_byte(*x))
}
fn is_variable(bytes: &[u8]) -> bool {
if bytes.len() > 1 && bytes[0] == b'$' {
is_identifier(&bytes[1..])
} else {
is_identifier(bytes)
}
}
2021-07-02 04:22:54 +02:00
fn check_call(command: Span, sig: &Signature, call: &Call) -> Option<ParseError> {
if call.positional.len() < sig.required_positional.len() {
let missing = &sig.required_positional[call.positional.len()];
Some(ParseError::MissingPositional(missing.name.clone(), command))
} else {
for req_flag in sig.named.iter().filter(|x| x.required) {
if call.named.iter().all(|(n, _)| n != &req_flag.long) {
return Some(ParseError::MissingRequiredFlag(
req_flag.long.clone(),
command,
));
}
}
None
}
}
2021-08-10 20:51:08 +02:00
pub fn span(spans: &[Span]) -> Span {
2021-07-01 02:01:04 +02:00
let length = spans.len();
if length == 0 {
Span::unknown()
} else if length == 1 {
2021-07-01 02:01:04 +02:00
spans[0]
} else {
Span {
start: spans[0].start,
end: spans[length - 1].end,
}
}
}
impl<'a> ParserWorkingSet<'a> {
2021-07-01 08:09:55 +02:00
pub fn parse_external_call(&mut self, spans: &[Span]) -> (Expression, Option<ParseError>) {
// TODO: add external parsing
2021-07-08 09:20:01 +02:00
let mut args = vec![];
let name = self.get_span_contents(spans[0]).to_vec();
for span in &spans[1..] {
args.push(self.get_span_contents(*span).to_vec());
}
(
Expression {
expr: Expr::ExternalCall(name, args),
span: span(spans),
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-08 09:20:01 +02:00
},
None,
)
2021-07-01 02:01:04 +02:00
}
2021-07-08 22:29:00 +02:00
fn parse_long_flag(
&mut self,
spans: &[Span],
2021-07-08 23:16:25 +02:00
spans_idx: &mut usize,
2021-07-08 22:29:00 +02:00
sig: &Signature,
) -> (Option<String>, Option<Expression>, Option<ParseError>) {
2021-07-08 23:16:25 +02:00
let arg_span = spans[*spans_idx];
2021-07-08 22:29:00 +02:00
let arg_contents = self.get_span_contents(arg_span);
2021-07-17 05:42:08 +02:00
if arg_contents.starts_with(b"--") {
2021-07-08 22:29:00 +02:00
// FIXME: only use the first you find
let split: Vec<_> = arg_contents.split(|x| *x == b'=').collect();
let long_name = String::from_utf8(split[0].into());
if let Ok(long_name) = long_name {
if let Some(flag) = sig.get_long_flag(&long_name) {
if let Some(arg_shape) = &flag.arg {
if split.len() > 1 {
// and we also have the argument
let mut span = arg_span;
span.start += long_name.len() + 1; //offset by long flag and '='
2021-07-24 07:57:17 +02:00
let (arg, err) = self.parse_value(span, arg_shape);
2021-07-08 22:29:00 +02:00
(Some(long_name), Some(arg), err)
2021-07-08 23:16:25 +02:00
} else if let Some(arg) = spans.get(*spans_idx + 1) {
2021-07-24 07:57:17 +02:00
let (arg, err) = self.parse_value(*arg, arg_shape);
2021-07-08 22:29:00 +02:00
2021-07-08 23:16:25 +02:00
*spans_idx += 1;
2021-07-08 22:29:00 +02:00
(Some(long_name), Some(arg), err)
} else {
(
Some(long_name),
None,
Some(ParseError::MissingFlagParam(arg_span)),
)
}
} else {
// A flag with no argument
(Some(long_name), None, None)
}
} else {
(
Some(long_name),
None,
Some(ParseError::UnknownFlag(arg_span)),
)
}
} else {
(Some("--".into()), None, Some(ParseError::NonUtf8(arg_span)))
}
} else {
(None, None, None)
}
}
fn parse_short_flags(
&mut self,
spans: &[Span],
2021-07-08 23:16:25 +02:00
spans_idx: &mut usize,
2021-07-08 22:29:00 +02:00
positional_idx: usize,
sig: &Signature,
) -> (Option<Vec<Flag>>, Option<ParseError>) {
let mut error = None;
2021-07-08 23:16:25 +02:00
let arg_span = spans[*spans_idx];
2021-07-08 22:29:00 +02:00
let arg_contents = self.get_span_contents(arg_span);
2021-07-17 05:42:08 +02:00
if arg_contents.starts_with(b"-") && arg_contents.len() > 1 {
2021-07-08 22:29:00 +02:00
let short_flags = &arg_contents[1..];
let mut found_short_flags = vec![];
let mut unmatched_short_flags = vec![];
for short_flag in short_flags.iter().enumerate() {
let short_flag_char = char::from(*short_flag.1);
let orig = arg_span;
let short_flag_span = Span {
start: orig.start + 1 + short_flag.0,
end: orig.start + 1 + short_flag.0 + 1,
};
if let Some(flag) = sig.get_short_flag(short_flag_char) {
// If we require an arg and are in a batch of short flags, error
if !found_short_flags.is_empty() && flag.arg.is_some() {
error =
error.or(Some(ParseError::ShortFlagBatchCantTakeArg(short_flag_span)))
}
found_short_flags.push(flag);
} else {
unmatched_short_flags.push(short_flag_span);
}
}
if found_short_flags.is_empty() {
// check to see if we have a negative number
if let Some(positional) = sig.get_positional(positional_idx) {
if positional.shape == SyntaxShape::Int
|| positional.shape == SyntaxShape::Number
{
2021-07-30 00:56:51 +02:00
if String::from_utf8_lossy(arg_contents).parse::<f64>().is_ok() {
2021-07-08 22:29:00 +02:00
return (None, None);
} else if let Some(first) = unmatched_short_flags.first() {
error = error.or(Some(ParseError::UnknownFlag(*first)));
}
} else if let Some(first) = unmatched_short_flags.first() {
error = error.or(Some(ParseError::UnknownFlag(*first)));
}
} else if let Some(first) = unmatched_short_flags.first() {
error = error.or(Some(ParseError::UnknownFlag(*first)));
}
} else if !unmatched_short_flags.is_empty() {
if let Some(first) = unmatched_short_flags.first() {
error = error.or(Some(ParseError::UnknownFlag(*first)));
}
}
(Some(found_short_flags), error)
} else {
(None, None)
}
}
2021-07-24 07:57:17 +02:00
fn calculate_end_span(
&self,
decl: &Declaration,
spans: &[Span],
spans_idx: usize,
positional_idx: usize,
) -> usize {
if decl.signature.rest_positional.is_some() {
spans.len()
} else {
// println!("num_positionals: {}", decl.signature.num_positionals());
// println!("positional_idx: {}", positional_idx);
// println!("spans.len(): {}", spans.len());
// println!("spans_idx: {}", spans_idx);
// check to see if a keyword follows the current position.
let mut next_keyword_idx = spans.len();
for idx in (positional_idx + 1)..decl.signature.num_positionals() {
2021-07-30 00:56:51 +02:00
if let Some(PositionalArg {
shape: SyntaxShape::Keyword(kw, ..),
..
}) = decl.signature.get_positional(idx)
{
#[allow(clippy::needless_range_loop)]
for span_idx in spans_idx..spans.len() {
let contents = self.get_span_contents(spans[span_idx]);
2021-07-24 07:57:17 +02:00
2021-07-30 00:56:51 +02:00
if contents == kw {
next_keyword_idx = span_idx - (idx - (positional_idx + 1));
break;
2021-07-24 07:57:17 +02:00
}
}
}
}
let remainder = decl.signature.num_positionals_after(positional_idx);
let remainder_idx = if remainder < spans.len() {
spans.len() - remainder + 1
} else {
spans_idx + 1
};
let end = [next_keyword_idx, remainder_idx, spans.len()]
.iter()
.min()
2021-07-30 00:56:51 +02:00
.copied()
.expect("internal error: can't find min");
2021-07-24 07:57:17 +02:00
// println!(
// "{:?}",
// [
// next_keyword_idx,
// remainder_idx,
// spans.len(),
// spans_idx,
// remainder,
// positional_idx,
// ]
// );
end
}
}
2021-07-08 23:16:25 +02:00
fn parse_multispan_value(
&mut self,
spans: &[Span],
spans_idx: &mut usize,
2021-07-24 07:57:17 +02:00
shape: &SyntaxShape,
2021-07-08 23:16:25 +02:00
) -> (Expression, Option<ParseError>) {
let mut error = None;
match shape {
2021-07-16 08:24:46 +02:00
SyntaxShape::VarWithOptType => {
let (arg, err) = self.parse_var_with_opt_type(spans, spans_idx);
error = error.or(err);
(arg, error)
}
2021-07-08 23:16:25 +02:00
SyntaxShape::RowCondition => {
2021-07-17 07:28:25 +02:00
let (arg, err) = self.parse_row_condition(&spans[*spans_idx..]);
2021-07-08 23:16:25 +02:00
error = error.or(err);
2021-07-24 07:57:17 +02:00
*spans_idx = spans.len() - 1;
2021-07-08 23:16:25 +02:00
(arg, error)
}
SyntaxShape::Expression => {
2021-07-17 07:28:25 +02:00
let (arg, err) = self.parse_expression(&spans[*spans_idx..]);
2021-07-08 23:16:25 +02:00
error = error.or(err);
2021-07-24 07:57:17 +02:00
*spans_idx = spans.len() - 1;
2021-07-08 23:16:25 +02:00
(arg, error)
}
2021-07-24 07:57:17 +02:00
SyntaxShape::Keyword(keyword, arg) => {
2021-07-17 07:28:25 +02:00
let arg_span = spans[*spans_idx];
2021-07-08 23:16:25 +02:00
let arg_contents = self.get_span_contents(arg_span);
2021-07-17 07:28:25 +02:00
2021-07-24 07:57:17 +02:00
if arg_contents != keyword {
2021-07-08 23:16:25 +02:00
// When keywords mismatch, this is a strong indicator of something going wrong.
// We won't often override the current error, but as this is a strong indicator
// go ahead and override the current error and tell the user about the missing
// keyword/literal.
2021-08-17 01:00:00 +02:00
error = Some(ParseError::ExpectedKeyword(
2021-07-30 00:56:51 +02:00
String::from_utf8_lossy(keyword).into(),
2021-07-08 23:16:25 +02:00
arg_span,
))
}
2021-07-24 07:57:17 +02:00
*spans_idx += 1;
if *spans_idx >= spans.len() {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
Some(ParseError::MissingPositional(
String::from_utf8_lossy(keyword).into(),
spans[*spans_idx - 1],
))
});
2021-07-24 07:57:17 +02:00
return (
Expression {
expr: Expr::Keyword(
keyword.clone(),
2021-07-30 06:38:41 +02:00
spans[*spans_idx - 1],
2021-07-24 07:57:17 +02:00
Box::new(Expression::garbage(arg_span)),
),
span: arg_span,
ty: Type::Unknown,
},
error,
);
}
2021-07-30 06:43:31 +02:00
let keyword_span = spans[*spans_idx - 1];
2021-07-30 00:56:51 +02:00
let (expr, err) = self.parse_multispan_value(spans, spans_idx, arg);
2021-07-24 07:57:17 +02:00
error = error.or(err);
let ty = expr.ty.clone();
2021-07-08 23:16:25 +02:00
(
Expression {
2021-07-30 06:43:31 +02:00
expr: Expr::Keyword(keyword.clone(), keyword_span, Box::new(expr)),
2021-07-08 23:16:25 +02:00
span: arg_span,
2021-07-24 07:57:17 +02:00
ty,
2021-07-08 23:16:25 +02:00
},
error,
)
}
2021-07-30 00:56:51 +02:00
_ => {
2021-07-08 23:31:08 +02:00
// All other cases are single-span values
2021-07-17 07:28:25 +02:00
let arg_span = spans[*spans_idx];
2021-07-30 00:56:51 +02:00
let (arg, err) = self.parse_value(arg_span, shape);
2021-07-08 23:16:25 +02:00
error = error.or(err);
(arg, error)
}
}
}
2021-07-08 08:19:38 +02:00
pub fn parse_internal_call(
&mut self,
command_span: Span,
2021-07-08 08:19:38 +02:00
spans: &[Span],
decl_id: usize,
) -> (Box<Call>, Span, Option<ParseError>) {
2021-07-02 00:40:08 +02:00
let mut error = None;
2021-07-08 08:19:38 +02:00
let mut call = Call::new();
call.decl_id = decl_id;
2021-07-22 21:50:59 +02:00
call.head = command_span;
2021-07-08 08:19:38 +02:00
2021-07-23 07:14:49 +02:00
let decl = self.get_decl(decl_id).clone();
2021-07-08 08:19:38 +02:00
2021-07-08 23:16:25 +02:00
// The index into the positional parameter in the definition
2021-07-08 08:19:38 +02:00
let mut positional_idx = 0;
2021-07-08 23:16:25 +02:00
// The index into the spans of argument data given to parse
// Starting at the first argument
let mut spans_idx = 0;
2021-07-02 00:40:08 +02:00
2021-07-08 23:16:25 +02:00
while spans_idx < spans.len() {
let arg_span = spans[spans_idx];
2021-07-09 08:23:20 +02:00
// Check if we're on a long flag, if so, parse
let (long_name, arg, err) =
self.parse_long_flag(spans, &mut spans_idx, &decl.signature);
2021-07-08 22:29:00 +02:00
if let Some(long_name) = long_name {
// We found a long flag, like --bar
error = error.or(err);
call.named.push((long_name, arg));
2021-07-08 23:16:25 +02:00
spans_idx += 1;
2021-07-08 22:29:00 +02:00
continue;
}
2021-07-02 00:40:08 +02:00
2021-07-09 08:23:20 +02:00
// Check if we're on a short flag or group of short flags, if so, parse
2021-07-08 22:29:00 +02:00
let (short_flags, err) =
self.parse_short_flags(spans, &mut spans_idx, positional_idx, &decl.signature);
2021-07-08 22:29:00 +02:00
if let Some(short_flags) = short_flags {
error = error.or(err);
for flag in short_flags {
2021-07-08 08:19:38 +02:00
if let Some(arg_shape) = flag.arg {
2021-07-08 23:16:25 +02:00
if let Some(arg) = spans.get(spans_idx + 1) {
2021-07-24 07:57:17 +02:00
let (arg, err) = self.parse_value(*arg, &arg_shape);
2021-07-08 08:19:38 +02:00
error = error.or(err);
2021-07-02 00:40:08 +02:00
2021-07-08 08:19:38 +02:00
call.named.push((flag.long.clone(), Some(arg)));
2021-07-08 23:16:25 +02:00
spans_idx += 1;
2021-07-02 00:40:08 +02:00
} else {
2021-07-08 08:19:38 +02:00
error = error.or(Some(ParseError::MissingFlagParam(arg_span)))
2021-07-02 00:40:08 +02:00
}
2021-07-08 08:19:38 +02:00
} else {
call.named.push((flag.long.clone(), None));
2021-07-02 00:40:08 +02:00
}
2021-07-08 08:19:38 +02:00
}
2021-07-08 23:16:25 +02:00
spans_idx += 1;
2021-07-08 22:29:00 +02:00
continue;
}
2021-07-09 08:23:20 +02:00
// Parse a positional arg if there is one
if let Some(positional) = decl.signature.get_positional(positional_idx) {
2021-07-08 23:16:25 +02:00
//Make sure we leave enough spans for the remaining positionals
2021-07-24 07:57:17 +02:00
let end = self.calculate_end_span(&decl, spans, spans_idx, positional_idx);
2021-07-17 01:22:01 +02:00
2021-07-23 23:46:55 +02:00
let orig_idx = spans_idx;
2021-07-24 07:57:17 +02:00
let (arg, err) =
self.parse_multispan_value(&spans[..end], &mut spans_idx, &positional.shape);
2021-07-08 23:16:25 +02:00
error = error.or(err);
2021-07-23 23:46:55 +02:00
2021-08-17 01:00:00 +02:00
let arg = if !Self::type_compatible(&positional.shape.to_type(), &arg.ty) {
2021-07-24 07:57:17 +02:00
let span = span(&spans[orig_idx..spans_idx]);
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::TypeMismatch(
positional.shape.to_type(),
arg.ty,
arg.span,
))
2021-07-30 00:56:51 +02:00
});
2021-07-23 23:46:55 +02:00
Expression::garbage(span)
} else {
arg
};
2021-07-08 23:16:25 +02:00
call.positional.push(arg);
2021-07-08 08:19:38 +02:00
positional_idx += 1;
} else {
2021-07-23 23:46:55 +02:00
call.positional.push(Expression::garbage(arg_span));
2021-07-08 08:19:38 +02:00
error = error.or(Some(ParseError::ExtraPositional(arg_span)))
2021-07-02 00:40:08 +02:00
}
2021-07-08 23:16:25 +02:00
error = error.or(err);
spans_idx += 1;
2021-07-08 08:19:38 +02:00
}
2021-07-02 00:40:08 +02:00
let err = check_call(command_span, &decl.signature, &call);
2021-07-08 08:19:38 +02:00
error = error.or(err);
2021-07-02 04:22:54 +02:00
2021-07-08 08:19:38 +02:00
// FIXME: type unknown
(Box::new(call), span(spans), error)
}
2021-08-09 02:19:07 +02:00
pub fn parse_call(
&mut self,
spans: &[Span],
expand_aliases: bool,
) -> (Expression, Option<ParseError>) {
2021-07-08 08:19:38 +02:00
// assume spans.len() > 0?
2021-07-31 07:20:40 +02:00
let mut pos = 0;
let mut shorthand = vec![];
while pos < spans.len() {
2021-08-09 02:19:07 +02:00
// Check if there is any environment shorthand
2021-07-31 07:20:40 +02:00
let name = self.get_span_contents(spans[pos]);
let split: Vec<_> = name.splitn(2, |x| *x == b'=').collect();
if split.len() == 2 {
shorthand.push(split);
pos += 1;
} else {
break;
}
}
if pos == spans.len() {
return (
Expression::garbage(span(spans)),
Some(ParseError::UnknownCommand(spans[0])),
);
}
2021-08-08 23:55:18 +02:00
2021-07-31 07:20:40 +02:00
let name = self.get_span_contents(spans[pos]);
2021-08-09 02:19:07 +02:00
2021-08-17 01:00:00 +02:00
let cmd_start = pos;
2021-08-09 02:19:07 +02:00
if expand_aliases {
2021-08-17 01:00:00 +02:00
if let Some(expansion) = self.find_alias(&name) {
2021-08-09 10:00:16 +02:00
let orig_span = spans[pos];
2021-08-09 02:19:07 +02:00
//let mut spans = spans.to_vec();
let mut new_spans: Vec<Span> = vec![];
new_spans.extend(&spans[0..pos]);
new_spans.extend(expansion);
if spans.len() > pos {
new_spans.extend(&spans[(pos + 1)..]);
}
2021-08-09 10:00:16 +02:00
let (result, err) = self.parse_call(&new_spans, false);
let expression = match result {
Expression {
expr: Expr::Call(mut call),
span,
ty,
} => {
call.head = orig_span;
Expression {
expr: Expr::Call(call),
span,
ty,
}
}
x => x,
};
return (expression, err);
2021-08-09 02:19:07 +02:00
}
}
2021-07-31 07:20:40 +02:00
pos += 1;
2021-07-08 08:19:38 +02:00
2021-07-17 21:40:39 +02:00
if let Some(mut decl_id) = self.find_decl(name) {
2021-07-17 21:34:43 +02:00
let mut name = name.to_vec();
while pos < spans.len() {
2021-07-17 21:40:39 +02:00
// look to see if it's a subcommand
2021-07-17 21:34:43 +02:00
let mut new_name = name.to_vec();
new_name.push(b' ');
new_name.extend(self.get_span_contents(spans[pos]));
2021-08-17 01:00:00 +02:00
if expand_aliases {
if let Some(expansion) = self.find_alias(&new_name) {
2021-08-17 01:04:45 +02:00
let orig_span = span(&spans[cmd_start..pos + 1]);
2021-08-17 01:00:00 +02:00
//let mut spans = spans.to_vec();
let mut new_spans: Vec<Span> = vec![];
new_spans.extend(&spans[0..cmd_start]);
new_spans.extend(expansion);
if spans.len() > pos {
new_spans.extend(&spans[(pos + 1)..]);
}
let (result, err) = self.parse_call(&new_spans, false);
let expression = match result {
Expression {
expr: Expr::Call(mut call),
span,
ty,
} => {
call.head = orig_span;
Expression {
expr: Expr::Call(call),
span,
ty,
}
}
x => x,
};
return (expression, err);
}
}
2021-07-17 21:34:43 +02:00
if let Some(did) = self.find_decl(&new_name) {
2021-07-17 21:40:39 +02:00
decl_id = did;
2021-07-17 21:34:43 +02:00
} else {
break;
}
name = new_name;
pos += 1;
}
// parse internal command
let (call, _, err) =
self.parse_internal_call(span(&spans[0..pos]), &spans[pos..], decl_id);
2021-07-17 21:40:39 +02:00
(
Expression {
expr: Expr::Call(call),
span: span(spans),
2021-07-23 23:19:30 +02:00
ty: Type::Unknown, // FIXME
2021-07-17 21:40:39 +02:00
},
err,
)
2021-07-01 08:09:55 +02:00
} else {
self.parse_external_call(spans)
2021-06-30 03:42:56 +02:00
}
}
2021-07-01 02:01:04 +02:00
pub fn parse_int(&mut self, token: &str, span: Span) -> (Expression, Option<ParseError>) {
if let Some(token) = token.strip_prefix("0x") {
if let Ok(v) = i64::from_str_radix(token, 16) {
(
Expression {
expr: Expr::Int(v),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Int,
2021-07-01 02:01:04 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Mismatch(
"int".into(),
"incompatible int".into(),
span,
)),
2021-07-01 02:01:04 +02:00
)
}
} else if let Some(token) = token.strip_prefix("0b") {
if let Ok(v) = i64::from_str_radix(token, 2) {
(
Expression {
expr: Expr::Int(v),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Int,
2021-07-01 02:01:04 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Mismatch(
"int".into(),
"incompatible int".into(),
span,
)),
2021-07-01 02:01:04 +02:00
)
}
} else if let Some(token) = token.strip_prefix("0o") {
if let Ok(v) = i64::from_str_radix(token, 8) {
(
Expression {
expr: Expr::Int(v),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Int,
2021-07-01 02:01:04 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Mismatch(
"int".into(),
"incompatible int".into(),
span,
)),
2021-07-01 02:01:04 +02:00
)
}
} else if let Ok(x) = token.parse::<i64>() {
(
Expression {
expr: Expr::Int(x),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Int,
2021-07-01 02:01:04 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("int".into(), span)),
2021-07-01 02:01:04 +02:00
)
}
}
2021-08-08 22:21:21 +02:00
pub fn parse_float(&mut self, token: &str, span: Span) -> (Expression, Option<ParseError>) {
if let Ok(x) = token.parse::<f64>() {
(
Expression {
expr: Expr::Float(x),
span,
2021-08-25 21:29:36 +02:00
ty: Type::Float,
2021-08-08 22:21:21 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("float".into(), span)),
2021-08-08 22:21:21 +02:00
)
}
}
2021-07-01 02:01:04 +02:00
pub fn parse_number(&mut self, token: &str, span: Span) -> (Expression, Option<ParseError>) {
if let (x, None) = self.parse_int(token, span) {
(x, None)
2021-08-08 22:21:21 +02:00
} else if let (x, None) = self.parse_float(token, span) {
(x, None)
2021-07-01 02:01:04 +02:00
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("number".into(), span)),
2021-07-01 02:01:04 +02:00
)
}
}
2021-07-08 08:57:24 +02:00
pub(crate) fn parse_dollar_expr(&mut self, span: Span) -> (Expression, Option<ParseError>) {
2021-07-30 05:26:06 +02:00
let contents = self.get_span_contents(span);
if contents.starts_with(b"$\"") {
self.parse_string_interpolation(span)
} else {
self.parse_variable_expr(span)
}
}
pub fn parse_string_interpolation(&mut self, span: Span) -> (Expression, Option<ParseError>) {
#[derive(PartialEq, Eq, Debug)]
enum InterpolationMode {
String,
Expression,
}
let mut error = None;
let contents = self.get_span_contents(span);
let start = if contents.starts_with(b"$\"") {
span.start + 2
} else {
span.start
};
let end = if contents.ends_with(b"\"") && contents.len() > 2 {
span.end - 1
} else {
span.end
};
let inner_span = Span { start, end };
let contents = self.get_span_contents(inner_span).to_vec();
let mut output = vec![];
let mut mode = InterpolationMode::String;
let mut token_start = start;
let mut depth = 0;
let mut b = start;
#[allow(clippy::needless_range_loop)]
while b != end {
if contents[b - start] == b'(' && mode == InterpolationMode::String {
depth = 1;
mode = InterpolationMode::Expression;
if token_start < b {
let span = Span {
start: token_start,
end: b,
};
let str_contents = self.get_span_contents(span);
output.push(Expression {
expr: Expr::String(String::from_utf8_lossy(str_contents).to_string()),
span,
ty: Type::String,
});
}
token_start = b;
} else if contents[b - start] == b'(' && mode == InterpolationMode::Expression {
depth += 1;
} else if contents[b - start] == b')' && mode == InterpolationMode::Expression {
match depth {
0 => {}
1 => {
mode = InterpolationMode::String;
if token_start < b {
let span = Span {
start: token_start,
end: b + 1,
};
let (expr, err) = self.parse_full_column_path(span);
error = error.or(err);
output.push(expr);
}
token_start = b + 1;
}
_ => depth -= 1,
}
}
b += 1;
}
match mode {
InterpolationMode::String => {
if token_start < end {
let span = Span {
start: token_start,
end,
};
let str_contents = self.get_span_contents(span);
output.push(Expression {
expr: Expr::String(String::from_utf8_lossy(str_contents).to_string()),
span,
ty: Type::String,
});
}
}
InterpolationMode::Expression => {
if token_start < end {
let span = Span {
start: token_start,
end,
};
let (expr, err) = self.parse_full_column_path(span);
error = error.or(err);
output.push(expr);
}
}
}
if let Some(decl_id) = self.find_decl(b"build-string") {
(
Expression {
expr: Expr::Call(Box::new(Call {
2021-07-30 05:33:33 +02:00
head: Span {
start: span.start,
end: span.start + 2,
},
2021-07-30 05:26:06 +02:00
named: vec![],
positional: output,
decl_id,
})),
span,
ty: Type::String,
},
error,
)
} else {
(
Expression::garbage(span),
Some(ParseError::UnknownCommand(span)),
)
}
2021-07-24 07:57:17 +02:00
}
2021-07-02 09:15:30 +02:00
2021-07-24 07:57:17 +02:00
pub fn parse_variable_expr(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let contents = self.get_span_contents(span);
if contents == b"$true" {
return (
2021-07-02 09:15:30 +02:00
Expression {
2021-07-24 07:57:17 +02:00
expr: Expr::Bool(true),
2021-07-02 09:15:30 +02:00
span,
2021-07-24 07:57:17 +02:00
ty: Type::Bool,
2021-07-02 09:15:30 +02:00
},
None,
2021-07-24 07:57:17 +02:00
);
} else if contents == b"$false" {
return (
Expression {
expr: Expr::Bool(false),
span,
ty: Type::Bool,
},
None,
);
2021-07-02 09:15:30 +02:00
}
2021-07-08 08:19:38 +02:00
let (id, err) = self.parse_variable(span);
if err.is_none() {
if let Some(id) = id {
(
Expression {
expr: Expr::Var(id),
span,
2021-07-23 23:19:30 +02:00
ty: self.get_variable(id).clone(),
2021-07-08 08:19:38 +02:00
},
None,
)
} else {
2021-07-24 08:44:38 +02:00
let name = self.get_span_contents(span).to_vec();
// this seems okay to set it to unknown here, but we should double-check
let id = self.add_variable(name, Type::Unknown);
2021-07-08 08:19:38 +02:00
(
2021-07-30 00:56:51 +02:00
Expression {
expr: Expr::Var(id),
span,
ty: Type::Unknown,
},
None,
2021-07-08 08:19:38 +02:00
)
}
} else {
(garbage(span), err)
}
}
2021-07-02 09:15:30 +02:00
pub fn parse_full_column_path(&mut self, span: Span) -> (Expression, Option<ParseError>) {
// FIXME: assume for now a paren expr, but needs more
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"(") {
start += 1;
}
if bytes.ends_with(b")") {
end -= 1;
2021-07-02 09:32:30 +02:00
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
")".into(),
Span {
start: end,
end: end + 1,
},
))
});
2021-07-02 09:15:30 +02:00
}
let span = Span { start, end };
2021-07-02 09:15:30 +02:00
let source = self.get_span_contents(span);
2021-07-02 09:15:30 +02:00
2021-07-30 00:56:51 +02:00
let (output, err) = lex(source, start, &[], &[]);
2021-07-02 09:15:30 +02:00
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
2021-07-17 08:31:34 +02:00
let (output, err) = self.parse_block(&output, true);
2021-07-02 09:15:30 +02:00
error = error.or(err);
2021-07-16 22:26:40 +02:00
let block_id = self.add_block(output);
2021-07-02 09:15:30 +02:00
(
Expression {
2021-07-16 22:26:40 +02:00
expr: Expr::Subexpression(block_id),
2021-07-02 09:15:30 +02:00
span,
2021-07-23 23:19:30 +02:00
ty: Type::Unknown, // FIXME
2021-07-02 09:15:30 +02:00
},
error,
)
}
2021-07-06 03:48:45 +02:00
pub fn parse_string(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
2021-07-30 00:56:51 +02:00
let bytes = if (bytes.starts_with(b"\"") && bytes.ends_with(b"\"") && bytes.len() > 1)
|| (bytes.starts_with(b"\'") && bytes.ends_with(b"\'") && bytes.len() > 1)
{
&bytes[1..(bytes.len() - 1)]
} else {
bytes
};
2021-07-06 03:48:45 +02:00
if let Ok(token) = String::from_utf8(bytes.into()) {
(
Expression {
expr: Expr::String(token),
span,
2021-07-23 23:19:30 +02:00
ty: Type::String,
2021-07-06 03:48:45 +02:00
},
None,
)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("string".into(), span)),
2021-07-06 03:48:45 +02:00
)
}
}
2021-07-16 23:55:12 +02:00
//TODO: Handle error case
2021-07-23 23:19:30 +02:00
pub fn parse_shape_name(&self, bytes: &[u8], span: Span) -> (SyntaxShape, Option<ParseError>) {
let result = match bytes {
2021-07-16 23:55:12 +02:00
b"any" => SyntaxShape::Any,
b"string" => SyntaxShape::String,
b"column-path" => SyntaxShape::ColumnPath,
b"number" => SyntaxShape::Number,
b"range" => SyntaxShape::Range,
b"int" => SyntaxShape::Int,
b"path" => SyntaxShape::FilePath,
b"glob" => SyntaxShape::GlobPattern,
b"block" => SyntaxShape::Block,
b"cond" => SyntaxShape::RowCondition,
b"operator" => SyntaxShape::Operator,
b"math" => SyntaxShape::MathExpression,
b"variable" => SyntaxShape::Variable,
b"signature" => SyntaxShape::Signature,
b"expr" => SyntaxShape::Expression,
2021-07-23 23:19:30 +02:00
_ => return (SyntaxShape::Any, Some(ParseError::UnknownType(span))),
};
(result, None)
2021-07-16 23:55:12 +02:00
}
2021-07-16 08:24:46 +02:00
pub fn parse_type(&self, bytes: &[u8]) -> Type {
if bytes == b"int" {
Type::Int
} else {
Type::Unknown
}
}
pub fn parse_var_with_opt_type(
&mut self,
spans: &[Span],
spans_idx: &mut usize,
) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(spans[*spans_idx]).to_vec();
if bytes.ends_with(b":") {
// We end with colon, so the next span should be the type
if *spans_idx + 1 < spans.len() {
*spans_idx += 1;
let type_bytes = self.get_span_contents(spans[*spans_idx]);
let ty = self.parse_type(type_bytes);
2021-07-23 23:19:30 +02:00
let id = self.add_variable(bytes[0..(bytes.len() - 1)].to_vec(), ty.clone());
2021-07-16 08:24:46 +02:00
(
Expression {
expr: Expr::Var(id),
2021-07-30 09:30:11 +02:00
span: span(&spans[*spans_idx - 1..*spans_idx + 1]),
2021-07-23 23:19:30 +02:00
ty,
2021-07-16 08:24:46 +02:00
},
None,
)
} else {
let id = self.add_variable(bytes[0..(bytes.len() - 1)].to_vec(), Type::Unknown);
(
Expression {
expr: Expr::Var(id),
span: spans[*spans_idx],
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-16 08:24:46 +02:00
},
Some(ParseError::MissingType(spans[*spans_idx])),
)
}
} else {
let id = self.add_variable(bytes, Type::Unknown);
(
Expression {
expr: Expr::Var(id),
2021-07-30 09:30:11 +02:00
span: span(&spans[*spans_idx..*spans_idx + 1]),
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-16 08:24:46 +02:00
},
None,
)
}
}
2021-07-08 00:55:46 +02:00
pub fn parse_row_condition(&mut self, spans: &[Span]) -> (Expression, Option<ParseError>) {
self.parse_math_expression(spans)
}
2021-07-16 23:55:12 +02:00
pub fn parse_signature(&mut self, span: Span) -> (Expression, Option<ParseError>) {
enum ParseMode {
ArgMode,
TypeMode,
}
enum Arg {
2021-07-17 00:53:45 +02:00
Positional(PositionalArg, bool), // bool - required
2021-07-16 23:55:12 +02:00
Flag(Flag),
}
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"]".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
2021-07-22 22:45:23 +02:00
let source = self.get_span_contents(span);
2021-07-16 23:55:12 +02:00
2021-07-30 00:56:51 +02:00
let (output, err) = lex(source, span.start, &[b'\n', b','], &[b':']);
2021-07-16 23:55:12 +02:00
error = error.or(err);
let mut args: Vec<Arg> = vec![];
2021-08-25 21:29:36 +02:00
let mut rest: Option<Arg> = None;
2021-07-16 23:55:12 +02:00
let mut parse_mode = ParseMode::ArgMode;
for token in &output {
match token {
Token {
contents: crate::TokenContents::Item,
span,
} => {
2021-07-22 22:45:23 +02:00
let span = *span;
let contents = self.get_span_contents(span);
2021-07-16 23:55:12 +02:00
if contents == b":" {
match parse_mode {
ParseMode::ArgMode => {
parse_mode = ParseMode::TypeMode;
}
ParseMode::TypeMode => {
// We're seeing two types for the same thing for some reason, error
2021-07-30 00:56:51 +02:00
error = error
2021-08-17 01:00:00 +02:00
.or_else(|| Some(ParseError::Expected("type".into(), span)));
2021-07-16 23:55:12 +02:00
}
}
} else {
match parse_mode {
ParseMode::ArgMode => {
2021-07-17 20:52:50 +02:00
if contents.starts_with(b"--") && contents.len() > 2 {
2021-07-16 23:55:12 +02:00
// Long flag
2021-07-23 23:19:30 +02:00
let flags: Vec<_> = contents
.split(|x| x == &b'(')
.map(|x| x.to_vec())
.collect();
let long = String::from_utf8_lossy(&flags[0]).to_string();
let variable_name = flags[0][2..].to_vec();
let var_id = self.add_variable(variable_name, Type::Unknown);
2021-07-17 20:52:50 +02:00
if flags.len() == 1 {
args.push(Arg::Flag(Flag {
arg: None,
desc: String::new(),
2021-07-23 23:19:30 +02:00
long,
2021-07-17 20:52:50 +02:00
short: None,
required: false,
2021-07-23 23:19:30 +02:00
var_id: Some(var_id),
2021-07-17 20:52:50 +02:00
}));
} else {
2021-07-23 23:19:30 +02:00
let short_flag = &flags[1];
2021-07-17 20:52:50 +02:00
let short_flag = if !short_flag.starts_with(b"-")
|| !short_flag.ends_with(b")")
{
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected(
2021-07-30 00:56:51 +02:00
"short flag".into(),
span,
))
});
2021-07-17 20:52:50 +02:00
short_flag
} else {
&short_flag[1..(short_flag.len() - 1)]
};
let short_flag =
2021-07-30 00:56:51 +02:00
String::from_utf8_lossy(short_flag).to_string();
2021-07-17 20:52:50 +02:00
let chars: Vec<char> = short_flag.chars().collect();
2021-07-23 23:19:30 +02:00
let long = String::from_utf8_lossy(&flags[0]).to_string();
let variable_name = flags[0][2..].to_vec();
let var_id =
self.add_variable(variable_name, Type::Unknown);
2021-07-17 20:52:50 +02:00
if chars.len() == 1 {
args.push(Arg::Flag(Flag {
arg: None,
desc: String::new(),
2021-07-23 23:19:30 +02:00
long,
2021-07-17 20:52:50 +02:00
short: Some(chars[0]),
required: false,
2021-07-23 23:19:30 +02:00
var_id: Some(var_id),
2021-07-17 20:52:50 +02:00
}));
} else {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected(
2021-07-30 00:56:51 +02:00
"short flag".into(),
span,
))
});
2021-07-17 20:52:50 +02:00
}
}
} else if contents.starts_with(b"-") && contents.len() > 1 {
2021-07-17 00:39:30 +02:00
// Short flag
let short_flag = &contents[1..];
let short_flag =
String::from_utf8_lossy(short_flag).to_string();
let chars: Vec<char> = short_flag.chars().collect();
if chars.len() > 1 {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("short flag".into(), span))
2021-07-30 00:56:51 +02:00
});
2021-07-17 00:39:30 +02:00
args.push(Arg::Flag(Flag {
arg: None,
desc: String::new(),
long: String::new(),
short: None,
required: false,
2021-07-23 23:19:30 +02:00
var_id: None,
2021-07-17 00:39:30 +02:00
}));
} else {
2021-07-23 23:19:30 +02:00
let mut encoded_var_name = vec![0u8; 4];
let len = chars[0].encode_utf8(&mut encoded_var_name).len();
let variable_name = encoded_var_name[0..len].to_vec();
let var_id =
self.add_variable(variable_name, Type::Unknown);
2021-07-17 00:39:30 +02:00
args.push(Arg::Flag(Flag {
arg: None,
desc: String::new(),
long: String::new(),
short: Some(chars[0]),
required: false,
2021-07-23 23:19:30 +02:00
var_id: Some(var_id),
2021-07-17 00:39:30 +02:00
}));
}
2021-07-17 20:52:50 +02:00
} else if contents.starts_with(b"(-") {
let short_flag = &contents[2..];
let short_flag = if !short_flag.ends_with(b")") {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("short flag".into(), span))
2021-07-30 00:56:51 +02:00
});
2021-07-17 20:52:50 +02:00
short_flag
} else {
&short_flag[..(short_flag.len() - 1)]
};
let short_flag =
String::from_utf8_lossy(short_flag).to_string();
let chars: Vec<char> = short_flag.chars().collect();
if chars.len() == 1 {
match args.last_mut() {
Some(Arg::Flag(flag)) => {
if flag.short.is_some() {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected(
2021-07-30 00:56:51 +02:00
"one short flag".into(),
span,
))
});
2021-07-17 20:52:50 +02:00
} else {
flag.short = Some(chars[0]);
}
}
_ => {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected(
2021-07-30 00:56:51 +02:00
"unknown flag".into(),
span,
))
});
2021-07-17 20:52:50 +02:00
}
}
} else {
2021-07-30 00:56:51 +02:00
error = error.or_else(|| {
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("short flag".into(), span))
2021-07-30 00:56:51 +02:00
});
2021-07-17 20:52:50 +02:00
}
2021-07-30 00:56:51 +02:00
} else if contents.ends_with(b"?") {
let contents: Vec<_> = contents[..(contents.len() - 1)].into();
let name = String::from_utf8_lossy(&contents).to_string();
2021-07-23 23:19:30 +02:00
2021-07-30 00:56:51 +02:00
let var_id = self.add_variable(contents, Type::Unknown);
2021-07-17 00:53:45 +02:00
2021-07-30 00:56:51 +02:00
// Positional arg, optional
args.push(Arg::Positional(
PositionalArg {
desc: String::new(),
name,
shape: SyntaxShape::Any,
var_id: Some(var_id),
},
false,
))
} else {
let name = String::from_utf8_lossy(contents).to_string();
let contents_vec = contents.to_vec();
2021-07-23 23:19:30 +02:00
2021-07-30 00:56:51 +02:00
let var_id = self.add_variable(contents_vec, Type::Unknown);
// Positional arg, required
args.push(Arg::Positional(
PositionalArg {
desc: String::new(),
name,
shape: SyntaxShape::Any,
var_id: Some(var_id),
},
true,
))
2021-07-16 23:55:12 +02:00
}
}
ParseMode::TypeMode => {
if let Some(last) = args.last_mut() {
2021-07-23 23:19:30 +02:00
let (syntax_shape, err) = self.parse_shape_name(contents, span);
error = error.or(err);
2021-07-16 23:55:12 +02:00
//TODO check if we're replacing one already
match last {
2021-07-23 23:19:30 +02:00
Arg::Positional(
PositionalArg { shape, var_id, .. },
..,
) => {
self.set_variable_type(var_id.expect("internal error: all custom parameters must have var_ids"), syntax_shape.to_type());
2021-07-16 23:55:12 +02:00
*shape = syntax_shape;
}
2021-07-23 23:19:30 +02:00
Arg::Flag(Flag { arg, var_id, .. }) => {
self.set_variable_type(var_id.expect("internal error: all custom parameters must have var_ids"), syntax_shape.to_type());
*arg = Some(syntax_shape)
}
2021-07-16 23:55:12 +02:00
}
}
parse_mode = ParseMode::ArgMode;
}
}
}
}
2021-07-17 00:31:36 +02:00
Token {
contents: crate::TokenContents::Comment,
span,
} => {
2021-07-22 22:45:23 +02:00
let contents = self.get_span_contents(Span {
start: span.start + 1,
end: span.end,
});
2021-07-17 00:31:36 +02:00
let mut contents = String::from_utf8_lossy(contents).to_string();
contents = contents.trim().into();
if let Some(last) = args.last_mut() {
match last {
Arg::Flag(flag) => {
if !flag.desc.is_empty() {
2021-07-30 00:56:51 +02:00
flag.desc.push('\n');
2021-07-17 00:31:36 +02:00
}
flag.desc.push_str(&contents);
}
2021-07-17 00:53:45 +02:00
Arg::Positional(positional, ..) => {
2021-07-17 00:31:36 +02:00
if !positional.desc.is_empty() {
2021-07-30 00:56:51 +02:00
positional.desc.push('\n');
2021-07-17 00:31:36 +02:00
}
positional.desc.push_str(&contents);
}
}
}
}
2021-07-16 23:55:12 +02:00
_ => {}
}
}
let mut sig = Signature::new(String::new());
for arg in args {
match arg {
2021-07-17 00:53:45 +02:00
Arg::Positional(positional, required) => {
2021-07-17 01:22:01 +02:00
if positional.name == "...rest" {
if sig.rest_positional.is_none() {
sig.rest_positional = Some(PositionalArg {
name: "rest".into(),
..positional
})
} else {
// Too many rest params
error = error.or(Some(ParseError::MultipleRestParams(span)))
}
} else if required {
2021-07-17 00:53:45 +02:00
sig.required_positional.push(positional)
} else {
sig.optional_positional.push(positional)
}
}
2021-07-16 23:55:12 +02:00
Arg::Flag(flag) => sig.named.push(flag),
}
}
(
Expression {
2021-07-30 00:56:51 +02:00
expr: Expr::Signature(Box::new(sig)),
2021-07-16 23:55:12 +02:00
span,
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-16 23:55:12 +02:00
},
error,
)
}
2021-07-08 09:49:17 +02:00
pub fn parse_list_expression(
&mut self,
span: Span,
element_shape: &SyntaxShape,
) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"]".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
2021-07-22 22:45:23 +02:00
let source = self.get_span_contents(span);
2021-07-08 09:49:17 +02:00
2021-07-30 00:56:51 +02:00
let (output, err) = lex(source, span.start, &[b'\n', b','], &[]);
2021-07-08 09:49:17 +02:00
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
let mut args = vec![];
2021-07-08 23:45:56 +02:00
2021-08-17 02:26:05 +02:00
let mut contained_type: Option<Type> = None;
2021-07-16 08:24:46 +02:00
if !output.block.is_empty() {
for arg in &output.block[0].commands {
let mut spans_idx = 0;
2021-07-08 09:49:17 +02:00
2021-07-16 08:24:46 +02:00
while spans_idx < arg.parts.len() {
2021-07-24 07:57:17 +02:00
let (arg, err) =
self.parse_multispan_value(&arg.parts, &mut spans_idx, element_shape);
2021-07-16 08:24:46 +02:00
error = error.or(err);
2021-07-08 23:45:56 +02:00
2021-08-17 02:26:05 +02:00
if let Some(ref ctype) = contained_type {
if *ctype != arg.ty {
contained_type = Some(Type::Unknown);
}
} else {
contained_type = Some(arg.ty.clone());
}
2021-07-16 08:24:46 +02:00
args.push(arg);
spans_idx += 1;
}
2021-07-08 09:49:17 +02:00
}
}
(
Expression {
expr: Expr::List(args),
span,
2021-08-17 02:26:05 +02:00
ty: Type::List(Box::new(if let Some(ty) = contained_type {
ty.clone()
} else {
Type::Unknown
})),
2021-07-08 09:49:17 +02:00
},
error,
)
}
2021-07-06 00:58:56 +02:00
pub fn parse_table_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"]".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
2021-07-22 22:45:23 +02:00
let source = self.get_span_contents(span);
2021-07-06 00:58:56 +02:00
2021-07-30 00:56:51 +02:00
let (output, err) = lex(source, start, &[b'\n', b','], &[]);
2021-07-06 00:58:56 +02:00
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
match output.block.len() {
0 => (
Expression {
expr: Expr::List(vec![]),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Table,
2021-07-06 00:58:56 +02:00
},
None,
),
1 => {
// List
2021-07-08 09:49:17 +02:00
self.parse_list_expression(span, &SyntaxShape::Any)
2021-07-06 00:58:56 +02:00
}
2021-07-06 03:48:45 +02:00
_ => {
let mut table_headers = vec![];
let (headers, err) =
2021-07-24 07:57:17 +02:00
self.parse_value(output.block[0].commands[0].parts[0], &SyntaxShape::Table);
2021-07-06 03:48:45 +02:00
error = error.or(err);
if let Expression {
expr: Expr::List(headers),
..
} = headers
{
table_headers = headers;
}
let mut rows = vec![];
for part in &output.block[1].commands[0].parts {
2021-07-24 07:57:17 +02:00
let (values, err) = self.parse_value(*part, &SyntaxShape::Table);
2021-07-06 03:48:45 +02:00
error = error.or(err);
if let Expression {
expr: Expr::List(values),
..
} = values
{
rows.push(values);
}
}
(
Expression {
expr: Expr::Table(table_headers, rows),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Table,
2021-07-06 03:48:45 +02:00
},
error,
)
}
2021-07-06 00:58:56 +02:00
}
}
2021-07-02 09:32:30 +02:00
pub fn parse_block_expression(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let bytes = self.get_span_contents(span);
let mut error = None;
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"{") {
start += 1;
2021-07-06 03:48:45 +02:00
} else {
return (
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("block".into(), span)),
2021-07-06 03:48:45 +02:00
);
2021-07-02 09:32:30 +02:00
}
if bytes.ends_with(b"}") {
end -= 1;
} else {
error = error.or_else(|| {
Some(ParseError::Unclosed(
"}".into(),
Span {
start: end,
end: end + 1,
},
))
});
}
let span = Span { start, end };
2021-07-02 09:32:30 +02:00
2021-07-22 22:45:23 +02:00
let source = self.get_span_contents(span);
2021-07-02 09:32:30 +02:00
2021-07-30 00:56:51 +02:00
let (output, err) = lex(source, start, &[], &[]);
2021-07-02 09:32:30 +02:00
error = error.or(err);
2021-08-25 21:29:36 +02:00
// Check to see if we have parameters
let params = if matches!(
output.first(),
Some(Token {
contents: TokenContents::Pipe,
..
})
) {
// We've found a parameter list
let mut param_tokens = vec![];
let mut token_iter = output.iter().skip(1);
for token in &mut token_iter {
if matches!(
token,
Token {
contents: TokenContents::Pipe,
..
}
) {
break;
} else {
param_tokens.push(token);
}
}
};
2021-07-02 09:32:30 +02:00
let (output, err) = lite_parse(&output);
error = error.or(err);
2021-07-17 08:31:34 +02:00
let (output, err) = self.parse_block(&output, true);
2021-07-02 09:32:30 +02:00
error = error.or(err);
2021-07-16 22:26:40 +02:00
let block_id = self.add_block(output);
2021-07-02 09:32:30 +02:00
(
Expression {
2021-07-16 22:26:40 +02:00
expr: Expr::Block(block_id),
2021-07-02 09:32:30 +02:00
span,
2021-07-23 23:19:30 +02:00
ty: Type::Block,
2021-07-02 09:32:30 +02:00
},
error,
)
}
2021-07-08 23:31:08 +02:00
pub fn parse_value(
2021-07-01 02:01:04 +02:00
&mut self,
span: Span,
2021-07-24 07:57:17 +02:00
shape: &SyntaxShape,
2021-07-01 02:01:04 +02:00
) -> (Expression, Option<ParseError>) {
2021-07-01 03:31:02 +02:00
let bytes = self.get_span_contents(span);
2021-07-08 08:57:24 +02:00
// First, check the special-cases. These will likely represent specific values as expressions
// and may fit a variety of shapes.
//
// We check variable first because immediately following we check for variables with column paths
// which might result in a value that fits other shapes (and require the variable to already be
// declared)
2021-07-24 07:57:17 +02:00
if shape == &SyntaxShape::Variable {
2021-07-08 08:19:38 +02:00
return self.parse_variable_expr(span);
} else if bytes.starts_with(b"$") {
2021-07-02 09:15:30 +02:00
return self.parse_dollar_expr(span);
} else if bytes.starts_with(b"(") {
return self.parse_full_column_path(span);
2021-07-08 23:45:56 +02:00
} else if bytes.starts_with(b"[") {
match shape {
2021-07-16 23:55:12 +02:00
SyntaxShape::Any
| SyntaxShape::List(_)
| SyntaxShape::Table
| SyntaxShape::Signature => {}
2021-07-08 23:45:56 +02:00
_ => {
return (
Expression::garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("non-[] value".into(), span)),
2021-07-08 23:45:56 +02:00
);
}
}
2021-07-01 03:31:02 +02:00
}
2021-07-01 02:01:04 +02:00
match shape {
SyntaxShape::Number => {
2021-07-01 03:31:02 +02:00
if let Ok(token) = String::from_utf8(bytes.into()) {
2021-07-01 02:01:04 +02:00
self.parse_number(&token, span)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("number".into(), span)),
2021-07-01 02:01:04 +02:00
)
}
}
2021-07-02 00:40:08 +02:00
SyntaxShape::Int => {
if let Ok(token) = String::from_utf8(bytes.into()) {
self.parse_int(&token, span)
} else {
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("int".into(), span)),
2021-07-08 00:55:46 +02:00
)
}
}
2021-07-08 08:19:38 +02:00
SyntaxShape::String | SyntaxShape::GlobPattern | SyntaxShape::FilePath => {
self.parse_string(span)
}
2021-07-08 09:49:17 +02:00
SyntaxShape::Block => {
if bytes.starts_with(b"{") {
self.parse_block_expression(span)
} else {
(
Expression::garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("block".into(), span)),
2021-07-08 09:49:17 +02:00
)
}
}
2021-07-16 23:55:12 +02:00
SyntaxShape::Signature => {
if bytes.starts_with(b"[") {
self.parse_signature(span)
} else {
(
Expression::garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("signature".into(), span)),
2021-07-16 23:55:12 +02:00
)
}
}
2021-07-09 08:23:20 +02:00
SyntaxShape::List(elem) => {
if bytes.starts_with(b"[") {
2021-07-30 00:56:51 +02:00
self.parse_list_expression(span, elem)
2021-07-09 08:23:20 +02:00
} else {
(
Expression::garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("list".into(), span)),
2021-07-09 08:23:20 +02:00
)
}
}
2021-07-08 09:49:17 +02:00
SyntaxShape::Table => {
if bytes.starts_with(b"[") {
self.parse_table_expression(span)
} else {
(
Expression::garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("table".into(), span)),
2021-07-08 09:49:17 +02:00
)
}
}
2021-07-02 08:44:37 +02:00
SyntaxShape::Any => {
2021-07-09 08:23:20 +02:00
let shapes = [
2021-07-02 08:44:37 +02:00
SyntaxShape::Int,
SyntaxShape::Number,
SyntaxShape::Range,
SyntaxShape::Filesize,
SyntaxShape::Duration,
SyntaxShape::Block,
SyntaxShape::Table,
2021-07-09 08:23:20 +02:00
SyntaxShape::List(Box::new(SyntaxShape::Any)),
2021-07-02 08:44:37 +02:00
SyntaxShape::String,
];
for shape in shapes.iter() {
2021-07-24 07:57:17 +02:00
if let (s, None) = self.parse_value(span, shape) {
2021-07-02 08:44:37 +02:00
return (s, None);
}
}
(
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("any shape".into(), span)),
2021-07-02 08:44:37 +02:00
)
}
2021-08-17 01:00:00 +02:00
_ => (garbage(span), Some(ParseError::IncompleteParser(span))),
2021-07-01 02:01:04 +02:00
}
}
2021-07-02 08:44:37 +02:00
pub fn parse_operator(&mut self, span: Span) -> (Expression, Option<ParseError>) {
let contents = self.get_span_contents(span);
let operator = match contents {
b"==" => Operator::Equal,
b"!=" => Operator::NotEqual,
b"<" => Operator::LessThan,
b"<=" => Operator::LessThanOrEqual,
b">" => Operator::GreaterThan,
b">=" => Operator::GreaterThanOrEqual,
b"=~" => Operator::Contains,
b"!~" => Operator::NotContains,
b"+" => Operator::Plus,
b"-" => Operator::Minus,
b"*" => Operator::Multiply,
b"/" => Operator::Divide,
b"in" => Operator::In,
b"not-in" => Operator::NotIn,
b"mod" => Operator::Modulo,
b"&&" => Operator::And,
b"||" => Operator::Or,
b"**" => Operator::Pow,
_ => {
return (
garbage(span),
2021-08-17 01:00:00 +02:00
Some(ParseError::Expected("operator".into(), span)),
2021-07-02 08:44:37 +02:00
);
}
};
(
Expression {
expr: Expr::Operator(operator),
span,
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-02 08:44:37 +02:00
},
None,
)
}
2021-07-01 02:01:04 +02:00
pub fn parse_math_expression(&mut self, spans: &[Span]) -> (Expression, Option<ParseError>) {
2021-07-02 08:44:37 +02:00
// As the expr_stack grows, we increase the required precedence to grow larger
// If, at any time, the operator we're looking at is the same or lower precedence
// of what is in the expression stack, we collapse the expression stack.
//
// This leads to an expression stack that grows under increasing precedence and collapses
// under decreasing/sustained precedence
//
// The end result is a stack that we can fold into binary operations as right associations
// safely.
let mut expr_stack: Vec<Expression> = vec![];
let mut idx = 0;
let mut last_prec = 1000000;
let mut error = None;
2021-07-24 07:57:17 +02:00
let (lhs, err) = self.parse_value(spans[0], &SyntaxShape::Any);
2021-07-02 08:44:37 +02:00
error = error.or(err);
idx += 1;
expr_stack.push(lhs);
while idx < spans.len() {
let (op, err) = self.parse_operator(spans[idx]);
error = error.or(err);
let op_prec = op.precedence();
idx += 1;
if idx == spans.len() {
// Handle broken math expr `1 +` etc
error = error.or(Some(ParseError::IncompleteMathExpression(spans[idx - 1])));
2021-07-22 21:50:59 +02:00
expr_stack.push(Expression::garbage(spans[idx - 1]));
expr_stack.push(Expression::garbage(spans[idx - 1]));
2021-07-02 08:44:37 +02:00
break;
}
2021-07-24 07:57:17 +02:00
let (rhs, err) = self.parse_value(spans[idx], &SyntaxShape::Any);
2021-07-02 08:44:37 +02:00
error = error.or(err);
if op_prec <= last_prec {
while expr_stack.len() > 1 {
// Collapse the right associated operations first
// so that we can get back to a stack with a lower precedence
2021-07-23 23:19:30 +02:00
let mut rhs = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let mut op = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let mut lhs = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let (result_ty, err) = self.math_result_type(&mut lhs, &mut op, &mut rhs);
error = error.or(err);
2021-07-02 08:44:37 +02:00
let op_span = span(&[lhs.span, rhs.span]);
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
span: op_span,
2021-07-23 23:19:30 +02:00
ty: result_ty,
2021-07-02 08:44:37 +02:00
});
}
}
expr_stack.push(op);
expr_stack.push(rhs);
last_prec = op_prec;
idx += 1;
}
while expr_stack.len() != 1 {
2021-07-23 23:19:30 +02:00
let mut rhs = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let mut op = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let mut lhs = expr_stack
2021-07-02 08:44:37 +02:00
.pop()
.expect("internal error: expression stack empty");
2021-07-23 23:19:30 +02:00
let (result_ty, err) = self.math_result_type(&mut lhs, &mut op, &mut rhs);
error = error.or(err);
2021-07-02 08:44:37 +02:00
let binary_op_span = span(&[lhs.span, rhs.span]);
expr_stack.push(Expression {
expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
span: binary_op_span,
2021-07-23 23:19:30 +02:00
ty: result_ty,
2021-07-02 08:44:37 +02:00
});
}
let output = expr_stack
.pop()
.expect("internal error: expression stack empty");
(output, error)
2021-07-01 02:01:04 +02:00
}
pub fn parse_expression(&mut self, spans: &[Span]) -> (Expression, Option<ParseError>) {
2021-07-02 00:40:08 +02:00
let bytes = self.get_span_contents(spans[0]);
match bytes[0] {
b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' | b'(' | b'{'
2021-07-30 00:56:51 +02:00
| b'[' | b'$' | b'"' | b'\'' => self.parse_math_expression(spans),
2021-08-09 02:19:07 +02:00
_ => self.parse_call(spans, true),
2021-07-02 00:40:08 +02:00
}
2021-07-01 02:01:04 +02:00
}
2021-07-01 03:31:02 +02:00
pub fn parse_variable(&mut self, span: Span) -> (Option<VarId>, Option<ParseError>) {
let bytes = self.get_span_contents(span);
2021-07-01 02:01:04 +02:00
2021-07-01 03:31:02 +02:00
if is_variable(bytes) {
2021-07-01 08:09:55 +02:00
if let Some(var_id) = self.find_variable(bytes) {
2021-07-01 03:31:02 +02:00
(Some(var_id), None)
} else {
(None, None)
}
2021-07-01 02:01:04 +02:00
} else {
2021-08-17 01:00:00 +02:00
(None, Some(ParseError::Expected("variable".into(), span)))
2021-07-01 02:01:04 +02:00
}
}
2021-07-31 06:04:42 +02:00
pub fn parse_def_predecl(&mut self, spans: &[Span]) {
let name = self.get_span_contents(spans[0]);
if name == b"def" && spans.len() >= 4 {
let (name_expr, ..) = self.parse_string(spans[1]);
2021-08-17 01:00:00 +02:00
let name = name_expr.as_string();
2021-07-31 06:04:42 +02:00
self.enter_scope();
2021-07-31 06:25:26 +02:00
// FIXME: because parse_signature will update the scope with the variables it sees
// we end up parsing the signature twice per def. The first time is during the predecl
// so that we can see the types that are part of the signature, which we need for parsing.
// The second time is when we actually parse the body itself.
// We can't reuse the first time because the variables that are created during parse_signature
// are lost when we exit the scope below.
2021-07-31 06:04:42 +02:00
let (sig, ..) = self.parse_signature(spans[2]);
2021-08-17 01:00:00 +02:00
let signature = sig.as_signature();
2021-07-31 06:04:42 +02:00
self.exit_scope();
2021-08-17 01:00:00 +02:00
match (name, signature) {
(Some(name), Some(mut signature)) => {
signature.name = name;
let decl = Declaration {
signature,
body: None,
};
2021-07-31 06:04:42 +02:00
2021-08-17 01:00:00 +02:00
self.add_decl(decl);
}
_ => {}
}
2021-07-31 06:04:42 +02:00
}
}
2021-07-16 08:24:46 +02:00
pub fn parse_def(&mut self, spans: &[Span]) -> (Statement, Option<ParseError>) {
2021-07-23 23:19:30 +02:00
let mut error = None;
2021-07-16 08:24:46 +02:00
let name = self.get_span_contents(spans[0]);
2021-07-23 23:19:30 +02:00
if name == b"def" && spans.len() >= 4 {
//FIXME: don't use expect here
let (name_expr, err) = self.parse_string(spans[1]);
error = error.or(err);
2021-07-16 08:24:46 +02:00
2021-07-23 23:19:30 +02:00
self.enter_scope();
let (sig, err) = self.parse_signature(spans[2]);
error = error.or(err);
2021-07-16 08:24:46 +02:00
2021-07-23 23:19:30 +02:00
let (block, err) = self.parse_block_expression(spans[3]);
2021-08-17 01:00:00 +02:00
error = error.or(err);
2021-07-23 23:19:30 +02:00
self.exit_scope();
2021-08-17 01:00:00 +02:00
let name = name_expr.as_string();
2021-07-23 23:19:30 +02:00
2021-08-17 01:00:00 +02:00
let signature = sig.as_signature();
2021-07-23 23:19:30 +02:00
2021-08-17 01:00:00 +02:00
let block_id = block.as_block();
2021-07-23 23:19:30 +02:00
2021-08-17 01:00:00 +02:00
match (name, signature, block_id) {
(Some(name), Some(mut signature), Some(block_id)) => {
let decl_id = self
.find_decl(name.as_bytes())
.expect("internal error: predeclaration failed to add definition");
2021-07-23 23:19:30 +02:00
2021-08-17 01:00:00 +02:00
let declaration = self.get_decl_mut(decl_id);
signature.name = name;
declaration.signature = signature;
declaration.body = Some(block_id);
let def_decl_id = self
.find_decl(b"def")
.expect("internal error: missing def command");
let call = Box::new(Call {
head: spans[0],
decl_id: def_decl_id,
positional: vec![name_expr, sig, block],
named: vec![],
});
(
Statement::Expression(Expression {
expr: Expr::Call(call),
span: span(spans),
ty: Type::Unknown,
}),
error,
)
}
_ => (
Statement::Expression(Expression {
expr: Expr::Garbage,
span: span(spans),
ty: Type::Unknown,
}),
error,
),
}
2021-07-23 23:19:30 +02:00
} else {
(
Statement::Expression(Expression {
expr: Expr::Garbage,
span: span(spans),
ty: Type::Unknown,
}),
Some(ParseError::UnknownState(
2021-08-17 01:00:00 +02:00
"internal error: definition unparseable".into(),
2021-07-23 23:19:30 +02:00
span(spans),
)),
)
2021-07-16 08:24:46 +02:00
}
}
2021-08-09 02:19:07 +02:00
pub fn parse_alias(&mut self, spans: &[Span]) -> (Statement, Option<ParseError>) {
let name = self.get_span_contents(spans[0]);
2021-08-09 09:53:06 +02:00
if name == b"alias" {
if let Some(decl_id) = self.find_decl(b"alias") {
let (call, call_span, _) = self.parse_internal_call(spans[0], &spans[1..], decl_id);
2021-08-09 02:19:07 +02:00
2021-08-09 09:53:06 +02:00
if spans.len() >= 4 {
2021-08-16 00:33:34 +02:00
let alias_name = self.get_span_contents(spans[1]);
let alias_name = if alias_name.starts_with(b"\"")
&& alias_name.ends_with(b"\"")
&& alias_name.len() > 1
{
alias_name[1..(alias_name.len() - 1)].to_vec()
} else {
alias_name.to_vec()
};
2021-08-09 09:53:06 +02:00
let _equals = self.get_span_contents(spans[2]);
2021-08-09 02:19:07 +02:00
2021-08-09 09:53:06 +02:00
let replacement = spans[3..].to_vec();
2021-08-17 01:00:00 +02:00
//println!("{:?} {:?}", alias_name, replacement);
2021-08-16 00:33:34 +02:00
2021-08-09 09:53:06 +02:00
self.add_alias(alias_name, replacement);
}
return (
Statement::Expression(Expression {
expr: Expr::Call(call),
span: call_span,
ty: Type::Unknown,
}),
None,
);
}
2021-08-09 02:19:07 +02:00
}
2021-08-09 09:53:06 +02:00
2021-08-09 02:19:07 +02:00
(
Statement::Expression(Expression {
expr: Expr::Garbage,
span: span(spans),
ty: Type::Unknown,
}),
Some(ParseError::UnknownState(
"internal error: let statement unparseable".into(),
span(spans),
)),
)
}
2021-07-01 02:01:04 +02:00
pub fn parse_let(&mut self, spans: &[Span]) -> (Statement, Option<ParseError>) {
2021-07-08 09:20:01 +02:00
let name = self.get_span_contents(spans[0]);
2021-07-08 08:19:38 +02:00
2021-07-08 09:20:01 +02:00
if name == b"let" {
if let Some(decl_id) = self.find_decl(b"let") {
let (call, call_span, err) =
self.parse_internal_call(spans[0], &spans[1..], decl_id);
2021-07-08 09:20:01 +02:00
2021-08-17 02:26:05 +02:00
// Update the variable to the known type if we can.
if err.is_none() {
let var_id = call.positional[0]
.as_var()
.expect("internal error: expected variable");
let rhs_type = call.positional[1].ty.clone();
self.set_variable_type(var_id, rhs_type);
}
2021-07-17 07:28:25 +02:00
return (
Statement::Expression(Expression {
expr: Expr::Call(call),
span: call_span,
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-17 07:28:25 +02:00
}),
err,
);
2021-07-08 08:19:38 +02:00
}
}
(
Statement::Expression(Expression {
expr: Expr::Garbage,
span: span(spans),
2021-07-23 23:19:30 +02:00
ty: Type::Unknown,
2021-07-08 08:19:38 +02:00
}),
Some(ParseError::UnknownState(
"internal error: let statement unparseable".into(),
span(spans),
)),
)
2021-07-01 02:01:04 +02:00
}
pub fn parse_statement(&mut self, spans: &[Span]) -> (Statement, Option<ParseError>) {
2021-07-16 08:24:46 +02:00
// FIXME: improve errors by checking keyword first
if let (decl, None) = self.parse_def(spans) {
(decl, None)
} else if let (stmt, None) = self.parse_let(spans) {
2021-07-01 02:01:04 +02:00
(stmt, None)
2021-08-09 02:19:07 +02:00
} else if let (stmt, None) = self.parse_alias(spans) {
(stmt, None)
2021-07-01 02:01:04 +02:00
} else {
2021-07-02 00:40:08 +02:00
let (expr, err) = self.parse_expression(spans);
(Statement::Expression(expr), err)
2021-07-01 02:01:04 +02:00
}
}
2021-06-30 03:42:56 +02:00
2021-07-17 08:31:34 +02:00
pub fn parse_block(
&mut self,
lite_block: &LiteBlock,
scoped: bool,
) -> (Block, Option<ParseError>) {
2021-06-30 03:42:56 +02:00
let mut error = None;
2021-07-17 08:31:34 +02:00
if scoped {
self.enter_scope();
}
2021-06-30 03:42:56 +02:00
let mut block = Block::new();
2021-07-31 06:04:42 +02:00
// Pre-declare any definition so that definitions
// that share the same block can see each other
for pipeline in &lite_block.block {
if pipeline.commands.len() == 1 {
self.parse_def_predecl(&pipeline.commands[0].parts);
}
}
2021-06-30 03:42:56 +02:00
for pipeline in &lite_block.block {
2021-07-17 05:42:08 +02:00
if pipeline.commands.len() > 1 {
let mut output = vec![];
for command in &pipeline.commands {
let (expr, err) = self.parse_expression(&command.parts);
error = error.or(err);
2021-07-01 02:01:04 +02:00
2021-07-17 05:42:08 +02:00
output.push(expr);
}
block.stmts.push(Statement::Pipeline(Pipeline {
expressions: output,
}));
} else {
let (stmt, err) = self.parse_statement(&pipeline.commands[0].parts);
error = error.or(err);
block.stmts.push(stmt);
}
2021-06-30 03:42:56 +02:00
}
2021-07-17 08:31:34 +02:00
if scoped {
self.exit_scope();
}
2021-06-30 03:42:56 +02:00
(block, error)
}
2021-07-17 08:31:34 +02:00
pub fn parse_file(
&mut self,
fname: &str,
contents: &[u8],
2021-07-17 08:31:34 +02:00
scoped: bool,
) -> (Block, Option<ParseError>) {
2021-06-30 03:42:56 +02:00
let mut error = None;
let span_offset = self.next_span_start();
2021-07-03 03:29:56 +02:00
self.add_file(fname.into(), contents);
2021-07-30 00:56:51 +02:00
let (output, err) = lex(contents, span_offset, &[], &[]);
error = error.or(err);
2021-06-30 03:42:56 +02:00
let (output, err) = lite_parse(&output);
error = error.or(err);
2021-07-17 08:31:34 +02:00
let (output, err) = self.parse_block(&output, scoped);
2021-06-30 03:42:56 +02:00
error = error.or(err);
(output, error)
}
2021-07-01 02:01:04 +02:00
2021-07-17 08:31:34 +02:00
pub fn parse_source(&mut self, source: &[u8], scoped: bool) -> (Block, Option<ParseError>) {
2021-07-01 02:01:04 +02:00
let mut error = None;
let span_offset = self.next_span_start();
2021-07-30 00:56:51 +02:00
self.add_file("source".into(), source);
2021-07-01 02:01:04 +02:00
let (output, err) = lex(source, span_offset, &[], &[]);
2021-07-01 02:01:04 +02:00
error = error.or(err);
let (output, err) = lite_parse(&output);
error = error.or(err);
2021-07-17 08:31:34 +02:00
let (output, err) = self.parse_block(&output, scoped);
2021-07-01 02:01:04 +02:00
error = error.or(err);
(output, error)
}
2021-06-30 03:42:56 +02:00
}
2021-07-02 03:42:25 +02:00
#[cfg(test)]
mod tests {
2021-07-22 21:50:59 +02:00
use crate::{ParseError, ParserState, Signature};
2021-07-02 03:42:25 +02:00
use super::*;
#[test]
pub fn parse_int() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 03:42:25 +02:00
2021-07-17 08:31:34 +02:00
let (block, err) = working_set.parse_source(b"3", true);
2021-07-02 03:42:25 +02:00
assert!(err.is_none());
assert!(block.len() == 1);
assert!(matches!(
block[0],
Statement::Expression(Expression {
expr: Expr::Int(3),
..
})
));
}
#[test]
pub fn parse_call() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 03:42:25 +02:00
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-02 03:42:25 +02:00
2021-07-17 08:31:34 +02:00
let (block, err) = working_set.parse_source(b"foo", true);
2021-07-02 03:42:25 +02:00
assert!(err.is_none());
assert!(block.len() == 1);
2021-07-02 21:30:03 +02:00
match &block[0] {
2021-07-02 03:42:25 +02:00
Statement::Expression(Expression {
2021-07-02 21:30:03 +02:00
expr: Expr::Call(call),
2021-07-02 03:42:25 +02:00
..
2021-07-02 21:30:03 +02:00
}) => {
assert_eq!(call.decl_id, 0);
}
_ => panic!("not a call"),
}
2021-07-02 03:42:25 +02:00
}
2021-07-02 04:22:54 +02:00
#[test]
pub fn parse_call_missing_flag_arg() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-02 04:22:54 +02:00
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo --jazz", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::MissingFlagParam(..))));
}
#[test]
pub fn parse_call_missing_short_flag_arg() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo").named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-02 04:22:54 +02:00
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo -j", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::MissingFlagParam(..))));
}
#[test]
pub fn parse_call_too_many_shortflag_args() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo")
.named("--jazz", SyntaxShape::Int, "jazz!!", Some('j'))
.named("--math", SyntaxShape::Int, "math!!", Some('m'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo -mj", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(
err,
Some(ParseError::ShortFlagBatchCantTakeArg(..))
));
}
#[test]
pub fn parse_call_unknown_shorthand() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo").switch("--jazz", "jazz!!", Some('j'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo -mj", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::UnknownFlag(..))));
}
#[test]
pub fn parse_call_extra_positional() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo").switch("--jazz", "jazz!!", Some('j'));
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo -j 100", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::ExtraPositional(..))));
}
#[test]
pub fn parse_call_missing_req_positional() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig = Signature::build("foo").required("jazz", SyntaxShape::Int, "jazz!!");
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::MissingPositional(..))));
}
#[test]
pub fn parse_call_missing_req_flag() {
let parser_state = ParserState::new();
let mut working_set = ParserWorkingSet::new(&parser_state);
2021-07-02 04:22:54 +02:00
let sig =
Signature::build("foo").required_named("--jazz", SyntaxShape::Int, "jazz!!", None);
2021-07-16 08:24:46 +02:00
working_set.add_decl(sig.into());
2021-07-17 08:31:34 +02:00
let (_, err) = working_set.parse_source(b"foo", true);
2021-07-02 04:22:54 +02:00
assert!(matches!(err, Some(ParseError::MissingRequiredFlag(..))));
}
2021-07-02 03:42:25 +02:00
}