mirror of
https://github.com/nushell/nushell.git
synced 2024-11-26 10:23:52 +01:00
basic signature parse
This commit is contained in:
parent
949c6a5932
commit
6aef00ecff
@ -64,6 +64,7 @@ impl Engine {
|
||||
Expr::Table(_, _) => Err(ShellError::Unsupported(expr.span)),
|
||||
Expr::Literal(_) => Err(ShellError::Unsupported(expr.span)),
|
||||
Expr::String(_) => Err(ShellError::Unsupported(expr.span)),
|
||||
Expr::Signature(_) => Err(ShellError::Unsupported(expr.span)),
|
||||
Expr::Garbage => Err(ShellError::Unsupported(expr.span)),
|
||||
}
|
||||
}
|
||||
|
66
src/lex.rs
66
src/lex.rs
@ -38,18 +38,28 @@ impl BlockKind {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub enum LexMode {
|
||||
Normal,
|
||||
CommaIsSpace,
|
||||
NewlineIsSpace,
|
||||
CommaAndNewlineIsSpace,
|
||||
Custom {
|
||||
whitespace: Vec<u8>,
|
||||
special: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
impl LexMode {
|
||||
pub fn whitespace_contains(&self, b: u8) -> bool {
|
||||
match self {
|
||||
LexMode::Custom { ref whitespace, .. } => whitespace.contains(&b),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A baseline token is terminated if it's not nested inside of a paired
|
||||
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
||||
// whitespace.
|
||||
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bool {
|
||||
fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
||||
block_level.is_empty()
|
||||
&& (c == b' '
|
||||
|| c == b'\t'
|
||||
@ -57,14 +67,25 @@ fn is_item_terminator(block_level: &[BlockKind], c: u8, lex_mode: LexMode) -> bo
|
||||
|| c == b'|'
|
||||
|| c == b';'
|
||||
|| c == b'#'
|
||||
|| (c == b',' && lex_mode == LexMode::CommaIsSpace)
|
||||
|| (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace))
|
||||
|| lex_mode.whitespace_contains(c))
|
||||
}
|
||||
|
||||
// A special token is one that is a byte that stands alone as its own token. For example
|
||||
// when parsing a signature you may want to have `:` be able to separate tokens and also
|
||||
// to be handled as its own token to notify you you're about to parse a type in the example
|
||||
// `foo:bar`
|
||||
fn is_special_item(block_level: &[BlockKind], c: u8, lex_mode: &LexMode) -> bool {
|
||||
block_level.is_empty()
|
||||
&& (match lex_mode {
|
||||
LexMode::Custom { special, .. } => special.contains(&c),
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lex_item(
|
||||
input: &[u8],
|
||||
curr_offset: &mut usize,
|
||||
lex_mode: LexMode,
|
||||
lex_mode: &LexMode,
|
||||
) -> (Span, Option<ParseError>) {
|
||||
// This variable tracks the starting character of a string literal, so that
|
||||
// we remain inside the string literal lexer mode until we encounter the
|
||||
@ -99,19 +120,22 @@ pub fn lex_item(
|
||||
quote_start = None;
|
||||
}
|
||||
} else if c == b'#' {
|
||||
if is_item_terminator(&block_level, c, lex_mode) {
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
break;
|
||||
}
|
||||
in_comment = true;
|
||||
} else if c == b'\n' {
|
||||
in_comment = false;
|
||||
if is_item_terminator(&block_level, c, lex_mode) {
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
break;
|
||||
}
|
||||
} else if in_comment {
|
||||
if is_item_terminator(&block_level, c, lex_mode) {
|
||||
if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
break;
|
||||
}
|
||||
} else if is_special_item(&block_level, c, &lex_mode) && token_start == *curr_offset {
|
||||
*curr_offset += 1;
|
||||
break;
|
||||
} else if c == b'\'' || c == b'"' {
|
||||
// We encountered the opening quote of a string literal.
|
||||
quote_start = Some(c);
|
||||
@ -140,7 +164,7 @@ pub fn lex_item(
|
||||
if let Some(BlockKind::Paren) = block_level.last() {
|
||||
let _ = block_level.pop();
|
||||
}
|
||||
} else if is_item_terminator(&block_level, c, lex_mode) {
|
||||
} else if is_item_terminator(&block_level, c, &lex_mode) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -182,7 +206,7 @@ pub fn lex_item(
|
||||
pub fn lex(
|
||||
input: &[u8],
|
||||
span_offset: usize,
|
||||
lex_mode: LexMode,
|
||||
lex_mode: &LexMode,
|
||||
) -> (Vec<Token>, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
@ -239,7 +263,7 @@ pub fn lex(
|
||||
|
||||
let idx = curr_offset;
|
||||
curr_offset += 1;
|
||||
if lex_mode != LexMode::NewlineIsSpace && lex_mode != LexMode::CommaAndNewlineIsSpace {
|
||||
if !lex_mode.whitespace_contains(c) {
|
||||
output.push(Token::new(TokenContents::Eol, Span::new(idx, idx + 1)));
|
||||
}
|
||||
} else if c == b'#' {
|
||||
@ -265,17 +289,13 @@ pub fn lex(
|
||||
Span::new(start, curr_offset),
|
||||
));
|
||||
}
|
||||
} else if c == b' '
|
||||
|| c == b'\t'
|
||||
|| (c == b',' && lex_mode == LexMode::CommaIsSpace)
|
||||
|| (c == b',' && lex_mode == LexMode::CommaAndNewlineIsSpace)
|
||||
{
|
||||
} else if c == b' ' || c == b'\t' || lex_mode.whitespace_contains(c) {
|
||||
// If the next character is non-newline whitespace, skip it.
|
||||
curr_offset += 1;
|
||||
} else {
|
||||
// Otherwise, try to consume an unclassified token.
|
||||
|
||||
let (span, err) = lex_item(input, &mut curr_offset, lex_mode);
|
||||
let (span, err) = lex_item(input, &mut curr_offset, &lex_mode);
|
||||
if error.is_none() {
|
||||
error = err;
|
||||
}
|
||||
@ -294,7 +314,7 @@ mod lex_tests {
|
||||
fn lex_basic() {
|
||||
let file = b"let x = 4";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
|
||||
assert!(output.1.is_none());
|
||||
}
|
||||
@ -303,7 +323,7 @@ mod lex_tests {
|
||||
fn lex_newline() {
|
||||
let file = b"let x = 300\nlet y = 500;";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
|
||||
println!("{:#?}", output.0);
|
||||
assert!(output.0.contains(&Token {
|
||||
@ -316,7 +336,7 @@ mod lex_tests {
|
||||
fn lex_empty() {
|
||||
let file = b"";
|
||||
|
||||
let output = lex(file, 0, LexMode::Normal);
|
||||
let output = lex(file, 0, &LexMode::Normal);
|
||||
|
||||
assert!(output.0.is_empty());
|
||||
assert!(output.1.is_none());
|
||||
|
@ -128,7 +128,7 @@ mod tests {
|
||||
use crate::{lex, lite_parse, LiteBlock, ParseError, Span};
|
||||
|
||||
fn lite_parse_helper(input: &[u8]) -> Result<LiteBlock, ParseError> {
|
||||
let (output, err) = lex(input, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(input, 0, &crate::LexMode::Normal);
|
||||
if let Some(err) = err {
|
||||
return Err(err);
|
||||
}
|
||||
|
@ -56,11 +56,7 @@ fn main() -> std::io::Result<()> {
|
||||
|
||||
let sig = Signature::build("def")
|
||||
.required("def_name", SyntaxShape::String, "definition name")
|
||||
.required(
|
||||
"params",
|
||||
SyntaxShape::List(Box::new(SyntaxShape::VarWithOptType)),
|
||||
"parameters",
|
||||
)
|
||||
.required("params", SyntaxShape::Signature, "parameters")
|
||||
.required("block", SyntaxShape::Block, "body of the definition");
|
||||
working_set.add_decl(sig.into());
|
||||
|
||||
|
226
src/parser.rs
226
src/parser.rs
@ -3,8 +3,8 @@ use std::ops::{Index, IndexMut};
|
||||
use crate::{
|
||||
lex, lite_parse,
|
||||
parser_state::{Type, VarId},
|
||||
signature::Flag,
|
||||
BlockId, DeclId, Declaration, LiteBlock, ParseError, ParserWorkingSet, Signature, Span,
|
||||
signature::{Flag, PositionalArg},
|
||||
BlockId, DeclId, Declaration, LiteBlock, ParseError, ParserWorkingSet, Signature, Span, Token,
|
||||
};
|
||||
|
||||
/// The syntactic shapes that values must match to be passed into a command. You can think of this as the type-checking that occurs when you call a function.
|
||||
@ -71,6 +71,9 @@ pub enum SyntaxShape {
|
||||
/// A variable with optional type, `x` or `x: int`
|
||||
VarWithOptType,
|
||||
|
||||
/// A signature for a definition, `[x:int, --foo]`
|
||||
Signature,
|
||||
|
||||
/// A general expression, eg `1 + 2` or `foo --bar`
|
||||
Expression,
|
||||
}
|
||||
@ -135,6 +138,7 @@ pub enum Expr {
|
||||
Table(Vec<Expression>, Vec<Vec<Expression>>),
|
||||
Literal(Vec<u8>),
|
||||
String(String), // FIXME: improve this in the future?
|
||||
Signature(Signature),
|
||||
Garbage,
|
||||
}
|
||||
|
||||
@ -185,6 +189,13 @@ impl Expression {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_signature(self) -> Option<Signature> {
|
||||
match self.expr {
|
||||
Expr::Signature(sig) => Some(sig),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_list(self) -> Option<Vec<Expression>> {
|
||||
match self.expr {
|
||||
Expr::List(list) => Some(list),
|
||||
@ -787,7 +798,7 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = self.get_span_contents(span);
|
||||
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -826,6 +837,28 @@ impl ParserWorkingSet {
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: Handle error case
|
||||
pub fn parse_shape_name(&self, bytes: &[u8]) -> SyntaxShape {
|
||||
match bytes {
|
||||
b"any" => SyntaxShape::Any,
|
||||
b"string" => SyntaxShape::String,
|
||||
b"column-path" => SyntaxShape::ColumnPath,
|
||||
b"number" => SyntaxShape::Number,
|
||||
b"range" => SyntaxShape::Range,
|
||||
b"int" => SyntaxShape::Int,
|
||||
b"path" => SyntaxShape::FilePath,
|
||||
b"glob" => SyntaxShape::GlobPattern,
|
||||
b"block" => SyntaxShape::Block,
|
||||
b"cond" => SyntaxShape::RowCondition,
|
||||
b"operator" => SyntaxShape::Operator,
|
||||
b"math" => SyntaxShape::MathExpression,
|
||||
b"variable" => SyntaxShape::Variable,
|
||||
b"signature" => SyntaxShape::Signature,
|
||||
b"expr" => SyntaxShape::Expression,
|
||||
_ => SyntaxShape::Any,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_type(&self, bytes: &[u8]) -> Type {
|
||||
if bytes == b"int" {
|
||||
Type::Int
|
||||
@ -887,6 +920,140 @@ impl ParserWorkingSet {
|
||||
self.parse_math_expression(spans)
|
||||
}
|
||||
|
||||
pub fn parse_signature(&mut self, span: Span) -> (Expression, Option<ParseError>) {
|
||||
enum ParseMode {
|
||||
ArgMode,
|
||||
TypeMode,
|
||||
}
|
||||
|
||||
enum Arg {
|
||||
Positional(PositionalArg),
|
||||
Flag(Flag),
|
||||
}
|
||||
|
||||
println!("parse signature");
|
||||
let bytes = self.get_span_contents(span);
|
||||
|
||||
let mut error = None;
|
||||
let mut start = span.start;
|
||||
let mut end = span.end;
|
||||
|
||||
if bytes.starts_with(b"[") {
|
||||
start += 1;
|
||||
}
|
||||
if bytes.ends_with(b"]") {
|
||||
end -= 1;
|
||||
} else {
|
||||
error = error.or_else(|| {
|
||||
Some(ParseError::Unclosed(
|
||||
"]".into(),
|
||||
Span {
|
||||
start: end,
|
||||
end: end + 1,
|
||||
},
|
||||
))
|
||||
});
|
||||
}
|
||||
|
||||
let span = Span { start, end };
|
||||
let source = &self.file_contents[..span.end];
|
||||
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
span.start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![b':', b'?'],
|
||||
},
|
||||
);
|
||||
error = error.or(err);
|
||||
|
||||
let mut args: Vec<Arg> = vec![];
|
||||
let mut parse_mode = ParseMode::ArgMode;
|
||||
|
||||
for token in &output {
|
||||
match token {
|
||||
Token {
|
||||
contents: crate::TokenContents::Item,
|
||||
span,
|
||||
} => {
|
||||
let contents = &self.file_contents[span.start..span.end];
|
||||
|
||||
if contents == b":" {
|
||||
match parse_mode {
|
||||
ParseMode::ArgMode => {
|
||||
parse_mode = ParseMode::TypeMode;
|
||||
}
|
||||
ParseMode::TypeMode => {
|
||||
// We're seeing two types for the same thing for some reason, error
|
||||
error = error.or(Some(ParseError::Mismatch("type".into(), *span)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match parse_mode {
|
||||
ParseMode::ArgMode => {
|
||||
if contents.starts_with(b"--") {
|
||||
// Long flag
|
||||
args.push(Arg::Flag(Flag {
|
||||
arg: None,
|
||||
desc: String::new(),
|
||||
long: String::from_utf8_lossy(contents).to_string(),
|
||||
short: None,
|
||||
required: true,
|
||||
}));
|
||||
} else {
|
||||
// Positional arg
|
||||
args.push(Arg::Positional(PositionalArg {
|
||||
desc: String::new(),
|
||||
name: String::from_utf8_lossy(contents).to_string(),
|
||||
shape: SyntaxShape::Any,
|
||||
}))
|
||||
}
|
||||
}
|
||||
ParseMode::TypeMode => {
|
||||
if let Some(last) = args.last_mut() {
|
||||
let syntax_shape = self.parse_shape_name(contents);
|
||||
//TODO check if we're replacing one already
|
||||
match last {
|
||||
Arg::Positional(PositionalArg { name, desc, shape }) => {
|
||||
*shape = syntax_shape;
|
||||
}
|
||||
Arg::Flag(Flag {
|
||||
long,
|
||||
short,
|
||||
arg,
|
||||
required,
|
||||
desc,
|
||||
}) => *arg = Some(syntax_shape),
|
||||
}
|
||||
}
|
||||
parse_mode = ParseMode::ArgMode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let mut sig = Signature::new(String::new());
|
||||
|
||||
for arg in args {
|
||||
match arg {
|
||||
Arg::Positional(positional) => sig.required_positional.push(positional),
|
||||
Arg::Flag(flag) => sig.named.push(flag),
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
Expression {
|
||||
expr: Expr::Signature(sig),
|
||||
span,
|
||||
},
|
||||
error,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_list_expression(
|
||||
&mut self,
|
||||
span: Span,
|
||||
@ -919,7 +1086,14 @@ impl ParserWorkingSet {
|
||||
let span = Span { start, end };
|
||||
let source = &self.file_contents[..span.end];
|
||||
|
||||
let (output, err) = lex(&source, span.start, crate::LexMode::CommaAndNewlineIsSpace);
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
span.start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![],
|
||||
},
|
||||
);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -983,7 +1157,14 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = &self.file_contents[..end];
|
||||
|
||||
let (output, err) = lex(&source, start, crate::LexMode::CommaAndNewlineIsSpace);
|
||||
let (output, err) = lex(
|
||||
&source,
|
||||
start,
|
||||
&crate::LexMode::Custom {
|
||||
whitespace: vec![b'\n', b','],
|
||||
special: vec![],
|
||||
},
|
||||
);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -1073,7 +1254,7 @@ impl ParserWorkingSet {
|
||||
|
||||
let source = &self.file_contents[..end];
|
||||
|
||||
let (output, err) = lex(&source, start, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&source, start, &crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
@ -1116,11 +1297,14 @@ impl ParserWorkingSet {
|
||||
return self.parse_full_column_path(span);
|
||||
} else if bytes.starts_with(b"[") {
|
||||
match shape {
|
||||
SyntaxShape::Any | SyntaxShape::List(_) | SyntaxShape::Table => {}
|
||||
SyntaxShape::Any
|
||||
| SyntaxShape::List(_)
|
||||
| SyntaxShape::Table
|
||||
| SyntaxShape::Signature => {}
|
||||
_ => {
|
||||
return (
|
||||
Expression::garbage(span),
|
||||
Some(ParseError::Mismatch("non-table/non-list".into(), span)),
|
||||
Some(ParseError::Mismatch("non-[] value".into(), span)),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -1179,6 +1363,16 @@ impl ParserWorkingSet {
|
||||
)
|
||||
}
|
||||
}
|
||||
SyntaxShape::Signature => {
|
||||
if bytes.starts_with(b"[") {
|
||||
self.parse_signature(span)
|
||||
} else {
|
||||
(
|
||||
Expression::garbage(span),
|
||||
Some(ParseError::Mismatch("signature".into(), span)),
|
||||
)
|
||||
}
|
||||
}
|
||||
SyntaxShape::List(elem) => {
|
||||
if bytes.starts_with(b"[") {
|
||||
self.parse_list_expression(span, &elem)
|
||||
@ -1419,22 +1613,20 @@ impl ParserWorkingSet {
|
||||
.remove(0)
|
||||
.as_string()
|
||||
.expect("internal error: expected def name");
|
||||
let args = call
|
||||
let mut signature = call
|
||||
.positional
|
||||
.remove(0)
|
||||
.as_list()
|
||||
.expect("internal error: expected param list")
|
||||
.into_iter()
|
||||
.map(|x| x.as_var().expect("internal error: expected parameter"))
|
||||
.collect::<Vec<_>>();
|
||||
.as_signature()
|
||||
.expect("internal error: expected param list");
|
||||
let block_id = call
|
||||
.positional
|
||||
.remove(0)
|
||||
.as_block()
|
||||
.expect("internal error: expected block");
|
||||
|
||||
signature.name = name;
|
||||
let decl = Declaration {
|
||||
signature: Signature::new(name),
|
||||
signature,
|
||||
body: Some(block_id),
|
||||
};
|
||||
|
||||
@ -1526,7 +1718,7 @@ impl ParserWorkingSet {
|
||||
pub fn parse_file(&mut self, fname: &str, contents: Vec<u8>) -> (Block, Option<ParseError>) {
|
||||
let mut error = None;
|
||||
|
||||
let (output, err) = lex(&contents, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(&contents, 0, &crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
self.add_file(fname.into(), contents);
|
||||
@ -1545,7 +1737,7 @@ impl ParserWorkingSet {
|
||||
|
||||
self.add_file("source".into(), source.into());
|
||||
|
||||
let (output, err) = lex(source, 0, crate::LexMode::Normal);
|
||||
let (output, err) = lex(source, 0, &crate::LexMode::Normal);
|
||||
error = error.or(err);
|
||||
|
||||
let (output, err) = lite_parse(&output);
|
||||
|
Loading…
Reference in New Issue
Block a user