Feature/def signature with comments (#2905)

* Put parse_definition related funcs into own module

* Add failing lexer test

* Implement Parsing of definition signature

This commit applied changes how the signature of a function is parsed. Before
there was a little bit of "quick-and-dirty" string-matching/parsing involved.
Now, a signature is a little bit more properly parsed.
The grammar of a definition signature understood by these parsing-functions is
as follows:
 `[ (parameter | flag | <eol>)* ]`
where
parameter is:
    `name (<:> type)? (<,> | <eol> | (#Comment <eol>))?`
flag is:
    `--name (-shortform)? (<:> type)? (<,> | <eol> | (#Comment <eol>))?`
(Note: After the last item no <,> has to come.)
Note: It is now possible to pass comments to flags and parameters
Example:
[
  d:int          # The required d parameter
  --x (-x):string # The all powerful x flag
  --y (-y):int    # The accompanying y flag
]

(Sadly there seems to be a bug (Or is this expected behaviour?) in the lexer, because of which `--x(-x)` would
be treated as one baseline token and is therefore not correctly recognized as 2. For
now a space has to be inserted)

During the implementation of the module, 2 question arose:
Should flag/parameter names be allowed to be type names?
Example case:
```shell
def f [ string ] { echo $string }
```
Currently an error is thrown

* Fix clippy lints

* Remove wrong comment

* Add spacing

* Add Cargo.lock
This commit is contained in:
Leonhard Kipp 2021-01-11 18:53:58 +01:00 committed by GitHub
parent 481c6d4511
commit 3e6e3a207c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1169 additions and 289 deletions

157
Cargo.lock generated
View File

@ -129,8 +129,8 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efd3d156917d94862e779f356c5acae312b08fd3121e792c857d7928c8088423"
dependencies = [
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -255,8 +255,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5444eec77a9ec2bfe4524139e09195862e981400c4358d3b760cae634e4c4ee"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -301,8 +301,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -1064,9 +1064,9 @@ dependencies = [
"matches",
"phf",
"proc-macro2",
"quote",
"quote 1.0.8",
"smallvec 1.6.0",
"syn",
"syn 1.0.57",
]
[[package]]
@ -1075,8 +1075,8 @@ version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
dependencies = [
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -1200,8 +1200,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71f31892cd5c62e414316f2963c5689242c43d8e7bbcaaeca97e5e28c95d91d9"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
name = "derive_is_enum_variant"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ac8859845146979953797f03cc5b282fb4396891807cdb3d04929a88418197"
dependencies = [
"heck",
"quote 0.3.15",
"syn 0.11.11",
]
[[package]]
@ -1211,8 +1222,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -1496,8 +1507,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
"synstructure",
]
@ -1754,8 +1765,8 @@ checksum = "77408a692f1f97bcc61dc001d752e00643408fbc922e4d634c655df50d595556"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -1904,8 +1915,8 @@ checksum = "24b328c01a4d71d2d8173daa93562a73ab0fe85616876f02500f53d82948c504"
dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -1997,6 +2008,15 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heim"
version = "0.1.0-rc.1"
@ -2216,8 +2236,8 @@ dependencies = [
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -3263,11 +3283,14 @@ dependencies = [
"bigdecimal",
"codespan-reporting",
"derive-new",
"derive_is_enum_variant",
"dunce",
"indexmap",
"log 0.4.11",
"nu-errors",
"nu-protocol",
"nu-source",
"nu-test-support",
"num-bigint 0.3.1",
"num-traits 0.2.14",
"serde 1.0.118",
@ -4035,8 +4058,8 @@ dependencies = [
"phf_shared",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -4073,8 +4096,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -4084,8 +4107,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7bcc46b8f73443d15bc1c5fecbb315718491fa9187fa483f0e359323cde8b3a"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -4214,8 +4237,8 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
"version_check",
]
@ -4226,7 +4249,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"quote 1.0.8",
"version_check",
]
@ -4248,7 +4271,7 @@ version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
dependencies = [
"unicode-xid",
"unicode-xid 0.2.1",
]
[[package]]
@ -4329,10 +4352,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "608c156fd8e97febc07dc9c2e2c80bf74cfc6ef26893eae3daf8bc2bc94a4b7f"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
name = "quote"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
[[package]]
name = "quote"
version = "1.0.8"
@ -4675,9 +4704,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc6a6c2785d73d8f0157d10a40223bbf0210f18aecb261d39b96802f9ccc69d"
dependencies = [
"proc-macro2",
"quote",
"quote 1.0.8",
"rust-embed-utils",
"syn",
"syn 1.0.57",
"walkdir",
]
@ -4966,8 +4995,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -5250,7 +5279,7 @@ dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"quote 1.0.8",
]
[[package]]
@ -5311,6 +5340,17 @@ dependencies = [
"sxd-document",
]
[[package]]
name = "syn"
version = "0.11.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad"
dependencies = [
"quote 0.3.15",
"synom",
"unicode-xid 0.0.4",
]
[[package]]
name = "syn"
version = "1.0.57"
@ -5318,8 +5358,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4211ce9909eb971f111059df92c45640aad50a619cf55cd76476be803c4c68e6"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
"quote 1.0.8",
"unicode-xid 0.2.1",
]
[[package]]
name = "synom"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6"
dependencies = [
"unicode-xid 0.0.4",
]
[[package]]
@ -5329,9 +5378,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701"
dependencies = [
"proc-macro2",
"quote",
"syn",
"unicode-xid",
"quote 1.0.8",
"syn 1.0.57",
"unicode-xid 0.2.1",
]
[[package]]
@ -5459,8 +5508,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
]
[[package]]
@ -5956,6 +6005,12 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc"
[[package]]
name = "unicode-xid"
version = "0.2.1"
@ -6155,8 +6210,8 @@ dependencies = [
"lazy_static 1.4.0",
"log 0.4.11",
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
"wasm-bindgen-shared",
]
@ -6194,7 +6249,7 @@ version = "0.2.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6ac8995ead1f084a8dea1e65f194d0973800c7f571f6edd70adf06ecf77084"
dependencies = [
"quote",
"quote 1.0.8",
"wasm-bindgen-macro-support",
]
@ -6205,8 +6260,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a48c72f299d80557c7c62e37e7225369ecc0c963964059509fbafe917c7549"
dependencies = [
"proc-macro2",
"quote",
"syn",
"quote 1.0.8",
"syn 1.0.57",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]

View File

@ -18,10 +18,13 @@ num-bigint = {version = "0.3.1", features = ["serde"]}
num-traits = "0.2.14"
serde = "1.0.118"
shellexpand = "2.1.0"
derive_is_enum_variant = "0.1.1"
nu-errors = {version = "0.25.1", path = "../nu-errors"}
nu-protocol = {version = "0.25.1", path = "../nu-protocol"}
nu-source = {version = "0.25.1", path = "../nu-source"}
nu-test-support = {version = "0.25.1", path = "../nu-test-support"}
dunce = "1.0.1"
[features]
stable = []

View File

@ -1,5 +1,5 @@
use std::iter::Peekable;
use std::str::CharIndices;
use std::{fmt, iter::Peekable};
use nu_source::{Span, Spanned, SpannedItem};
@ -18,7 +18,7 @@ impl Token {
}
}
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, is_enum_variant)]
pub enum TokenContents {
/// A baseline token is an atomic chunk of source code. This means that the
/// token contains the entirety of string literals, as well as the entirety
@ -34,6 +34,28 @@ pub enum TokenContents {
EOL,
}
impl fmt::Display for TokenContents {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenContents::Baseline(base) => {
write!(f, "{}", base)
}
TokenContents::Comment(comm) => {
write!(f, "#{}", comm)
}
TokenContents::Pipe => {
write!(f, "|")
}
TokenContents::Semicolon => {
write!(f, ";")
}
TokenContents::EOL => {
write!(f, "\\n")
}
}
}
}
/// A `LiteCommand` is a list of words that will get meaning when processed by
/// the parser.
#[derive(Debug, Clone)]
@ -657,6 +679,19 @@ mod tests {
mod bare {
use super::*;
#[ignore = "result is Token::baseline(\"--flag(-f)\")"]
#[test]
fn lex_flag() {
let input = "--flag(-f)";
let (result, err) = lex(input, 0);
assert_eq!("", format!("{:?}", result));
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 6));
assert_eq!(result[1].span, span(7, 9));
}
#[test]
fn simple_1() {
let input = "foo bar baz";

View File

@ -1,3 +1,6 @@
#[macro_use]
extern crate derive_is_enum_variant;
mod errors;
mod lex;
mod parse;

View File

@ -5,19 +5,29 @@ use log::trace;
use nu_errors::{ArgumentError, ParseError};
use nu_protocol::hir::{
self, Binary, Block, ClassifiedCommand, Expression, ExternalRedirection, Flag, FlagKind, Group,
InternalCommand, Literal, Member, NamedArguments, Operator, Pipeline, RangeOperator,
SpannedExpression, Unit,
InternalCommand, Member, NamedArguments, Operator, Pipeline, RangeOperator, SpannedExpression,
Unit,
};
use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape, UnspannedPathMember};
use nu_source::{Span, Spanned, SpannedItem};
use num_bigint::BigInt;
//use crate::errors::{ParseError, ParseResult};
use crate::lex::{block, lex, LiteBlock, LiteCommand, LitePipeline};
use crate::path::expand_path;
use crate::scope::ParserScope;
use bigdecimal::BigDecimal;
use self::{
def::{parse_definition, parse_definition_prototype},
util::trim_quotes,
util::verify_and_strip,
};
mod def;
mod util;
pub use self::util::garbage;
/// Parses a simple column path, one without a variable (implied or explicit) at the head
pub fn parse_simple_column_path(
lite_arg: &Spanned<String>,
@ -198,17 +208,6 @@ pub fn parse_full_column_path(
}
}
fn trim_quotes(input: &str) -> String {
let mut chars = input.chars();
match (chars.next(), chars.next_back()) {
(Some('\''), Some('\'')) => chars.collect(),
(Some('"'), Some('"')) => chars.collect(),
(Some('`'), Some('`')) => chars.collect(),
_ => input.to_string(),
}
}
/// Parse a numeric range
fn parse_range(
lite_arg: &Spanned<String>,
@ -674,28 +673,6 @@ fn parse_list(
(output, error)
}
fn verify_and_strip(
contents: &Spanned<String>,
left: char,
right: char,
) -> (String, Option<ParseError>) {
let mut chars = contents.item.chars();
match (chars.next(), chars.next_back()) {
(Some(l), Some(r)) if l == left && r == right => {
let output: String = chars.collect();
(output, None)
}
_ => (
String::new(),
Some(ParseError::mismatch(
format!("value in {} {}", left, right),
contents.clone(),
)),
),
}
}
fn parse_table(
lite_block: &LiteBlock,
scope: &dyn ParserScope,
@ -2034,200 +2011,6 @@ fn parse_alias(call: &LiteCommand, scope: &dyn ParserScope) -> Option<ParseError
None
}
fn parse_signature(
name: &str,
s: &Spanned<String>,
scope: &dyn ParserScope,
) -> (Signature, Option<ParseError>) {
let mut err = None;
let (preparsed_params, error) = parse_arg(SyntaxShape::Table, scope, s);
if err.is_none() {
err = error;
}
let mut signature = Signature::new(name);
if let SpannedExpression {
expr: Expression::List(preparsed_params),
..
} = preparsed_params
{
for preparsed_param in preparsed_params.iter() {
match &preparsed_param.expr {
Expression::Literal(Literal::String(st)) => {
let parts: Vec<_> = st.split(':').collect();
if parts.len() == 1 {
if parts[0].starts_with("--") {
// Flag
let flagname = parts[0][2..].to_string();
signature
.named
.insert(flagname, (NamedType::Switch(None), String::new()));
} else {
// Positional
signature.positional.push((
PositionalType::Mandatory(parts[0].to_string(), SyntaxShape::Any),
String::new(),
));
}
} else if parts.len() == 2 {
if parts[0].starts_with("--") {
// Flag
let flagname = parts[0][2..].to_string();
let shape = match parts[1] {
"int" => SyntaxShape::Int,
"string" => SyntaxShape::String,
"path" => SyntaxShape::FilePath,
"table" => SyntaxShape::Table,
"unit" => SyntaxShape::Unit,
"number" => SyntaxShape::Number,
"pattern" => SyntaxShape::GlobPattern,
"range" => SyntaxShape::Range,
"block" => SyntaxShape::Block,
"any" => SyntaxShape::Any,
_ => {
if err.is_none() {
err = Some(ParseError::mismatch(
"params with known types",
s.clone(),
));
}
SyntaxShape::Any
}
};
signature.named.insert(
flagname,
(NamedType::Optional(None, shape), String::new()),
);
} else {
// Positional
let name = parts[0].to_string();
let shape = match parts[1] {
"int" => SyntaxShape::Int,
"string" => SyntaxShape::String,
"path" => SyntaxShape::FilePath,
"table" => SyntaxShape::Table,
"unit" => SyntaxShape::Unit,
"number" => SyntaxShape::Number,
"pattern" => SyntaxShape::GlobPattern,
"range" => SyntaxShape::Range,
"block" => SyntaxShape::Block,
"any" => SyntaxShape::Any,
_ => {
if err.is_none() {
err = Some(ParseError::mismatch(
"params with known types",
s.clone(),
));
}
SyntaxShape::Any
}
};
signature
.positional
.push((PositionalType::Mandatory(name, shape), String::new()));
}
} else if err.is_none() {
err = Some(ParseError::mismatch("param with type", s.clone()));
}
}
_ => {
if err.is_none() {
err = Some(ParseError::mismatch("parameter", s.clone()));
}
}
}
}
(signature, err)
} else {
(
signature,
Some(ParseError::mismatch("parameters", s.clone())),
)
}
}
fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<ParseError> {
// A this point, we've already handled the prototype and put it into scope
// So our main goal here is to parse the block now that the names and
// prototypes of adjacent commands are also available
if call.parts.len() == 4 {
if call.parts.len() != 4 {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (mut signature, err) = parse_signature(&name, &call.parts[2], scope);
//Add commands comments to signature usage
signature.usage = call.comments_joined();
if err.is_some() {
return err;
};
let mut chars = call.parts[3].chars();
match (chars.next(), chars.next_back()) {
(Some('{'), Some('}')) => {
// We have a literal block
let string: String = chars.collect();
scope.enter_scope();
let (tokens, err) = lex(&string, call.parts[3].span.start() + 1);
if err.is_some() {
return err;
};
let (lite_block, err) = block(tokens);
if err.is_some() {
return err;
};
let (mut block, err) = classify_block(&lite_block, scope);
scope.exit_scope();
block.params = signature;
block.params.name = name;
scope.add_definition(block);
err
}
_ => Some(ParseError::mismatch("body", call.parts[3].clone())),
}
} else {
Some(ParseError::internal_error(
"need a block".to_string().spanned(call.span()),
))
}
}
fn parse_definition_prototype(call: &LiteCommand, scope: &dyn ParserScope) -> Option<ParseError> {
let mut err = None;
if call.parts.len() != 4 {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (signature, error) = parse_signature(&name, &call.parts[2], scope);
if err.is_none() {
err = error;
}
scope.add_definition(Block::new(signature, vec![], IndexMap::new(), call.span()));
err
}
pub fn classify_block(
lite_block: &LiteBlock,
scope: &dyn ParserScope,
@ -2350,11 +2133,6 @@ pub fn parse(
classify_block(&lite_block, scope)
}
/// Easy shorthand function to create a garbage expression at the given span
pub fn garbage(span: Span) -> SpannedExpression {
SpannedExpression::new(Expression::Garbage, span)
}
#[test]
fn unit_parse_byte_units() -> Result<(), ParseError> {
struct TestCase {

View File

@ -0,0 +1,100 @@
use crate::parse::{classify_block, util::trim_quotes};
use indexmap::IndexMap;
use nu_errors::ParseError;
use nu_protocol::hir::Block;
use nu_source::SpannedItem;
//use crate::errors::{ParseError, ParseResult};
use crate::lex::{block, lex, LiteCommand};
use crate::ParserScope;
use self::param_flag_list::parse_signature;
mod param_flag_list;
pub(crate) fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<ParseError> {
// A this point, we've already handled the prototype and put it into scope;
// So our main goal here is to parse the block now that the names and
// prototypes of adjacent commands are also available
if call.parts.len() == 4 {
if call.parts.len() != 4 {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (mut signature, err) = parse_signature(&name, &call.parts[2]);
//Add commands comments to signature usage
signature.usage = call.comments_joined();
if err.is_some() {
return err;
};
let mut chars = call.parts[3].chars();
match (chars.next(), chars.next_back()) {
(Some('{'), Some('}')) => {
// We have a literal block
let string: String = chars.collect();
scope.enter_scope();
let (tokens, err) = lex(&string, call.parts[3].span.start() + 1);
if err.is_some() {
return err;
};
let (lite_block, err) = block(tokens);
if err.is_some() {
return err;
};
let (mut block, err) = classify_block(&lite_block, scope);
scope.exit_scope();
block.params = signature;
block.params.name = name;
scope.add_definition(block);
err
}
_ => Some(ParseError::mismatch("body", call.parts[3].clone())),
}
} else {
Some(ParseError::internal_error(
"need a block".to_string().spanned(call.span()),
))
}
}
pub(crate) fn parse_definition_prototype(
call: &LiteCommand,
scope: &dyn ParserScope,
) -> Option<ParseError> {
let mut err = None;
if call.parts.len() != 4 {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (signature, error) = parse_signature(&name, &call.parts[2]);
if err.is_none() {
err = error;
}
scope.add_definition(Block::new(signature, vec![], IndexMap::new(), call.span()));
err
}

View File

@ -0,0 +1,859 @@
///This module contains functions to parse the parameter and flag list (signature) of a
///definition
///Such a signature can be of the following format:
/// [ (parameter | flag | <eol>)* ]
///Where
///parameter is:
/// name (<:> type)? (<,> | <eol> | (#Comment <eol>))?
///flag is:
/// --name (-shortform)? (<:> type)? (<,> | <eol> | (#Comment <eol>))?
///
use log::debug;
use crate::{
lex::{lex, Token, TokenContents},
parse::util::token_to_spanned_string,
};
use nu_errors::ParseError;
use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape};
use nu_source::{Span, Spanned, SpannedItem};
pub fn parse_signature(
name: &str,
signature_vec: &Spanned<String>,
) -> (Signature, Option<ParseError>) {
let mut err = None;
let mut chars = signature_vec.chars();
match (chars.next(), chars.next_back()) {
(Some('['), Some(']')) => {}
_ => {
err = err.or_else(|| {
Some(ParseError::mismatch(
"definition signature",
signature_vec.clone(),
))
});
}
}
let string: String = chars.collect();
debug!(
"signature vec span start: {}",
signature_vec.span.start() + 1
);
let (tokens, error) = lex(&string, signature_vec.span.start() + 1);
err = err.or(error);
//After normal lexing, tokens also need to be split on ',' and ':'
let tokens = lex_split_baseline_tokens_on(tokens, &[',', ':']);
let mut parameters = vec![];
let mut flags = vec![];
let mut i = 0;
while i < tokens.len() {
if tokens[i].contents.is_eol() {
//Skip leading eol
i += 1;
} else if is_flag(&tokens[i]) {
let (flag, advanced_by, error) = parse_flag(&tokens[i..], signature_vec);
err = err.or(error);
i += advanced_by;
flags.push(flag);
} else {
let (parameter, advanced_by, error) = parse_parameter(&tokens[i..], signature_vec);
err = err.or(error);
i += advanced_by;
parameters.push(parameter);
}
}
let signature = to_signature(name, parameters, flags);
debug!("Signature: {:?}", signature);
(signature, err)
}
fn parse_parameter(
tokens: &[Token],
tokens_as_str: &Spanned<String>,
) -> (Parameter, usize, Option<ParseError>) {
if tokens.is_empty() {
//TODO fix span
return (
Parameter::error(),
0,
Some(ParseError::unexpected_eof("parameter", tokens_as_str.span)),
);
}
let mut err: Option<ParseError> = None;
//1 because name = tokens[0]
let mut i = 1;
let (name, error) = parse_param_name(&tokens[0]);
err = err.or(error);
let (type_, advanced_by, error) = parse_optional_type(&tokens[i..]);
let type_ = type_.unwrap_or(SyntaxShape::Any);
err = err.or(error);
i += advanced_by;
let (comment_text, advanced_by, error) = parse_signature_item_end(&tokens[i..]);
i += advanced_by;
err = err.or(error);
let parameter = Parameter::new(
PositionalType::mandatory(&name.item, type_),
comment_text,
name.span,
);
debug!(
"Parsed parameter: {} with shape {:?}",
parameter.pos_type.name(),
parameter.pos_type.syntax_type()
);
(parameter, i, err)
}
fn parse_flag(
tokens: &[Token],
tokens_as_str: &Spanned<String>,
) -> (Flag, usize, Option<ParseError>) {
if tokens.is_empty() {
return (
Flag::error(),
0,
Some(ParseError::unexpected_eof("parameter", tokens_as_str.span)),
);
}
let mut err: Option<ParseError> = None;
//1 because name = tokens[0]
let mut i = 1;
let (name, error) = parse_flag_name(&tokens[0]);
err = err.or(error);
let (shortform, advanced_by, error) = parse_flag_optional_shortform(&tokens[i..]);
i += advanced_by;
err = err.or(error);
let shortform = shortform.map(|c| c.item);
let (type_, advanced_by, error) = parse_optional_type(&tokens[i..]);
let type_ = type_.unwrap_or(SyntaxShape::Any);
err = err.or(error);
i += advanced_by;
let (comment, advanced_by, error) = parse_signature_item_end(&tokens[i..]);
i += advanced_by;
err = err.or(error);
//TODO Fixup span
let flag = Flag::new(
name.item.clone(),
NamedType::Optional(shortform, type_),
comment,
name.span,
);
debug!("Parsed flag: {:?}", flag);
(flag, i, err)
}
fn parse_type(type_: &Spanned<String>) -> (SyntaxShape, Option<ParseError>) {
debug!("Parsing type {:?}", type_);
match type_.item.as_str() {
"int" => (SyntaxShape::Int, None),
"string" => (SyntaxShape::String, None),
"path" => (SyntaxShape::FilePath, None),
"table" => (SyntaxShape::Table, None),
"unit" => (SyntaxShape::Unit, None),
"number" => (SyntaxShape::Number, None),
"pattern" => (SyntaxShape::GlobPattern, None),
"range" => (SyntaxShape::Range, None),
"block" => (SyntaxShape::Block, None),
"any" => (SyntaxShape::Any, None),
_ => (
SyntaxShape::Any,
Some(ParseError::mismatch("type", type_.clone())),
),
}
}
fn parse_type_token(type_: &Token) -> (SyntaxShape, Option<ParseError>) {
match &type_.contents {
TokenContents::Baseline(type_str) => parse_type(&type_str.clone().spanned(type_.span)),
_ => (
SyntaxShape::Any,
Some(ParseError::mismatch(
"type",
type_.contents.to_string().spanned(type_.span),
)),
),
}
}
fn parse_param_name(token: &Token) -> (Spanned<String>, Option<ParseError>) {
match &token.contents {
TokenContents::Baseline(name) => {
//Make sure user didn't enter type
//REVIEW Should type names be allowed to be parameter names:
//Example case:
//def f [ string ] { echo $string }
//Currently an error is thrown
let name = name.clone().spanned(token.span);
let (_, err) = parse_type(&name);
if err.is_some() {
//Okay not a type. Just return name
(name, None)
} else {
(
name,
Some(ParseError::mismatch(
"parameter name",
token_to_spanned_string(token),
)),
)
}
}
_ => (
"Internal Error".to_string().spanned(token.span),
Some(ParseError::mismatch(
"parameter name",
token_to_spanned_string(token),
)),
),
}
}
fn parse_optional_comment(tokens: &[Token]) -> (Option<String>, usize) {
let mut comment_text = None;
let mut i: usize = 0;
if i < tokens.len() {
if let TokenContents::Comment(comment) = &tokens[i].contents {
comment_text = Some(comment.trim().to_string());
i += 1;
}
}
(comment_text, i)
}
fn parse_optional_type(tokens: &[Token]) -> (Option<SyntaxShape>, usize, Option<ParseError>) {
fn is_double_point(token: &Token) -> bool {
match &token.contents {
TokenContents::Baseline(base) => base == ":",
_ => false,
}
}
let mut err = None;
let mut type_ = None;
let mut i: usize = 0;
//Check if a type has to follow
if i < tokens.len() && is_double_point(&tokens[i]) {
//Type has to follow
if i + 1 == tokens.len() {
err = err.or_else(|| Some(ParseError::unexpected_eof("type", tokens[i].span)));
} else {
//Jump over <:>
i += 1;
let (shape, error) = parse_type_token(&tokens[i]);
err = err.or(error);
type_ = Some(shape);
i += 1;
}
}
(type_, i, err)
}
///Parses the end of a flag or a parameter
/// ((<,> | <eol>) | (#Comment <eol>)
fn parse_signature_item_end(tokens: &[Token]) -> (Option<String>, usize, Option<ParseError>) {
if tokens.is_empty() {
//If no more tokens, parameter/flag doesn't need ',' or comment to be properly finished
return (None, 0, None);
}
let mut i = 0;
let err = None;
let (parsed_comma, advanced_by) = parse_comma(&tokens[i..]);
i += advanced_by;
let (comment, advanced_by) = parse_optional_comment(&tokens[i..]);
i += advanced_by;
let (parsed_eol, advanced_by) = parse_eol(&tokens[i..]);
i += advanced_by;
debug!(
"Parsed comma {} and parsed eol {}",
parsed_comma, parsed_eol
);
////Separating flags/parameters is optional.
////If this should change, the below code would raise a warning whenever 2 parameters/flags are
////not delmited by <,> or <eol>
//if there is next item, but it's not comma, then it must be Optional(#Comment) + <eof>
//let parsed_delimiter = parsed_comma || parsed_eol;
//if !parsed_delimiter && i < tokens.len() {
// //If not parsed , or eol but more tokens are comming
// err = err.or(Some(ParseError::mismatch(
// "Newline or ','",
// (token[i-1].to_string() + token[i].to_string()).spanned(token[i-1].span.until(token[i].span))
// )));
//}
(comment, i, err)
}
fn parse_flag_name(token: &Token) -> (Spanned<String>, Option<ParseError>) {
if let TokenContents::Baseline(name) = &token.contents {
if !name.starts_with("--") {
(
name.clone().spanned(token.span),
Some(ParseError::mismatch(
"longform of a flag (Starting with --)",
token_to_spanned_string(token),
)),
)
} else {
//Discard preceding --
let name = name[2..].to_string();
(name.spanned(token.span), None)
}
} else {
(
"".to_string().spanned_unknown(),
Some(ParseError::mismatch(
"longform of a flag (Starting with --)",
token_to_spanned_string(token),
)),
)
}
}
fn parse_flag_optional_shortform(
tokens: &[Token],
) -> (Option<Spanned<char>>, usize, Option<ParseError>) {
if tokens.is_empty() {
return (None, 0, None);
}
let token = &tokens[0];
if let TokenContents::Baseline(shortform) = &token.contents {
let mut chars = shortform.chars();
match (chars.next(), chars.next_back()) {
(Some('('), Some(')')) => {
let mut err = None;
let mut start = token.span.start() + 1; //Skip '('
let end = token.span.end() - 1; // Skip ')'
let mut c: String = chars.collect();
let dash_count = c.chars().take_while(|c| *c == '-').count();
debug!("Dash count {}", dash_count);
match dash_count {
0 => {
//If no starting -
err = err.or_else(|| {
Some(ParseError::mismatch(
"Shortflag starting with '-'",
c.clone().spanned((start, end)),
))
});
}
1 => {
//Skip over '-'
start += 1;
c.remove(0);
}
_ => {
//If --
err = err.or_else(|| {
Some(ParseError::mismatch(
"Shortflag starting with a single '-'",
c.clone().spanned((start, end)),
))
});
//Skip over --
start += dash_count;
c = c
.strip_prefix(&"-".repeat(dash_count))
.unwrap_or("X")
.into();
}
}
let err = err.or_else(|| match c.chars().count() {
0 => Some(ParseError::mismatch(
"Shortflag of exactly 1 character",
shortform.clone().spanned((start, end)),
)),
1 => None,
_ => Some(ParseError::mismatch(
"Shortflag of exactly 1 character",
c.clone().spanned((start, end)),
)),
});
let c = c.chars().next().unwrap_or('X').spanned((start, end));
(Some(*c.spanned((start, end))), 1, err)
}
_ => (None, 0, None),
}
} else {
(None, 0, None)
}
}
fn parse_eol(tokens: &[Token]) -> (bool, usize) {
if !tokens.is_empty() && tokens[0].contents.is_eol() {
(true, 1)
} else {
(false, 0)
}
}
fn parse_comma(tokens: &[Token]) -> (bool, usize) {
fn is_comma(token: &Token) -> bool {
match &token.contents {
TokenContents::Baseline(base) => base == ",",
_ => false,
}
}
if !tokens.is_empty() && is_comma(&tokens[0]) {
(true, 1)
} else {
(false, 0)
}
}
///True for short or longform flags. False otherwise
fn is_flag(token: &Token) -> bool {
match &token.contents {
TokenContents::Baseline(item) => item.starts_with('-'),
_ => false,
}
}
fn to_signature(name: &str, params: Vec<Parameter>, flags: Vec<Flag>) -> Signature {
let mut sign = Signature::new(name);
for param in params.into_iter() {
// pub positional: Vec<(PositionalType, Description)>,
sign.positional
.push((param.pos_type, param.desc.unwrap_or_else(|| "".to_string())));
}
for flag in flags.into_iter() {
sign.named.insert(
flag.long_name,
(flag.named_type, flag.desc.unwrap_or_else(|| "".to_string())),
);
}
sign
}
//Currently the lexer does not split baselines on , and :
//The parameter list requires this. Therefore here is a hacky method doing this.
fn lex_split_baseline_tokens_on(
tokens: Vec<Token>,
extra_baseline_terminal_tokens: &[char],
) -> Vec<Token> {
debug!("Before lex fix up {:?}", tokens);
let make_new_token =
|token_new: String, token_new_end: usize, terminator_char: Option<char>| {
let end = token_new_end;
let start = end - token_new.len();
let mut result = vec![];
//Only add token if its not empty
if !token_new.is_empty() {
result.push(Token::new(
TokenContents::Baseline(token_new),
Span::new(start, end),
));
}
//Insert terminator_char as baseline token
if let Some(ch) = terminator_char {
result.push(Token::new(
TokenContents::Baseline(ch.to_string()),
Span::new(end, end + 1),
));
}
result
};
let mut result = Vec::with_capacity(tokens.len());
for token in tokens {
match token.contents {
TokenContents::Baseline(base) => {
let token_offset = token.span.start();
let mut current = "".to_string();
for (i, c) in base.chars().enumerate() {
if extra_baseline_terminal_tokens.contains(&c) {
result.extend(make_new_token(current, i + token_offset, Some(c)));
current = "".to_string();
} else {
current.push(c);
}
}
result.extend(make_new_token(current, base.len() + token_offset, None));
}
_ => result.push(token),
}
}
result
}
type Description = String;
#[derive(Clone)]
struct Parameter {
pub pos_type: PositionalType,
pub desc: Option<Description>,
pub span: Span,
}
impl Parameter {
pub fn new(pos_type: PositionalType, desc: Option<Description>, span: Span) -> Parameter {
Parameter {
pos_type,
desc,
span,
}
}
pub fn error() -> Parameter {
Parameter::new(
PositionalType::optional("Internal Error", SyntaxShape::Any),
Some(
"Wanted to parse a parameter, but no input present. Please report this error!"
.to_string(),
),
Span::unknown(),
)
}
}
#[derive(Clone, Debug)]
struct Flag {
pub long_name: String,
pub named_type: NamedType,
pub desc: Option<Description>,
pub span: Span,
}
impl Flag {
pub fn new(
long_name: String,
named_type: NamedType,
desc: Option<Description>,
span: Span,
) -> Flag {
Flag {
long_name,
named_type,
desc,
span,
}
}
pub fn error() -> Flag {
Flag::new(
"Internal Error".to_string(),
NamedType::Switch(None),
Some(
"Wanted to parse a flag, but no input present. Please report this error!"
.to_string(),
),
Span::unknown(),
)
}
}
mod tests {
#[allow(unused_imports)]
use super::*;
#[allow(unused_imports)]
use nu_test_support::nu;
#[test]
fn simple_def_with_params() {
let name = "my_func";
let sign = "[param1:int, param2:string]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 27)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::Int),
"".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::String),
"".into()
),
]
);
}
#[test]
fn simple_def_with_params_with_comment() {
let name = "my_func";
let sign = "[
param1:path # My first param
param2:number # My second param
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 64)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::FilePath),
"My first param".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::Number),
"My second param".into()
),
]
);
}
#[test]
fn simple_def_with_params_without_type() {
let name = "my_func";
let sign = "[
param1 # My first param
param2:number # My second param
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 0)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::Any),
"My first param".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::Number),
"My second param".into()
),
]
);
}
#[test]
fn oddly_but_correct_written_params() {
let name = "my_func";
let sign = "[
param1 :int # param1
param2 : number # My second param
param4, param5:path , param6 # param6
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 0)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::Int),
"param1".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::Number),
"My second param".into()
),
(
PositionalType::Mandatory("param4".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("param5".into(), SyntaxShape::FilePath),
"".into()
),
(
PositionalType::Mandatory("param6".into(), SyntaxShape::Any),
"param6".into()
),
]
);
}
#[test]
fn err_wrong_type() {
let actual = nu!(
cwd: ".",
"def f [ param1:strig ] { echo hi }"
);
assert!(actual.err.contains("type"));
}
//For what ever reason, this gets reported as not used
#[allow(dead_code)]
fn assert_signature_has_flag(sign: &Signature, name: &str, type_: NamedType, comment: &str) {
assert_eq!(
Some((type_, comment.to_string())),
sign.named.get(name).cloned()
);
}
#[test]
fn simple_def_with_only_flags() {
let name = "my_func";
let sign = "[
--list (-l) : path # First flag
--verbose : number # Second flag
]";
// --all(-a) # My switch
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"list",
NamedType::Optional(Some('l'), SyntaxShape::FilePath),
"First flag",
);
assert_signature_has_flag(
&sign,
"verbose",
NamedType::Optional(None, SyntaxShape::Number),
"Second flag",
);
// assert_signature_has_flag(
// &sign,
// "verbose",
// NamedType::Switch(Some('a')),
// "Second flag",
// );
}
#[test]
fn simple_def_with_params_and_flags() {
let name = "my_func";
let sign = "[
--list (-l) : path # First flag
param1, param2:table # Param2 Doc
--verbose # Second flag
param3 : number,
--flag3 # Third flag
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"list",
NamedType::Optional(Some('l'), SyntaxShape::FilePath),
"First flag",
);
assert_signature_has_flag(
&sign,
"verbose",
NamedType::Optional(None, SyntaxShape::Any),
"Second flag",
);
assert_signature_has_flag(
&sign,
"flag3",
NamedType::Optional(None, SyntaxShape::Any),
"Third flag",
);
assert_eq!(
sign.positional,
// --list (-l) : path # First flag
// param1, param2:table # Param2 Doc
// --verbose # Second flag
// param3 : number,
// --flag3 # Third flag
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::Table),
"Param2 Doc".into()
),
(
PositionalType::Mandatory("param3".into(), SyntaxShape::Number),
"".into()
),
]
);
}
#[test]
fn simple_def_with_parameters_and_flags_no_delimiter() {
let name = "my_func";
let sign = "[ param1:int param2
--force (-f) param3 # Param3
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"force",
NamedType::Optional(Some('f'), SyntaxShape::Any),
"",
);
assert_eq!(
sign.positional,
// --list (-l) : path # First flag
// param1, param2:table # Param2 Doc
// --verbose # Second flag
// param3 : number,
// --flag3 # Third flag
vec![
(
PositionalType::Mandatory("param1".into(), SyntaxShape::Int),
"".into()
),
(
PositionalType::Mandatory("param2".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("param3".into(), SyntaxShape::Any),
"Param3".into()
),
]
);
}
#[test]
fn simple_example_signature() {
let name = "my_func";
let sign = "[
d:int # The required d parameter
--x (-x):string # The all powerful x flag
--y (-y):int # The accompanying y flag
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"x",
NamedType::Optional(Some('x'), SyntaxShape::String),
"The all powerful x flag",
);
assert_signature_has_flag(
&sign,
"y",
NamedType::Optional(Some('y'), SyntaxShape::Int),
"The accompanying y flag",
);
assert_eq!(
sign.positional,
vec![(
PositionalType::Mandatory("d".into(), SyntaxShape::Int),
"The required d parameter".into()
)]
);
}
}

View File

@ -0,0 +1,47 @@
use nu_errors::ParseError;
use nu_protocol::hir::{Expression, SpannedExpression};
use nu_source::{Span, Spanned, SpannedItem};
use crate::lex::Token;
pub(crate) fn token_to_spanned_string(token: &Token) -> Spanned<String> {
token.contents.to_string().spanned(token.span)
}
/// Easy shorthand function to create a garbage expression at the given span
pub fn garbage(span: Span) -> SpannedExpression {
SpannedExpression::new(Expression::Garbage, span)
}
pub(crate) fn trim_quotes(input: &str) -> String {
let mut chars = input.chars();
match (chars.next(), chars.next_back()) {
(Some('\''), Some('\'')) => chars.collect(),
(Some('"'), Some('"')) => chars.collect(),
(Some('`'), Some('`')) => chars.collect(),
_ => input.to_string(),
}
}
pub(crate) fn verify_and_strip(
contents: &Spanned<String>,
left: char,
right: char,
) -> (String, Option<ParseError>) {
let mut chars = contents.item.chars();
match (chars.next(), chars.next_back()) {
(Some(l), Some(r)) if l == left && r == right => {
let output: String = chars.collect();
(output, None)
}
_ => (
String::new(),
Some(ParseError::mismatch(
format!("value in {} {}", left, right),
contents.clone(),
)),
),
}
}

View File

@ -5,7 +5,7 @@ use nu_source::{b, DebugDocBuilder, PrettyDebug, PrettyDebugWithSource};
use serde::{Deserialize, Serialize};
/// The types of named parameter that a command can have
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub enum NamedType {
/// A flag without any associated argument. eg) `foo --bar, foo -b`
Switch(Option<char>),