engine-q merge

This commit is contained in:
Fernando Herrera
2022-02-07 19:11:34 +00:00
1965 changed files with 119062 additions and 20 deletions

View File

@ -0,0 +1,26 @@
[package]
authors = ["The Nu Project Contributors"]
description = "Nushell parser"
edition = "2018"
license = "MIT"
name = "nu-parser"
version = "0.43.0"
[dependencies]
bigdecimal = { package = "bigdecimal", version = "0.3.0", features = ["serde"] }
derive-new = "0.5.8"
indexmap = { version="1.6.1", features=["serde-1"] }
log = "0.4"
num-bigint = { version="0.4.3", features=["serde"] }
itertools = "0.10.0"
smart-default = "0.6.0"
nu-errors = { version = "0.43.0", path="../nu-errors" }
nu-data = { version = "0.43.0", path="../nu-data" }
nu-path = { version = "0.43.0", path="../nu-path" }
nu-protocol = { version = "0.43.0", path="../nu-protocol" }
nu-source = { version = "0.43.0", path="../nu-source" }
nu-test-support = { version = "0.43.0", path="../nu-test-support" }
[features]
stable = []

View File

@ -0,0 +1,18 @@
// use std::fmt::Debug;
// A combination of an informative parse error, and what has been successfully parsed so far
// #[derive(Debug)]
// pub struct ParseError {
// /// An informative cause for this parse error
// pub cause: nu_errors::ParseError,
// // /// What has been successfully parsed, if anything
// // pub partial: Option<T>,
// }
// pub type ParseResult<T> = Result<T, ParseError<T>>;
// impl<T: Debug> From<ParseError<T>> for nu_errors::ShellError {
// fn from(e: ParseError<T>) -> Self {
// e.cause.into()
// }
// }

View File

@ -0,0 +1,138 @@
use nu_errors::{ArgumentError, ParseError};
use nu_protocol::hir::InternalCommand;
use nu_protocol::NamedType;
use nu_source::{Span, Spanned, SpannedItem};
/// Match the available flags in a signature with what the user provided. This will check both long-form flags (--long) and shorthand flags (-l)
/// This also allows users to provide a group of shorthand flags (-la) that correspond to multiple shorthand flags at once.
pub fn get_flag_signature_spec(
signature: &nu_protocol::Signature,
cmd: &InternalCommand,
arg: &Spanned<String>,
) -> (Vec<(String, NamedType)>, Option<ParseError>) {
if arg.item.starts_with('-') {
// It's a flag (or set of flags)
let mut output = vec![];
let mut error = None;
let remainder: String = arg.item.chars().skip(1).collect();
if remainder.starts_with('-') {
// Long flag expected
let mut remainder: String = remainder.chars().skip(1).collect();
if remainder.contains('=') {
let assignment: Vec<_> = remainder.split('=').collect();
if assignment.len() != 2 {
error = Some(ParseError::argument_error(
cmd.name.to_string().spanned(cmd.name_span),
ArgumentError::InvalidExternalWord,
));
} else {
remainder = assignment[0].to_string();
}
}
if let Some((named_type, _)) = signature.named.get(&remainder) {
output.push((remainder.clone(), named_type.clone()));
} else {
error = Some(ParseError::argument_error(
cmd.name.to_string().spanned(cmd.name_span),
ArgumentError::UnexpectedFlag(arg.clone()),
));
}
} else {
// Short flag(s) expected
let mut starting_pos = arg.span.start() + 1;
for c in remainder.chars() {
let mut found = false;
for (full_name, named_arg) in &signature.named {
if Some(c) == named_arg.0.get_short() {
found = true;
output.push((full_name.clone(), named_arg.0.clone()));
break;
}
}
if !found {
error = Some(ParseError::argument_error(
cmd.name.to_string().spanned(cmd.name_span),
ArgumentError::UnexpectedFlag(
arg.item
.clone()
.spanned(Span::new(starting_pos, starting_pos + c.len_utf8())),
),
));
}
starting_pos += c.len_utf8();
}
}
(output, error)
} else {
// It's not a flag, so don't bother with it
(vec![], None)
}
}
#[cfg(test)]
mod tests {
use super::get_flag_signature_spec;
use crate::{lex, parse_block};
use nu_protocol::{hir::InternalCommand, NamedType, Signature, SyntaxShape};
use nu_source::{HasSpan, Span};
fn bundle() -> Signature {
Signature::build("bundle add")
.switch("skip-install", "Adds the gem to the Gemfile but does not install it.", None)
.named("group", SyntaxShape::String, "Specify the group(s) for the added gem. Multiple groups should be separated by commas.", Some('g'))
.rest("rest", SyntaxShape::Any, "options")
}
#[test]
fn parses_longform_flag_containing_equal_sign() {
let input = "bundle add rails --group=development";
let (tokens, _) = lex(input, 0, lex::lexer::NewlineMode::Normal);
let (root_node, _) = parse_block(tokens);
assert_eq!(root_node.block.len(), 1);
assert_eq!(root_node.block[0].pipelines.len(), 1);
assert_eq!(root_node.block[0].pipelines[0].commands.len(), 1);
assert_eq!(root_node.block[0].pipelines[0].commands[0].parts.len(), 4);
let command_node = root_node.block[0].pipelines[0].commands[0].clone();
let idx = 1;
let (name, name_span) = (
command_node.parts[0..(idx + 1)]
.iter()
.map(|x| x.item.clone())
.collect::<Vec<String>>()
.join(" "),
Span::new(
command_node.parts[0].span.start(),
command_node.parts[idx].span.end(),
),
);
let mut internal = InternalCommand::new(name, name_span, command_node.span());
let signature = bundle();
internal.args.set_initial_flags(&signature);
let (flags, err) = get_flag_signature_spec(&signature, &internal, &command_node.parts[3]);
let (long_name, spec) = flags[0].clone();
assert!(err.is_none());
assert_eq!(long_name, "group".to_string());
assert_eq!(spec.get_short(), Some('g'));
match spec {
NamedType::Optional(_, _) => {}
_ => panic!("optional flag didn't parse successfully"),
}
}
}

View File

@ -0,0 +1,537 @@
use smart_default::SmartDefault;
use std::iter::Peekable;
use std::str::CharIndices;
use nu_errors::ParseError;
use nu_source::{HasSpan, Span, Spanned, SpannedItem};
use super::token_group::TokenBuilder;
use super::tokens::{
CommandBuilder, CommentsBuilder, GroupBuilder, LiteBlock, LiteCommand, LiteComment,
PipelineBuilder, TokenContents,
};
type Input<'t> = Peekable<CharIndices<'t>>;
#[derive(Debug, Clone)]
pub struct Token {
pub contents: TokenContents,
pub span: Span,
}
impl Token {
pub fn new(contents: TokenContents, span: Span) -> Token {
Token { contents, span }
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum NewlineMode {
/// Treat newlines as a group separator
Normal,
/// Treat newlines as just another whitespace
Whitespace,
}
#[derive(Clone, Copy)]
enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
}
impl BlockKind {
fn closing(self) -> char {
match self {
BlockKind::Paren => ')',
BlockKind::SquareBracket => ']',
BlockKind::CurlyBracket => '}',
}
}
}
/// Finds the extents of a basline token, returning the string with its
/// associated span, along with any parse error that was discovered along the
/// way.
///
/// Baseline tokens are unparsed content separated by spaces or a command
/// separator (like pipe or semicolon) Baseline tokens may be surrounded by
/// quotes (single, double, or backtick) or braces (square, paren, curly)
///
/// Baseline tokens may be further processed based on the needs of the syntax
/// shape that encounters them. They are still lightly lexed. For example, if a
/// baseline token begins with `{`, the entire token will continue until the
/// closing `}`, taking comments into consideration.
pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option<ParseError>) {
let mut token_contents = String::new();
let start_offset = if let Some((pos, _)) = src.peek() {
*pos
} else {
0
};
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
// closing quote.
let mut quote_start: Option<char> = None;
let mut in_comment = false;
// This Vec tracks paired delimiters
let mut block_level: Vec<BlockKind> = vec![];
// A baseline token is terminated if it's not nested inside of a paired
// delimiter and the next character is one of: `|`, `;`, `#` or any
// whitespace.
fn is_termination(block_level: &[BlockKind], c: char) -> bool {
block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#')
}
// The process of slurping up a baseline token repeats:
//
// - String literal, which begins with `'`, `"` or `\``, and continues until
// the same character is encountered again.
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
// the matching closing delimiter is found, skipping comments and string
// literals.
// - When not nested inside of a delimiter pair, when a terminating
// character (whitespace, `|`, `;` or `#`) is encountered, the baseline
// token is done.
// - Otherwise, accumulate the character into the current baseline token.
while let Some((_, c)) = src.peek() {
let c = *c;
if quote_start.is_some() {
// If we encountered the closing quote character for the current
// string, we're done with the current string.
if Some(c) == quote_start {
quote_start = None;
}
} else if c == '#' {
if is_termination(&block_level, c) {
break;
}
in_comment = true;
} else if c == '\n' {
in_comment = false;
if is_termination(&block_level, c) {
break;
}
} else if in_comment {
if is_termination(&block_level, c) {
break;
}
} else if c == '\'' || c == '"' || c == '`' {
// We encountered the opening quote of a string literal.
quote_start = Some(c);
} else if c == '[' {
// We encountered an opening `[` delimiter.
block_level.push(BlockKind::SquareBracket);
} else if c == ']' {
// We encountered a closing `]` delimiter. Pop off the opening `[`
// delimiter.
if let Some(BlockKind::SquareBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '{' {
// We encountered an opening `{` delimiter.
block_level.push(BlockKind::CurlyBracket);
} else if c == '}' {
// We encountered a closing `}` delimiter. Pop off the opening `{`.
if let Some(BlockKind::CurlyBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '(' {
// We enceountered an opening `(` delimiter.
block_level.push(BlockKind::Paren);
} else if c == ')' {
// We encountered a closing `)` delimiter. Pop off the opening `(`.
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_termination(&block_level, c) {
break;
}
// Otherwise, accumulate the character into the current token.
token_contents.push(c);
// Consume the character.
let _ = src.next();
}
let span = Span::new(
start_offset + span_offset,
start_offset + span_offset + token_contents.len(),
);
// If there is still unclosed opening delimiters, close them and add
// synthetic closing characters to the accumulated token.
if let Some(block) = block_level.last() {
let delim: char = (*block).closing();
let cause = ParseError::unexpected_eof(delim.to_string(), span);
while let Some(bk) = block_level.pop() {
token_contents.push(bk.closing());
}
return (token_contents.spanned(span), Some(cause));
}
if let Some(delimiter) = quote_start {
// The non-lite parse trims quotes on both sides, so we add the expected quote so that
// anyone wanting to consume this partial parse (e.g., completions) will be able to get
// correct information from the non-lite parse.
token_contents.push(delimiter);
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
);
}
// If we didn't accumulate any characters, it's an unexpected error.
if token_contents.is_empty() {
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof("command".to_string(), span)),
);
}
(token_contents.spanned(span), None)
}
/// We encountered a `#` character. Keep consuming characters until we encounter
/// a newline character (but don't consume it).
fn parse_comment(input: &mut Input, hash_offset: usize) -> LiteComment {
let mut comment = String::new();
let mut in_ws = true;
let mut body_start = 0;
input.next();
while let Some((_, c)) = input.peek() {
if *c == '\n' {
break;
}
if in_ws && c.is_whitespace() {
body_start += c.len_utf8();
} else if in_ws && !c.is_whitespace() {
in_ws = false;
}
comment.push(*c);
input.next();
}
if body_start == 0 {
let len = comment.len();
LiteComment::new(comment.spanned(Span::new(hash_offset + 1, hash_offset + 1 + len)))
} else {
let ws = comment[..body_start].to_string();
let body = comment[body_start..].to_string();
let body_len = body.len();
LiteComment::new_with_ws(
ws.spanned(Span::new(hash_offset + 1, hash_offset + 1 + body_start)),
body.spanned(Span::new(
hash_offset + 1 + body_start,
hash_offset + 1 + body_start + body_len,
)),
)
}
}
#[derive(SmartDefault)]
struct BlockParser {
groups: TokenBuilder<GroupBuilder>,
group: GroupBuilder,
pipeline: PipelineBuilder,
command: CommandBuilder,
prev_token: Option<Token>,
prev_comments: CommentsBuilder,
prev_comment_indent: usize,
}
impl BlockParser {
fn consumed(&mut self, token: Token) {
self.prev_token = Some(token);
}
fn success(mut self) -> (LiteBlock, Option<ParseError>) {
self.close_group();
(LiteBlock::new(self.groups.map(|g| g.into())), None)
}
fn fail(self, error: ParseError) -> (LiteBlock, Option<ParseError>) {
(LiteBlock::new(self.groups.map(|g| g.into())), Some(error))
}
fn comment(&mut self, token: &LiteComment) {
if self.prev_comments.is_empty() {
self.prev_comment_indent = token.ws_len();
}
self.prev_comments
.push(token.unindent(self.prev_comment_indent));
}
fn eoleol(&mut self) {
self.prev_comment_indent = 0;
self.prev_comments.take();
self.eol();
}
fn eol(&mut self) {
// If the last token on the current line is a `|`, the group
// continues on the next line.
if let Some(prev) = &self.prev_token {
if let TokenContents::Pipe = prev.contents {
return;
}
}
self.close_group();
}
fn pipe(&mut self) -> Result<(), ()> {
// If the current command has content, accumulate it into
// the current pipeline and start a new command.
match self.close_command() {
None => Err(()),
Some(command) => {
self.pipeline.push(command);
Ok(())
}
}
}
fn semicolon(&mut self) {
self.close_pipeline();
}
fn baseline(&mut self, part: Spanned<String>) {
// We encountered an unclassified character. Accumulate it into
// the current command as a string.
self.command.push(part);
}
fn close_command(&mut self) -> Option<LiteCommand> {
let command = self.command.take()?;
let command = LiteCommand {
parts: command.into(),
comments: self.prev_comments.take().map(|c| c.into()),
};
self.prev_comment_indent = 0;
Some(command)
}
fn close_pipeline(&mut self) {
if let Some(command) = self.close_command() {
self.pipeline.push(command);
}
if let Some(pipeline) = self.pipeline.take() {
self.group.push(pipeline);
}
}
fn close_group(&mut self) {
self.close_pipeline();
if let Some(group) = self.group.take() {
self.groups.push(group);
}
}
}
/// Try to parse a list of tokens into a block.
pub fn parse_block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
let mut parser = BlockParser::default();
let mut tokens = tokens.iter().peekable();
// The parsing process repeats:
//
// - newline (`\n` or `\r`)
// - pipes (`|`)
// - semicolon
while let Some(token) = tokens.next() {
match &token.contents {
TokenContents::Eol => {
// If we encounter two newline characters in a row, use a special eoleol event,
// which allows the parser to discard comments that shouldn't be treated as
// documentation for the following item.
let last_was_comment = std::matches!(
parser.prev_token,
Some(Token {
contents: TokenContents::Comment(..),
..
})
);
let next_is_eol = std::matches!(
tokens.peek(),
Some(Token {
contents: TokenContents::Eol,
..
})
);
if last_was_comment && next_is_eol {
tokens.next();
parser.eoleol();
} else {
// We encountered a newline character. If the last token on the
// current line is a `|`, continue the current group on the next
// line. Otherwise, close up the current group by rolling up the
// current command into the current pipeline, and then roll up
// the current pipeline into the group.
parser.eol();
}
}
TokenContents::Pipe => {
// We encountered a pipe (`|`) character, which terminates a
// command.
if parser.pipe().is_err() {
// If the current command doesn't have content, return an
// error that indicates that the `|` was unexpected.
return parser.fail(ParseError::extra_tokens(
"|".to_string().spanned(token.span),
));
}
// match parser.pipe() {}
}
TokenContents::Semicolon => {
// We encountered a semicolon (`;`) character, which terminates
// a pipeline.
parser.semicolon();
}
TokenContents::Baseline(part) => {
// We encountered an unclassified character. Accumulate it into
// the current command as a string.
parser.baseline(part.to_string().spanned(token.span));
}
TokenContents::Comment(comment) => parser.comment(comment),
}
parser.consumed(token.clone());
}
parser.success()
}
/// Breaks the input string into a vector of tokens. This tokenization only tries to classify separators like
/// semicolons, pipes, etc from external bare values (values that haven't been classified further)
/// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings)
pub fn lex(
input: &str,
span_offset: usize,
newline_mode: NewlineMode,
) -> (Vec<Token>, Option<ParseError>) {
// Break the input slice into an iterator of Unicode characters.
let mut char_indices = input.char_indices().peekable();
let mut error = None;
let mut output = vec![];
let mut is_complete = true;
// The lexing process repeats. One character of lookahead is sufficient to decide what to do next.
//
// - `|`: the token is either `|` token or a `||` token
// - `;`: the token is a semicolon
// - `\n` or `\r`: the token is an EOL (end of line) token
// - other whitespace: ignored
// - `#` the token starts a line comment, which contains all of the subsequent characters until the next EOL
// -
while let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
// If the next character is `|`, it's either `|` or `||`.
let idx = *idx;
let prev_idx = idx;
let _ = char_indices.next();
// If the next character is `|`, we're looking at a `||`.
if let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Baseline("||".into()),
Span::new(span_offset + prev_idx, span_offset + idx + 1),
));
continue;
}
}
// Otherwise, it's just a regular `|` token.
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1),
));
is_complete = false;
} else if *c == ';' {
// If the next character is a `;`, we're looking at a semicolon token.
if !is_complete && error.is_none() {
error = Some(ParseError::extra_tokens(
";".to_string().spanned(Span::new(*idx, idx + 1)),
));
}
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Semicolon,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if *c == '\n' || *c == '\r' {
// If the next character is a newline, we're looking at an EOL (end of line) token.
let idx = *idx;
let _ = char_indices.next();
if newline_mode == NewlineMode::Normal {
output.push(Token::new(
TokenContents::Eol,
Span::new(span_offset + idx, span_offset + idx + 1),
));
}
} else if *c == '#' {
// If the next character is `#`, we're at the beginning of a line
// comment. The comment continues until the next newline.
let idx = *idx;
let comment = parse_comment(&mut char_indices, idx);
let span = comment.span();
output.push(Token::new(TokenContents::Comment(comment), span));
} else if c.is_whitespace() {
// If the next character is non-newline whitespace, skip it.
let _ = char_indices.next();
} else {
// Otherwise, try to consume an unclassified token.
let (result, err) = baseline(&mut char_indices, span_offset);
if error.is_none() {
error = err;
}
is_complete = true;
let Spanned { item, span } = result;
output.push(Token::new(TokenContents::Baseline(item), span));
}
}
(output, error)
}

View File

@ -0,0 +1,6 @@
pub mod lexer;
mod token_group;
pub mod tokens;
#[cfg(test)]
mod tests;

View File

@ -0,0 +1,508 @@
use nu_source::{Span, SpannedItem};
use super::lexer::*;
use super::tokens::*;
fn span(left: usize, right: usize) -> Span {
Span::new(left, right)
}
mod bare {
use super::*;
#[test]
fn simple_1() {
let input = "foo bar baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 3));
}
#[test]
fn simple_2() {
let input = "'foo bar' baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_3() {
let input = "'foo\" bar' baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
}
#[test]
fn simple_4() {
let input = "[foo bar] baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_5() {
let input = "'foo 'bar baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_6() {
let input = "''foo baz";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 5));
}
#[test]
fn simple_7() {
let input = "'' foo";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 2));
}
#[test]
fn simple_8() {
let input = " '' foo";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 3));
}
#[test]
fn simple_9() {
let input = " 'foo' foo";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 6));
}
#[test]
fn simple_10() {
let input = "[foo, bar]";
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
}
#[test]
fn lex_comment() {
let input = r#"
#A comment
def e [] {echo hi}
"#;
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
//result[0] == EOL
assert_eq!(result[1].span, span(2, 11));
assert_eq!(
result[1].contents,
TokenContents::Comment(LiteComment::new(
"A comment".to_string().spanned(Span::new(2, 11))
))
);
}
#[test]
fn lex_multi_comments() {
let input = r#"
#A comment
def e [] {echo hi}
#Another comment
def e2 [] {echo hello}
"#;
let (result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
let span1 = span(2, 11);
assert_eq!(result[1].span, span1);
assert_eq!(
result[1].contents,
TokenContents::Comment(LiteComment::new("A comment".to_string().spanned(span1)))
);
let span2 = span(33, 48);
assert_eq!(result[9].span, span2);
assert_eq!(
result[9].contents,
TokenContents::Comment(LiteComment::new(
"Another comment".to_string().spanned(span2)
))
);
}
#[test]
fn def_comment_with_single_quote() {
let input = r#"def f [] {
# shouldn't return error
echo hi
}"#;
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
#[test]
fn def_comment_with_double_quote() {
let input = r#"def f [] {
# should "not return error
echo hi
}"#;
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
#[test]
fn def_comment_with_bracket() {
let input = r#"def f [] {
# should not [return error
echo hi
}"#;
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
#[test]
fn def_comment_with_curly_brace() {
let input = r#"def f [] {
# should not return {error
echo hi
}"#;
let (_result, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_none());
}
#[test]
fn ignore_future() {
let input = "foo 'bar";
let (result, _) = lex(input, 0, NewlineMode::Normal);
assert_eq!(result[0].span, span(0, 3));
}
#[test]
fn invalid_1() {
let input = "'foo bar";
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
#[test]
fn invalid_2() {
let input = "'bar";
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
#[test]
fn invalid_4() {
let input = " 'bar";
let (_, err) = lex(input, 0, NewlineMode::Normal);
assert!(err.is_some());
}
}
mod lite_parse {
use nu_source::HasSpan;
use super::*;
#[test]
fn pipeline() {
let (result, err) = lex("cmd1 | cmd2 ; deploy", 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.span(), span(0, 20));
assert_eq!(result.block[0].pipelines[0].span(), span(0, 11));
assert_eq!(result.block[0].pipelines[1].span(), span(14, 20));
}
#[test]
fn simple_1() {
let (result, err) = lex("foo", 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].span,
span(0, 3)
);
}
#[test]
fn simple_offset() {
let (result, err) = lex("foo", 10, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].span,
span(10, 13)
);
}
#[test]
fn incomplete_result() {
let (result, err) = lex("my_command \"foo' --test", 10, NewlineMode::Normal);
assert!(matches!(
err.unwrap().reason(),
nu_errors::ParseErrorReason::Eof { .. }
));
let (result, _) = parse_block(result);
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].item,
"my_command"
);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[1].item,
"\"foo' --test\""
);
}
#[test]
fn command_with_comment() {
let code = r#"
# My echo
# * It's much better :)
def my_echo [arg] { echo $arg }
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
//Leading space is trimmed
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(2, 3)),
"My echo".to_string().spanned(Span::new(3, 10))
),
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(12, 13)),
"* It's much better :)"
.to_string()
.spanned(Span::new(13, 34))
)
])
);
}
#[test]
fn two_commands_with_comments() {
let code = r#"
# My echo
# * It's much better :)
def my_echo [arg] { echo $arg }
# My echo2
# * It's even better!
def my_echo2 [arg] { echo $arg }
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 2);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(2, 3)),
"My echo".to_string().spanned(Span::new(3, 10))
),
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(12, 13)),
"* It's much better :)"
.to_string()
.spanned(Span::new(13, 34))
)
])
);
assert_eq!(result.block[1].pipelines.len(), 1);
assert_eq!(result.block[1].pipelines[0].commands.len(), 1);
assert_eq!(result.block[1].pipelines[0].commands[0].parts.len(), 4);
assert_eq!(
result.block[1].pipelines[0].commands[0].comments,
Some(vec![
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(69, 70)),
"My echo2".to_string().spanned(Span::new(70, 78))
),
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(80, 81)),
"* It's even better!"
.to_string()
.spanned(Span::new(81, 100))
)
])
);
}
#[test]
fn discarded_comment() {
let code = r#"
# This comment gets discarded, because of the following empty line
echo 42
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(result.block[0].pipelines[0].commands[0].comments, None);
}
#[test]
fn discarded_comment_multi_newline() {
let code = r#"
# This comment gets discarded, because of the following empty line
echo 42
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(result.block[0].pipelines[0].commands[0].comments, None);
}
}
#[test]
fn no_discarded_white_space_start_of_comment() {
let code = r#"
#No white_space at first line ==> No white_space discarded
# Starting space is not discarded
echo 42
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
LiteComment::new(
"No white_space at first line ==> No white_space discarded"
.to_string()
.spanned(Span::new(2, 59))
),
LiteComment::new(
" Starting space is not discarded"
.to_string()
.spanned(Span::new(61, 95))
),
])
);
}
#[test]
fn multiple_discarded_white_space_start_of_comment() {
let code = r#"
# Discard 2 spaces
# Discard 1 space
# Discard 2 spaces
echo 42
"#;
let (result, err) = lex(code, 0, NewlineMode::Normal);
assert!(err.is_none());
// assert_eq!(format!("{:?}", result), "");
let (result, err) = parse_block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(2, 4)),
"Discard 2 spaces".to_string().spanned(Span::new(4, 20))
),
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(22, 23)),
"Discard 1 space".to_string().spanned(Span::new(23, 38))
),
LiteComment::new_with_ws(
" ".to_string().spanned(Span::new(40, 42)),
"Discard 2 spaces".to_string().spanned(Span::new(42, 58))
),
])
);
}

View File

@ -0,0 +1,76 @@
use smart_default::SmartDefault;
use std::iter::FromIterator;
use derive_new::new;
use nu_source::{HasSpan, Span};
#[derive(Debug, Clone, SmartDefault, new)]
pub struct TokenBuilder<T: HasSpan> {
#[default(None)]
contents: Option<Vec<T>>,
}
impl<T> From<TokenBuilder<T>> for Vec<T>
where
T: HasSpan,
{
fn from(x: TokenBuilder<T>) -> Self {
x.contents.unwrap_or_else(Vec::new)
}
}
impl<T> HasSpan for TokenBuilder<T>
where
T: HasSpan,
{
fn span(&self) -> Span {
match &self.contents {
Some(vec) => {
let mut iter = vec.iter();
let head = iter.next();
let last = iter.last().or(head);
match (head, last) {
(Some(head), Some(last)) => Span::new(head.span().start(), last.span().end()),
_ => Span::default(),
}
}
None => Span::new(0, 0),
}
}
}
impl<T> TokenBuilder<T>
where
T: HasSpan,
{
pub fn is_empty(&self) -> bool {
self.contents.is_none()
}
pub fn take(&mut self) -> Option<TokenBuilder<T>> {
self.contents.take().map(|c| TokenBuilder::new(Some(c)))
}
pub fn map<I, U>(self, mapper: impl Fn(T) -> U) -> I
where
I: FromIterator<U>,
{
match self.contents {
Some(contents) => contents.into_iter().map(mapper).collect(),
None => I::from_iter(None),
}
}
pub fn push(&mut self, item: T) {
let contents = match self.contents.take() {
Some(mut contents) => {
contents.push(item);
contents
}
None => vec![item],
};
self.contents.replace(contents);
}
}

View File

@ -0,0 +1,218 @@
use derive_new::new;
use itertools::Itertools;
use std::fmt;
use nu_source::{HasSpan, Span, Spanned, SpannedItem};
use super::token_group::TokenBuilder;
#[derive(Debug, Clone, PartialEq)]
pub enum TokenContents {
/// A baseline token is an atomic chunk of source code. This means that the
/// token contains the entirety of string literals, as well as the entirety
/// of sections delimited by paired delimiters.
///
/// For example, if the token begins with `{`, the baseline token continues
/// until the closing `}` (after taking comments and string literals into
/// consideration).
Baseline(String),
Comment(LiteComment),
Pipe,
Semicolon,
Eol,
}
impl fmt::Display for TokenContents {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenContents::Baseline(base) => write!(f, "{}", base),
TokenContents::Comment(comm) => write!(f, "{}", comm),
TokenContents::Pipe => write!(f, "|"),
TokenContents::Semicolon => write!(f, ";"),
TokenContents::Eol => write!(f, "\\n"),
}
}
}
impl TokenContents {
pub fn is_eol(&self) -> bool {
matches!(self, Self::Eol)
}
}
pub type CommandBuilder = TokenBuilder<Spanned<String>>;
pub type CommentsBuilder = TokenBuilder<LiteComment>;
pub type PipelineBuilder = TokenBuilder<LiteCommand>;
pub type GroupBuilder = TokenBuilder<PipelineBuilder>;
/// A LiteComment is a line comment. It begins with `#` and continues until (but not including) the
/// next newline.
///
/// It remembers any leading whitespace, which is used in later processing steps to strip off
/// leading whitespace for an entire comment block when it is associated with a definition.
#[derive(Debug, PartialEq, Clone)]
pub struct LiteComment {
leading_ws: Option<Spanned<String>>,
rest: Spanned<String>,
}
impl LiteComment {
pub fn new(string: impl Into<Spanned<String>>) -> LiteComment {
LiteComment {
leading_ws: None,
rest: string.into(),
}
}
pub fn new_with_ws(
ws: impl Into<Spanned<String>>,
comment: impl Into<Spanned<String>>,
) -> LiteComment {
LiteComment {
leading_ws: Some(ws.into()),
rest: comment.into(),
}
}
pub fn unindent(&self, excluded_spaces: usize) -> LiteComment {
match &self.leading_ws {
// If there's no leading whitespace, there's no whitespace to exclude
None => self.clone(),
Some(Spanned { item, span }) => {
// If the number of spaces to exclude is larger than the amount of whitespace we
// have, there's no whitespace to move into the comment body.
if excluded_spaces > item.len() {
self.clone()
} else {
// If there are no spaces to exclude, prepend all of the leading_whitespace to
// the comment body.
if excluded_spaces == 0 {
let rest_span = self.span();
let rest = format!("{}{}", item, self.rest.item).spanned(rest_span);
return LiteComment {
leading_ws: None,
rest,
};
}
// Pull off excluded_spaces number of spaces, and create a new Spanned<String>
// for that whitespace. Any remaining spaces will be added to the comment.
let excluded_ws = item[..excluded_spaces]
.to_string()
.spanned(Span::new(span.start(), span.start() + excluded_spaces));
let included_ws = &item[excluded_spaces..];
let rest_start = span.start() + excluded_spaces;
let rest_span = Span::new(rest_start, rest_start + self.rest.len());
let rest = format!("{}{}", included_ws, self.rest.item).spanned(rest_span);
LiteComment {
leading_ws: Some(excluded_ws),
rest,
}
}
}
}
}
pub fn ws_len(&self) -> usize {
match &self.leading_ws {
None => 0,
Some(ws) => ws.item.len(),
}
}
pub(crate) fn trim(&self) -> Spanned<String> {
let trimmed = self.rest.trim();
trimmed.to_string().spanned(Span::new(
self.rest.span().start(),
self.rest.span().start() + trimmed.len(),
))
}
}
impl fmt::Display for LiteComment {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.leading_ws {
None => write!(f, "#{}", self.rest.item),
Some(leading) => write!(f, "#{}{}", leading.item, self.rest.item),
}
}
}
impl HasSpan for LiteComment {
fn span(&self) -> Span {
match &self.leading_ws {
None => self.rest.span(),
Some(leading) => leading.span().until(self.rest.span()),
}
}
}
/// A `LiteCommand` is a list of words that will get meaning when processed by
/// the parser.
#[derive(Debug, Default, Clone)]
pub struct LiteCommand {
pub parts: Vec<Spanned<String>>,
/// Preceding comments.
pub comments: Option<Vec<LiteComment>>,
}
impl HasSpan for LiteCommand {
fn span(&self) -> Span {
Span::from_list(&self.parts)
}
}
impl LiteCommand {
pub fn comments_joined(&self) -> String {
match &self.comments {
None => "".to_string(),
Some(text) => text.iter().map(|s| s.trim().item).join("\n"),
}
}
}
/// A `LitePipeline` is a series of `LiteCommand`s, separated by `|`.
#[derive(Debug, Clone, new)]
pub struct LitePipeline {
pub commands: Vec<LiteCommand>,
}
impl HasSpan for LitePipeline {
fn span(&self) -> Span {
Span::from_list(&self.commands)
}
}
/// A `LiteGroup` is a series of `LitePipeline`s, separated by `;`.
#[derive(Debug, Clone, new)]
pub struct LiteGroup {
pub pipelines: Vec<LitePipeline>,
}
impl From<GroupBuilder> for LiteGroup {
fn from(group: GroupBuilder) -> Self {
LiteGroup::new(group.map(|p| LitePipeline::new(p.into())))
}
}
impl HasSpan for LiteGroup {
fn span(&self) -> Span {
Span::from_list(&self.pipelines)
}
}
/// A `LiteBlock` is a series of `LiteGroup`s, separated by newlines.
#[derive(Debug, Clone, new)]
pub struct LiteBlock {
pub block: Vec<LiteGroup>,
}
impl HasSpan for LiteBlock {
fn span(&self) -> Span {
Span::from_list(&self.block)
}
}

View File

@ -0,0 +1,15 @@
#[macro_use]
extern crate derive_new;
mod errors;
mod flag;
mod lex;
mod parse;
mod scope;
mod shapes;
pub use lex::lexer::{lex, parse_block, NewlineMode};
pub use lex::tokens::{LiteBlock, LiteCommand, LiteGroup, LitePipeline};
pub use parse::{classify_block, garbage, parse, parse_full_column_path, parse_math_expression};
pub use scope::ParserScope;
pub use shapes::shapes;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,157 @@
use std::sync::Arc;
use crate::{
lex::{lexer::NewlineMode, tokens::LiteCommand},
parse::{classify_block, util::trim_quotes},
};
use indexmap::IndexMap;
use nu_errors::ParseError;
use nu_protocol::hir::Block;
use nu_source::{HasSpan, Span, SpannedItem};
//use crate::errors::{ParseError, ParseResult};
use crate::lex::lexer::{lex, parse_block};
use crate::ParserScope;
use self::signature::parse_signature;
pub use self::signature::{lex_split_baseline_tokens_on, parse_parameter};
mod data_structs;
mod primitives;
mod signature;
mod tests;
pub(crate) fn parse_definition(call: &LiteCommand, scope: &dyn ParserScope) -> Option<ParseError> {
// A this point, we've already handled the prototype and put it into scope;
// So our main goal here is to parse the block now that the names and
// prototypes of adjacent commands are also available
match call.parts.len() {
4 => {
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (mut signature, err) = parse_signature(&name, &call.parts[2]);
//Add commands comments to signature usage
signature.usage = call.comments_joined();
if err.is_some() {
return err;
};
let mut chars = call.parts[3].chars();
match (chars.next(), chars.next_back()) {
(Some('{'), Some('}')) => {
// We have a literal block
let string: String = chars.collect();
scope.enter_scope();
let (tokens, err) =
lex(&string, call.parts[3].span.start() + 1, NewlineMode::Normal);
if err.is_some() {
return err;
};
let (lite_block, err) = parse_block(tokens);
if err.is_some() {
return err;
};
let (mut block, err) = classify_block(&lite_block, scope);
scope.exit_scope();
if let Some(block) =
std::sync::Arc::<nu_protocol::hir::Block>::get_mut(&mut block)
{
block.params = signature;
block.params.name = name;
}
scope.add_definition(block);
err
}
_ => Some(ParseError::mismatch("body", call.parts[3].clone())),
}
}
3 => Some(ParseError::general_error(
"wrong shape. Expected: def name [signature] {body}",
"expected definition body".to_string().spanned(Span::new(
call.parts[2].span.end(),
call.parts[2].span.end(),
)),
)),
2 => Some(ParseError::general_error(
"wrong shape. Expected: def name [signature] {body}",
"expected definition parameters"
.to_string()
.spanned(Span::new(
call.parts[1].span.end(),
call.parts[1].span.end(),
)),
)),
1 => Some(ParseError::general_error(
"wrong shape. Expected: def name [signature] {body}",
"expected definition name".to_string().spanned(Span::new(
call.parts[0].span.end(),
call.parts[0].span.end(),
)),
)),
0 => Some(ParseError::general_error(
"wrong shape. Expected: def name [signature] {body}",
"expected 'def' keyword'".to_string().spanned(call.span()),
)),
x if x < 4 => Some(ParseError::general_error(
"wrong shape. Expected: def name [signature] {body}",
"expected: def name [signature] {body}"
.to_string()
.spanned(Span::new(
call.parts[x - 1].span.end(),
call.parts[x - 1].span.end(),
)),
)),
_ => Some(ParseError::general_error(
"extra arguments given. Expected: def name [signature] {body}.",
"extra argument given"
.to_string()
.spanned(call.parts[4].span()),
)),
}
}
pub(crate) fn parse_definition_prototype(
call: &LiteCommand,
scope: &dyn ParserScope,
) -> Option<ParseError> {
let mut err = None;
if call.parts.len() != 4 {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
if call.parts[0].item != "def" {
return Some(ParseError::mismatch("definition", call.parts[0].clone()));
}
let name = trim_quotes(&call.parts[1].item);
let (signature, error) = parse_signature(&name, &call.parts[2]);
if err.is_none() {
err = error;
}
scope.add_definition(Arc::new(Block::new(
signature,
vec![],
IndexMap::new(),
call.span(),
)));
err
}

View File

@ -0,0 +1,45 @@
use nu_protocol::{NamedType, PositionalType, SyntaxShape};
use nu_source::Span;
pub type Description = String;
#[derive(Clone, new)]
pub struct Parameter {
pub pos_type: PositionalType,
pub desc: Option<Description>,
pub span: Span,
}
impl Parameter {
pub fn error() -> Parameter {
Parameter::new(
PositionalType::optional("Internal Error", SyntaxShape::Any),
Some(
"Wanted to parse a parameter, but no input present. Please report this error!"
.to_string(),
),
Span::unknown(),
)
}
}
#[derive(Clone, Debug, new)]
pub struct Flag {
pub long_name: String,
pub named_type: NamedType,
pub desc: Option<Description>,
pub span: Span,
}
impl Flag {
pub fn error() -> Flag {
Flag::new(
"Internal Error".to_string(),
NamedType::Switch(None),
Some(
"Wanted to parse a flag, but no input present. Please report this error!"
.to_string(),
),
Span::unknown(),
)
}
}

View File

@ -0,0 +1,207 @@
///All of the functions in this mod parse only 1 Token per invocation.
///Therefore they are primitives
use crate::lex::{lexer::Token, tokens::TokenContents};
use crate::parse::util::token_to_spanned_string;
use nu_errors::ParseError;
use nu_protocol::SyntaxShape;
use nu_source::{Span, Spanned, SpannedItem};
///Helper function
pub(crate) fn is_baseline_token_matching(token: &Token, string: &str) -> bool {
match &token.contents {
TokenContents::Baseline(base) => base == string,
_ => false,
}
}
pub(crate) fn parse_comma(tokens: &[Token]) -> (bool, usize) {
fn is_comma(token: &Token) -> bool {
is_baseline_token_matching(token, ",")
}
if !tokens.is_empty() && is_comma(&tokens[0]) {
(true, 1)
} else {
(false, 0)
}
}
pub(crate) fn parse_eol(tokens: &[Token]) -> (bool, usize) {
if !tokens.is_empty() && tokens[0].contents.is_eol() {
(true, 1)
} else {
(false, 0)
}
}
pub(crate) fn parse_optional_comment(tokens: &[Token]) -> (Option<String>, usize) {
let mut comment_text = None;
let mut i: usize = 0;
if i < tokens.len() {
if let TokenContents::Comment(comment) = &tokens[i].contents {
comment_text = Some(comment.trim().to_string());
i += 1;
}
}
(comment_text, i)
}
///Returns true if token is optional modifier
pub(crate) fn parse_optional_parameter_optional_modifier(token: &Token) -> (bool, usize) {
if is_baseline_token_matching(token, "?") {
(true, 1)
} else {
(false, 0)
}
}
pub(crate) fn parse_flag_optional_shortform(
tokens: &[Token],
) -> (Option<char>, usize, Option<ParseError>) {
if tokens.is_empty() {
return (None, 0, None);
}
let token = &tokens[0];
return if let TokenContents::Baseline(shortform) = &token.contents {
let mut chars = shortform.chars();
match (chars.next(), chars.next_back()) {
(Some('('), Some(')')) => {
let mut err = None;
let flag_span = Span::new(
token.span.start() + 1, //Skip '('
token.span.end() - 1, // Skip ')'
);
let c: String = chars.collect();
let dash_count = c.chars().take_while(|c| *c == '-').count();
err = err
.or_else(|| err_on_too_many_dashes(dash_count, c.clone().spanned(flag_span)));
let name = &c[dash_count..];
err = err.or_else(|| err_on_name_too_long(name, c.clone().spanned(flag_span)));
let c = name.chars().next();
(c, 1, err)
}
_ => (None, 0, None),
}
} else {
(None, 0, None)
};
fn err_on_too_many_dashes(dash_count: usize, actual: Spanned<String>) -> Option<ParseError> {
match dash_count {
0 => {
//If no starting -
Some(ParseError::mismatch("Shortflag starting with '-'", actual))
}
1 => None,
_ => {
//If --
Some(ParseError::mismatch(
"Shortflag starting with a single '-'",
actual,
))
}
}
}
fn err_on_name_too_long(name: &str, actual: Spanned<String>) -> Option<ParseError> {
if name.len() != 1 {
Some(ParseError::mismatch(
"Shortflag of exactly 1 character",
actual,
))
} else {
None
}
}
}
pub(crate) fn parse_flag_name(token: &Token) -> (Spanned<String>, Option<ParseError>) {
if let TokenContents::Baseline(name) = &token.contents {
if !name.starts_with("--") {
(
name.clone().spanned(token.span),
Some(ParseError::mismatch(
"longform of a flag (Starting with --)",
token_to_spanned_string(token),
)),
)
} else {
//Discard preceding --
let name = name[2..].to_string();
(name.spanned(token.span), None)
}
} else {
(
"".to_string().spanned_unknown(),
Some(ParseError::mismatch(
"longform of a flag (Starting with --)",
token_to_spanned_string(token),
)),
)
}
}
pub(crate) fn parse_param_name(token: &Token) -> (Spanned<String>, Option<ParseError>) {
if let TokenContents::Baseline(name) = &token.contents {
let name = name.clone().spanned(token.span);
(name, None)
} else {
(
"InternalError".to_string().spanned(token.span),
Some(ParseError::mismatch(
"parameter name",
token_to_spanned_string(token),
)),
)
}
}
pub fn parse_type_token(type_: &Token) -> (SyntaxShape, Option<ParseError>) {
match &type_.contents {
TokenContents::Baseline(type_str) => match type_str.as_str() {
"int" => (SyntaxShape::Int, None),
"string" => (SyntaxShape::String, None),
"path" => (SyntaxShape::FilePath, None),
"table" => (SyntaxShape::Table, None),
"duration" => (SyntaxShape::Duration, None),
"filesize" => (SyntaxShape::Filesize, None),
"number" => (SyntaxShape::Number, None),
"pattern" => (SyntaxShape::GlobPattern, None),
"range" => (SyntaxShape::Range, None),
"block" => (SyntaxShape::Block, None),
"any" => (SyntaxShape::Any, None),
_ => (
SyntaxShape::Any,
Some(ParseError::mismatch("type", token_to_spanned_string(type_))),
),
},
_ => (
SyntaxShape::Any,
Some(ParseError::mismatch("type", token_to_spanned_string(type_))),
),
}
}
pub(crate) fn parse_rest_name(name_token: &Token) -> (Spanned<String>, Option<ParseError>) {
return if let TokenContents::Baseline(name) = &name_token.contents {
match name.strip_prefix("...") {
Some(var_name) => (var_name.to_string().spanned(name_token.span), None),
None => (
"InternalError".to_string().spanned(name_token.span),
Some(parse_rest_name_err(name_token)),
),
}
} else {
(
"InternalError".to_string().spanned(name_token.span),
Some(parse_rest_name_err(name_token)),
)
};
fn parse_rest_name_err(token: &Token) -> ParseError {
ParseError::mismatch("...rest", token_to_spanned_string(token))
}
}

View File

@ -0,0 +1,460 @@
///This module contains functions to parse the parameter and flag list (signature)
///Such a signature can be of the following format:
/// [ (parameter | flag | rest_param | <eol>)* ]
///Where
///parameter is:
/// name (<:> type)? (<?>)? item_end
///flag is:
/// --name (-shortform)? (<:> type)? item_end
///rest is:
/// ...rest (<:> type)? item_end
///item_end:
/// (<,>)? (#Comment)? (<eol>)?
///
use log::debug;
use nu_errors::ParseError;
use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape};
use nu_source::{Span, Spanned};
use crate::lex::{
lexer::{lex, NewlineMode, Token},
tokens::TokenContents,
};
use super::{
data_structs::{Description, Flag, Parameter},
primitives::{
is_baseline_token_matching, parse_comma, parse_eol, parse_flag_name,
parse_flag_optional_shortform, parse_optional_comment,
parse_optional_parameter_optional_modifier, parse_param_name, parse_rest_name,
parse_type_token,
},
};
pub fn parse_signature(
name: &str,
signature_vec: &Spanned<String>,
) -> (Signature, Option<ParseError>) {
let mut err = None;
let mut chars = signature_vec.chars();
match (chars.next(), chars.next_back()) {
(Some('['), Some(']')) => {}
_ => {
err = err.or_else(|| {
Some(ParseError::mismatch(
"definition signature",
signature_vec.clone(),
))
});
}
}
let string: String = chars.collect();
debug!(
"signature vec span start: {}",
signature_vec.span.start() + 1
);
let (tokens, error) = lex(
&string,
signature_vec.span.start() + 1,
NewlineMode::Whitespace,
);
err = err.or(error);
//After normal lexing, tokens also need to be split on ',' and ':'
//TODO this could probably be all done in a specialized lexing function
let tokens = lex_split_baseline_tokens_on(tokens, &[',', ':', '?']);
let tokens = lex_split_shortflag_from_longflag(tokens);
debug!("Tokens are {:?}", tokens);
let mut parameters = vec![];
let mut flags = vec![];
let mut rest = None;
let mut i = 0;
while i < tokens.len() {
if tokens[i].contents.is_eol() {
//Skip leading eol
i += 1;
} else if is_flag(&tokens[i]) {
let (flag, advanced_by, error) = parse_flag(&tokens[i..], signature_vec);
err = err.or(error);
i += advanced_by;
flags.push(flag);
} else if is_rest(&tokens[i]) {
let (rest_, advanced_by, error) = parse_rest(&tokens[i..], signature_vec);
err = err.or(error);
i += advanced_by;
rest = rest_;
} else {
let (parameter, advanced_by, error) = parse_parameter(&tokens[i..], signature_vec.span);
err = err.or(error);
i += advanced_by;
parameters.push(parameter);
}
}
let signature = to_signature(name, parameters, flags, rest);
debug!("Signature: {:?}", signature);
(signature, err)
}
pub fn parse_parameter(tokens: &[Token], span: Span) -> (Parameter, usize, Option<ParseError>) {
if tokens.is_empty() {
//TODO fix span
return (
Parameter::error(),
0,
Some(ParseError::unexpected_eof("parameter", span)),
);
}
let mut err: Option<ParseError> = None;
let mut i = 0;
let mut type_ = SyntaxShape::Any;
let mut comment = None;
let mut optional = false;
let (name, error) = parse_param_name(&tokens[0]);
i += 1;
err = err.or(error);
if i < tokens.len() {
let (parsed_opt_modifier, advanced_by) =
parse_optional_parameter_optional_modifier(&tokens[i]);
optional = parsed_opt_modifier;
i += advanced_by;
}
if i < tokens.len() {
let (parsed_type_, advanced_by, error) = parse_optional_type(&tokens[i..]);
type_ = parsed_type_.unwrap_or(SyntaxShape::Any);
err = err.or(error);
i += advanced_by;
}
if i < tokens.len() {
let (comment_text, advanced_by, error) = parse_signature_item_end(&tokens[i..]);
comment = comment_text;
i += advanced_by;
err = err.or(error);
}
let pos_type = if optional {
if name.item.starts_with('$') {
PositionalType::optional(&name.item, type_)
} else {
PositionalType::optional(&format!("${}", name.item), type_)
}
} else if name.item.starts_with('$') {
PositionalType::mandatory(&name.item, type_)
} else {
PositionalType::mandatory(&format!("${}", name.item), type_)
};
let parameter = Parameter::new(pos_type, comment, name.span);
debug!(
"Parsed parameter: {} with shape {:?}",
parameter.pos_type.name(),
parameter.pos_type.syntax_type()
);
(parameter, i, err)
}
fn parse_flag(
tokens: &[Token],
tokens_as_str: &Spanned<String>,
) -> (Flag, usize, Option<ParseError>) {
if tokens.is_empty() {
return (
Flag::error(),
0,
Some(ParseError::unexpected_eof("parameter", tokens_as_str.span)),
);
}
let mut err: Option<ParseError> = None;
let mut i = 0;
let mut shortform = None;
let mut type_ = None;
let mut comment = None;
let (name, error) = parse_flag_name(&tokens[0]);
err = err.or(error);
i += 1;
if i < tokens.len() {
let (parsed_shortform, advanced_by, error) = parse_flag_optional_shortform(&tokens[i..]);
shortform = parsed_shortform;
i += advanced_by;
err = err.or(error);
}
if i < tokens.len() {
let (parsed_type, advanced_by, error) = parse_optional_type(&tokens[i..]);
type_ = parsed_type;
i += advanced_by;
err = err.or(error);
}
if i < tokens.len() {
let (parsed_comment, advanced_by, error) = parse_signature_item_end(&tokens[i..]);
comment = parsed_comment;
i += advanced_by;
err = err.or(error);
}
//If no type is given, the flag is a switch. Otherwise its optional
//Example:
//--verbose(-v) # Switch
//--output(-o): path # Optional flag
let named_type = if let Some(shape) = type_ {
NamedType::Optional(shortform, shape)
} else {
NamedType::Switch(shortform)
};
let flag = Flag::new(name.item.clone(), named_type, comment, name.span);
debug!("Parsed flag: {:?}", flag);
(flag, i, err)
}
fn parse_rest(
tokens: &[Token],
tokens_as_str: &Spanned<String>,
) -> (
Option<(String, SyntaxShape, Description)>,
usize,
Option<ParseError>,
) {
if tokens.is_empty() {
return (
None,
0,
Some(ParseError::unexpected_eof(
"rest argument",
tokens_as_str.span,
)),
);
}
let mut err = None;
let mut i = 0;
let mut type_ = SyntaxShape::Any;
let mut comment = "".to_string();
let (name, error) = parse_rest_name(&tokens[i]);
err = err.or(error);
i += 1;
if i < tokens.len() {
let (parsed_type, advanced_by, error) = parse_optional_type(&tokens[i..]);
err = err.or(error);
i += advanced_by;
type_ = parsed_type.unwrap_or(SyntaxShape::Any);
}
if i < tokens.len() {
let (parsed_comment, advanced_by) = parse_optional_comment(&tokens[i..]);
i += advanced_by;
comment = parsed_comment.unwrap_or_else(|| "".to_string());
}
(Some((name.item, type_, comment)), i, err)
}
fn parse_optional_type(tokens: &[Token]) -> (Option<SyntaxShape>, usize, Option<ParseError>) {
fn is_double_point(token: &Token) -> bool {
is_baseline_token_matching(token, ":")
}
let mut err = None;
let mut type_ = None;
let mut i: usize = 0;
//Check if a type has to follow
if i < tokens.len() && is_double_point(&tokens[i]) {
//Type has to follow
if i + 1 == tokens.len() {
err = err.or_else(|| Some(ParseError::unexpected_eof("type", tokens[i].span)));
} else {
//Jump over <:>
i += 1;
let (shape, error) = parse_type_token(&tokens[i]);
err = err.or(error);
type_ = Some(shape);
i += 1;
}
}
(type_, i, err)
}
///Parses the end of a flag or a parameter
/// (<,>)? (#Comment)? (<eol>)?
fn parse_signature_item_end(tokens: &[Token]) -> (Option<String>, usize, Option<ParseError>) {
if tokens.is_empty() {
//If no more tokens, parameter/flag doesn't need ',' or comment to be properly finished
return (None, 0, None);
}
let mut i = 0;
let err = None;
let (parsed_comma, advanced_by) = parse_comma(&tokens[i..]);
i += advanced_by;
let (comment, advanced_by) = parse_optional_comment(&tokens[i..]);
i += advanced_by;
let (parsed_eol, advanced_by) = parse_eol(&tokens[i..]);
i += advanced_by;
debug!(
"Parsed comma {} and parsed eol {}",
parsed_comma, parsed_eol
);
////Separating flags/parameters is optional.
////If this should change, the below code would raise a warning whenever 2 parameters/flags are
////not delimited by <,> or <eol>
//if there is next item, but it's not comma, then it must be Optional(#Comment) + <eof>
//let parsed_delimiter = parsed_comma || parsed_eol;
//if !parsed_delimiter && i < tokens.len() {
// //If not parsed , or eol but more tokens are coming
// err = err.or(Some(ParseError::mismatch(
// "Newline or ','",
// (token[i-1].to_string() + token[i].to_string()).spanned(token[i-1].span.until(token[i].span))
// )));
//}
(comment, i, err)
}
///Returns true if token potentially represents rest argument
fn is_rest(token: &Token) -> bool {
match &token.contents {
TokenContents::Baseline(item) => item.starts_with("..."),
_ => false,
}
}
///True for short or longform flags. False otherwise
fn is_flag(token: &Token) -> bool {
match &token.contents {
TokenContents::Baseline(item) => item.starts_with('-'),
_ => false,
}
}
fn to_signature(
name: &str,
params: Vec<Parameter>,
flags: Vec<Flag>,
rest: Option<(String, SyntaxShape, Description)>,
) -> Signature {
let mut sign = Signature::new(name);
for param in params {
// pub positional: Vec<(PositionalType, Description)>,
sign.positional
.push((param.pos_type, param.desc.unwrap_or_else(|| "".to_string())));
}
for flag in flags {
sign.named.insert(
flag.long_name,
(flag.named_type, flag.desc.unwrap_or_else(|| "".to_string())),
);
}
sign.rest_positional = rest;
sign
}
//Currently the lexer does not split off baselines after existing text
//Example --flag(-f) is lexed as one baseline token.
//To properly parse the input, it is required that --flag and (-f) are 2 tokens.
fn lex_split_shortflag_from_longflag(tokens: Vec<Token>) -> Vec<Token> {
let mut result = Vec::with_capacity(tokens.capacity());
for token in tokens {
let mut processed = false;
if let TokenContents::Baseline(base) = &token.contents {
if let Some(paren_start) = base.find('(') {
if paren_start != 0 {
processed = true;
//If token contains '(' and '(' is not the first char,
//we split on '('
//Example: Baseline(--flag(-f)) results in: [Baseline(--flag), Baseline((-f))]
let paren_span_i = token.span.start() + paren_start;
result.push(Token::new(
TokenContents::Baseline(base[..paren_start].to_string()),
Span::new(token.span.start(), paren_span_i),
));
result.push(Token::new(
TokenContents::Baseline(base[paren_start..].to_string()),
Span::new(paren_span_i, token.span.end()),
));
}
}
}
if !processed {
result.push(token);
}
}
result
}
//Currently the lexer does not split baselines on ',' ':' '?'
//The parameter list requires this. Therefore here is a hacky method doing this.
pub fn lex_split_baseline_tokens_on(
tokens: Vec<Token>,
extra_baseline_terminal_tokens: &[char],
) -> Vec<Token> {
debug!("Before lex fix up {:?}", tokens);
let make_new_token =
|token_new: String, token_new_end: usize, terminator_char: Option<char>| {
let end = token_new_end;
let start = end - token_new.len();
let mut result = vec![];
//Only add token if its not empty
if !token_new.is_empty() {
result.push(Token::new(
TokenContents::Baseline(token_new),
Span::new(start, end),
));
}
//Insert terminator_char as baseline token
if let Some(ch) = terminator_char {
result.push(Token::new(
TokenContents::Baseline(ch.to_string()),
Span::new(end, end + 1),
));
}
result
};
let mut result = Vec::with_capacity(tokens.len());
for token in tokens {
match token.contents {
TokenContents::Baseline(base) => {
let token_offset = token.span.start();
let mut current = "".to_string();
for (i, c) in base.chars().enumerate() {
if extra_baseline_terminal_tokens.contains(&c) {
result.extend(make_new_token(current, i + token_offset, Some(c)));
current = "".to_string();
} else {
current.push(c);
}
}
result.extend(make_new_token(current, base.len() + token_offset, None));
}
_ => result.push(token),
}
}
result
}

View File

@ -0,0 +1,409 @@
#[allow(unused_imports)]
use super::parse_signature;
#[allow(unused_imports)]
use nu_errors::ParseError;
#[allow(unused_imports)]
use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape};
#[allow(unused_imports)]
use nu_source::{Span, Spanned, SpannedItem};
#[allow(unused_imports)]
use nu_test_support::nu;
#[test]
fn simple_def_with_params() {
let name = "my_func";
let sign = "[param1?: int, param2: string]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 27)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Optional("$param1".into(), SyntaxShape::Int),
"".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::String),
"".into()
),
]
);
}
#[test]
fn simple_def_with_optional_param_without_type() {
let name = "my_func";
let sign = "[param1 ?, param2?]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 27)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Optional("$param1".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Optional("$param2".into(), SyntaxShape::Any),
"".into()
),
]
);
}
#[test]
fn simple_def_with_params_with_comment() {
let name = "my_func";
let sign = "[
param1:path # My first param
param2:number # My second param
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 64)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("$param1".into(), SyntaxShape::FilePath),
"My first param".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::Number),
"My second param".into()
),
]
);
}
#[test]
fn simple_def_with_params_without_type() {
let name = "my_func";
let sign = "[
param1 # My first param
param2:number # My second param
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 0)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("$param1".into(), SyntaxShape::Any),
"My first param".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::Number),
"My second param".into()
),
]
);
}
#[test]
fn oddly_but_correct_written_params() {
let name = "my_func";
let sign = "[
param1 :int # param1
param2 : number # My second param
param4, param5:path , param6 # param6
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned(Span::new(0, 0)));
assert!(err.is_none());
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("$param1".into(), SyntaxShape::Int),
"param1".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::Number),
"My second param".into()
),
(
PositionalType::Mandatory("$param4".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("$param5".into(), SyntaxShape::FilePath),
"".into()
),
(
PositionalType::Mandatory("$param6".into(), SyntaxShape::Any),
"param6".into()
),
]
);
}
#[test]
fn err_wrong_dash_count() {
let actual = nu!(
cwd: ".",
"def f [ --flag(--f)] { echo hi }"
);
assert!(actual.err.contains("single '-'"));
}
#[test]
fn err_wrong_dash_count2() {
let actual = nu!(
cwd: ".",
"def f [ --flag(f)] { echo hi }"
);
assert!(actual.err.contains("'-'"));
}
#[test]
fn err_wrong_type() {
let actual = nu!(
cwd: ".",
"def f [ param1:strig ] { echo hi }"
);
assert!(actual.err.contains("type"));
}
//For what ever reason, this gets reported as not used
#[allow(dead_code)]
fn assert_signature_has_flag(sign: &Signature, name: &str, type_: NamedType, comment: &str) {
assert_eq!(
Some((type_, comment.to_string())),
sign.named.get(name).cloned()
);
}
#[test]
fn simple_def_with_only_flags() {
let name = "my_func";
let sign = "[
--list (-l) : path # First flag
--verbose : number # Second flag
--all(-a) # My switch
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"list",
NamedType::Optional(Some('l'), SyntaxShape::FilePath),
"First flag",
);
assert_signature_has_flag(
&sign,
"verbose",
NamedType::Optional(None, SyntaxShape::Number),
"Second flag",
);
assert_signature_has_flag(&sign, "all", NamedType::Switch(Some('a')), "My switch");
}
#[test]
fn simple_def_with_params_and_flags() {
let name = "my_func";
let sign = "[
--list (-l) : path # First flag
param1, param2:table # Param2 Doc
--verbose # Second flag
param3 : number,
--flag3 # Third flag
param4 ?: table # Optional Param
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"list",
NamedType::Optional(Some('l'), SyntaxShape::FilePath),
"First flag",
);
assert_signature_has_flag(&sign, "verbose", NamedType::Switch(None), "Second flag");
assert_signature_has_flag(&sign, "flag3", NamedType::Switch(None), "Third flag");
assert_eq!(
sign.positional,
vec![
(
PositionalType::Mandatory("$param1".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::Table),
"Param2 Doc".into()
),
(
PositionalType::Mandatory("$param3".into(), SyntaxShape::Number),
"".into()
),
(
PositionalType::Optional("$param4".into(), SyntaxShape::Table),
"Optional Param".into()
),
]
);
}
#[test]
fn simple_def_with_parameters_and_flags_no_delimiter() {
let name = "my_func";
let sign = "[ param1:int param2
--force (-f) param3 # Param3
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(&sign, "force", NamedType::Switch(Some('f')), "");
assert_eq!(
sign.positional,
// --list (-l) : path # First flag
// param1, param2:table # Param2 Doc
// --verbose # Second flag
// param3 : number,
// --flag3 # Third flag
vec![
(
PositionalType::Mandatory("$param1".into(), SyntaxShape::Int),
"".into()
),
(
PositionalType::Mandatory("$param2".into(), SyntaxShape::Any),
"".into()
),
(
PositionalType::Mandatory("$param3".into(), SyntaxShape::Any),
"Param3".into()
),
]
);
}
#[test]
fn simple_example_signature() {
let name = "my_func";
let sign = "[
d:int # The required d parameter
--x (-x):string # The all powerful x flag
--y (-y):int # The accompanying y flag
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"x",
NamedType::Optional(Some('x'), SyntaxShape::String),
"The all powerful x flag",
);
assert_signature_has_flag(
&sign,
"y",
NamedType::Optional(Some('y'), SyntaxShape::Int),
"The accompanying y flag",
);
assert_eq!(
sign.positional,
vec![(
PositionalType::Mandatory("$d".into(), SyntaxShape::Int),
"The required d parameter".into()
)]
);
}
#[test]
fn flag_withouth_space_between_longname_shortname() {
let name = "my_func";
let sign = "[
--xxx(-x):string # The all powerful x flag
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"xxx",
NamedType::Optional(Some('x'), SyntaxShape::String),
"The all powerful x flag",
);
}
#[test]
fn simple_def_with_rest_arg() {
let name = "my_func";
let sign = "[ ...rest]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_eq!(
sign.rest_positional,
Some(("rest".to_string(), SyntaxShape::Any, "".to_string()))
);
}
#[test]
fn simple_def_with_rest_arg_other_name() {
let name = "my_func";
let sign = "[ ...paths:path # A pathological test]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_eq!(
sign.rest_positional,
Some((
"paths".to_string(),
SyntaxShape::FilePath,
"A pathological test".to_string()
))
);
}
#[test]
fn simple_def_with_rest_arg_with_type_and_comment() {
let name = "my_func";
let sign = "[ ...rest:path # My super cool rest arg]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_eq!(
sign.rest_positional,
Some((
"rest".to_string(),
SyntaxShape::FilePath,
"My super cool rest arg".to_string()
))
);
}
#[test]
fn simple_def_with_param_flag_and_rest() {
let name = "my_func";
let sign = "[
d:string # The required d parameter
--xxx(-x) # The all powerful x flag
--yyy (-y):int # The accompanying y flag
...rest:table # Another rest
]";
let (sign, err) = parse_signature(name, &sign.to_string().spanned_unknown());
assert!(err.is_none());
assert_signature_has_flag(
&sign,
"xxx",
NamedType::Switch(Some('x')),
"The all powerful x flag",
);
assert_signature_has_flag(
&sign,
"yyy",
NamedType::Optional(Some('y'), SyntaxShape::Int),
"The accompanying y flag",
);
assert_eq!(
sign.positional,
vec![(
PositionalType::Mandatory("$d".into(), SyntaxShape::String),
"The required d parameter".into()
)]
);
assert_eq!(
sign.rest_positional,
Some((
"rest".to_string(),
SyntaxShape::Table,
"Another rest".to_string()
))
);
}

View File

@ -0,0 +1,100 @@
use crate::{lex::tokens::LiteCommand, ParserScope};
use nu_errors::{ArgumentError, ParseError};
use nu_path::{canonicalize, canonicalize_with};
use nu_protocol::hir::{Expression, InternalCommand};
use std::path::Path;
use nu_source::SpannedItem;
pub fn parse_source_internal(
lite_cmd: &LiteCommand,
command: &InternalCommand,
scope: &dyn ParserScope,
) -> Result<(), ParseError> {
if lite_cmd.parts.len() != 2 {
return Err(ParseError::argument_error(
lite_cmd.parts[0].clone(),
ArgumentError::MissingMandatoryPositional("a path for sourcing".into()),
));
}
if lite_cmd.parts[1].item.starts_with('$') {
return Err(ParseError::mismatch(
"a filepath constant",
lite_cmd.parts[1].clone(),
));
}
// look for source files in lib dirs first
// if not files are found, try the current path
// first file found wins.
find_source_file(lite_cmd, command, scope)
}
fn find_source_file(
lite_cmd: &LiteCommand,
command: &InternalCommand,
scope: &dyn ParserScope,
) -> Result<(), ParseError> {
let (file, file_span) = if let Some(ref positional_args) = command.args.positional {
if let Expression::FilePath(ref p) = positional_args[0].expr {
(p.as_path(), &positional_args[0].span)
} else {
(Path::new(&lite_cmd.parts[1].item), &lite_cmd.parts[1].span)
}
} else {
(Path::new(&lite_cmd.parts[1].item), &lite_cmd.parts[1].span)
};
let lib_dirs = nu_data::config::config(nu_source::Tag::unknown())
.ok()
.as_ref()
.map(|configuration| match configuration.get("lib_dirs") {
Some(paths) => paths
.table_entries()
.cloned()
.map(|path| path.as_string())
.collect(),
None => vec![],
});
if let Some(dir) = lib_dirs {
for lib_path in dir.into_iter().flatten() {
let path = if let Ok(p) = canonicalize_with(&file, lib_path) {
p
} else {
continue;
};
if let Ok(contents) = std::fs::read_to_string(&path) {
return parse(&contents, 0, scope);
}
}
}
let path = canonicalize(&file).map_err(|e| {
ParseError::general_error(
format!("Can't load source file. Reason: {}", e),
"Can't load this file".spanned(file_span),
)
})?;
let contents = std::fs::read_to_string(&path);
match contents {
Ok(contents) => parse(&contents, 0, scope),
Err(e) => Err(ParseError::general_error(
format!("Can't load source file. Reason: {}", e),
"Can't load this file".spanned(file_span),
)),
}
}
pub fn parse(input: &str, span_offset: usize, scope: &dyn ParserScope) -> Result<(), ParseError> {
if let (_, Some(parse_error)) = super::parse(input, span_offset, scope) {
Err(parse_error)
} else {
Ok(())
}
}

View File

@ -0,0 +1,46 @@
use nu_errors::ParseError;
use nu_protocol::hir::{Expression, SpannedExpression};
use nu_source::{Span, Spanned, SpannedItem};
use crate::lex::lexer::Token;
pub(crate) fn token_to_spanned_string(token: &Token) -> Spanned<String> {
token.contents.to_string().spanned(token.span)
}
/// Easy shorthand function to create a garbage expression at the given span
pub fn garbage(span: Span) -> SpannedExpression {
SpannedExpression::new(Expression::Garbage, span)
}
pub(crate) fn trim_quotes(input: &str) -> String {
let mut chars = input.chars();
match (chars.next(), chars.next_back()) {
(Some('\''), Some('\'')) => chars.collect(),
(Some('"'), Some('"')) => chars.collect(),
_ => input.to_string(),
}
}
pub(crate) fn verify_and_strip(
contents: &Spanned<String>,
left: char,
right: char,
) -> (String, Option<ParseError>) {
let mut chars = contents.item.chars();
match (chars.next(), chars.next_back()) {
(Some(l), Some(r)) if l == left && r == right => {
let output: String = chars.collect();
(output, None)
}
_ => (
String::new(),
Some(ParseError::mismatch(
format!("value in {} {}", left, right),
contents.clone(),
)),
),
}
}

View File

@ -0,0 +1,23 @@
use nu_protocol::hir::Block;
use nu_source::Spanned;
use std::{fmt::Debug, sync::Arc};
pub trait ParserScope: Debug {
fn get_signature(&self, name: &str) -> Option<nu_protocol::Signature>;
fn has_signature(&self, name: &str) -> bool;
fn add_definition(&self, block: Arc<Block>);
fn get_definitions(&self) -> Vec<Arc<Block>>;
fn get_alias(&self, name: &str) -> Option<Vec<Spanned<String>>>;
fn remove_alias(&self, name: &str);
fn add_alias(&self, name: &str, replacement: Vec<Spanned<String>>);
fn enter_scope(&self);
fn exit_scope(&self);
}

View File

@ -0,0 +1,153 @@
use nu_protocol::hir::*;
use nu_protocol::UnspannedPathMember;
use nu_source::{Spanned, SpannedItem};
/// Converts a SpannedExpression into a spanned shape(s) ready for color-highlighting
pub fn expression_to_flat_shape(e: &SpannedExpression) -> Vec<Spanned<FlatShape>> {
match &e.expr {
Expression::Block(exprs) => shapes(exprs),
Expression::Subexpression(exprs) => shapes(exprs),
Expression::FilePath(_) => vec![FlatShape::Path.spanned(e.span)],
Expression::Garbage => vec![FlatShape::Garbage.spanned(e.span)],
Expression::List(exprs) => {
let mut output = vec![];
for expr in exprs {
output.append(&mut expression_to_flat_shape(expr));
}
output
}
Expression::Table(headers, cells) => {
let mut output = vec![];
for header in headers {
output.append(&mut expression_to_flat_shape(header));
}
for row in cells {
for cell in row {
output.append(&mut expression_to_flat_shape(cell));
}
}
output
}
Expression::FullColumnPath(exprs) => {
let mut output = vec![];
output.append(&mut expression_to_flat_shape(&exprs.head));
for member in &exprs.tail {
if let UnspannedPathMember::String(_) = &member.unspanned {
output.push(FlatShape::StringMember.spanned(member.span));
}
}
output
}
Expression::Command => vec![FlatShape::InternalCommand.spanned(e.span)],
Expression::Literal(Literal::Bare(_)) => vec![FlatShape::BareMember.spanned(e.span)],
Expression::Literal(Literal::ColumnPath(_)) => vec![FlatShape::Path.spanned(e.span)],
Expression::Literal(Literal::GlobPattern(_)) => {
vec![FlatShape::GlobPattern.spanned(e.span)]
}
Expression::Literal(Literal::Number(_)) => vec![FlatShape::Int.spanned(e.span)],
Expression::Literal(Literal::Operator(_)) => vec![FlatShape::Operator.spanned(e.span)],
Expression::Literal(Literal::Size(number, unit)) => vec![FlatShape::Size {
number: number.span,
unit: unit.span,
}
.spanned(e.span)],
Expression::Literal(Literal::String(_)) => vec![FlatShape::String.spanned(e.span)],
Expression::ExternalWord => vec![FlatShape::ExternalWord.spanned(e.span)],
Expression::ExternalCommand(_) => vec![FlatShape::ExternalCommand.spanned(e.span)],
Expression::Synthetic(_) => vec![FlatShape::BareMember.spanned(e.span)],
Expression::Variable(_, _) => vec![FlatShape::Variable.spanned(e.span)],
Expression::Binary(binary) => {
let mut output = vec![];
output.append(&mut expression_to_flat_shape(&binary.left));
output.append(&mut expression_to_flat_shape(&binary.op));
output.append(&mut expression_to_flat_shape(&binary.right));
output
}
Expression::Range(range) => {
let mut output = vec![];
if let Some(left) = &range.left {
output.append(&mut expression_to_flat_shape(left));
}
output.push(
match &range.operator.item {
RangeOperator::Inclusive => FlatShape::DotDot,
RangeOperator::RightExclusive => FlatShape::DotDotLeftAngleBracket,
}
.spanned(&range.operator.span),
);
if let Some(right) = &range.right {
output.append(&mut expression_to_flat_shape(right));
}
output
}
Expression::Boolean(_) => vec![FlatShape::Keyword.spanned(e.span)],
}
}
/// Converts a series of commands into a vec of spanned shapes ready for color-highlighting
pub fn shapes(commands: &Block) -> Vec<Spanned<FlatShape>> {
let mut output = vec![];
for group in &commands.block {
for pipeline in &group.pipelines {
for command in &pipeline.list {
match command {
ClassifiedCommand::Internal(internal) => {
output.append(&mut expression_to_flat_shape(&internal.args.head));
if let Some(positionals) = &internal.args.positional {
for positional_arg in positionals {
output.append(&mut expression_to_flat_shape(positional_arg));
}
}
if let Some(named) = &internal.args.named {
for (_, named_arg) in named {
match named_arg {
NamedValue::PresentSwitch(span) => {
output.push(FlatShape::Flag.spanned(*span));
}
NamedValue::Value(span, expr) => {
output.push(FlatShape::Flag.spanned(*span));
output.append(&mut expression_to_flat_shape(expr));
}
_ => {}
}
}
}
}
ClassifiedCommand::Expr(expr) => {
output.append(&mut expression_to_flat_shape(expr))
}
ClassifiedCommand::Dynamic(call) => {
output.append(&mut expression_to_flat_shape(&call.head));
if let Some(positionals) = &call.positional {
for positional_arg in positionals {
output.append(&mut expression_to_flat_shape(positional_arg));
}
}
if let Some(named) = &call.named {
for (_, named_arg) in named {
match named_arg {
NamedValue::PresentSwitch(span) => {
output.push(FlatShape::Flag.spanned(*span));
}
NamedValue::Value(span, expr) => {
output.push(FlatShape::Flag.spanned(*span));
output.append(&mut expression_to_flat_shape(expr));
}
_ => {}
}
}
}
}
_ => {}
}
}
}
}
output
}

View File

@ -0,0 +1,66 @@
use nu_test_support::fs::Stub::FileWithContent;
use nu_test_support::nu;
use nu_test_support::playground::Playground;
#[test]
fn defs_contain_comment_in_help() {
Playground::setup("comment_test", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContent(
"my_def.nu",
r#"
# I comment and test. I am a good boy.
def comment_philosphy [] {
echo Its not a bug its an undocumented feature. (Anonymous)
}
"#,
)]);
let actual = nu!(cwd: dirs.test(), r#"
source my_def.nu
help comment_philosphy
"#);
assert!(actual.out.contains("I comment and test. I am a good boy."));
});
}
#[test]
fn defs_contain_multiple_comments_in_help() {
Playground::setup("comment_test_2", |dirs, sandbox| {
sandbox.with_files(vec![FileWithContent(
"my_def.nu",
r#"
# I comment and test. I am a good boy.
def comment_philosphy [] {
echo Its not a bug its an undocumented feature. (Anonymous)
}
# I comment and test all my functions. I am a very good boy.
def comment_philosphy_2 [] {
echo Its not a bug its an undocumented feature. (Anonymous)
}
# I comment and test all my functions. I am the best boy.
def comment_philosphy_3 [] {
echo Its not a bug its an undocumented feature. (Anonymous)
}
"#,
)]);
let actual = nu!(cwd: dirs.test(), r#"
source my_def.nu
help comment_philosphy
help comment_philosphy_2
help comment_philosphy_3
"#);
assert!(actual.out.contains("I comment and test. I am a good boy."));
assert!(actual
.out
.contains("I comment and test all my functions. I am a very good boy."));
assert!(actual
.out
.contains("I comment and test all my functions. I am the best boy."));
});
}