From 4d3e7efe25135e3e7bb83250cc17d645a1bac178 Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Mon, 9 Sep 2019 10:43:10 -0700 Subject: [PATCH 1/3] Close a bunch of holes in external command args Previously, there was a single parsing rule for "bare words" that applied to both internal and external commands. This meant that, because `cargo +nightly` needed to work, we needed to add `+` as a valid character in bare words. The number of characters continued to grow, and the situation was becoming untenable. The current strategy would eventually eat up all syntax and make it impossible to add syntax like `@foo` to internal commands. This patch significantly restricts bare words and introduces a new token type (`ExternalWord`). An `ExternalWord` expands to an error in the internal syntax, but expands to a bare word in the external syntax. `ExternalWords` are highlighted in grey in the shell. --- src/errors.rs | 15 +++ src/evaluate/evaluator.rs | 7 +- src/parser/hir.rs | 8 ++ src/parser/hir/baseline_parse.rs | 46 ++++--- src/parser/hir/baseline_parse_tokens.rs | 16 +-- src/parser/parse/call_node.rs | 16 +++ src/parser/parse/parser.rs | 161 +++++++++++++++++++----- src/parser/parse/pipeline.rs | 36 ++++++ src/parser/parse/token_tree.rs | 21 ++-- src/parser/parse/token_tree_builder.rs | 32 ++++- src/parser/parse/tokens.rs | 6 +- src/parser/parse_command.rs | 3 +- src/shell/helper.rs | 6 +- 13 files changed, 300 insertions(+), 73 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 3eb8e33e9..d97435d22 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -38,6 +38,7 @@ pub enum ArgumentError { MissingMandatoryFlag(String), MissingMandatoryPositional(String), MissingValueForName(String), + InvalidExternalWord, } #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Serialize, Deserialize)] @@ -136,6 +137,16 @@ impl ShellError { .start() } + pub(crate) fn invalid_external_word(span: Span) -> ShellError { + ProximateShellError::ArgumentError { + command: "Invalid argument to Nu command (did you mean to call an external command?)" + .into(), + error: ArgumentError::InvalidExternalWord, + span, + } + .start() + } + pub(crate) fn parse_error( error: nom::Err<(nom5_locate::LocatedSpan<&str>, nom::error::ErrorKind)>, ) -> ShellError { @@ -190,6 +201,10 @@ impl ShellError { error, span, } => match error { + ArgumentError::InvalidExternalWord => Diagnostic::new( + Severity::Error, + format!("Invalid bare word for Nu command (did you intend to invoke an external command?)")) + .with_label(Label::new_primary(span)), ArgumentError::MissingMandatoryFlag(name) => Diagnostic::new( Severity::Error, format!( diff --git a/src/evaluate/evaluator.rs b/src/evaluate/evaluator.rs index ee241583f..6419ab73a 100644 --- a/src/evaluate/evaluator.rs +++ b/src/evaluate/evaluator.rs @@ -1,5 +1,5 @@ use crate::data::base::Block; -use crate::errors::Description; +use crate::errors::{ArgumentError, Description}; use crate::parser::{ hir::{self, Expression, RawExpression}, CommandRegistry, Text, @@ -39,6 +39,11 @@ pub(crate) fn evaluate_baseline_expr( ) -> Result, ShellError> { match &expr.item { RawExpression::Literal(literal) => Ok(evaluate_literal(expr.copy_span(literal), source)), + RawExpression::ExternalWord => Err(ShellError::argument_error( + "Invalid external word", + ArgumentError::InvalidExternalWord, + expr.span(), + )), RawExpression::FilePath(path) => Ok(Value::path(path.clone()).tagged(expr.span())), RawExpression::Synthetic(hir::Synthetic::String(s)) => Ok(Value::string(s).tagged_unknown()), RawExpression::Variable(var) => evaluate_reference(var, scope, source), diff --git a/src/parser/hir.rs b/src/parser/hir.rs index 3e155cc05..aaf5bb771 100644 --- a/src/parser/hir.rs +++ b/src/parser/hir.rs @@ -83,6 +83,7 @@ impl ToDebug for Call { #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] pub enum RawExpression { Literal(Literal), + ExternalWord, Synthetic(Synthetic), Variable(Variable), Binary(Box), @@ -113,6 +114,7 @@ impl RawExpression { match self { RawExpression::Literal(literal) => literal.type_name(), RawExpression::Synthetic(synthetic) => synthetic.type_name(), + RawExpression::ExternalWord => "externalword", RawExpression::FilePath(..) => "filepath", RawExpression::Variable(..) => "variable", RawExpression::List(..) => "list", @@ -189,6 +191,7 @@ impl ToDebug for Expression { match self.item() { RawExpression::Literal(l) => l.tagged(self.span()).fmt_debug(f, source), RawExpression::FilePath(p) => write!(f, "{}", p.display()), + RawExpression::ExternalWord => write!(f, "{}", self.span().slice(source)), RawExpression::Synthetic(Synthetic::String(s)) => write!(f, "{:?}", s), RawExpression::Variable(Variable::It(_)) => write!(f, "$it"), RawExpression::Variable(Variable::Other(s)) => write!(f, "${}", s.slice(source)), @@ -225,6 +228,11 @@ impl From> for Expression { } } +/// Literals are expressions that are: +/// +/// 1. Copy +/// 2. Can be evaluated without additional context +/// 3. Evaluation cannot produce an error #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] pub enum Literal { Number(Number), diff --git a/src/parser/hir/baseline_parse.rs b/src/parser/hir/baseline_parse.rs index d76a88d51..4437a6d38 100644 --- a/src/parser/hir/baseline_parse.rs +++ b/src/parser/hir/baseline_parse.rs @@ -1,10 +1,14 @@ use crate::context::Context; +use crate::errors::ShellError; use crate::parser::{hir, RawToken, Token}; use crate::Text; use std::path::PathBuf; -pub fn baseline_parse_single_token(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_single_token( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.span()), RawToken::Size(int, unit) => { hir::Expression::size(int.to_number(source), unit, token.span()) @@ -14,17 +18,22 @@ pub fn baseline_parse_single_token(token: &Token, source: &Text) -> hir::Express hir::Expression::it_variable(span, token.span()) } RawToken::Variable(span) => hir::Expression::variable(span, token.span()), - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Bare => hir::Expression::bare(token.span()), - } + }) } -pub fn baseline_parse_token_as_number(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_token_as_number( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.span()), RawToken::Size(number, unit) => { @@ -32,33 +41,38 @@ pub fn baseline_parse_token_as_number(token: &Token, source: &Text) -> hir::Expr } RawToken::Bare => hir::Expression::bare(token.span()), RawToken::String(span) => hir::Expression::string(span, token.span()), - } + }) } -pub fn baseline_parse_token_as_string(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_token_as_string( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), RawToken::String(span) => hir::Expression::string(span, token.span()), - } + }) } pub fn baseline_parse_token_as_path( token: &Token, context: &Context, source: &Text, -) -> hir::Expression { - match *token.item() { +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), @@ -69,7 +83,7 @@ pub fn baseline_parse_token_as_path( RawToken::String(span) => { hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) } - } + }) } pub fn expand_path(string: &str, context: &Context) -> PathBuf { diff --git a/src/parser/hir/baseline_parse_tokens.rs b/src/parser/hir/baseline_parse_tokens.rs index ca9b0bb37..13c7630fe 100644 --- a/src/parser/hir/baseline_parse_tokens.rs +++ b/src/parser/hir/baseline_parse_tokens.rs @@ -33,7 +33,6 @@ pub fn baseline_parse_tokens( Ok(exprs) } - #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub enum SyntaxType { Any, @@ -62,7 +61,7 @@ impl std::fmt::Display for SyntaxType { SyntaxType::Path => write!(f, "Path"), SyntaxType::Binary => write!(f, "Binary"), SyntaxType::Block => write!(f, "Block"), - SyntaxType::Boolean => write!(f, "Boolean") + SyntaxType::Boolean => write!(f, "Boolean"), } } } @@ -81,7 +80,7 @@ pub fn baseline_parse_next_expr( match (syntax_type, next) { (SyntaxType::Path, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_path(token, context, source)) + return baseline_parse_token_as_path(token, context, source) } (SyntaxType::Path, token) => { @@ -92,7 +91,7 @@ pub fn baseline_parse_next_expr( } (SyntaxType::String, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_string(token, source)); + return baseline_parse_token_as_string(token, source); } (SyntaxType::String, token) => { @@ -103,7 +102,7 @@ pub fn baseline_parse_next_expr( } (SyntaxType::Number, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_number(token, source)); + return Ok(baseline_parse_token_as_number(token, source)?); } (SyntaxType::Number, token) => { @@ -115,7 +114,7 @@ pub fn baseline_parse_next_expr( // TODO: More legit member processing (SyntaxType::Member, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_string(token, source)); + return baseline_parse_token_as_string(token, source); } (SyntaxType::Member, token) => { @@ -245,7 +244,7 @@ pub fn baseline_parse_semantic_token( source: &Text, ) -> Result { match token { - TokenNode::Token(token) => Ok(baseline_parse_single_token(token, source)), + TokenNode::Token(token) => baseline_parse_single_token(token, source), TokenNode::Call(_call) => unimplemented!(), TokenNode::Delimited(delimited) => baseline_parse_delimited(delimited, context, source), TokenNode::Pipeline(_pipeline) => unimplemented!(), @@ -315,7 +314,8 @@ pub fn baseline_parse_path( RawToken::Number(_) | RawToken::Size(..) | RawToken::Variable(_) - | RawToken::External(_) => { + | RawToken::ExternalCommand(_) + | RawToken::ExternalWord => { return Err(ShellError::type_error( "String", token.type_name().simple_spanned(part), diff --git a/src/parser/parse/call_node.rs b/src/parser/parse/call_node.rs index 2869abb44..eb715cd37 100644 --- a/src/parser/parse/call_node.rs +++ b/src/parser/parse/call_node.rs @@ -1,5 +1,7 @@ use crate::parser::TokenNode; +use crate::traits::ToDebug; use getset::Getters; +use std::fmt; #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters)] pub struct CallNode { @@ -24,3 +26,17 @@ impl CallNode { } } } + +impl ToDebug for CallNode { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + write!(f, "{}", self.head.debug(source))?; + + if let Some(children) = &self.children { + for child in children { + write!(f, "{}", child.debug(source))? + } + } + + Ok(()) + } +} diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index f230c36c2..a691fb244 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -236,12 +236,34 @@ pub fn bare(input: NomSpan) -> IResult { let start = input.offset; let (input, _) = take_while1(is_start_bare_char)(input)?; let (input, _) = take_while(is_bare_char)(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); + } + } + let end = input.offset; Ok((input, TokenTreeBuilder::spanned_bare((start, end)))) }) } +pub fn external_word(input: NomSpan) -> IResult { + trace_step(input, "bare", move |input| { + let start = input.offset; + let (input, _) = take_while1(is_external_word_char)(input)?; + let end = input.offset; + + Ok((input, TokenTreeBuilder::spanned_external_word((start, end)))) + }) +} + pub fn var(input: NomSpan) -> IResult { trace_step(input, "var", move |input| { let start = input.offset; @@ -364,8 +386,17 @@ pub fn size(input: NomSpan) -> IResult { pub fn leaf(input: NomSpan) -> IResult { trace_step(input, "leaf", move |input| { - let (input, node) = - alt((size, string, operator, flag, shorthand, var, external, bare))(input)?; + let (input, node) = alt(( + size, + string, + operator, + flag, + shorthand, + var, + external, + bare, + external_word, + ))(input)?; Ok((input, node)) }) @@ -582,26 +613,13 @@ pub fn pipeline(input: NomSpan) -> IResult { } fn make_call_list( - head: Option<( - Option, - Tagged, - Option - )>, - items: Vec<( - NomSpan, - Option, - Tagged, - Option, - )>, + head: Option<(Option, Tagged, Option)>, + items: Vec<(NomSpan, Option, Tagged, Option)>, ) -> Vec { let mut out = vec![]; if let Some(head) = head { - let el = PipelineElement::new( - None, - head.0.map(Span::from), - head.1, - head.2.map(Span::from)); + let el = PipelineElement::new(None, head.0.map(Span::from), head.1, head.2.map(Span::from)); out.push(el); } @@ -611,7 +629,8 @@ fn make_call_list( Some(pipe).map(Span::from), ws1.map(Span::from), call, - ws2.map(Span::from)); + ws2.map(Span::from), + ); out.push(el); } @@ -628,40 +647,39 @@ fn int(frag: &str, neg: Option) -> i64 { } } +fn is_external_word_char(c: char) -> bool { + match c { + ';' | '|' | '#' | '-' | '"' | '\'' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '`' => false, + other if other.is_whitespace() => false, + _ => true, + } +} + fn is_start_bare_char(c: char) -> bool { match c { + '+' => false, _ if c.is_alphabetic() => true, - _ if c.is_numeric() => true, '.' => true, '\\' => true, '/' => true, '_' => true, '-' => true, - '@' => true, - '*' => true, - '?' => true, '~' => true, - '+' => true, _ => false, } } fn is_bare_char(c: char) -> bool { match c { + '+' => false, _ if c.is_alphanumeric() => true, - ':' => true, '.' => true, '\\' => true, '/' => true, '_' => true, '-' => true, - '@' => true, - '*' => true, - '?' => true, '=' => true, '~' => true, - '+' => true, - '%' => true, _ => false, } } @@ -724,6 +742,44 @@ mod tests { } } + macro_rules! equal_tokens { + ($source:tt -> $tokens:expr) => { + let result = apply(pipeline, "pipeline", $source); + let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens); + + if result != expected_tree { + let debug_result = format!("{}", result.debug($source)); + let debug_expected = format!("{}", expected_tree.debug(&expected_source)); + + if debug_result == debug_expected { + assert_eq!( + result, expected_tree, + "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}", + $source, + debug_expected + ) + } else { + assert_eq!(debug_result, debug_expected) + } + } + + // apply(pipeline, "pipeline", r#"cargo +nightly run"#), + // build_token(b::pipeline(vec![( + // None, + // b::call( + // b::bare("cargo"), + // vec![ + // b::sp(), + // b::external_word("+nightly"), + // b::sp(), + // b::bare("run") + // ] + // ), + // None + // )])) + }; + } + #[test] fn test_integer() { assert_leaf! { @@ -854,7 +910,7 @@ mod tests { fn test_external() { assert_leaf! { parsers [ external ] - "^ls" -> 0..3 { External(span(1, 3)) } + "^ls" -> 0..3 { ExternalCommand(span(1, 3)) } } } @@ -1058,6 +1114,46 @@ mod tests { ); } + #[test] + fn test_external_word() { + let _ = pretty_env_logger::try_init(); + + equal_tokens!( + "cargo +nightly run" -> + b::pipeline(vec![( + None, + b::call( + b::bare("cargo"), + vec![ + b::sp(), + b::external_word("+nightly"), + b::sp(), + b::bare("run") + ] + ), + None + )]) + ); + + equal_tokens!( + "rm foo%bar" -> + b::pipeline(vec![( + None, + b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), + None + )]) + ); + + equal_tokens!( + "rm foo%bar" -> + b::pipeline(vec![( + None, + b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), + None + )]) + ); + } + #[test] fn test_smoke_pipeline() { let _ = pretty_env_logger::try_init(); @@ -1178,7 +1274,6 @@ mod tests { } fn build_token(block: CurriedToken) -> TokenNode { - let mut builder = TokenTreeBuilder::new(); - block(&mut builder) + TokenTreeBuilder::build(block).0 } } diff --git a/src/parser/parse/pipeline.rs b/src/parser/parse/pipeline.rs index 75155d143..64a899c17 100644 --- a/src/parser/parse/pipeline.rs +++ b/src/parser/parse/pipeline.rs @@ -1,7 +1,9 @@ use crate::parser::CallNode; +use crate::traits::ToDebug; use crate::{Span, Tagged}; use derive_new::new; use getset::Getters; +use std::fmt; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, new)] pub struct Pipeline { @@ -9,6 +11,20 @@ pub struct Pipeline { pub(crate) post_ws: Option, } +impl ToDebug for Pipeline { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + for part in &self.parts { + write!(f, "{}", part.debug(source))?; + } + + if let Some(post_ws) = self.post_ws { + write!(f, "{}", post_ws.slice(source))? + } + + Ok(()) + } +} + #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters, new)] pub struct PipelineElement { pub pipe: Option, @@ -17,3 +33,23 @@ pub struct PipelineElement { call: Tagged, pub post_ws: Option, } + +impl ToDebug for PipelineElement { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + if let Some(pipe) = self.pipe { + write!(f, "{}", pipe.slice(source))?; + } + + if let Some(pre_ws) = self.pre_ws { + write!(f, "{}", pre_ws.slice(source))?; + } + + write!(f, "{}", self.call.debug(source))?; + + if let Some(post_ws) = self.post_ws { + write!(f, "{}", post_ws.slice(source))?; + } + + Ok(()) + } +} diff --git a/src/parser/parse/token_tree.rs b/src/parser/parse/token_tree.rs index df189a1a0..f69c176e9 100644 --- a/src/parser/parse/token_tree.rs +++ b/src/parser/parse/token_tree.rs @@ -1,5 +1,6 @@ use crate::errors::ShellError; use crate::parser::parse::{call_node::*, flag::*, operator::*, pipeline::*, tokens::*}; +use crate::traits::ToDebug; use crate::{Span, Tagged, Text}; use derive_new::new; use enum_utils::FromStr; @@ -22,6 +23,12 @@ pub enum TokenNode { Path(Tagged), } +impl ToDebug for TokenNode { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + write!(f, "{:?}", self.old_debug(&Text::from(source))) + } +} + pub struct DebugTokenNode<'a> { node: &'a TokenNode, source: &'a Text, @@ -34,11 +41,11 @@ impl fmt::Debug for DebugTokenNode<'_> { TokenNode::Call(s) => { write!(f, "(")?; - write!(f, "{:?}", s.head().debug(self.source))?; + write!(f, "{}", s.head().debug(self.source))?; if let Some(children) = s.children() { for child in children { - write!(f, "{:?}", child.debug(self.source))?; + write!(f, "{}", child.debug(self.source))?; } } @@ -57,7 +64,7 @@ impl fmt::Debug for DebugTokenNode<'_> { )?; for child in d.children() { - write!(f, "{:?}", child.debug(self.source))?; + write!(f, "{:?}", child.old_debug(self.source))?; } write!( @@ -70,7 +77,7 @@ impl fmt::Debug for DebugTokenNode<'_> { } ) } - TokenNode::Pipeline(_) => write!(f, ""), + TokenNode::Pipeline(pipeline) => write!(f, "{}", pipeline.debug(self.source)), TokenNode::Error(s) => write!(f, " for {:?}", s.span().slice(self.source)), rest => write!(f, "{}", rest.span().slice(self.source)), } @@ -115,7 +122,7 @@ impl TokenNode { .to_string() } - pub fn debug<'a>(&'a self, source: &'a Text) -> DebugTokenNode<'a> { + pub fn old_debug<'a>(&'a self, source: &'a Text) -> DebugTokenNode<'a> { DebugTokenNode { node: self, source } } @@ -140,7 +147,7 @@ impl TokenNode { pub fn is_external(&self) -> bool { match self { TokenNode::Token(Tagged { - item: RawToken::External(..), + item: RawToken::ExternalCommand(..), .. }) => true, _ => false, @@ -150,7 +157,7 @@ impl TokenNode { pub fn expect_external(&self) -> Span { match self { TokenNode::Token(Tagged { - item: RawToken::External(span), + item: RawToken::ExternalCommand(span), .. }) => *span, _ => panic!("Only call expect_external if you checked is_external first"), diff --git a/src/parser/parse/token_tree_builder.rs b/src/parser/parse/token_tree_builder.rs index 9dd1ebc16..8034e8b0c 100644 --- a/src/parser/parse/token_tree_builder.rs +++ b/src/parser/parse/token_tree_builder.rs @@ -14,15 +14,19 @@ use derive_new::new; pub struct TokenTreeBuilder { #[new(default)] pos: usize, + + #[new(default)] + output: String, } pub type CurriedToken = Box TokenNode + 'static>; pub type CurriedCall = Box Tagged + 'static>; impl TokenTreeBuilder { - pub fn build(block: impl FnOnce(&mut Self) -> TokenNode) -> TokenNode { + pub fn build(block: impl FnOnce(&mut Self) -> TokenNode) -> (TokenNode, String) { let mut builder = TokenTreeBuilder::new(); - block(&mut builder) + let node = block(&mut builder); + (node, builder.output) } pub fn pipeline(input: Vec<(Option<&str>, CurriedCall, Option<&str>)>) -> CurriedToken { @@ -56,7 +60,8 @@ impl TokenTreeBuilder { pipe, pre_span.map(Span::from), call, - post_span.map(Span::from))); + post_span.map(Span::from), + )); loop { match input.next() { @@ -147,9 +152,27 @@ impl TokenTreeBuilder { )) } + pub fn external_word(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + b.pos = end; + + TokenTreeBuilder::spanned_external_word((start, end)) + }) + } + + pub fn spanned_external_word(input: impl Into) -> TokenNode { + TokenNode::Token(Tagged::from_simple_spanned_item( + RawToken::ExternalWord, + input.into(), + )) + } + pub fn spanned_external(input: impl Into, span: impl Into) -> TokenNode { TokenNode::Token(Tagged::from_simple_spanned_item( - RawToken::External(input.into()), + RawToken::ExternalCommand(input.into()), span.into(), )) } @@ -422,6 +445,7 @@ impl TokenTreeBuilder { fn consume(&mut self, input: &str) -> (usize, usize) { let start = self.pos; self.pos += input.len(); + self.output.push_str(input); (start, self.pos) } } diff --git a/src/parser/parse/tokens.rs b/src/parser/parse/tokens.rs index ed9c1f72a..0bb2e3f17 100644 --- a/src/parser/parse/tokens.rs +++ b/src/parser/parse/tokens.rs @@ -10,7 +10,8 @@ pub enum RawToken { Size(RawNumber, Unit), String(Span), Variable(Span), - External(Span), + ExternalCommand(Span), + ExternalWord, Bare, } @@ -50,7 +51,8 @@ impl RawToken { RawToken::Size(..) => "Size", RawToken::String(_) => "String", RawToken::Variable(_) => "Variable", - RawToken::External(_) => "External", + RawToken::ExternalCommand(_) => "ExternalCommand", + RawToken::ExternalWord => "ExternalWord", RawToken::Bare => "String", } } diff --git a/src/parser/parse_command.rs b/src/parser/parse_command.rs index 33ad25e6f..e0fc9d86f 100644 --- a/src/parser/parse_command.rs +++ b/src/parser/parse_command.rs @@ -6,6 +6,7 @@ use crate::parser::{ hir::{self, NamedArguments}, Flag, RawToken, TokenNode, }; +use crate::traits::ToDebug; use crate::{Span, Tag, Tagged, Text}; use log::trace; @@ -248,7 +249,7 @@ pub fn trace_remaining(desc: &'static str, tail: hir::TokensIterator<'_>, source itertools::join( tail.debug_remaining() .iter() - .map(|i| format!("%{:?}%", i.debug(source))), + .map(|i| format!("%{}%", i.debug(&source))), " " ) ); diff --git a/src/shell/helper.rs b/src/shell/helper.rs index 8e21c50ec..462f37529 100644 --- a/src/shell/helper.rs +++ b/src/shell/helper.rs @@ -136,9 +136,13 @@ fn paint_token_node(token_node: &TokenNode, line: &str) -> String { .. }) => Color::Green.normal().paint(token_node.span().slice(line)), TokenNode::Token(Tagged { - item: RawToken::External(..), + item: RawToken::ExternalCommand(..), .. }) => Color::Cyan.bold().paint(token_node.span().slice(line)), + TokenNode::Token(Tagged { + item: RawToken::ExternalWord, + .. + }) => Color::Black.bold().paint(token_node.span().slice(line)), }; styled.to_string() From b15bb2c667c122b161a01131c2143e96b39430e3 Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Tue, 10 Sep 2019 08:31:21 -0700 Subject: [PATCH 2/3] Added glob patterns to the syntax shapes Bare words now represent literal file names, and globs are a different syntax shape called "Pattern". This allows commands like `cp` to ask for a pattern as a source and a literal file as a target. This also means that attempting to pass a glob to a command that expects a literal path will produce an error. --- src/commands/cp.rs | 2 +- src/commands/to_bson.rs | 1 + src/commands/to_json.rs | 1 + src/commands/to_sqlite.rs | 1 + src/commands/to_toml.rs | 1 + src/commands/to_yaml.rs | 1 + src/data/base.rs | 8 ++++ src/evaluate/evaluator.rs | 1 + src/parser/hir.rs | 10 ++++- src/parser/hir/baseline_parse.rs | 49 +++++++++++++++++++++++++ src/parser/hir/baseline_parse_tokens.rs | 16 +++++++- src/parser/parse/parser.rs | 35 +++++++++++++++++- src/parser/parse/token_tree_builder.rs | 18 +++++++++ src/parser/parse/tokens.rs | 2 + src/shell/helper.rs | 4 ++ 15 files changed, 146 insertions(+), 4 deletions(-) diff --git a/src/commands/cp.rs b/src/commands/cp.rs index 8160fc9d2..491e18b1a 100644 --- a/src/commands/cp.rs +++ b/src/commands/cp.rs @@ -21,7 +21,7 @@ impl PerItemCommand for Cpy { fn signature(&self) -> Signature { Signature::build("cp") - .required("src", SyntaxType::Path) + .required("src", SyntaxType::Pattern) .required("dst", SyntaxType::Path) .named("file", SyntaxType::Any) .switch("recursive") diff --git a/src/commands/to_bson.rs b/src/commands/to_bson.rs index bb0355a5e..a77bebeac 100644 --- a/src/commands/to_bson.rs +++ b/src/commands/to_bson.rs @@ -50,6 +50,7 @@ pub fn value_to_bson_value(v: &Tagged) -> Result { } Value::Primitive(Primitive::Nothing) => Bson::Null, Value::Primitive(Primitive::String(s)) => Bson::String(s.clone()), + Value::Primitive(Primitive::Pattern(p)) => Bson::String(p.clone()), Value::Primitive(Primitive::Path(s)) => Bson::String(s.display().to_string()), Value::Table(l) => Bson::Array( l.iter() diff --git a/src/commands/to_json.rs b/src/commands/to_json.rs index f53fbd8d2..35c03af32 100644 --- a/src/commands/to_json.rs +++ b/src/commands/to_json.rs @@ -45,6 +45,7 @@ pub fn value_to_json_value(v: &Tagged) -> Result::coerce_into(i.tagged(v.tag), "converting to JSON number")?, )), Value::Primitive(Primitive::Nothing) => serde_json::Value::Null, + Value::Primitive(Primitive::Pattern(s)) => serde_json::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => serde_json::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => serde_json::Value::String(s.display().to_string()), diff --git a/src/commands/to_sqlite.rs b/src/commands/to_sqlite.rs index 7580c3f4b..0fd392f34 100644 --- a/src/commands/to_sqlite.rs +++ b/src/commands/to_sqlite.rs @@ -91,6 +91,7 @@ fn nu_value_to_sqlite_string(v: Value) -> String { Primitive::Int(i) => format!("{}", i), Primitive::Decimal(f) => format!("{}", f), Primitive::Bytes(u) => format!("{}", u), + Primitive::Pattern(s) => format!("'{}'", s.replace("'", "''")), Primitive::String(s) => format!("'{}'", s.replace("'", "''")), Primitive::Boolean(true) => "1".into(), Primitive::Boolean(_) => "0".into(), diff --git a/src/commands/to_toml.rs b/src/commands/to_toml.rs index 7bca9840e..e18e15236 100644 --- a/src/commands/to_toml.rs +++ b/src/commands/to_toml.rs @@ -44,6 +44,7 @@ pub fn value_to_toml_value(v: &Tagged) -> Result toml::Value::Integer(i.tagged(v.tag).coerce_into("converting to TOML integer")?) } Value::Primitive(Primitive::Nothing) => toml::Value::String("".to_string()), + Value::Primitive(Primitive::Pattern(s)) => toml::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => toml::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => toml::Value::String(s.display().to_string()), diff --git a/src/commands/to_yaml.rs b/src/commands/to_yaml.rs index 129deebdf..915827252 100644 --- a/src/commands/to_yaml.rs +++ b/src/commands/to_yaml.rs @@ -42,6 +42,7 @@ pub fn value_to_yaml_value(v: &Tagged) -> Result::coerce_into(i.tagged(v.tag), "converting to YAML number")?, )), Value::Primitive(Primitive::Nothing) => serde_yaml::Value::Null, + Value::Primitive(Primitive::Pattern(s)) => serde_yaml::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => serde_yaml::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => serde_yaml::Value::String(s.display().to_string()), diff --git a/src/data/base.rs b/src/data/base.rs index b48d69212..6707d6402 100644 --- a/src/data/base.rs +++ b/src/data/base.rs @@ -20,6 +20,7 @@ pub enum Primitive { Decimal(BigDecimal), Bytes(u64), String(String), + Pattern(String), Boolean(bool), Date(DateTime), Path(PathBuf), @@ -53,6 +54,7 @@ impl Primitive { Int(_) => "int", Decimal(_) => "decimal", Bytes(_) => "bytes", + Pattern(_) => "pattern", String(_) => "string", Boolean(_) => "boolean", Date(_) => "date", @@ -71,6 +73,7 @@ impl Primitive { Path(path) => write!(f, "{}", path.display()), Decimal(decimal) => write!(f, "{}", decimal), Bytes(bytes) => write!(f, "{}", bytes), + Pattern(string) => write!(f, "{:?}", string), String(string) => write!(f, "{:?}", string), Boolean(boolean) => write!(f, "{}", boolean), Date(date) => write!(f, "{}", date), @@ -108,6 +111,7 @@ impl Primitive { } Primitive::Int(i) => format!("{}", i), Primitive::Decimal(decimal) => format!("{}", decimal), + Primitive::Pattern(s) => format!("{}", s), Primitive::String(s) => format!("{}", s), Primitive::Boolean(b) => match (b, field_name) { (true, None) => format!("Yes"), @@ -577,6 +581,10 @@ impl Value { Value::Primitive(Primitive::String(s.into())) } + pub fn pattern(s: impl Into) -> Value { + Value::Primitive(Primitive::String(s.into())) + } + pub fn path(s: impl Into) -> Value { Value::Primitive(Primitive::Path(s.into())) } diff --git a/src/evaluate/evaluator.rs b/src/evaluate/evaluator.rs index 6419ab73a..52edf6981 100644 --- a/src/evaluate/evaluator.rs +++ b/src/evaluate/evaluator.rs @@ -114,6 +114,7 @@ fn evaluate_literal(literal: Tagged<&hir::Literal>, source: &Text) -> Tagged int.into(), hir::Literal::Size(int, unit) => unit.compute(int), hir::Literal::String(span) => Value::string(span.slice(source)), + hir::Literal::GlobPattern => Value::pattern(literal.span().slice(source)), hir::Literal::Bare => Value::string(literal.span().slice(source)), }; diff --git a/src/parser/hir.rs b/src/parser/hir.rs index aaf5bb771..90bb38796 100644 --- a/src/parser/hir.rs +++ b/src/parser/hir.rs @@ -17,7 +17,7 @@ use crate::evaluate::Scope; pub(crate) use self::baseline_parse::{ baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_string, + baseline_parse_token_as_pattern, baseline_parse_token_as_string, }; pub(crate) use self::baseline_parse_tokens::{baseline_parse_next_expr, TokensIterator}; pub(crate) use self::binary::Binary; @@ -90,6 +90,7 @@ pub enum RawExpression { Block(Vec), List(Vec), Path(Box), + FilePath(PathBuf), ExternalCommand(ExternalCommand), @@ -164,6 +165,10 @@ impl Expression { Tagged::from_simple_spanned_item(RawExpression::Literal(Literal::Bare), span.into()) } + pub(crate) fn pattern(tag: impl Into) -> Expression { + RawExpression::Literal(Literal::GlobPattern).tagged(tag.into()) + } + pub(crate) fn variable(inner: impl Into, outer: impl Into) -> Expression { Tagged::from_simple_spanned_item( RawExpression::Variable(Variable::Other(inner.into())), @@ -238,6 +243,7 @@ pub enum Literal { Number(Number), Size(Number, Unit), String(Span), + GlobPattern, Bare, } @@ -247,6 +253,7 @@ impl ToDebug for Tagged<&Literal> { Literal::Number(number) => write!(f, "{:?}", *number), Literal::Size(number, unit) => write!(f, "{:?}{:?}", *number, unit), Literal::String(span) => write!(f, "{}", span.slice(source)), + Literal::GlobPattern => write!(f, "{}", self.span().slice(source)), Literal::Bare => write!(f, "{}", self.span().slice(source)), } } @@ -259,6 +266,7 @@ impl Literal { Literal::Size(..) => "size", Literal::String(..) => "string", Literal::Bare => "string", + Literal::GlobPattern => "pattern", } } } diff --git a/src/parser/hir/baseline_parse.rs b/src/parser/hir/baseline_parse.rs index 4437a6d38..5248bde5f 100644 --- a/src/parser/hir/baseline_parse.rs +++ b/src/parser/hir/baseline_parse.rs @@ -1,6 +1,7 @@ use crate::context::Context; use crate::errors::ShellError; use crate::parser::{hir, RawToken, Token}; +use crate::TaggedItem; use crate::Text; use std::path::PathBuf; @@ -20,6 +21,7 @@ pub fn baseline_parse_single_token( RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), + RawToken::GlobPattern => hir::Expression::pattern(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), }) } @@ -40,6 +42,12 @@ pub fn baseline_parse_token_as_number( hir::Expression::size(number.to_number(source), unit, token.span()) } RawToken::Bare => hir::Expression::bare(token.span()), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Number", + "glob pattern".to_string().tagged(token.tag()), + )) + } RawToken::String(span) => hir::Expression::string(span, token.span()), }) } @@ -58,6 +66,12 @@ pub fn baseline_parse_token_as_string( RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "String", + "glob pattern".tagged(token.tag()), + )) + } RawToken::String(span) => hir::Expression::string(span, token.span()), }) } @@ -80,6 +94,41 @@ pub fn baseline_parse_token_as_path( expand_path(token.span().slice(source), context), token.span(), ), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Path", + "glob pattern".tagged(token.tag()), + )) + } + RawToken::String(span) => { + hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) + } + }) +} + +pub fn baseline_parse_token_as_pattern( + token: &Token, + context: &Context, + source: &Text, +) -> Result { + Ok(match *token.item() { + RawToken::Variable(span) if span.slice(source) == "it" => { + hir::Expression::it_variable(span, token.span()) + } + RawToken::ExternalCommand(_) => { + return Err(ShellError::syntax_error( + "Invalid external command".to_string().tagged(token.tag()), + )) + } + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), + RawToken::Variable(span) => hir::Expression::variable(span, token.span()), + RawToken::Number(_) => hir::Expression::bare(token.span()), + RawToken::Size(_, _) => hir::Expression::bare(token.span()), + RawToken::GlobPattern => hir::Expression::pattern(token.span()), + RawToken::Bare => hir::Expression::file_path( + expand_path(token.span().slice(source), context), + token.span(), + ), RawToken::String(span) => { hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) } diff --git a/src/parser/hir/baseline_parse_tokens.rs b/src/parser/hir/baseline_parse_tokens.rs index 13c7630fe..ac2c703d3 100644 --- a/src/parser/hir/baseline_parse_tokens.rs +++ b/src/parser/hir/baseline_parse_tokens.rs @@ -4,7 +4,7 @@ use crate::parser::{ hir, hir::{ baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_string, + baseline_parse_token_as_pattern, baseline_parse_token_as_string, }, DelimitedNode, Delimiter, PathNode, RawToken, TokenNode, }; @@ -43,6 +43,7 @@ pub enum SyntaxType { Variable, Number, Path, + Pattern, Binary, Block, Boolean, @@ -59,6 +60,7 @@ impl std::fmt::Display for SyntaxType { SyntaxType::Variable => write!(f, "Variable"), SyntaxType::Number => write!(f, "Number"), SyntaxType::Path => write!(f, "Path"), + SyntaxType::Pattern => write!(f, "Pattern"), SyntaxType::Binary => write!(f, "Binary"), SyntaxType::Block => write!(f, "Block"), SyntaxType::Boolean => write!(f, "Boolean"), @@ -90,6 +92,17 @@ pub fn baseline_parse_next_expr( )) } + (SyntaxType::Pattern, TokenNode::Token(token)) => { + return baseline_parse_token_as_pattern(token, context, source) + } + + (SyntaxType::Pattern, token) => { + return Err(ShellError::type_error( + "Path", + token.type_name().simple_spanned(token.span()), + )) + } + (SyntaxType::String, TokenNode::Token(token)) => { return baseline_parse_token_as_string(token, source); } @@ -315,6 +328,7 @@ pub fn baseline_parse_path( | RawToken::Size(..) | RawToken::Variable(_) | RawToken::ExternalCommand(_) + | RawToken::GlobPattern | RawToken::ExternalWord => { return Err(ShellError::type_error( "String", diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index a691fb244..66656619d 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -231,6 +231,29 @@ pub fn external(input: NomSpan) -> IResult { }) } +pub fn pattern(input: NomSpan) -> IResult { + trace_step(input, "bare", move |input| { + let start = input.offset; + let (input, _) = take_while1(is_start_glob_char)(input)?; + let (input, _) = take_while(is_glob_char)(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); + } + } + + let end = input.offset; + + Ok((input, TokenTreeBuilder::spanned_pattern((start, end)))) + }) +} + pub fn bare(input: NomSpan) -> IResult { trace_step(input, "bare", move |input| { let start = input.offset; @@ -240,7 +263,7 @@ pub fn bare(input: NomSpan) -> IResult { let next_char = &input.fragment.chars().nth(0); if let Some(next_char) = next_char { - if is_external_word_char(*next_char) { + if is_external_word_char(*next_char) || *next_char == '*' { return Err(nom::Err::Error(nom::error::make_error( input, nom::error::ErrorKind::TakeWhile1, @@ -395,6 +418,7 @@ pub fn leaf(input: NomSpan) -> IResult { var, external, bare, + pattern, external_word, ))(input)?; @@ -655,6 +679,14 @@ fn is_external_word_char(c: char) -> bool { } } +fn is_start_glob_char(c: char) -> bool { + is_start_bare_char(c) || c == '*' +} + +fn is_glob_char(c: char) -> bool { + is_bare_char(c) || c == '*' +} + fn is_start_bare_char(c: char) -> bool { match c { '+' => false, @@ -680,6 +712,7 @@ fn is_bare_char(c: char) -> bool { '-' => true, '=' => true, '~' => true, + ':' => true, _ => false, } } diff --git a/src/parser/parse/token_tree_builder.rs b/src/parser/parse/token_tree_builder.rs index 8034e8b0c..ae1b344c4 100644 --- a/src/parser/parse/token_tree_builder.rs +++ b/src/parser/parse/token_tree_builder.rs @@ -152,6 +152,24 @@ impl TokenTreeBuilder { )) } + pub fn pattern(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + b.pos = end; + + TokenTreeBuilder::spanned_pattern((start, end)) + }) + } + + pub fn spanned_pattern(input: impl Into) -> TokenNode { + TokenNode::Token(Tagged::from_simple_spanned_item( + RawToken::Bare, + input.into(), + )) + } + pub fn external_word(input: impl Into) -> CurriedToken { let input = input.into(); diff --git a/src/parser/parse/tokens.rs b/src/parser/parse/tokens.rs index 0bb2e3f17..b59985249 100644 --- a/src/parser/parse/tokens.rs +++ b/src/parser/parse/tokens.rs @@ -12,6 +12,7 @@ pub enum RawToken { Variable(Span), ExternalCommand(Span), ExternalWord, + GlobPattern, Bare, } @@ -53,6 +54,7 @@ impl RawToken { RawToken::Variable(_) => "Variable", RawToken::ExternalCommand(_) => "ExternalCommand", RawToken::ExternalWord => "ExternalWord", + RawToken::GlobPattern => "GlobPattern", RawToken::Bare => "String", } } diff --git a/src/shell/helper.rs b/src/shell/helper.rs index 462f37529..16802657d 100644 --- a/src/shell/helper.rs +++ b/src/shell/helper.rs @@ -123,6 +123,10 @@ fn paint_token_node(token_node: &TokenNode, line: &str) -> String { item: RawToken::Size(..), .. }) => Color::Purple.bold().paint(token_node.span().slice(line)), + TokenNode::Token(Tagged { + item: RawToken::GlobPattern, + .. + }) => Color::Cyan.normal().paint(token_node.span().slice(line)), TokenNode::Token(Tagged { item: RawToken::String(..), .. From 540e93aa3ada4d722cc6eac7df64322cb83e9097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Tue, 10 Sep 2019 12:26:56 -0500 Subject: [PATCH 3/3] question mark character can also be in glob patterns. --- src/commands/ls.rs | 2 +- src/parser/parse/parser.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/commands/ls.rs b/src/commands/ls.rs index 709614150..d2a0f6c03 100644 --- a/src/commands/ls.rs +++ b/src/commands/ls.rs @@ -10,7 +10,7 @@ impl WholeStreamCommand for LS { } fn signature(&self) -> Signature { - Signature::build("ls").optional("path", SyntaxType::Path) + Signature::build("ls").optional("path", SyntaxType::Pattern) } fn usage(&self) -> &str { diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index 66656619d..0be05af06 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -713,6 +713,7 @@ fn is_bare_char(c: char) -> bool { '=' => true, '~' => true, ':' => true, + '?' => true, _ => false, } }