From 1ad9d6f199556c706812d992f22a7a91e8668259 Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Tue, 17 Sep 2019 15:26:27 -0700 Subject: [PATCH] Overhaul the expansion system The main thrust of this (very large) commit is an overhaul of the expansion system. The parsing pipeline is: - Lightly parse the source file for atoms, basic delimiters and pipeline structure into a token tree - Expand the token tree into a HIR (high-level intermediate representation) based upon the baseline syntax rules for expressions and the syntactic shape of commands. Somewhat non-traditionally, nu doesn't have an AST at all. It goes directly from the token tree, which doesn't represent many important distinctions (like the difference between `hello` and `5KB`) directly into a high-level representation that doesn't have a direct correspondence to the source code. At a high level, nu commands work like macros, in the sense that the syntactic shape of the invocation of a command depends on the definition of a command. However, commands do not have the ability to perform unrestricted expansions of the token tree. Instead, they describe their arguments in terms of syntactic shapes, and the expander expands the token tree into HIR based upon that definition. For example, the `where` command says that it takes a block as its first required argument, and the description of the block syntactic shape expands the syntax `cpu > 10` into HIR that represents `{ $it.cpu > 10 }`. This commit overhauls that system so that the syntactic shapes are described in terms of a few new traits (`ExpandSyntax` and `ExpandExpression` are the primary ones) that are more composable than the previous system. The first big win of this new system is the addition of the `ColumnPath` shape, which looks like `cpu."max ghz"` or `package.version`. Previously, while a variable path could look like `$it.cpu."max ghz"`, the tail of a variable path could not be easily reused in other contexts. Now, that tail is its own syntactic shape, and it can be used as part of a command's signature. This cleans up commands like `inc`, `add` and `edit` as well as shorthand blocks, which can now look like `| where cpu."max ghz" > 10` --- Cargo.lock | 22 + Cargo.toml | 3 + src/cli.rs | 190 +- src/commands.rs | 1 + src/commands/autoview.rs | 33 +- src/commands/classified.rs | 35 +- src/commands/command.rs | 9 + src/commands/echo.rs | 9 +- src/commands/enter.rs | 10 +- src/commands/fetch.rs | 2 +- src/commands/first.rs | 2 +- src/commands/get.rs | 61 +- src/commands/open.rs | 2 +- src/commands/save.rs | 7 +- src/commands/skip_while.rs | 3 + src/commands/tags.rs | 4 +- src/context.rs | 24 +- src/data/base.rs | 133 +- src/data/meta.rs | 123 +- src/errors.rs | 31 +- src/evaluate/evaluator.rs | 30 +- src/lib.rs | 2 +- src/parser.rs | 6 +- src/parser/deserializer.rs | 9 +- src/parser/hir.rs | 138 +- src/parser/hir/baseline_parse.rs | 142 +- src/parser/hir/baseline_parse/tests.rs | 144 ++ src/parser/hir/baseline_parse_tokens.rs | 459 ----- src/parser/hir/binary.rs | 6 + src/parser/hir/expand_external_tokens.rs | 87 + src/parser/hir/external_command.rs | 2 +- src/parser/hir/path.rs | 34 +- src/parser/hir/syntax_shape.rs | 662 +++++++ src/parser/hir/syntax_shape/block.rs | 168 ++ src/parser/hir/syntax_shape/expression.rs | 188 ++ .../hir/syntax_shape/expression/delimited.rs | 38 + .../hir/syntax_shape/expression/file_path.rs | 59 + .../hir/syntax_shape/expression/list.rs | 43 + .../hir/syntax_shape/expression/number.rs | 97 + .../hir/syntax_shape/expression/pattern.rs | 86 + .../hir/syntax_shape/expression/string.rs | 60 + .../hir/syntax_shape/expression/unit.rs | 89 + .../syntax_shape/expression/variable_path.rs | 396 ++++ src/parser/hir/tokens_iterator.rs | 365 ++++ src/parser/hir/tokens_iterator/debug.rs | 30 + src/parser/parse/files.rs | 29 +- src/parser/parse/operator.rs | 3 + src/parser/parse/parser.rs | 1586 ++++++++--------- src/parser/parse/pipeline.rs | 27 +- src/parser/parse/token_tree.rs | 112 +- src/parser/parse/token_tree_builder.rs | 110 +- src/parser/parse/tokens.rs | 11 +- src/parser/parse_command.rs | 155 +- src/parser/registry.rs | 19 +- src/plugins/add.rs | 23 +- src/plugins/edit.rs | 18 +- src/plugins/inc.rs | 37 +- src/plugins/str.rs | 47 +- src/shell/helper.rs | 38 +- tests/command_open_tests.rs | 2 +- tests/helpers/mod.rs | 1 + 61 files changed, 4310 insertions(+), 1952 deletions(-) create mode 100644 src/parser/hir/baseline_parse/tests.rs delete mode 100644 src/parser/hir/baseline_parse_tokens.rs create mode 100644 src/parser/hir/expand_external_tokens.rs create mode 100644 src/parser/hir/syntax_shape.rs create mode 100644 src/parser/hir/syntax_shape/block.rs create mode 100644 src/parser/hir/syntax_shape/expression.rs create mode 100644 src/parser/hir/syntax_shape/expression/delimited.rs create mode 100644 src/parser/hir/syntax_shape/expression/file_path.rs create mode 100644 src/parser/hir/syntax_shape/expression/list.rs create mode 100644 src/parser/hir/syntax_shape/expression/number.rs create mode 100644 src/parser/hir/syntax_shape/expression/pattern.rs create mode 100644 src/parser/hir/syntax_shape/expression/string.rs create mode 100644 src/parser/hir/syntax_shape/expression/unit.rs create mode 100644 src/parser/hir/syntax_shape/expression/variable_path.rs create mode 100644 src/parser/hir/tokens_iterator.rs create mode 100644 src/parser/hir/tokens_iterator/debug.rs diff --git a/Cargo.lock b/Cargo.lock index 852fbd610..af1d46aa0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1491,6 +1491,25 @@ dependencies = [ "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "nom-tracable" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "nom-tracable-macros 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "nom_locate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "nom-tracable-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "nom_locate" version = "1.0.0" @@ -1550,6 +1569,7 @@ dependencies = [ "natural 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "neso 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "nom-tracable 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "nom_locate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-bigint 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3140,6 +3160,8 @@ dependencies = [ "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" "checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" "checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b" +"checksum nom-tracable 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "edaa64ad2837d831d4a17966c9a83aa5101cc320730f5b724811c8f7442a2528" +"checksum nom-tracable-macros 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fd25f70877a9fe68bd406b3dd3ff99e94ce9de776cf2a96e0d99de90b53d4765" "checksum nom_locate 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f932834fd8e391fc7710e2ba17e8f9f8645d846b55aa63207e17e110a1e1ce35" "checksum ntapi 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f26e041cd983acbc087e30fcba770380cfa352d0e392e175b2344ebaf7ea0602" "checksum num-bigint 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f9c3f34cdd24f334cb265d9bf8bfa8a241920d026916785747a92f0e55541a1a" diff --git a/Cargo.toml b/Cargo.toml index f51ea06d8..66bd695c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ surf = "1.0.2" url = "2.1.0" roxmltree = "0.7.0" nom_locate = "1.0.0" +nom-tracable = "0.4.0" enum-utils = "0.1.1" unicode-xid = "0.2.0" serde_ini = "0.2.0" @@ -95,6 +96,8 @@ textview = ["syntect", "onig_sys", "crossterm"] binaryview = ["image", "crossterm"] sys = ["heim", "battery"] ps = ["heim"] +trace = ["nom-tracable/trace"] +all = ["raw-key", "textview", "binaryview", "sys", "ps", "clipboard", "ptree"] [dependencies.rusqlite] version = "0.20.0" diff --git a/src/cli.rs b/src/cli.rs index 38e2474fa..6a35608d9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,4 +1,3 @@ -use crate::commands::autoview; use crate::commands::classified::{ ClassifiedCommand, ClassifiedInputStream, ClassifiedPipeline, ExternalCommand, InternalCommand, StreamNext, @@ -13,7 +12,12 @@ pub(crate) use crate::errors::ShellError; use crate::fuzzysearch::{interactive_fuzzy_search, SelectionResult}; use crate::git::current_branch; use crate::parser::registry::Signature; -use crate::parser::{hir, CallNode, Pipeline, PipelineElement, TokenNode}; +use crate::parser::{ + hir, + hir::syntax_shape::{CommandHeadShape, CommandSignature, ExpandSyntax}, + hir::{expand_external_tokens::expand_external_tokens, tokens_iterator::TokensIterator}, + parse_command_tail, Pipeline, PipelineElement, TokenNode, +}; use crate::prelude::*; use log::{debug, trace}; @@ -25,6 +29,7 @@ use std::io::{BufRead, BufReader, Write}; use std::iter::Iterator; use std::path::PathBuf; use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; #[derive(Debug)] pub enum MaybeOwned<'a, T> { @@ -75,7 +80,7 @@ fn load_plugin(path: &std::path::Path, context: &mut Context) -> Result<(), Shel let name = params.name.clone(); let fname = fname.to_string(); - if context.has_command(&name) { + if let Some(_) = context.get_command(&name) { trace!("plugin {:?} already loaded.", &name); } else { if params.is_filter { @@ -428,21 +433,11 @@ pub async fn cli() -> Result<(), Box> { } } - LineResult::Error(mut line, err) => { + LineResult::Error(line, err) => { rl.add_history_entry(line.clone()); - let diag = err.to_diagnostic(); + context.with_host(|host| { - let writer = host.err_termcolor(); - line.push_str(" "); - let files = crate::parser::Files::new(line); - let _ = std::panic::catch_unwind(move || { - let _ = language_reporting::emit( - &mut writer.lock(), - &files, - &diag, - &language_reporting::DefaultConfig, - ); - }); + print_err(err, host, &Text::from(line)); }) } @@ -459,6 +454,14 @@ pub async fn cli() -> Result<(), Box> { Ok(()) } +fn chomp_newline(s: &str) -> &str { + if s.ends_with('\n') { + &s[..s.len() - 1] + } else { + s + } +} + enum LineResult { Success(String), Error(String, ShellError), @@ -471,9 +474,11 @@ async fn process_line(readline: Result, ctx: &mut Context Ok(line) if line.trim() == "" => LineResult::Success(line.clone()), Ok(line) => { - let result = match crate::parser::parse(&line, uuid::Uuid::nil()) { + let line = chomp_newline(line); + + let result = match crate::parser::parse(&line, uuid::Uuid::new_v4()) { Err(err) => { - return LineResult::Error(line.clone(), err); + return LineResult::Error(line.to_string(), err); } Ok(val) => val, @@ -484,7 +489,7 @@ async fn process_line(readline: Result, ctx: &mut Context let mut pipeline = match classify_pipeline(&result, ctx, &Text::from(line)) { Ok(pipeline) => pipeline, - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), }; match pipeline.commands.last() { @@ -492,7 +497,7 @@ async fn process_line(readline: Result, ctx: &mut Context _ => pipeline .commands .push(ClassifiedCommand::Internal(InternalCommand { - command: whole_stream_command(autoview::Autoview), + name: "autoview".to_string(), name_tag: Tag::unknown(), args: hir::Call::new( Box::new(hir::Expression::synthetic_string("autoview")), @@ -514,16 +519,24 @@ async fn process_line(readline: Result, ctx: &mut Context input = match (item, next) { (None, _) => break, + (Some(ClassifiedCommand::Dynamic(_)), _) + | (_, Some(ClassifiedCommand::Dynamic(_))) => { + return LineResult::Error( + line.to_string(), + ShellError::unimplemented("Dynamic commands"), + ) + } + (Some(ClassifiedCommand::Expr(_)), _) => { return LineResult::Error( - line.clone(), + line.to_string(), ShellError::unimplemented("Expression-only commands"), ) } (_, Some(ClassifiedCommand::Expr(_))) => { return LineResult::Error( - line.clone(), + line.to_string(), ShellError::unimplemented("Expression-only commands"), ) } @@ -536,7 +549,7 @@ async fn process_line(readline: Result, ctx: &mut Context .await { Ok(val) => ClassifiedInputStream::from_input_stream(val), - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), }, (Some(ClassifiedCommand::Internal(left)), Some(_)) => { @@ -545,7 +558,7 @@ async fn process_line(readline: Result, ctx: &mut Context .await { Ok(val) => ClassifiedInputStream::from_input_stream(val), - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), } } @@ -555,7 +568,7 @@ async fn process_line(readline: Result, ctx: &mut Context .await { Ok(val) => ClassifiedInputStream::from_input_stream(val), - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), } } @@ -564,20 +577,20 @@ async fn process_line(readline: Result, ctx: &mut Context Some(ClassifiedCommand::External(_)), ) => match left.run(ctx, input, StreamNext::External).await { Ok(val) => val, - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), }, (Some(ClassifiedCommand::External(left)), Some(_)) => { match left.run(ctx, input, StreamNext::Internal).await { Ok(val) => val, - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), } } (Some(ClassifiedCommand::External(left)), None) => { match left.run(ctx, input, StreamNext::Last).await { Ok(val) => val, - Err(err) => return LineResult::Error(line.clone(), err), + Err(err) => return LineResult::Error(line.to_string(), err), } } }; @@ -585,7 +598,7 @@ async fn process_line(readline: Result, ctx: &mut Context is_first_command = false; } - LineResult::Success(line.clone()) + LineResult::Success(line.to_string()) } Err(ReadlineError::Interrupted) => LineResult::CtrlC, Err(ReadlineError::Eof) => LineResult::Break, @@ -616,80 +629,91 @@ fn classify_pipeline( } fn classify_command( - command: &PipelineElement, + command: &Tagged, context: &Context, source: &Text, ) -> Result { - let call = command.call(); + let mut iterator = TokensIterator::new(&command.tokens.item, command.tag, true); + + let head = CommandHeadShape + .expand_syntax(&mut iterator, &context.expand_context(source, command.tag))?; + + match &head { + CommandSignature::Expression(_) => Err(ShellError::syntax_error( + "Unexpected expression in command position".tagged(command.tag), + )), - match call { // If the command starts with `^`, treat it as an external command no matter what - call if call.head().is_external() => { - let name_tag = call.head().expect_external(); - let name = name_tag.slice(source); + CommandSignature::External(name) => { + let name_str = name.slice(source); - Ok(external_command(call, source, name.tagged(name_tag))) + external_command(&mut iterator, source, name_str.tagged(name)) } - // Otherwise, if the command is a bare word, we'll need to triage it - call if call.head().is_bare() => { - let head = call.head(); - let name = head.source(source); + CommandSignature::LiteralExternal { outer, inner } => { + let name_str = inner.slice(source); - match context.has_command(name) { - // if the command is in the registry, it's an internal command - true => { - let command = context.get_command(name); - let config = command.signature(); - - trace!(target: "nu::build_pipeline", "classifying {:?}", config); - - let args: hir::Call = config.parse_args(call, &context, source)?; - - trace!(target: "nu::build_pipeline", "args :: {}", args.debug(source)); - - Ok(ClassifiedCommand::Internal(InternalCommand { - command, - name_tag: head.tag(), - args, - })) - } - - // otherwise, it's an external command - false => Ok(external_command(call, source, name.tagged(head.tag()))), - } + external_command(&mut iterator, source, name_str.tagged(outer)) } - // If the command is something else (like a number or a variable), that is currently unsupported. - // We might support `$somevar` as a curried command in the future. - call => Err(ShellError::invalid_command(call.head().tag())), + CommandSignature::Internal(command) => { + let tail = parse_command_tail( + &command.signature(), + &context.expand_context(source, command.tag), + &mut iterator, + command.tag, + )?; + + let (positional, named) = match tail { + None => (None, None), + Some((positional, named)) => (positional, named), + }; + + let call = hir::Call { + head: Box::new(head.to_expression()), + positional, + named, + }; + + Ok(ClassifiedCommand::Internal(InternalCommand::new( + command.name().to_string(), + command.tag, + call, + ))) + } } } // Classify this command as an external command, which doesn't give special meaning // to nu syntactic constructs, and passes all arguments to the external command as // strings. -fn external_command( - call: &Tagged, +pub(crate) fn external_command( + tokens: &mut TokensIterator, source: &Text, name: Tagged<&str>, -) -> ClassifiedCommand { - let arg_list_strings: Vec> = match call.children() { - Some(args) => args - .iter() - .filter_map(|i| match i { - TokenNode::Whitespace(_) => None, - other => Some(other.as_external_arg(source).tagged(other.tag())), - }) - .collect(), - None => vec![], - }; +) -> Result { + let arg_list_strings = expand_external_tokens(tokens, source)?; - let (name, tag) = name.into_parts(); - - ClassifiedCommand::External(ExternalCommand { + Ok(ClassifiedCommand::External(ExternalCommand { name: name.to_string(), - name_tag: tag, + name_tag: name.tag(), args: arg_list_strings, - }) + })) +} + +pub fn print_err(err: ShellError, host: &dyn Host, source: &Text) { + let diag = err.to_diagnostic(); + + let writer = host.err_termcolor(); + let mut source = source.to_string(); + source.push_str(" "); + let files = crate::parser::Files::new(source); + let _ = std::panic::catch_unwind(move || { + let _ = language_reporting::emit( + &mut writer.lock(), + &files, + &diag, + &language_reporting::DefaultConfig, + ); + }); } diff --git a/src/commands.rs b/src/commands.rs index 72c07e38e..4eb733edd 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -75,6 +75,7 @@ pub(crate) use command::{ UnevaluatedCallInfo, WholeStreamCommand, }; +pub(crate) use classified::ClassifiedCommand; pub(crate) use config::Config; pub(crate) use cp::Cpy; pub(crate) use date::Date; diff --git a/src/commands/autoview.rs b/src/commands/autoview.rs index a0e7e9a8a..57ab6269b 100644 --- a/src/commands/autoview.rs +++ b/src/commands/autoview.rs @@ -58,21 +58,21 @@ pub fn autoview( } } }; - } else if is_single_anchored_text_value(&input) { - let text = context.get_command("textview"); - if let Some(text) = text { - let result = text.run(raw.with_input(input), &context.commands, false); - result.collect::>().await; - } else { - for i in input { - match i.item { - Value::Primitive(Primitive::String(s)) => { - println!("{}", s); - } - _ => {} - } - } - } + // } else if is_single_origined_text_value(&input) { + // let text = context.get_command("textview"); + // if let Some(text) = text { + // let result = text.run(raw.with_input(input), &context.commands); + // result.collect::>().await; + // } else { + // for i in input { + // match i.item { + // Value::Primitive(Primitive::String(s)) => { + // println!("{}", s); + // } + // _ => {} + // } + // } + // } } else if is_single_text_value(&input) { for i in input { match i.item { @@ -111,7 +111,8 @@ fn is_single_text_value(input: &Vec>) -> bool { } } -fn is_single_anchored_text_value(input: &Vec>) -> bool { +#[allow(unused)] +fn is_single_origined_text_value(input: &Vec>) -> bool { if input.len() != 1 { return false; } diff --git a/src/commands/classified.rs b/src/commands/classified.rs index 0e5cd95d8..d30025b94 100644 --- a/src/commands/classified.rs +++ b/src/commands/classified.rs @@ -1,12 +1,11 @@ -use crate::commands::Command; use crate::parser::{hir, TokenNode}; use crate::prelude::*; use bytes::{BufMut, BytesMut}; +use derive_new::new; use futures::stream::StreamExt; use futures_codec::{Decoder, Encoder, Framed}; use log::{log_enabled, trace}; use std::io::{Error, ErrorKind}; -use std::sync::Arc; use subprocess::Exec; /// A simple `Codec` implementation that splits up data into lines. @@ -77,19 +76,28 @@ pub(crate) struct ClassifiedPipeline { pub(crate) commands: Vec, } +#[derive(Debug, Eq, PartialEq)] pub(crate) enum ClassifiedCommand { #[allow(unused)] Expr(TokenNode), Internal(InternalCommand), + #[allow(unused)] + Dynamic(hir::Call), External(ExternalCommand), } +#[derive(new, Debug, Eq, PartialEq)] pub(crate) struct InternalCommand { - pub(crate) command: Arc, + pub(crate) name: String, pub(crate) name_tag: Tag, pub(crate) args: hir::Call, } +#[derive(new, Debug, Eq, PartialEq)] +pub(crate) struct DynamicCommand { + pub(crate) args: hir::Call, +} + impl InternalCommand { pub(crate) async fn run( self, @@ -100,15 +108,17 @@ impl InternalCommand { ) -> Result { if log_enabled!(log::Level::Trace) { trace!(target: "nu::run::internal", "->"); - trace!(target: "nu::run::internal", "{}", self.command.name()); + trace!(target: "nu::run::internal", "{}", self.name); trace!(target: "nu::run::internal", "{}", self.args.debug(&source)); } let objects: InputStream = trace_stream!(target: "nu::trace_stream::internal", "input" = input.objects); + let command = context.expect_command(&self.name); + let result = context.run_command( - self.command, + command, self.name_tag.clone(), context.source_map.clone(), self.args, @@ -185,6 +195,7 @@ impl InternalCommand { } } +#[derive(Debug, Eq, PartialEq)] pub(crate) struct ExternalCommand { pub(crate) name: String, @@ -192,6 +203,7 @@ pub(crate) struct ExternalCommand { pub(crate) args: Vec>, } +#[derive(Debug)] pub(crate) enum StreamNext { Last, External, @@ -221,6 +233,8 @@ impl ExternalCommand { process = Exec::cmd(&self.name); + trace!(target: "nu::run::external", "command = {:?}", process); + if arg_string.contains("$it") { let mut first = true; @@ -275,6 +289,8 @@ impl ExternalCommand { process = process.cwd(context.shell_manager.path()); + trace!(target: "nu::run::external", "cwd = {:?}", context.shell_manager.path()); + let mut process = match stream_next { StreamNext::Last => process, StreamNext::External | StreamNext::Internal => { @@ -282,11 +298,18 @@ impl ExternalCommand { } }; + trace!(target: "nu::run::external", "set up stdout pipe"); + if let Some(stdin) = stdin { process = process.stdin(stdin); } - let mut popen = process.popen()?; + trace!(target: "nu::run::external", "set up stdin pipe"); + trace!(target: "nu::run::external", "built process {:?}", process); + + let mut popen = process.popen().unwrap(); + + trace!(target: "nu::run::external", "next = {:?}", stream_next); match stream_next { StreamNext::Last => { diff --git a/src/commands/command.rs b/src/commands/command.rs index 95732abac..7fb08bcef 100644 --- a/src/commands/command.rs +++ b/src/commands/command.rs @@ -507,6 +507,15 @@ pub enum Command { PerItem(Arc), } +impl std::fmt::Debug for Command { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Command::WholeStream(command) => write!(f, "WholeStream({})", command.name()), + Command::PerItem(command) => write!(f, "PerItem({})", command.name()), + } + } +} + impl Command { pub fn name(&self) -> &str { match self { diff --git a/src/commands/echo.rs b/src/commands/echo.rs index 21188f54f..5bfc12efb 100644 --- a/src/commands/echo.rs +++ b/src/commands/echo.rs @@ -54,11 +54,10 @@ fn run( output.push_str(&s); } _ => { - return Err(ShellError::labeled_error( - "Expect a string from pipeline", - "not a string-compatible value", - i.tag(), - )); + return Err(ShellError::type_error( + "a string-compatible value", + i.tagged_type_name(), + )) } } } diff --git a/src/commands/enter.rs b/src/commands/enter.rs index 2d96fe865..94688acd5 100644 --- a/src/commands/enter.rs +++ b/src/commands/enter.rs @@ -15,7 +15,7 @@ impl PerItemCommand for Enter { } fn signature(&self) -> registry::Signature { - Signature::build("enter").required("location", SyntaxShape::Block) + Signature::build("enter").required("location", SyntaxShape::Path) } fn usage(&self) -> &str { @@ -33,14 +33,14 @@ impl PerItemCommand for Enter { let raw_args = raw_args.clone(); match call_info.args.expect_nth(0)? { Tagged { - item: Value::Primitive(Primitive::String(location)), + item: Value::Primitive(Primitive::Path(location)), .. } => { - let location = location.to_string(); - let location_clone = location.to_string(); + let location_string = location.display().to_string(); + let location_clone = location_string.clone(); if location.starts_with("help") { - let spec = location.split(":").collect::>(); + let spec = location_string.split(":").collect::>(); let (_, command) = (spec[0], spec[1]); diff --git a/src/commands/fetch.rs b/src/commands/fetch.rs index 652ec77eb..21ef7fbfd 100644 --- a/src/commands/fetch.rs +++ b/src/commands/fetch.rs @@ -53,7 +53,7 @@ fn run( }; let path_buf = path.as_path()?; let path_str = path_buf.display().to_string(); - let path_span = path.span(); + let path_span = path.tag.span; let has_raw = call_info.args.has("raw"); let registry = registry.clone(); let raw_args = raw_args.clone(); diff --git a/src/commands/first.rs b/src/commands/first.rs index e39b5155d..71d05be7e 100644 --- a/src/commands/first.rs +++ b/src/commands/first.rs @@ -16,7 +16,7 @@ impl WholeStreamCommand for First { } fn signature(&self) -> Signature { - Signature::build("first").required("amount", SyntaxShape::Literal) + Signature::build("first").required("amount", SyntaxShape::Int) } fn usage(&self) -> &str { diff --git a/src/commands/get.rs b/src/commands/get.rs index afa550c72..4b0916c5d 100644 --- a/src/commands/get.rs +++ b/src/commands/get.rs @@ -1,14 +1,16 @@ use crate::commands::WholeStreamCommand; +use crate::data::meta::tag_for_tagged_list; use crate::data::Value; use crate::errors::ShellError; use crate::prelude::*; +use log::trace; pub struct Get; #[derive(Deserialize)] pub struct GetArgs { - member: Tagged, - rest: Vec>, + member: ColumnPath, + rest: Vec, } impl WholeStreamCommand for Get { @@ -18,8 +20,8 @@ impl WholeStreamCommand for Get { fn signature(&self) -> Signature { Signature::build("get") - .required("member", SyntaxShape::Member) - .rest(SyntaxShape::Member) + .required("member", SyntaxShape::ColumnPath) + .rest(SyntaxShape::ColumnPath) } fn usage(&self) -> &str { @@ -35,39 +37,34 @@ impl WholeStreamCommand for Get { } } -fn get_member(path: &Tagged, obj: &Tagged) -> Result, ShellError> { +pub type ColumnPath = Vec>; + +pub fn get_column_path( + path: &ColumnPath, + obj: &Tagged, +) -> Result, ShellError> { let mut current = Some(obj); - for p in path.split(".") { + for p in path.iter() { if let Some(obj) = current { - current = match obj.get_data_by_key(p) { + current = match obj.get_data_by_key(&p) { Some(v) => Some(v), None => // Before we give up, see if they gave us a path that matches a field name by itself { - match obj.get_data_by_key(&path.item) { - Some(v) => return Ok(v.clone()), - None => { - let possibilities = obj.data_descriptors(); + let possibilities = obj.data_descriptors(); - let mut possible_matches: Vec<_> = possibilities - .iter() - .map(|x| { - (natural::distance::levenshtein_distance(x, &path.item), x) - }) - .collect(); + let mut possible_matches: Vec<_> = possibilities + .iter() + .map(|x| (natural::distance::levenshtein_distance(x, &p), x)) + .collect(); - possible_matches.sort(); + possible_matches.sort(); - if possible_matches.len() > 0 { - return Err(ShellError::labeled_error( - "Unknown column", - format!("did you mean '{}'?", possible_matches[0].1), - path.tag(), - )); - } - None - } - } + return Err(ShellError::labeled_error( + "Unknown column", + format!("did you mean '{}'?", possible_matches[0].1), + tag_for_tagged_list(path.iter().map(|p| p.tag())), + )); } } } @@ -97,6 +94,8 @@ pub fn get( }: GetArgs, RunnableContext { input, .. }: RunnableContext, ) -> Result { + trace!("get {:?} {:?}", member, fields); + let stream = input .values .map(move |item| { @@ -107,10 +106,10 @@ pub fn get( let fields = vec![&member, &fields] .into_iter() .flatten() - .collect::>>(); + .collect::>(); - for field in &fields { - match get_member(field, &item) { + for column_path in &fields { + match get_column_path(column_path, &item) { Ok(Tagged { item: Value::Table(l), .. diff --git a/src/commands/open.rs b/src/commands/open.rs index 254b0bd7b..97b0df274 100644 --- a/src/commands/open.rs +++ b/src/commands/open.rs @@ -54,7 +54,7 @@ fn run( }; let path_buf = path.as_path()?; let path_str = path_buf.display().to_string(); - let path_span = path.span(); + let path_span = path.tag.span; let has_raw = call_info.args.has("raw"); let registry = registry.clone(); let raw_args = raw_args.clone(); diff --git a/src/commands/save.rs b/src/commands/save.rs index 47f1a17e9..44e07da5e 100644 --- a/src/commands/save.rs +++ b/src/commands/save.rs @@ -143,15 +143,16 @@ fn save( } _ => { yield Err(ShellError::labeled_error( - "Save requires a filepath", + "Save requires a filepath (1)", "needs path", name_tag, )); } }, None => { + eprintln!("{:?} {:?}", anchor, source_map); yield Err(ShellError::labeled_error( - "Save requires a filepath", + "Save requires a filepath (2)", "needs path", name_tag, )); @@ -159,7 +160,7 @@ fn save( } } else { yield Err(ShellError::labeled_error( - "Save requires a filepath", + "Save requires a filepath (3)", "needs path", name_tag, )); diff --git a/src/commands/skip_while.rs b/src/commands/skip_while.rs index 041caf300..a768ae613 100644 --- a/src/commands/skip_while.rs +++ b/src/commands/skip_while.rs @@ -1,6 +1,7 @@ use crate::commands::WholeStreamCommand; use crate::errors::ShellError; use crate::prelude::*; +use log::trace; pub struct SkipWhile; @@ -38,7 +39,9 @@ pub fn skip_while( RunnableContext { input, .. }: RunnableContext, ) -> Result { let objects = input.values.skip_while(move |item| { + trace!("ITEM = {:?}", item); let result = condition.invoke(&item); + trace!("RESULT = {:?}", result); let return_value = match result { Ok(ref v) if v.is_true() => true, diff --git a/src/commands/tags.rs b/src/commands/tags.rs index 0cef300b0..2b710d1b6 100644 --- a/src/commands/tags.rs +++ b/src/commands/tags.rs @@ -38,8 +38,8 @@ fn tags(args: CommandArgs, _registry: &CommandRegistry) -> Result Arc { + self.get_command(name).unwrap() + } + pub(crate) fn has(&self, name: &str) -> bool { let registry = self.registry.lock().unwrap(); registry.contains_key(name) } - fn insert(&mut self, name: impl Into, command: Arc) { + pub(crate) fn insert(&mut self, name: impl Into, command: Arc) { let mut registry = self.registry.lock().unwrap(); registry.insert(name.into(), command); } @@ -83,6 +87,14 @@ impl Context { &self.registry } + pub(crate) fn expand_context<'context>( + &'context self, + source: &'context Text, + tag: Tag, + ) -> ExpandContext<'context> { + ExpandContext::new(&self.registry, tag, source, self.shell_manager.homedir()) + } + pub(crate) fn basic() -> Result> { let registry = CommandRegistry::new(); Ok(Context { @@ -109,12 +121,12 @@ impl Context { self.source_map.insert(uuid, anchor_location); } - pub(crate) fn has_command(&self, name: &str) -> bool { - self.registry.has(name) + pub(crate) fn get_command(&self, name: &str) -> Option> { + self.registry.get_command(name) } - pub(crate) fn get_command(&self, name: &str) -> Arc { - self.registry.get_command(name).unwrap() + pub(crate) fn expect_command(&self, name: &str) -> Arc { + self.registry.expect_command(name) } pub(crate) fn run_command<'a>( diff --git a/src/data/base.rs b/src/data/base.rs index 04465181a..176560137 100644 --- a/src/data/base.rs +++ b/src/data/base.rs @@ -8,6 +8,7 @@ use crate::Text; use chrono::{DateTime, Utc}; use chrono_humanize::Humanize; use derive_new::new; +use log::trace; use serde::{Deserialize, Serialize}; use std::fmt; use std::path::PathBuf; @@ -217,6 +218,14 @@ impl Block { let mut last = None; + trace!( + "EXPRS = {:?}", + self.expressions + .iter() + .map(|e| format!("{}", e)) + .collect::>() + ); + for expr in self.expressions.iter() { last = Some(evaluate_baseline_expr( &expr, @@ -394,13 +403,34 @@ impl Tagged { pub(crate) fn debug(&self) -> ValueDebug<'_> { ValueDebug { value: self } } + + pub fn as_column_path(&self) -> Result>>, ShellError> { + let mut out: Vec> = vec![]; + + match &self.item { + Value::Table(table) => { + for item in table { + out.push(item.as_string()?.tagged(item.tag)); + } + } + + other => { + return Err(ShellError::type_error( + "column name", + other.type_name().tagged(self.tag), + )) + } + } + + Ok(out.tagged(self.tag)) + } } impl Value { pub(crate) fn type_name(&self) -> String { match self { Value::Primitive(p) => p.type_name(), - Value::Row(_) => format!("object"), + Value::Row(_) => format!("row"), Value::Table(_) => format!("list"), Value::Block(_) => format!("block"), } @@ -443,6 +473,22 @@ impl Value { } } + pub fn get_data_by_column_path( + &self, + tag: Tag, + path: &Vec>, + ) -> Option> { + let mut current = self; + for p in path { + match current.get_data_by_key(p) { + Some(v) => current = v, + None => return None, + } + } + + Some(Tagged::from_item(current, tag)) + } + pub fn get_data_by_path(&self, tag: Tag, path: &str) -> Option> { let mut current = self; for p in path.split(".") { @@ -508,6 +554,58 @@ impl Value { None } + pub fn insert_data_at_column_path( + &self, + tag: Tag, + split_path: &Vec>, + new_value: Value, + ) -> Option> { + let mut new_obj = self.clone(); + + if let Value::Row(ref mut o) = new_obj { + let mut current = o; + + if split_path.len() == 1 { + // Special case for inserting at the top level + current.entries.insert( + split_path[0].item.clone(), + Tagged::from_item(new_value, tag), + ); + return Some(Tagged::from_item(new_obj, tag)); + } + + for idx in 0..split_path.len() { + match current.entries.get_mut(&split_path[idx].item) { + Some(next) => { + if idx == (split_path.len() - 2) { + match &mut next.item { + Value::Row(o) => { + o.entries.insert( + split_path[idx + 1].to_string(), + Tagged::from_item(new_value, tag), + ); + } + _ => {} + } + + return Some(Tagged::from_item(new_obj, tag)); + } else { + match next.item { + Value::Row(ref mut o) => { + current = o; + } + _ => return None, + } + } + } + _ => return None, + } + } + } + + None + } + pub fn replace_data_at_path( &self, tag: Tag, @@ -543,6 +641,39 @@ impl Value { None } + pub fn replace_data_at_column_path( + &self, + tag: Tag, + split_path: &Vec>, + replaced_value: Value, + ) -> Option> { + let mut new_obj = self.clone(); + + if let Value::Row(ref mut o) = new_obj { + let mut current = o; + for idx in 0..split_path.len() { + match current.entries.get_mut(&split_path[idx].item) { + Some(next) => { + if idx == (split_path.len() - 1) { + *next = Tagged::from_item(replaced_value, tag); + return Some(Tagged::from_item(new_obj, tag)); + } else { + match next.item { + Value::Row(ref mut o) => { + current = o; + } + _ => return None, + } + } + } + _ => return None, + } + } + } + + None + } + pub fn get_data(&self, desc: &String) -> MaybeOwned<'_, Value> { match self { p @ Value::Primitive(_) => MaybeOwned::Borrowed(p), diff --git a/src/data/meta.rs b/src/data/meta.rs index 0a56198e6..b66b009cc 100644 --- a/src/data/meta.rs +++ b/src/data/meta.rs @@ -1,4 +1,5 @@ use crate::context::{AnchorLocation, SourceMap}; +use crate::parser::parse::parser::TracableContext; use crate::prelude::*; use crate::Text; use derive_new::new; @@ -119,10 +120,7 @@ impl From<&Tag> for Tag { impl From> for Span { fn from(input: nom_locate::LocatedSpanEx<&str, Uuid>) -> Span { - Span { - start: input.offset, - end: input.offset + input.fragment.len(), - } + Span::new(input.offset, input.offset + input.fragment.len()) } } @@ -147,10 +145,7 @@ impl impl From<(usize, usize)> for Span { fn from(input: (usize, usize)) -> Span { - Span { - start: input.0, - end: input.1, - } + Span::new(input.0, input.1) } } @@ -164,7 +159,7 @@ impl From<&std::ops::Range> for Span { } #[derive( - Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Hash, Getters, + Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Hash, Getters, new, )] pub struct Tag { pub anchor: Uuid, @@ -189,11 +184,20 @@ impl From<&Span> for Tag { } } +impl From<(usize, usize, TracableContext)> for Tag { + fn from((start, end, context): (usize, usize, TracableContext)) -> Self { + Tag { + anchor: context.origin, + span: Span::new(start, end), + } + } +} + impl From<(usize, usize, Uuid)> for Tag { fn from((start, end, anchor): (usize, usize, Uuid)) -> Self { Tag { anchor, - span: Span { start, end }, + span: Span::new(start, end), } } } @@ -201,24 +205,17 @@ impl From<(usize, usize, Uuid)> for Tag { impl From<(usize, usize, Option)> for Tag { fn from((start, end, anchor): (usize, usize, Option)) -> Self { Tag { - anchor: if let Some(uuid) = anchor { - uuid - } else { - uuid::Uuid::nil() - }, - span: Span { start, end }, + anchor: anchor.unwrap_or(uuid::Uuid::nil()), + span: Span::new(start, end), } } } -impl From> for Tag { - fn from(input: nom_locate::LocatedSpanEx<&str, Uuid>) -> Tag { +impl From> for Tag { + fn from(input: nom_locate::LocatedSpanEx<&str, TracableContext>) -> Tag { Tag { - anchor: input.extra, - span: Span { - start: input.offset, - end: input.offset + input.fragment.len(), - }, + anchor: input.extra.origin, + span: Span::new(input.offset, input.offset + input.fragment.len()), } } } @@ -265,10 +262,7 @@ impl Tag { ); Tag { - span: Span { - start: self.span.start, - end: other.span.end, - }, + span: Span::new(self.span.start, other.span.end), anchor: self.anchor, } } @@ -276,18 +270,46 @@ impl Tag { pub fn slice<'a>(&self, source: &'a str) -> &'a str { self.span.slice(source) } + + pub fn string<'a>(&self, source: &'a str) -> String { + self.span.slice(source).to_string() + } + + pub fn tagged_slice<'a>(&self, source: &'a str) -> Tagged<&'a str> { + self.span.slice(source).tagged(self) + } + + pub fn tagged_string<'a>(&self, source: &'a str) -> Tagged { + self.span.slice(source).to_string().tagged(self) + } +} + +pub fn tag_for_tagged_list(mut iter: impl Iterator) -> Tag { + let first = iter.next(); + + let first = match first { + None => return Tag::unknown(), + Some(first) => first, + }; + + let last = iter.last(); + + match last { + None => first, + Some(last) => first.until(last), + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Hash)] pub struct Span { - pub(crate) start: usize, - pub(crate) end: usize, + start: usize, + end: usize, } impl From> for Span { fn from(input: Option) -> Span { match input { - None => Span { start: 0, end: 0 }, + None => Span::new(0, 0), Some(span) => span, } } @@ -295,7 +317,18 @@ impl From> for Span { impl Span { pub fn unknown() -> Span { - Span { start: 0, end: 0 } + Span::new(0, 0) + } + + pub fn new(start: usize, end: usize) -> Span { + assert!( + end >= start, + "Can't create a Span whose end < start, start={}, end={}", + start, + end + ); + + Span { start, end } } /* @@ -308,6 +341,14 @@ impl Span { } */ + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } + pub fn is_unknown(&self) -> bool { self.start == 0 && self.end == 0 } @@ -319,17 +360,11 @@ impl Span { impl language_reporting::ReportingSpan for Span { fn with_start(&self, start: usize) -> Self { - Span { - start, - end: self.end, - } + Span::new(start, self.end) } fn with_end(&self, end: usize) -> Self { - Span { - start: self.start, - end, - } + Span::new(self.start, end) } fn start(&self) -> usize { @@ -344,20 +379,14 @@ impl language_reporting::ReportingSpan for Span { impl language_reporting::ReportingSpan for Tag { fn with_start(&self, start: usize) -> Self { Tag { - span: Span { - start, - end: self.span.end, - }, + span: Span::new(start, self.span.end), anchor: self.anchor, } } fn with_end(&self, end: usize) -> Self { Tag { - span: Span { - start: self.span.start, - end, - }, + span: Span::new(self.span.start, end), anchor: self.anchor, } } diff --git a/src/errors.rs b/src/errors.rs index 7e9c14b23..a070f6f54 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,5 +1,6 @@ use crate::prelude::*; +use crate::parser::parse::parser::TracableContext; use ansi_term::Color; use derive_new::new; use language_reporting::{Diagnostic, Label, Severity}; @@ -62,6 +63,14 @@ impl ShellError { .start() } + pub(crate) fn unexpected_eof(expected: impl Into, tag: Tag) -> ShellError { + ProximateShellError::UnexpectedEof { + expected: expected.into(), + tag, + } + .start() + } + pub(crate) fn range_error( expected: impl Into, actual: &Tagged, @@ -82,6 +91,7 @@ impl ShellError { .start() } + #[allow(unused)] pub(crate) fn invalid_command(problem: impl Into) -> ShellError { ProximateShellError::InvalidCommand { command: problem.into(), @@ -133,7 +143,7 @@ impl ShellError { pub(crate) fn parse_error( error: nom::Err<( - nom_locate::LocatedSpanEx<&str, uuid::Uuid>, + nom_locate::LocatedSpanEx<&str, TracableContext>, nom::error::ErrorKind, )>, ) -> ShellError { @@ -235,7 +245,6 @@ impl ShellError { Label::new_primary(tag) .with_message(format!("Expected {}, found {}", expected, actual)), ), - ProximateShellError::TypeError { expected, actual: @@ -246,6 +255,11 @@ impl ShellError { } => Diagnostic::new(Severity::Error, "Type Error") .with_label(Label::new_primary(tag).with_message(expected)), + ProximateShellError::UnexpectedEof { + expected, tag + } => Diagnostic::new(Severity::Error, format!("Unexpected end of input")) + .with_label(Label::new_primary(tag).with_message(format!("Expected {}", expected))), + ProximateShellError::RangeError { kind, operation, @@ -267,10 +281,10 @@ impl ShellError { problem: Tagged { tag, - .. + item }, } => Diagnostic::new(Severity::Error, "Syntax Error") - .with_label(Label::new_primary(tag).with_message("Unexpected external command")), + .with_label(Label::new_primary(tag).with_message(item)), ProximateShellError::MissingProperty { subpath, expr } => { let subpath = subpath.into_label(); @@ -340,6 +354,10 @@ impl ShellError { pub(crate) fn unexpected(title: impl Into) -> ShellError { ShellError::string(&format!("Unexpected: {}", title.into())) } + + pub(crate) fn unreachable(title: impl Into) -> ShellError { + ShellError::string(&format!("BUG: Unreachable: {}", title.into())) + } } #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Serialize, Deserialize)] @@ -387,6 +405,10 @@ pub enum ProximateShellError { SyntaxError { problem: Tagged, }, + UnexpectedEof { + expected: String, + tag: Tag, + }, InvalidCommand { command: Tag, }, @@ -473,6 +495,7 @@ impl std::fmt::Display for ShellError { ProximateShellError::MissingValue { .. } => write!(f, "MissingValue"), ProximateShellError::InvalidCommand { .. } => write!(f, "InvalidCommand"), ProximateShellError::TypeError { .. } => write!(f, "TypeError"), + ProximateShellError::UnexpectedEof { .. } => write!(f, "UnexpectedEof"), ProximateShellError::RangeError { .. } => write!(f, "RangeError"), ProximateShellError::SyntaxError { .. } => write!(f, "SyntaxError"), ProximateShellError::MissingProperty { .. } => write!(f, "MissingProperty"), diff --git a/src/evaluate/evaluator.rs b/src/evaluate/evaluator.rs index a111d3964..248d2a081 100644 --- a/src/evaluate/evaluator.rs +++ b/src/evaluate/evaluator.rs @@ -7,6 +7,8 @@ use crate::parser::{ use crate::prelude::*; use derive_new::new; use indexmap::IndexMap; +use log::trace; +use std::fmt; #[derive(new)] pub struct Scope { @@ -15,6 +17,15 @@ pub struct Scope { vars: IndexMap>, } +impl fmt::Display for Scope { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_map() + .entry(&"$it", &format!("{:?}", self.it.item)) + .entries(self.vars.iter().map(|(k, v)| (k, &v.item))) + .finish() + } +} + impl Scope { pub(crate) fn empty() -> Scope { Scope { @@ -48,12 +59,15 @@ pub(crate) fn evaluate_baseline_expr( RawExpression::Synthetic(hir::Synthetic::String(s)) => { Ok(Value::string(s).tagged_unknown()) } - RawExpression::Variable(var) => evaluate_reference(var, scope, source), + RawExpression::Variable(var) => evaluate_reference(var, scope, source, expr.tag()), + RawExpression::Command(_) => evaluate_command(expr.tag(), scope, source), RawExpression::ExternalCommand(external) => evaluate_external(external, scope, source), RawExpression::Binary(binary) => { let left = evaluate_baseline_expr(binary.left(), registry, scope, source)?; let right = evaluate_baseline_expr(binary.right(), registry, scope, source)?; + trace!("left={:?} right={:?}", left.item, right.item); + match left.compare(binary.op(), &*right) { Ok(result) => Ok(Value::boolean(result).tagged(expr.tag())), Err((left_type, right_type)) => Err(ShellError::coerce_error( @@ -130,14 +144,16 @@ fn evaluate_reference( name: &hir::Variable, scope: &Scope, source: &Text, + tag: Tag, ) -> Result, ShellError> { + trace!("Evaluating {} with Scope {}", name, scope); match name { - hir::Variable::It(tag) => Ok(scope.it.item.clone().tagged(*tag)), - hir::Variable::Other(tag) => Ok(scope + hir::Variable::It(_) => Ok(scope.it.item.clone().tagged(tag)), + hir::Variable::Other(inner) => Ok(scope .vars - .get(tag.slice(source)) + .get(inner.slice(source)) .map(|v| v.clone()) - .unwrap_or_else(|| Value::nothing().tagged(*tag))), + .unwrap_or_else(|| Value::nothing().tagged(tag))), } } @@ -150,3 +166,7 @@ fn evaluate_external( "Unexpected external command".tagged(*external.name()), )) } + +fn evaluate_command(tag: Tag, _scope: &Scope, _source: &Text) -> Result, ShellError> { + Err(ShellError::syntax_error("Unexpected command".tagged(tag))) +} diff --git a/src/lib.rs b/src/lib.rs index e8e09aacd..b955f426e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,7 @@ pub use cli::cli; pub use data::base::{Primitive, Value}; pub use data::config::{config_path, APP_INFO}; pub use data::dict::{Dictionary, TaggedDictBuilder}; -pub use data::meta::{Tag, Tagged, TaggedItem}; +pub use data::meta::{Span, Tag, Tagged, TaggedItem}; pub use errors::{CoerceInto, ShellError}; pub use num_traits::cast::ToPrimitive; pub use parser::parse::text::Text; diff --git a/src/parser.rs b/src/parser.rs index 138125769..5fcfaaa27 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,7 @@ pub(crate) mod registry; use crate::errors::ShellError; pub(crate) use deserializer::ConfigDeserializer; -pub(crate) use hir::baseline_parse_tokens::baseline_parse_tokens; +pub(crate) use hir::TokensIterator; pub(crate) use parse::call_node::CallNode; pub(crate) use parse::files::Files; pub(crate) use parse::flag::Flag; @@ -15,10 +15,10 @@ pub(crate) use parse::operator::Operator; pub(crate) use parse::parser::{nom_input, pipeline}; pub(crate) use parse::pipeline::{Pipeline, PipelineElement}; pub(crate) use parse::text::Text; -pub(crate) use parse::token_tree::{DelimitedNode, Delimiter, PathNode, TokenNode}; +pub(crate) use parse::token_tree::{DelimitedNode, Delimiter, TokenNode}; pub(crate) use parse::tokens::{RawToken, Token}; pub(crate) use parse::unit::Unit; -pub(crate) use parse_command::parse_command; +pub(crate) use parse_command::parse_command_tail; pub(crate) use registry::CommandRegistry; pub fn parse(input: &str, anchor: uuid::Uuid) -> Result { diff --git a/src/parser/deserializer.rs b/src/parser/deserializer.rs index f9b9146e5..43409fc4d 100644 --- a/src/parser/deserializer.rs +++ b/src/parser/deserializer.rs @@ -310,9 +310,10 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut ConfigDeserializer<'de> { return Ok(r); } trace!( - "deserializing struct {:?} {:?} (stack={:?})", + "deserializing struct {:?} {:?} (saw_root={} stack={:?})", name, fields, + self.saw_root, self.stack ); @@ -326,6 +327,12 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut ConfigDeserializer<'de> { let type_name = std::any::type_name::(); let tagged_val_name = std::any::type_name::>(); + trace!( + "type_name={} tagged_val_name={}", + type_name, + tagged_val_name + ); + if type_name == tagged_val_name { return visit::, _>(value.val, name, fields, visitor); } diff --git a/src/parser/hir.rs b/src/parser/hir.rs index 96eb7272a..4fd0a71b3 100644 --- a/src/parser/hir.rs +++ b/src/parser/hir.rs @@ -1,11 +1,13 @@ pub(crate) mod baseline_parse; -pub(crate) mod baseline_parse_tokens; pub(crate) mod binary; +pub(crate) mod expand_external_tokens; pub(crate) mod external_command; pub(crate) mod named; pub(crate) mod path; +pub(crate) mod syntax_shape; +pub(crate) mod tokens_iterator; -use crate::parser::{registry, Unit}; +use crate::parser::{registry, Operator, Unit}; use crate::prelude::*; use derive_new::new; use getset::Getters; @@ -14,27 +16,18 @@ use std::fmt; use std::path::PathBuf; use crate::evaluate::Scope; +use crate::parser::parse::tokens::RawNumber; +use crate::traits::ToDebug; -pub(crate) use self::baseline_parse::{ - baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_pattern, baseline_parse_token_as_string, -}; -pub(crate) use self::baseline_parse_tokens::{baseline_parse_next_expr, TokensIterator}; pub(crate) use self::binary::Binary; pub(crate) use self::external_command::ExternalCommand; pub(crate) use self::named::NamedArguments; pub(crate) use self::path::Path; +pub(crate) use self::syntax_shape::ExpandContext; +pub(crate) use self::tokens_iterator::debug::debug_tokens; +pub(crate) use self::tokens_iterator::TokensIterator; -pub use self::baseline_parse_tokens::SyntaxShape; - -pub fn path(head: impl Into, tail: Vec>>) -> Path { - Path::new( - head.into(), - tail.into_iter() - .map(|item| item.map(|string| string.into())) - .collect(), - ) -} +pub use self::syntax_shape::SyntaxShape; #[derive(Debug, Clone, Eq, PartialEq, Getters, Serialize, Deserialize, new)] pub struct Call { @@ -93,6 +86,7 @@ pub enum RawExpression { FilePath(PathBuf), ExternalCommand(ExternalCommand), + Command(Tag), Boolean(bool), } @@ -115,13 +109,14 @@ impl RawExpression { match self { RawExpression::Literal(literal) => literal.type_name(), RawExpression::Synthetic(synthetic) => synthetic.type_name(), - RawExpression::ExternalWord => "externalword", - RawExpression::FilePath(..) => "filepath", + RawExpression::Command(..) => "command", + RawExpression::ExternalWord => "external word", + RawExpression::FilePath(..) => "file path", RawExpression::Variable(..) => "variable", RawExpression::List(..) => "list", RawExpression::Binary(..) => "binary", RawExpression::Block(..) => "block", - RawExpression::Path(..) => "path", + RawExpression::Path(..) => "variable path", RawExpression::Boolean(..) => "boolean", RawExpression::ExternalCommand(..) => "external", } @@ -130,6 +125,39 @@ impl RawExpression { pub type Expression = Tagged; +impl std::fmt::Display for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let span = self.tag.span; + + match &self.item { + RawExpression::Literal(literal) => write!(f, "{}", literal.tagged(self.tag)), + RawExpression::Synthetic(Synthetic::String(s)) => write!(f, "{}", s), + RawExpression::Command(_) => write!(f, "Command{{ {}..{} }}", span.start(), span.end()), + RawExpression::ExternalWord => { + write!(f, "ExternalWord{{ {}..{} }}", span.start(), span.end()) + } + RawExpression::FilePath(file) => write!(f, "Path{{ {} }}", file.display()), + RawExpression::Variable(variable) => write!(f, "{}", variable), + RawExpression::List(list) => f + .debug_list() + .entries(list.iter().map(|e| format!("{}", e))) + .finish(), + RawExpression::Binary(binary) => write!(f, "{}", binary), + RawExpression::Block(items) => { + write!(f, "Block")?; + f.debug_set() + .entries(items.iter().map(|i| format!("{}", i))) + .finish() + } + RawExpression::Path(path) => write!(f, "{}", path), + RawExpression::Boolean(b) => write!(f, "${}", b), + RawExpression::ExternalCommand(..) => { + write!(f, "ExternalComment{{ {}..{} }}", span.start(), span.end()) + } + } + } +} + impl Expression { pub(crate) fn number(i: impl Into, tag: impl Into) -> Expression { RawExpression::Literal(Literal::Number(i.into())).tagged(tag.into()) @@ -151,10 +179,50 @@ impl Expression { RawExpression::Literal(Literal::String(inner.into())).tagged(outer.into()) } + pub(crate) fn path( + head: Expression, + tail: Vec>>, + tag: impl Into, + ) -> Expression { + let tail = tail.into_iter().map(|t| t.map(|s| s.into())).collect(); + RawExpression::Path(Box::new(Path::new(head, tail))).tagged(tag.into()) + } + + pub(crate) fn dot_member(head: Expression, next: Tagged>) -> Expression { + let Tagged { item, tag } = head; + let new_tag = head.tag.until(next.tag); + + match item { + RawExpression::Path(path) => { + let (head, mut tail) = path.parts(); + + tail.push(next.map(|i| i.into())); + Expression::path(head, tail, new_tag) + } + + other => Expression::path(other.tagged(tag), vec![next], new_tag), + } + } + + pub(crate) fn infix( + left: Expression, + op: Tagged>, + right: Expression, + ) -> Expression { + let new_tag = left.tag.until(right.tag); + + RawExpression::Binary(Box::new(Binary::new(left, op.map(|o| o.into()), right))) + .tagged(new_tag) + } + pub(crate) fn file_path(path: impl Into, outer: impl Into) -> Expression { RawExpression::FilePath(path.into()).tagged(outer) } + pub(crate) fn list(list: Vec, tag: impl Into) -> Expression { + RawExpression::List(list).tagged(tag) + } + pub(crate) fn bare(tag: impl Into) -> Expression { RawExpression::Literal(Literal::Bare).tagged(tag) } @@ -182,6 +250,7 @@ impl ToDebug for Expression { RawExpression::Literal(l) => l.tagged(self.tag()).fmt_debug(f, source), RawExpression::FilePath(p) => write!(f, "{}", p.display()), RawExpression::ExternalWord => write!(f, "{}", self.tag().slice(source)), + RawExpression::Command(tag) => write!(f, "{}", tag.slice(source)), RawExpression::Synthetic(Synthetic::String(s)) => write!(f, "{:?}", s), RawExpression::Variable(Variable::It(_)) => write!(f, "$it"), RawExpression::Variable(Variable::Other(s)) => write!(f, "${}", s.slice(source)), @@ -232,6 +301,26 @@ pub enum Literal { Bare, } +impl std::fmt::Display for Tagged { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", Tagged::new(self.tag, &self.item)) + } +} + +impl std::fmt::Display for Tagged<&Literal> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let span = self.tag.span; + + match &self.item { + Literal::Number(number) => write!(f, "{}", number), + Literal::Size(number, unit) => write!(f, "{}{}", number, unit.as_str()), + Literal::String(_) => write!(f, "String{{ {}..{} }}", span.start(), span.end()), + Literal::GlobPattern => write!(f, "Glob{{ {}..{} }}", span.start(), span.end()), + Literal::Bare => write!(f, "Bare{{ {}..{} }}", span.start(), span.end()), + } + } +} + impl ToDebug for Tagged<&Literal> { fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { match self.item() { @@ -261,3 +350,12 @@ pub enum Variable { It(Tag), Other(Tag), } + +impl std::fmt::Display for Variable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Variable::It(_) => write!(f, "$it"), + Variable::Other(tag) => write!(f, "${{ {}..{} }}", tag.span.start(), tag.span.end()), + } + } +} diff --git a/src/parser/hir/baseline_parse.rs b/src/parser/hir/baseline_parse.rs index 267494f27..87c277195 100644 --- a/src/parser/hir/baseline_parse.rs +++ b/src/parser/hir/baseline_parse.rs @@ -1,140 +1,2 @@ -use crate::context::Context; -use crate::errors::ShellError; -use crate::parser::{hir, RawToken, Token}; -use crate::TaggedItem; -use crate::Text; -use std::path::PathBuf; - -pub fn baseline_parse_single_token( - token: &Token, - source: &Text, -) -> Result { - Ok(match *token.item() { - RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.tag()), - RawToken::Size(int, unit) => { - hir::Expression::size(int.to_number(source), unit, token.tag()) - } - RawToken::String(tag) => hir::Expression::string(tag, token.tag()), - RawToken::Variable(tag) if tag.slice(source) == "it" => { - hir::Expression::it_variable(tag, token.tag()) - } - RawToken::Variable(tag) => hir::Expression::variable(tag, token.tag()), - RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token.tag()), - RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.tag())), - RawToken::GlobPattern => hir::Expression::pattern(token.tag()), - RawToken::Bare => hir::Expression::bare(token.tag()), - }) -} - -pub fn baseline_parse_token_as_number( - token: &Token, - source: &Text, -) -> Result { - Ok(match *token.item() { - RawToken::Variable(tag) if tag.slice(source) == "it" => { - hir::Expression::it_variable(tag, token.tag()) - } - RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token.tag()), - RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.tag())), - RawToken::Variable(tag) => hir::Expression::variable(tag, token.tag()), - RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.tag()), - RawToken::Size(number, unit) => { - hir::Expression::size(number.to_number(source), unit, token.tag()) - } - RawToken::Bare => hir::Expression::bare(token.tag()), - RawToken::GlobPattern => { - return Err(ShellError::type_error( - "Number", - "glob pattern".to_string().tagged(token.tag()), - )) - } - RawToken::String(tag) => hir::Expression::string(tag, token.tag()), - }) -} - -pub fn baseline_parse_token_as_string( - token: &Token, - source: &Text, -) -> Result { - Ok(match *token.item() { - RawToken::Variable(tag) if tag.slice(source) == "it" => { - hir::Expression::it_variable(tag, token.tag()) - } - RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token.tag()), - RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.tag())), - RawToken::Variable(tag) => hir::Expression::variable(tag, token.tag()), - RawToken::Number(_) => hir::Expression::bare(token.tag()), - RawToken::Size(_, _) => hir::Expression::bare(token.tag()), - RawToken::Bare => hir::Expression::bare(token.tag()), - RawToken::GlobPattern => { - return Err(ShellError::type_error( - "String", - "glob pattern".tagged(token.tag()), - )) - } - RawToken::String(tag) => hir::Expression::string(tag, token.tag()), - }) -} - -pub fn baseline_parse_token_as_path( - token: &Token, - context: &Context, - source: &Text, -) -> Result { - Ok(match *token.item() { - RawToken::Variable(tag) if tag.slice(source) == "it" => { - hir::Expression::it_variable(tag, token.tag()) - } - RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token.tag()), - RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.tag())), - RawToken::Variable(tag) => hir::Expression::variable(tag, token.tag()), - RawToken::Number(_) => hir::Expression::bare(token.tag()), - RawToken::Size(_, _) => hir::Expression::bare(token.tag()), - RawToken::Bare => { - hir::Expression::file_path(expand_path(token.tag().slice(source), context), token.tag()) - } - RawToken::GlobPattern => { - return Err(ShellError::type_error( - "Path", - "glob pattern".tagged(token.tag()), - )) - } - RawToken::String(tag) => { - hir::Expression::file_path(expand_path(tag.slice(source), context), token.tag()) - } - }) -} - -pub fn baseline_parse_token_as_pattern( - token: &Token, - context: &Context, - source: &Text, -) -> Result { - Ok(match *token.item() { - RawToken::Variable(tag) if tag.slice(source) == "it" => { - hir::Expression::it_variable(tag, token.tag()) - } - RawToken::ExternalCommand(_) => { - return Err(ShellError::syntax_error( - "Invalid external command".to_string().tagged(token.tag()), - )) - } - RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.tag())), - RawToken::Variable(tag) => hir::Expression::variable(tag, token.tag()), - RawToken::Number(_) => hir::Expression::bare(token.tag()), - RawToken::Size(_, _) => hir::Expression::bare(token.tag()), - RawToken::GlobPattern => hir::Expression::pattern(token.tag()), - RawToken::Bare => { - hir::Expression::file_path(expand_path(token.tag().slice(source), context), token.tag()) - } - RawToken::String(tag) => { - hir::Expression::file_path(expand_path(tag.slice(source), context), token.tag()) - } - }) -} - -pub fn expand_path(string: &str, context: &Context) -> PathBuf { - let expanded = shellexpand::tilde_with_context(string, || context.shell_manager.homedir()); - - PathBuf::from(expanded.as_ref()) -} +#[cfg(test)] +mod tests; diff --git a/src/parser/hir/baseline_parse/tests.rs b/src/parser/hir/baseline_parse/tests.rs new file mode 100644 index 000000000..badb17751 --- /dev/null +++ b/src/parser/hir/baseline_parse/tests.rs @@ -0,0 +1,144 @@ +use crate::commands::classified::InternalCommand; +use crate::commands::ClassifiedCommand; +use crate::env::host::BasicHost; +use crate::parser::hir; +use crate::parser::hir::syntax_shape::*; +use crate::parser::hir::TokensIterator; +use crate::parser::parse::token_tree_builder::{CurriedToken, TokenTreeBuilder as b}; +use crate::parser::TokenNode; +use crate::{Span, Tag, Tagged, TaggedItem, Text}; +use pretty_assertions::assert_eq; +use std::fmt::Debug; +use uuid::Uuid; + +#[test] +fn test_parse_string() { + parse_tokens(StringShape, vec![b::string("hello")], |tokens| { + hir::Expression::string(inner_string_tag(tokens[0].tag()), tokens[0].tag()) + }); +} + +#[test] +fn test_parse_path() { + parse_tokens( + VariablePathShape, + vec![b::var("it"), b::op("."), b::bare("cpu")], + |tokens| { + let (outer_var, inner_var) = tokens[0].expect_var(); + let bare = tokens[2].expect_bare(); + hir::Expression::path( + hir::Expression::it_variable(inner_var, outer_var), + vec!["cpu".tagged(bare)], + outer_var.until(bare), + ) + }, + ); + + parse_tokens( + VariablePathShape, + vec![ + b::var("cpu"), + b::op("."), + b::bare("amount"), + b::op("."), + b::string("max ghz"), + ], + |tokens| { + let (outer_var, inner_var) = tokens[0].expect_var(); + let amount = tokens[2].expect_bare(); + let (outer_max_ghz, _) = tokens[4].expect_string(); + + hir::Expression::path( + hir::Expression::variable(inner_var, outer_var), + vec!["amount".tagged(amount), "max ghz".tagged(outer_max_ghz)], + outer_var.until(outer_max_ghz), + ) + }, + ); +} + +#[test] +fn test_parse_command() { + parse_tokens( + ClassifiedCommandShape, + vec![b::bare("ls"), b::sp(), b::pattern("*.txt")], + |tokens| { + let bare = tokens[0].expect_bare(); + let pat = tokens[2].tag(); + + ClassifiedCommand::Internal(InternalCommand::new( + "ls".to_string(), + bare, + hir::Call { + head: Box::new(hir::RawExpression::Command(bare).tagged(bare)), + positional: Some(vec![hir::Expression::pattern(pat)]), + named: None, + }, + )) + // hir::Expression::path( + // hir::Expression::variable(inner_var, outer_var), + // vec!["cpu".tagged(bare)], + // outer_var.until(bare), + // ) + }, + ); + + parse_tokens( + VariablePathShape, + vec![ + b::var("cpu"), + b::op("."), + b::bare("amount"), + b::op("."), + b::string("max ghz"), + ], + |tokens| { + let (outer_var, inner_var) = tokens[0].expect_var(); + let amount = tokens[2].expect_bare(); + let (outer_max_ghz, _) = tokens[4].expect_string(); + + hir::Expression::path( + hir::Expression::variable(inner_var, outer_var), + vec!["amount".tagged(amount), "max ghz".tagged(outer_max_ghz)], + outer_var.until(outer_max_ghz), + ) + }, + ); +} + +fn parse_tokens( + shape: impl ExpandSyntax, + tokens: Vec, + expected: impl FnOnce(Tagged<&[TokenNode]>) -> T, +) { + let tokens = b::token_list(tokens); + let (tokens, source) = b::build(test_origin(), tokens); + + ExpandContext::with_empty(&Text::from(source), |context| { + let tokens = tokens.expect_list(); + let mut iterator = TokensIterator::all(tokens.item, *context.tag()); + + let expr = expand_syntax(&shape, &mut iterator, &context); + + let expr = match expr { + Ok(expr) => expr, + Err(err) => { + crate::cli::print_err(err, &BasicHost, context.source().clone()); + panic!("Parse failed"); + } + }; + + assert_eq!(expr, expected(tokens)); + }) +} + +fn test_origin() -> Uuid { + Uuid::nil() +} + +fn inner_string_tag(tag: Tag) -> Tag { + Tag { + span: Span::new(tag.span.start() + 1, tag.span.end() - 1), + anchor: tag.anchor, + } +} diff --git a/src/parser/hir/baseline_parse_tokens.rs b/src/parser/hir/baseline_parse_tokens.rs deleted file mode 100644 index 8413bd07e..000000000 --- a/src/parser/hir/baseline_parse_tokens.rs +++ /dev/null @@ -1,459 +0,0 @@ -use crate::context::Context; -use crate::errors::ShellError; -use crate::parser::{ - hir, - hir::{ - baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_pattern, baseline_parse_token_as_string, - }, - DelimitedNode, Delimiter, PathNode, RawToken, TokenNode, -}; -use crate::{Tag, Tagged, TaggedItem, Text}; -use derive_new::new; -use log::trace; -use serde::{Deserialize, Serialize}; - -pub fn baseline_parse_tokens( - token_nodes: &mut TokensIterator<'_>, - context: &Context, - source: &Text, - syntax_type: SyntaxShape, -) -> Result, ShellError> { - let mut exprs: Vec = vec![]; - - loop { - if token_nodes.at_end() { - break; - } - - let expr = baseline_parse_next_expr(token_nodes, context, source, syntax_type)?; - exprs.push(expr); - } - - Ok(exprs) -} - -#[derive(Debug, Copy, Clone, Serialize, Deserialize)] -pub enum SyntaxShape { - Any, - List, - Literal, - String, - Member, - Variable, - Number, - Path, - Pattern, - Binary, - Block, - Boolean, -} - -impl std::fmt::Display for SyntaxShape { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - SyntaxShape::Any => write!(f, "Any"), - SyntaxShape::List => write!(f, "List"), - SyntaxShape::Literal => write!(f, "Literal"), - SyntaxShape::String => write!(f, "String"), - SyntaxShape::Member => write!(f, "Member"), - SyntaxShape::Variable => write!(f, "Variable"), - SyntaxShape::Number => write!(f, "Number"), - SyntaxShape::Path => write!(f, "Path"), - SyntaxShape::Pattern => write!(f, "Pattern"), - SyntaxShape::Binary => write!(f, "Binary"), - SyntaxShape::Block => write!(f, "Block"), - SyntaxShape::Boolean => write!(f, "Boolean"), - } - } -} - -pub fn baseline_parse_next_expr( - tokens: &mut TokensIterator, - context: &Context, - source: &Text, - syntax_type: SyntaxShape, -) -> Result { - let next = tokens - .next() - .ok_or_else(|| ShellError::string("Expected token, found none"))?; - - trace!(target: "nu::parser::parse_one_expr", "syntax_type={:?}, token={:?}", syntax_type, next); - - match (syntax_type, next) { - (SyntaxShape::Path, TokenNode::Token(token)) => { - return baseline_parse_token_as_path(token, context, source) - } - - (SyntaxShape::Path, token) => { - return Err(ShellError::type_error( - "Path", - token.type_name().tagged(token.tag()), - )) - } - - (SyntaxShape::Pattern, TokenNode::Token(token)) => { - return baseline_parse_token_as_pattern(token, context, source) - } - - (SyntaxShape::Pattern, token) => { - return Err(ShellError::type_error( - "Path", - token.type_name().tagged(token.tag()), - )) - } - - (SyntaxShape::String, TokenNode::Token(token)) => { - return baseline_parse_token_as_string(token, source); - } - - (SyntaxShape::String, token) => { - return Err(ShellError::type_error( - "String", - token.type_name().tagged(token.tag()), - )) - } - - (SyntaxShape::Number, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_number(token, source)?); - } - - (SyntaxShape::Number, token) => { - return Err(ShellError::type_error( - "Numeric", - token.type_name().tagged(token.tag()), - )) - } - - // TODO: More legit member processing - (SyntaxShape::Member, TokenNode::Token(token)) => { - return baseline_parse_token_as_string(token, source); - } - - (SyntaxShape::Member, token) => { - return Err(ShellError::type_error( - "member", - token.type_name().tagged(token.tag()), - )) - } - - (SyntaxShape::Any, _) => {} - (SyntaxShape::List, _) => {} - (SyntaxShape::Literal, _) => {} - (SyntaxShape::Variable, _) => {} - (SyntaxShape::Binary, _) => {} - (SyntaxShape::Block, _) => {} - (SyntaxShape::Boolean, _) => {} - }; - - let first = baseline_parse_semantic_token(next, context, source)?; - - let possible_op = tokens.peek(); - - let op = match possible_op { - Some(TokenNode::Operator(op)) => op.clone(), - _ => return Ok(first), - }; - - tokens.next(); - - let second = match tokens.next() { - None => { - return Err(ShellError::labeled_error( - "Expected something after an operator", - "operator", - op.tag(), - )) - } - Some(token) => baseline_parse_semantic_token(token, context, source)?, - }; - - // We definitely have a binary expression here -- let's see if we should coerce it into a block - - match syntax_type { - SyntaxShape::Any => { - let tag = first.tag().until(second.tag()); - let binary = hir::Binary::new(first, op, second); - let binary = hir::RawExpression::Binary(Box::new(binary)); - let binary = binary.tagged(tag); - - Ok(binary) - } - - SyntaxShape::Block => { - let tag = first.tag().until(second.tag()); - - let path: Tagged = match first { - Tagged { - item: hir::RawExpression::Literal(hir::Literal::Bare), - tag, - } => { - let string = tag.slice(source).to_string().tagged(tag); - let path = hir::Path::new( - // TODO: Deal with synthetic nodes that have no representation at all in source - hir::RawExpression::Variable(hir::Variable::It(Tag::unknown())) - .tagged(Tag::unknown()), - vec![string], - ); - let path = hir::RawExpression::Path(Box::new(path)); - path.tagged(first.tag()) - } - Tagged { - item: hir::RawExpression::Literal(hir::Literal::String(inner)), - tag, - } => { - let string = inner.slice(source).to_string().tagged(tag); - let path = hir::Path::new( - // TODO: Deal with synthetic nodes that have no representation at all in source - hir::RawExpression::Variable(hir::Variable::It(Tag::unknown())) - .tagged_unknown(), - vec![string], - ); - let path = hir::RawExpression::Path(Box::new(path)); - path.tagged(first.tag()) - } - Tagged { - item: hir::RawExpression::Variable(..), - .. - } => first, - Tagged { tag, item } => { - return Err(ShellError::labeled_error( - "The first part of an un-braced block must be a column name", - item.type_name(), - tag, - )) - } - }; - - let binary = hir::Binary::new(path, op, second); - let binary = hir::RawExpression::Binary(Box::new(binary)); - let binary = binary.tagged(tag); - - let block = hir::RawExpression::Block(vec![binary]); - let block = block.tagged(tag); - - Ok(block) - } - - other => Err(ShellError::unimplemented(format!( - "coerce hint {:?}", - other - ))), - } -} - -pub fn baseline_parse_semantic_token( - token: &TokenNode, - context: &Context, - source: &Text, -) -> Result { - match token { - TokenNode::Token(token) => baseline_parse_single_token(token, source), - TokenNode::Call(_call) => unimplemented!(), - TokenNode::Delimited(delimited) => baseline_parse_delimited(delimited, context, source), - TokenNode::Pipeline(_pipeline) => unimplemented!(), - TokenNode::Operator(op) => Err(ShellError::syntax_error( - "Unexpected operator".tagged(op.tag), - )), - TokenNode::Flag(flag) => Err(ShellError::syntax_error("Unexpected flag".tagged(flag.tag))), - TokenNode::Member(tag) => Err(ShellError::syntax_error( - "BUG: Top-level member".tagged(*tag), - )), - TokenNode::Whitespace(tag) => Err(ShellError::syntax_error( - "BUG: Whitespace found during parse".tagged(*tag), - )), - TokenNode::Error(error) => Err(*error.item.clone()), - TokenNode::Path(path) => baseline_parse_path(path, context, source), - } -} - -pub fn baseline_parse_delimited( - token: &Tagged, - context: &Context, - source: &Text, -) -> Result { - match token.delimiter() { - Delimiter::Brace => { - let children = token.children(); - let exprs = baseline_parse_tokens( - &mut TokensIterator::new(children), - context, - source, - SyntaxShape::Any, - )?; - - let expr = hir::RawExpression::Block(exprs); - Ok(expr.tagged(token.tag())) - } - Delimiter::Paren => unimplemented!(), - Delimiter::Square => { - let children = token.children(); - let exprs = baseline_parse_tokens( - &mut TokensIterator::new(children), - context, - source, - SyntaxShape::Any, - )?; - - let expr = hir::RawExpression::List(exprs); - Ok(expr.tagged(token.tag())) - } - } -} - -pub fn baseline_parse_path( - token: &Tagged, - context: &Context, - source: &Text, -) -> Result { - let head = baseline_parse_semantic_token(token.head(), context, source)?; - - let mut tail = vec![]; - - for part in token.tail() { - let string = match part { - TokenNode::Token(token) => match token.item() { - RawToken::Bare => token.tag().slice(source), - RawToken::String(tag) => tag.slice(source), - RawToken::Number(_) - | RawToken::Size(..) - | RawToken::Variable(_) - | RawToken::ExternalCommand(_) - | RawToken::GlobPattern - | RawToken::ExternalWord => { - return Err(ShellError::type_error( - "String", - token.type_name().tagged(part.tag()), - )) - } - }, - - TokenNode::Member(tag) => tag.slice(source), - - // TODO: Make this impossible - other => { - return Err(ShellError::syntax_error( - format!("{} in path", other.type_name()).tagged(other.tag()), - )) - } - } - .to_string(); - - tail.push(string.tagged(part.tag())); - } - - Ok(hir::path(head, tail).tagged(token.tag()).into()) -} - -#[derive(Debug, new)] -pub struct TokensIterator<'a> { - tokens: &'a [TokenNode], - #[new(default)] - index: usize, - #[new(default)] - seen: indexmap::IndexSet, -} - -impl TokensIterator<'_> { - pub fn remove(&mut self, position: usize) { - self.seen.insert(position); - } - - pub fn len(&self) -> usize { - self.tokens.len() - } - - pub fn at_end(&self) -> bool { - for index in self.index..self.tokens.len() { - if !self.seen.contains(&index) { - return false; - } - } - - true - } - - pub fn advance(&mut self) { - self.seen.insert(self.index); - self.index += 1; - } - - pub fn extract(&mut self, f: impl Fn(&TokenNode) -> Option) -> Option<(usize, T)> { - for (i, item) in self.tokens.iter().enumerate() { - if self.seen.contains(&i) { - continue; - } - - match f(item) { - None => { - continue; - } - Some(value) => { - self.seen.insert(i); - return Some((i, value)); - } - } - } - - None - } - - pub fn move_to(&mut self, pos: usize) { - self.index = pos; - } - - pub fn restart(&mut self) { - self.index = 0; - } - - pub fn clone(&self) -> TokensIterator { - TokensIterator { - tokens: self.tokens, - index: self.index, - seen: self.seen.clone(), - } - } - - pub fn peek(&self) -> Option<&TokenNode> { - let mut tokens = self.clone(); - - tokens.next() - } - - pub fn debug_remaining(&self) -> Vec { - let mut tokens = self.clone(); - tokens.restart(); - tokens.cloned().collect() - } -} - -impl<'a> Iterator for TokensIterator<'a> { - type Item = &'a TokenNode; - - fn next(&mut self) -> Option<&'a TokenNode> { - loop { - if self.index >= self.tokens.len() { - return None; - } - - if self.seen.contains(&self.index) { - self.advance(); - continue; - } - - if self.index >= self.tokens.len() { - return None; - } - - match &self.tokens[self.index] { - TokenNode::Whitespace(_) => { - self.advance(); - } - other => { - self.advance(); - return Some(other); - } - } - } - } -} diff --git a/src/parser/hir/binary.rs b/src/parser/hir/binary.rs index 02a4d416e..a44c41d63 100644 --- a/src/parser/hir/binary.rs +++ b/src/parser/hir/binary.rs @@ -16,6 +16,12 @@ pub struct Binary { right: Expression, } +impl fmt::Display for Binary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "({} {} {})", self.op.as_str(), self.left, self.right) + } +} + impl ToDebug for Binary { fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { write!(f, "{}", self.left.debug(source))?; diff --git a/src/parser/hir/expand_external_tokens.rs b/src/parser/hir/expand_external_tokens.rs new file mode 100644 index 000000000..30a2a90aa --- /dev/null +++ b/src/parser/hir/expand_external_tokens.rs @@ -0,0 +1,87 @@ +use crate::errors::ShellError; +use crate::parser::{TokenNode, TokensIterator}; +use crate::{Tag, Tagged, Text}; + +pub fn expand_external_tokens( + token_nodes: &mut TokensIterator<'_>, + source: &Text, +) -> Result>, ShellError> { + let mut out: Vec> = vec![]; + + loop { + if let Some(tag) = expand_next_expression(token_nodes)? { + out.push(tag.tagged_string(source)); + } else { + break; + } + } + + Ok(out) +} + +pub fn expand_next_expression( + token_nodes: &mut TokensIterator<'_>, +) -> Result, ShellError> { + let first = token_nodes.next_non_ws(); + + let first = match first { + None => return Ok(None), + Some(v) => v, + }; + + let first = triage_external_head(first)?; + let mut last = first; + + loop { + let continuation = triage_continuation(token_nodes)?; + + if let Some(continuation) = continuation { + last = continuation; + } else { + break; + } + } + + Ok(Some(first.until(last))) +} + +fn triage_external_head(node: &TokenNode) -> Result { + Ok(match node { + TokenNode::Token(token) => token.tag(), + TokenNode::Call(_call) => unimplemented!(), + TokenNode::Nodes(_nodes) => unimplemented!(), + TokenNode::Delimited(_delimited) => unimplemented!(), + TokenNode::Pipeline(_pipeline) => unimplemented!(), + TokenNode::Flag(flag) => flag.tag(), + TokenNode::Member(member) => *member, + TokenNode::Whitespace(_whitespace) => { + unreachable!("This function should be called after next_non_ws()") + } + TokenNode::Error(_error) => unimplemented!(), + }) +} + +fn triage_continuation<'a, 'b>( + nodes: &'a mut TokensIterator<'b>, +) -> Result, ShellError> { + let mut peeked = nodes.peek_any(); + + let node = match peeked.node { + None => return Ok(None), + Some(node) => node, + }; + + match &node { + node if node.is_whitespace() => return Ok(None), + TokenNode::Token(..) | TokenNode::Flag(..) | TokenNode::Member(..) => {} + TokenNode::Call(..) => unimplemented!("call"), + TokenNode::Nodes(..) => unimplemented!("nodes"), + TokenNode::Delimited(..) => unimplemented!("delimited"), + TokenNode::Pipeline(..) => unimplemented!("pipeline"), + TokenNode::Whitespace(..) => unimplemented!("whitespace"), + TokenNode::Error(..) => unimplemented!("error"), + } + + peeked.commit(); + Ok(Some(node.tag())) +} diff --git a/src/parser/hir/external_command.rs b/src/parser/hir/external_command.rs index 28865330d..2dd42c131 100644 --- a/src/parser/hir/external_command.rs +++ b/src/parser/hir/external_command.rs @@ -9,7 +9,7 @@ use std::fmt; )] #[get = "pub(crate)"] pub struct ExternalCommand { - name: Tag, + pub(crate) name: Tag, } impl ToDebug for ExternalCommand { diff --git a/src/parser/hir/path.rs b/src/parser/hir/path.rs index f43edf176..a1925102f 100644 --- a/src/parser/hir/path.rs +++ b/src/parser/hir/path.rs @@ -2,19 +2,49 @@ use crate::parser::hir::Expression; use crate::prelude::*; use crate::Tagged; use derive_new::new; -use getset::Getters; +use getset::{Getters, MutGetters}; use serde::{Deserialize, Serialize}; use std::fmt; #[derive( - Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Getters, Serialize, Deserialize, new, + Debug, + Clone, + Eq, + PartialEq, + Ord, + PartialOrd, + Hash, + Getters, + MutGetters, + Serialize, + Deserialize, + new, )] #[get = "pub(crate)"] pub struct Path { head: Expression, + #[get_mut = "pub(crate)"] tail: Vec>, } +impl fmt::Display for Path { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.head)?; + + for entry in &self.tail { + write!(f, ".{}", entry.item)?; + } + + Ok(()) + } +} + +impl Path { + pub(crate) fn parts(self) -> (Expression, Vec>) { + (self.head, self.tail) + } +} + impl ToDebug for Path { fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { write!(f, "{}", self.head.debug(source))?; diff --git a/src/parser/hir/syntax_shape.rs b/src/parser/hir/syntax_shape.rs new file mode 100644 index 000000000..5dcbd0fb7 --- /dev/null +++ b/src/parser/hir/syntax_shape.rs @@ -0,0 +1,662 @@ +mod block; +mod expression; + +use crate::cli::external_command; +use crate::commands::{classified::InternalCommand, ClassifiedCommand, Command}; +use crate::parser::hir::syntax_shape::block::AnyBlockShape; +use crate::parser::hir::tokens_iterator::Peeked; +use crate::parser::parse_command::parse_command_tail; +use crate::parser::{ + hir, + hir::{debug_tokens, TokensIterator}, + Operator, RawToken, TokenNode, +}; +use crate::prelude::*; +use derive_new::new; +use getset::Getters; +use log::trace; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; + +pub(crate) use self::expression::file_path::FilePathShape; +pub(crate) use self::expression::list::ExpressionListShape; +pub(crate) use self::expression::number::{IntShape, NumberShape}; +pub(crate) use self::expression::pattern::PatternShape; +pub(crate) use self::expression::string::StringShape; +pub(crate) use self::expression::unit::UnitShape; +pub(crate) use self::expression::variable_path::{ + ColumnPathShape, DotShape, ExpressionContinuation, ExpressionContinuationShape, MemberShape, + PathTailShape, VariablePathShape, +}; +pub(crate) use self::expression::{continue_expression, AnyExpressionShape}; + +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] +pub enum SyntaxShape { + Any, + List, + String, + Member, + ColumnPath, + Number, + Int, + Path, + Pattern, + Binary, + Block, + Boolean, +} + +impl ExpandExpression for SyntaxShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + match self { + SyntaxShape::Any => expand_expr(&AnyExpressionShape, token_nodes, context), + SyntaxShape::List => Err(ShellError::unimplemented("SyntaxShape:List")), + SyntaxShape::Int => expand_expr(&IntShape, token_nodes, context), + SyntaxShape::String => expand_expr(&StringShape, token_nodes, context), + SyntaxShape::Member => { + let syntax = expand_syntax(&MemberShape, token_nodes, context)?; + Ok(syntax.to_expr()) + } + SyntaxShape::ColumnPath => { + let Tagged { item: members, tag } = + expand_syntax(&ColumnPathShape, token_nodes, context)?; + + Ok(hir::Expression::list( + members.into_iter().map(|s| s.to_expr()).collect(), + tag, + )) + } + SyntaxShape::Number => expand_expr(&NumberShape, token_nodes, context), + SyntaxShape::Path => expand_expr(&FilePathShape, token_nodes, context), + SyntaxShape::Pattern => expand_expr(&PatternShape, token_nodes, context), + SyntaxShape::Binary => Err(ShellError::unimplemented("SyntaxShape:Binary")), + SyntaxShape::Block => expand_expr(&AnyBlockShape, token_nodes, context), + SyntaxShape::Boolean => Err(ShellError::unimplemented("SyntaxShape:Boolean")), + } + } +} + +impl std::fmt::Display for SyntaxShape { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + SyntaxShape::Any => write!(f, "Any"), + SyntaxShape::List => write!(f, "List"), + SyntaxShape::String => write!(f, "String"), + SyntaxShape::Int => write!(f, "Integer"), + SyntaxShape::Member => write!(f, "Member"), + SyntaxShape::ColumnPath => write!(f, "ColumnPath"), + SyntaxShape::Number => write!(f, "Number"), + SyntaxShape::Path => write!(f, "Path"), + SyntaxShape::Pattern => write!(f, "Pattern"), + SyntaxShape::Binary => write!(f, "Binary"), + SyntaxShape::Block => write!(f, "Block"), + SyntaxShape::Boolean => write!(f, "Boolean"), + } + } +} + +#[derive(Getters, new)] +pub struct ExpandContext<'context> { + #[get = "pub(crate)"] + registry: &'context CommandRegistry, + #[get = "pub(crate)"] + tag: Tag, + #[get = "pub(crate)"] + source: &'context Text, + homedir: Option, +} + +impl<'context> ExpandContext<'context> { + pub(crate) fn homedir(&self) -> Option<&Path> { + self.homedir.as_ref().map(|h| h.as_path()) + } + + #[cfg(test)] + pub fn with_empty(source: &Text, callback: impl FnOnce(ExpandContext)) { + let mut registry = CommandRegistry::new(); + registry.insert( + "ls", + crate::commands::whole_stream_command(crate::commands::LS), + ); + + callback(ExpandContext { + registry: ®istry, + tag: Tag::unknown(), + source, + homedir: None, + }) + } +} + +pub trait TestSyntax: std::fmt::Debug + Copy { + fn test<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Option>; +} + +pub trait ExpandExpression: std::fmt::Debug + Copy { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result; +} + +pub(crate) trait ExpandSyntax: std::fmt::Debug + Copy { + type Output: std::fmt::Debug; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result; +} + +pub(crate) fn expand_syntax<'a, 'b, T: ExpandSyntax>( + shape: &T, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, +) -> Result { + trace!(target: "nu::expand_syntax", "before {} :: {:?}", std::any::type_name::(), debug_tokens(token_nodes, context.source)); + + let result = shape.expand_syntax(token_nodes, context); + + match result { + Err(err) => { + trace!(target: "nu::expand_syntax", "error :: {} :: {:?}", err, debug_tokens(token_nodes, context.source)); + Err(err) + } + + Ok(result) => { + trace!(target: "nu::expand_syntax", "ok :: {:?} :: {:?}", result, debug_tokens(token_nodes, context.source)); + Ok(result) + } + } +} + +pub(crate) fn expand_expr<'a, 'b, T: ExpandExpression>( + shape: &T, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, +) -> Result { + trace!(target: "nu::expand_syntax", "before {} :: {:?}", std::any::type_name::(), debug_tokens(token_nodes, context.source)); + + let result = shape.expand_syntax(token_nodes, context); + + match result { + Err(err) => { + trace!(target: "nu::expand_syntax", "error :: {} :: {:?}", err, debug_tokens(token_nodes, context.source)); + Err(err) + } + + Ok(result) => { + trace!(target: "nu::expand_syntax", "ok :: {:?} :: {:?}", result, debug_tokens(token_nodes, context.source)); + Ok(result) + } + } +} + +impl ExpandSyntax for T { + type Output = hir::Expression; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + ExpandExpression::expand_expr(self, token_nodes, context) + } +} + +pub trait SkipSyntax: std::fmt::Debug + Copy { + fn skip<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result<(), ShellError>; +} + +enum BarePathState { + Initial, + Seen(Tag, Tag), + Error(ShellError), +} + +impl BarePathState { + pub fn seen(self, tag: Tag) -> BarePathState { + match self { + BarePathState::Initial => BarePathState::Seen(tag, tag), + BarePathState::Seen(start, _) => BarePathState::Seen(start, tag), + BarePathState::Error(err) => BarePathState::Error(err), + } + } + + pub fn end(self, peeked: Peeked, reason: impl Into) -> BarePathState { + match self { + BarePathState::Initial => BarePathState::Error(peeked.type_error(reason)), + BarePathState::Seen(start, end) => BarePathState::Seen(start, end), + BarePathState::Error(err) => BarePathState::Error(err), + } + } + + pub fn into_bare(self) -> Result { + match self { + BarePathState::Initial => unreachable!("into_bare in initial state"), + BarePathState::Seen(start, end) => Ok(start.until(end)), + BarePathState::Error(err) => Err(err), + } + } +} + +pub fn expand_bare<'a, 'b>( + token_nodes: &'b mut TokensIterator<'a>, + _context: &ExpandContext, + predicate: impl Fn(&TokenNode) -> bool, +) -> Result { + let mut state = BarePathState::Initial; + + loop { + // Whitespace ends a word + let mut peeked = token_nodes.peek_any(); + + match peeked.node { + None => { + state = state.end(peeked, "word"); + break; + } + Some(node) => { + if predicate(node) { + state = state.seen(node.tag()); + peeked.commit(); + } else { + state = state.end(peeked, "word"); + break; + } + } + } + } + + state.into_bare() +} + +#[derive(Debug, Copy, Clone)] +pub struct BarePathShape; + +impl ExpandSyntax for BarePathShape { + type Output = Tag; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + expand_bare(token_nodes, context, |token| match token { + TokenNode::Token(Tagged { + item: RawToken::Bare, + .. + }) + | TokenNode::Token(Tagged { + item: RawToken::Operator(Operator::Dot), + .. + }) => true, + + _ => false, + }) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct BareShape; + +impl ExpandSyntax for BareShape { + type Output = Tagged; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let peeked = token_nodes.peek_any().not_eof("word")?; + + match peeked.node { + TokenNode::Token(Tagged { + item: RawToken::Bare, + tag, + }) => { + peeked.commit(); + Ok(tag.tagged_string(context.source)) + } + + other => Err(ShellError::type_error("word", other.tagged_type_name())), + } + } +} + +impl TestSyntax for BareShape { + fn test<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + _context: &ExpandContext, + ) -> Option> { + let peeked = token_nodes.peek_any(); + + match peeked.node { + Some(TokenNode::Token(token)) => match token.item { + RawToken::Bare => Some(peeked), + _ => None, + }, + + _ => None, + } + } +} + +#[derive(Debug)] +pub enum CommandSignature { + Internal(Tagged>), + LiteralExternal { outer: Tag, inner: Tag }, + External(Tag), + Expression(hir::Expression), +} + +impl CommandSignature { + pub fn to_expression(&self) -> hir::Expression { + match self { + CommandSignature::Internal(command) => { + let tag = command.tag; + hir::RawExpression::Command(tag).tagged(tag) + } + CommandSignature::LiteralExternal { outer, inner } => { + hir::RawExpression::ExternalCommand(hir::ExternalCommand::new(*inner)).tagged(outer) + } + CommandSignature::External(tag) => { + hir::RawExpression::ExternalCommand(hir::ExternalCommand::new(*tag)).tagged(tag) + } + CommandSignature::Expression(expr) => expr.clone(), + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct CommandHeadShape; + +impl ExpandSyntax for CommandHeadShape { + type Output = CommandSignature; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let node = + parse_single_node_skipping_ws(token_nodes, "command head1", |token, token_tag| { + Ok(match token { + RawToken::ExternalCommand(tag) => CommandSignature::LiteralExternal { + outer: token_tag, + inner: tag, + }, + RawToken::Bare => { + let name = token_tag.slice(context.source); + if context.registry.has(name) { + let command = context.registry.expect_command(name); + CommandSignature::Internal(command.tagged(token_tag)) + } else { + CommandSignature::External(token_tag) + } + } + _ => { + return Err(ShellError::type_error( + "command head2", + token.type_name().tagged(token_tag), + )) + } + }) + }); + + match node { + Ok(expr) => return Ok(expr), + Err(_) => match expand_expr(&AnyExpressionShape, token_nodes, context) { + Ok(expr) => return Ok(CommandSignature::Expression(expr)), + Err(_) => Err(token_nodes.peek_non_ws().type_error("command head3")), + }, + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct ClassifiedCommandShape; + +impl ExpandSyntax for ClassifiedCommandShape { + type Output = ClassifiedCommand; + + fn expand_syntax<'a, 'b>( + &self, + iterator: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let head = expand_syntax(&CommandHeadShape, iterator, context)?; + + match &head { + CommandSignature::Expression(expr) => Err(ShellError::syntax_error( + "Unexpected expression in command position".tagged(expr.tag), + )), + + // If the command starts with `^`, treat it as an external command no matter what + CommandSignature::External(name) => { + let name_str = name.slice(&context.source); + + external_command(iterator, &context.source, name_str.tagged(name)) + } + + CommandSignature::LiteralExternal { outer, inner } => { + let name_str = inner.slice(&context.source); + + external_command(iterator, &context.source, name_str.tagged(outer)) + } + + CommandSignature::Internal(command) => { + let tail = + parse_command_tail(&command.signature(), &context, iterator, command.tag)?; + + let (positional, named) = match tail { + None => (None, None), + Some((positional, named)) => (positional, named), + }; + + let call = hir::Call { + head: Box::new(head.to_expression()), + positional, + named, + }; + + Ok(ClassifiedCommand::Internal(InternalCommand::new( + command.item.name().to_string(), + command.tag, + call, + ))) + } + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct InternalCommandHeadShape; + +impl ExpandExpression for InternalCommandHeadShape { + fn expand_expr( + &self, + token_nodes: &mut TokensIterator<'_>, + _context: &ExpandContext, + ) -> Result { + let peeked_head = token_nodes.peek_non_ws().not_eof("command head4")?; + + let expr = match peeked_head.node { + TokenNode::Token( + spanned @ Tagged { + item: RawToken::Bare, + .. + }, + ) => spanned.map(|_| hir::RawExpression::Literal(hir::Literal::Bare)), + + TokenNode::Token(Tagged { + item: RawToken::String(inner_tag), + tag, + }) => hir::RawExpression::Literal(hir::Literal::String(*inner_tag)).tagged(*tag), + + node => { + return Err(ShellError::type_error( + "command head5", + node.tagged_type_name(), + )) + } + }; + + peeked_head.commit(); + + Ok(expr) + } +} + +fn parse_single_node<'a, 'b, T>( + token_nodes: &'b mut TokensIterator<'a>, + expected: &'static str, + callback: impl FnOnce(RawToken, Tag) -> Result, +) -> Result { + let peeked = token_nodes.peek_any().not_eof(expected)?; + + let expr = match peeked.node { + TokenNode::Token(token) => callback(token.item, token.tag())?, + + other => return Err(ShellError::type_error(expected, other.tagged_type_name())), + }; + + peeked.commit(); + + Ok(expr) +} + +fn parse_single_node_skipping_ws<'a, 'b, T>( + token_nodes: &'b mut TokensIterator<'a>, + expected: &'static str, + callback: impl FnOnce(RawToken, Tag) -> Result, +) -> Result { + let peeked = token_nodes.peek_non_ws().not_eof(expected)?; + + let expr = match peeked.node { + TokenNode::Token(token) => callback(token.item, token.tag())?, + + other => return Err(ShellError::type_error(expected, other.tagged_type_name())), + }; + + peeked.commit(); + + Ok(expr) +} + +#[derive(Debug, Copy, Clone)] +pub struct WhitespaceShape; + +impl ExpandSyntax for WhitespaceShape { + type Output = Tag; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + _context: &ExpandContext, + ) -> Result { + let peeked = token_nodes.peek_any().not_eof("whitespace")?; + + let tag = match peeked.node { + TokenNode::Whitespace(tag) => *tag, + + other => { + return Err(ShellError::type_error( + "whitespace", + other.tagged_type_name(), + )) + } + }; + + peeked.commit(); + + Ok(tag) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct SpacedExpression { + inner: T, +} + +impl ExpandExpression for SpacedExpression { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + // TODO: Make the name part of the trait + let peeked = token_nodes.peek_any().not_eof("whitespace")?; + + match peeked.node { + TokenNode::Whitespace(_) => { + peeked.commit(); + expand_expr(&self.inner, token_nodes, context) + } + + other => Err(ShellError::type_error( + "whitespace", + other.tagged_type_name(), + )), + } + } +} + +pub fn maybe_spaced(inner: T) -> MaybeSpacedExpression { + MaybeSpacedExpression { inner } +} + +#[derive(Debug, Copy, Clone)] +pub struct MaybeSpacedExpression { + inner: T, +} + +impl ExpandExpression for MaybeSpacedExpression { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + // TODO: Make the name part of the trait + let peeked = token_nodes.peek_any().not_eof("whitespace")?; + + match peeked.node { + TokenNode::Whitespace(_) => { + peeked.commit(); + expand_expr(&self.inner, token_nodes, context) + } + + _ => { + peeked.rollback(); + expand_expr(&self.inner, token_nodes, context) + } + } + } +} + +pub fn spaced(inner: T) -> SpacedExpression { + SpacedExpression { inner } +} + +fn expand_variable(tag: Tag, token_tag: Tag, source: &Text) -> hir::Expression { + if tag.slice(source) == "it" { + hir::Expression::it_variable(tag, token_tag) + } else { + hir::Expression::variable(tag, token_tag) + } +} diff --git a/src/parser/hir/syntax_shape/block.rs b/src/parser/hir/syntax_shape/block.rs new file mode 100644 index 000000000..a78292b34 --- /dev/null +++ b/src/parser/hir/syntax_shape/block.rs @@ -0,0 +1,168 @@ +use crate::errors::ShellError; +use crate::parser::{ + hir, + hir::syntax_shape::{ + continue_expression, expand_expr, expand_syntax, ExpandContext, ExpandExpression, + ExpressionListShape, PathTailShape, VariablePathShape, + }, + hir::tokens_iterator::TokensIterator, + RawToken, TokenNode, +}; +use crate::{Tag, Tagged, TaggedItem}; + +#[derive(Debug, Copy, Clone)] +pub struct AnyBlockShape; + +impl ExpandExpression for AnyBlockShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let block = token_nodes.peek_non_ws().not_eof("block")?; + + // is it just a block? + let block = block.node.as_block(); + + match block { + Some(block) => { + let mut iterator = TokensIterator::new(&block.item, context.tag, false); + + let exprs = expand_syntax(&ExpressionListShape, &mut iterator, context)?; + + return Ok(hir::RawExpression::Block(exprs).tagged(block.tag)); + } + _ => {} + } + + expand_syntax(&ShorthandBlock, token_nodes, context) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct ShorthandBlock; + +impl ExpandExpression for ShorthandBlock { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let path = expand_expr(&ShorthandPath, token_nodes, context)?; + let start = path.tag; + let expr = continue_expression(path, token_nodes, context)?; + let end = expr.tag; + let block = hir::RawExpression::Block(vec![expr]).tagged(start.until(end)); + + Ok(block) + } +} + +/// A shorthand for `$it.foo."bar"`, used inside of a shorthand block +#[derive(Debug, Copy, Clone)] +pub struct ShorthandPath; + +impl ExpandExpression for ShorthandPath { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + // if it's a variable path, that's the head part + let path = expand_expr(&VariablePathShape, token_nodes, context); + + match path { + Ok(path) => return Ok(path), + Err(_) => {} + } + + // Synthesize the head of the shorthand path (`` -> `$it.`) + let mut head = expand_expr(&ShorthandHeadShape, token_nodes, context)?; + + // Now that we've synthesized the head, of the path, proceed to expand the tail of the path + // like any other path. + let tail = expand_syntax(&PathTailShape, token_nodes, context); + + match tail { + Err(_) => return Ok(head), + Ok((tail, _)) => { + // For each member that `PathTailShape` expanded, join it onto the existing expression + // to form a new path + for member in tail { + head = hir::Expression::dot_member(head, member); + } + + println!("{:?}", head); + + Ok(head) + } + } + } +} + +/// A shorthand for `$it.foo."bar"`, used inside of a shorthand block +#[derive(Debug, Copy, Clone)] +pub struct ShorthandHeadShape; + +impl ExpandExpression for ShorthandHeadShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + // A shorthand path must not be at EOF + let peeked = token_nodes.peek_non_ws().not_eof("shorthand path")?; + + match peeked.node { + // If the head of a shorthand path is a bare token, it expands to `$it.bare` + TokenNode::Token(Tagged { + item: RawToken::Bare, + tag, + }) => { + // Commit the peeked token + peeked.commit(); + + // Synthesize an `$it` expression + let it = synthetic_it(token_nodes.anchor()); + + // Make a path out of `$it` and the bare token as a member + Ok(hir::Expression::path( + it, + vec![tag.tagged_string(context.source)], + tag, + )) + } + + // If the head of a shorthand path is a string, it expands to `$it."some string"` + TokenNode::Token(Tagged { + item: RawToken::String(inner), + tag: outer, + }) => { + // Commit the peeked token + peeked.commit(); + + // Synthesize an `$it` expression + let it = synthetic_it(token_nodes.anchor()); + + // Make a path out of `$it` and the bare token as a member + Ok(hir::Expression::path( + it, + vec![inner.string(context.source).tagged(outer)], + outer, + )) + } + + // Any other token is not a valid bare head + other => { + return Err(ShellError::type_error( + "shorthand path", + other.tagged_type_name(), + )) + } + } + } +} + +fn synthetic_it(origin: uuid::Uuid) -> hir::Expression { + hir::Expression::it_variable(Tag::unknown_span(origin), Tag::unknown_span(origin)) +} diff --git a/src/parser/hir/syntax_shape/expression.rs b/src/parser/hir/syntax_shape/expression.rs new file mode 100644 index 000000000..58cfa4a1a --- /dev/null +++ b/src/parser/hir/syntax_shape/expression.rs @@ -0,0 +1,188 @@ +pub(crate) mod delimited; +pub(crate) mod file_path; +pub(crate) mod list; +pub(crate) mod number; +pub(crate) mod pattern; +pub(crate) mod string; +pub(crate) mod unit; +pub(crate) mod variable_path; + +use crate::parser::hir::syntax_shape::{ + expand_expr, expand_syntax, expand_variable, expression::delimited::expand_delimited_expr, + BareShape, DotShape, ExpandContext, ExpandExpression, ExpandSyntax, ExpressionContinuation, + ExpressionContinuationShape, UnitShape, +}; +use crate::parser::{ + hir, + hir::{Expression, Operator, TokensIterator}, + RawToken, Token, TokenNode, +}; +use crate::prelude::*; +use std::path::PathBuf; + +#[derive(Debug, Copy, Clone)] +pub struct AnyExpressionShape; + +impl ExpandExpression for AnyExpressionShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + // Look for an expression at the cursor + let head = expand_expr(&AnyExpressionStartShape, token_nodes, context)?; + + continue_expression(head, token_nodes, context) + } +} + +pub(crate) fn continue_expression( + mut head: hir::Expression, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, +) -> Result { + loop { + // Check to see whether there's any continuation after the head expression + let continuation = expand_syntax(&ExpressionContinuationShape, token_nodes, context); + + match continuation { + // If there's no continuation, return the head + Err(_) => return Ok(head), + // Otherwise, form a new expression by combining the head with the continuation + Ok(continuation) => match continuation { + // If the continuation is a `.member`, form a path with the new member + ExpressionContinuation::DotSuffix(_dot, member) => { + head = Expression::dot_member(head, member); + } + + // Otherwise, if the continuation is an infix suffix, form an infix expression + ExpressionContinuation::InfixSuffix(op, expr) => { + head = Expression::infix(head, op, expr); + } + }, + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct AnyExpressionStartShape; + +impl ExpandExpression for AnyExpressionStartShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let size = expand_expr(&UnitShape, token_nodes, context); + + match size { + Ok(expr) => return Ok(expr), + Err(_) => {} + } + + let peek_next = token_nodes.peek_any().not_eof("expression")?; + + let head = match peek_next.node { + TokenNode::Token(token) => match token.item { + RawToken::Bare | RawToken::Operator(Operator::Dot) => { + let start = token.tag; + peek_next.commit(); + + let end = expand_syntax(&BareTailShape, token_nodes, context)?; + + match end { + Some(end) => return Ok(hir::Expression::bare(start.until(end))), + None => return Ok(hir::Expression::bare(start)), + } + } + _ => { + peek_next.commit(); + expand_one_context_free_token(*token, context) + } + }, + node @ TokenNode::Call(_) + | node @ TokenNode::Nodes(_) + | node @ TokenNode::Pipeline(_) + | node @ TokenNode::Flag(_) + | node @ TokenNode::Member(_) + | node @ TokenNode::Whitespace(_) => { + return Err(ShellError::type_error( + "expression", + node.tagged_type_name(), + )) + } + TokenNode::Delimited(delimited) => { + peek_next.commit(); + expand_delimited_expr(delimited, context) + } + + TokenNode::Error(error) => return Err(*error.item.clone()), + }?; + + Ok(head) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct BareTailShape; + +impl ExpandSyntax for BareTailShape { + type Output = Option; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result, ShellError> { + let mut end: Option = None; + + loop { + match expand_syntax(&BareShape, token_nodes, context) { + Ok(bare) => { + end = Some(bare.tag); + continue; + } + + Err(_) => match expand_syntax(&DotShape, token_nodes, context) { + Ok(dot) => { + end = Some(dot); + continue; + } + + Err(_) => break, + }, + } + } + + Ok(end) + } +} + +fn expand_one_context_free_token<'a, 'b>( + token: Token, + context: &ExpandContext, +) -> Result { + Ok(match token.item { + RawToken::Number(number) => { + hir::Expression::number(number.to_number(context.source), token.tag) + } + RawToken::Operator(..) => { + return Err(ShellError::syntax_error( + "unexpected operator, expected an expression".tagged(token.tag), + )) + } + RawToken::Size(..) => unimplemented!("size"), + RawToken::String(tag) => hir::Expression::string(tag, token.tag), + RawToken::Variable(tag) => expand_variable(tag, token.tag, &context.source), + RawToken::ExternalCommand(_) => unimplemented!(), + RawToken::ExternalWord => unimplemented!(), + RawToken::GlobPattern => hir::Expression::pattern(token.tag), + RawToken::Bare => hir::Expression::string(token.tag, token.tag), + }) +} + +pub fn expand_file_path(string: &str, context: &ExpandContext) -> PathBuf { + let expanded = shellexpand::tilde_with_context(string, || context.homedir()); + + PathBuf::from(expanded.as_ref()) +} diff --git a/src/parser/hir/syntax_shape/expression/delimited.rs b/src/parser/hir/syntax_shape/expression/delimited.rs new file mode 100644 index 000000000..0a01b0fc2 --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/delimited.rs @@ -0,0 +1,38 @@ +use crate::parser::hir::syntax_shape::{expand_syntax, ExpandContext, ExpressionListShape}; +use crate::parser::{hir, hir::TokensIterator}; +use crate::parser::{DelimitedNode, Delimiter}; +use crate::prelude::*; + +pub fn expand_delimited_expr( + delimited: &Tagged, + context: &ExpandContext, +) -> Result { + match &delimited.item { + DelimitedNode { + delimiter: Delimiter::Square, + children, + } => { + let mut tokens = TokensIterator::new(&children, delimited.tag, false); + + let list = expand_syntax(&ExpressionListShape, &mut tokens, context); + + Ok(hir::Expression::list(list?, delimited.tag)) + } + + DelimitedNode { + delimiter: Delimiter::Paren, + .. + } => Err(ShellError::type_error( + "expression", + "unimplemented call expression".tagged(delimited.tag), + )), + + DelimitedNode { + delimiter: Delimiter::Brace, + .. + } => Err(ShellError::type_error( + "expression", + "unimplemented block expression".tagged(delimited.tag), + )), + } +} diff --git a/src/parser/hir/syntax_shape/expression/file_path.rs b/src/parser/hir/syntax_shape/expression/file_path.rs new file mode 100644 index 000000000..c0e5c7c2a --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/file_path.rs @@ -0,0 +1,59 @@ +use crate::parser::hir::syntax_shape::{ + expand_syntax, expression::expand_file_path, parse_single_node, BarePathShape, ExpandContext, + ExpandExpression, +}; +use crate::parser::{hir, hir::TokensIterator, RawToken}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct FilePathShape; + +impl ExpandExpression for FilePathShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let bare = expand_syntax(&BarePathShape, token_nodes, context); + + match bare { + Ok(tag) => { + let string = tag.slice(context.source); + let path = expand_file_path(string, context); + return Ok(hir::Expression::file_path(path, tag)); + } + Err(_) => {} + } + + parse_single_node(token_nodes, "Path", |token, token_tag| { + Ok(match token { + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Path", + "glob pattern".tagged(token_tag), + )) + } + RawToken::Operator(..) => { + return Err(ShellError::type_error("Path", "operator".tagged(token_tag))) + } + RawToken::Variable(tag) if tag.slice(context.source) == "it" => { + hir::Expression::it_variable(tag, token_tag) + } + RawToken::Variable(tag) => hir::Expression::variable(tag, token_tag), + RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token_tag), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token_tag)), + RawToken::Number(_) => hir::Expression::bare(token_tag), + RawToken::Size(_, _) => hir::Expression::bare(token_tag), + RawToken::Bare => hir::Expression::file_path( + expand_file_path(token_tag.slice(context.source), context), + token_tag, + ), + + RawToken::String(tag) => hir::Expression::file_path( + expand_file_path(tag.slice(context.source), context), + token_tag, + ), + }) + }) + } +} diff --git a/src/parser/hir/syntax_shape/expression/list.rs b/src/parser/hir/syntax_shape/expression/list.rs new file mode 100644 index 000000000..9d28f4414 --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/list.rs @@ -0,0 +1,43 @@ +use crate::errors::ShellError; +use crate::parser::{ + hir, + hir::syntax_shape::{ + expand_expr, maybe_spaced, spaced, AnyExpressionShape, ExpandContext, ExpandSyntax, + }, + hir::{debug_tokens, TokensIterator}, +}; + +#[derive(Debug, Copy, Clone)] +pub struct ExpressionListShape; + +impl ExpandSyntax for ExpressionListShape { + type Output = Vec; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result, ShellError> { + let mut exprs = vec![]; + + if token_nodes.at_end_possible_ws() { + return Ok(exprs); + } + + let expr = expand_expr(&maybe_spaced(AnyExpressionShape), token_nodes, context)?; + + exprs.push(expr); + + println!("{:?}", debug_tokens(token_nodes, context.source)); + + loop { + if token_nodes.at_end_possible_ws() { + return Ok(exprs); + } + + let expr = expand_expr(&spaced(AnyExpressionShape), token_nodes, context)?; + + exprs.push(expr); + } + } +} diff --git a/src/parser/hir/syntax_shape/expression/number.rs b/src/parser/hir/syntax_shape/expression/number.rs new file mode 100644 index 000000000..5b77044a2 --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/number.rs @@ -0,0 +1,97 @@ +use crate::parser::hir::syntax_shape::{parse_single_node, ExpandContext, ExpandExpression}; +use crate::parser::{ + hir, + hir::{RawNumber, TokensIterator}, + RawToken, +}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct NumberShape; + +impl ExpandExpression for NumberShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + parse_single_node(token_nodes, "Number", |token, token_tag| { + Ok(match token { + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Number", + "glob pattern".to_string().tagged(token_tag), + )) + } + RawToken::Operator(..) => { + return Err(ShellError::type_error( + "Number", + "operator".to_string().tagged(token_tag), + )) + } + RawToken::Variable(tag) if tag.slice(context.source) == "it" => { + hir::Expression::it_variable(tag, token_tag) + } + RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token_tag), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token_tag)), + RawToken::Variable(tag) => hir::Expression::variable(tag, token_tag), + RawToken::Number(number) => { + hir::Expression::number(number.to_number(context.source), token_tag) + } + RawToken::Size(number, unit) => { + hir::Expression::size(number.to_number(context.source), unit, token_tag) + } + RawToken::Bare => hir::Expression::bare(token_tag), + RawToken::String(tag) => hir::Expression::string(tag, token_tag), + }) + }) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct IntShape; + +impl ExpandExpression for IntShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + parse_single_node(token_nodes, "Integer", |token, token_tag| { + Ok(match token { + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Integer", + "glob pattern".to_string().tagged(token_tag), + )) + } + RawToken::Operator(..) => { + return Err(ShellError::type_error( + "Integer", + "operator".to_string().tagged(token_tag), + )) + } + RawToken::Variable(tag) if tag.slice(context.source) == "it" => { + hir::Expression::it_variable(tag, token_tag) + } + RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token_tag), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token_tag)), + RawToken::Variable(tag) => hir::Expression::variable(tag, token_tag), + RawToken::Number(number @ RawNumber::Int(_)) => { + hir::Expression::number(number.to_number(context.source), token_tag) + } + token @ RawToken::Number(_) => { + return Err(ShellError::type_error( + "Integer", + token.type_name().tagged(token_tag), + )); + } + RawToken::Size(number, unit) => { + hir::Expression::size(number.to_number(context.source), unit, token_tag) + } + RawToken::Bare => hir::Expression::bare(token_tag), + RawToken::String(tag) => hir::Expression::string(tag, token_tag), + }) + }) + } +} diff --git a/src/parser/hir/syntax_shape/expression/pattern.rs b/src/parser/hir/syntax_shape/expression/pattern.rs new file mode 100644 index 000000000..4105b79b4 --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/pattern.rs @@ -0,0 +1,86 @@ +use crate::parser::hir::syntax_shape::{ + expand_bare, expand_syntax, expression::expand_file_path, parse_single_node, ExpandContext, + ExpandExpression, ExpandSyntax, +}; +use crate::parser::{hir, hir::TokensIterator, Operator, RawToken, TokenNode}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct PatternShape; + +impl ExpandExpression for PatternShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let pattern = expand_syntax(&BarePatternShape, token_nodes, context); + + match pattern { + Ok(tag) => { + return Ok(hir::Expression::pattern(tag)); + } + Err(_) => {} + } + + parse_single_node(token_nodes, "Pattern", |token, token_tag| { + Ok(match token { + RawToken::GlobPattern => { + return Err(ShellError::unreachable( + "glob pattern after glob already returned", + )) + } + RawToken::Operator(..) => { + return Err(ShellError::unreachable("dot after glob already returned")) + } + RawToken::Bare => { + return Err(ShellError::unreachable("bare after glob already returned")) + } + + RawToken::Variable(tag) if tag.slice(context.source) == "it" => { + hir::Expression::it_variable(tag, token_tag) + } + RawToken::Variable(tag) => hir::Expression::variable(tag, token_tag), + RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token_tag), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token_tag)), + RawToken::Number(_) => hir::Expression::bare(token_tag), + RawToken::Size(_, _) => hir::Expression::bare(token_tag), + + RawToken::String(tag) => hir::Expression::file_path( + expand_file_path(tag.slice(context.source), context), + token_tag, + ), + }) + }) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct BarePatternShape; + +impl ExpandSyntax for BarePatternShape { + type Output = Tag; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + expand_bare(token_nodes, context, |token| match token { + TokenNode::Token(Tagged { + item: RawToken::Bare, + .. + }) + | TokenNode::Token(Tagged { + item: RawToken::Operator(Operator::Dot), + .. + }) + | TokenNode::Token(Tagged { + item: RawToken::GlobPattern, + .. + }) => true, + + _ => false, + }) + } +} diff --git a/src/parser/hir/syntax_shape/expression/string.rs b/src/parser/hir/syntax_shape/expression/string.rs new file mode 100644 index 000000000..6a4973feb --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/string.rs @@ -0,0 +1,60 @@ +use crate::parser::hir::syntax_shape::{ + expand_variable, parse_single_node, ExpandContext, ExpandExpression, TestSyntax, +}; +use crate::parser::hir::tokens_iterator::Peeked; +use crate::parser::{hir, hir::TokensIterator, RawToken, TokenNode}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct StringShape; + +impl ExpandExpression for StringShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + parse_single_node(token_nodes, "String", |token, token_tag| { + Ok(match token { + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "String", + "glob pattern".tagged(token_tag), + )) + } + RawToken::Operator(..) => { + return Err(ShellError::type_error( + "String", + "operator".tagged(token_tag), + )) + } + RawToken::Variable(tag) => expand_variable(tag, token_tag, &context.source), + RawToken::ExternalCommand(tag) => hir::Expression::external_command(tag, token_tag), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token_tag)), + RawToken::Number(_) => hir::Expression::bare(token_tag), + RawToken::Size(_, _) => hir::Expression::bare(token_tag), + RawToken::Bare => hir::Expression::bare(token_tag), + RawToken::String(tag) => hir::Expression::string(tag, token_tag), + }) + }) + } +} + +impl TestSyntax for StringShape { + fn test<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + _context: &ExpandContext, + ) -> Option> { + let peeked = token_nodes.peek_any(); + + match peeked.node { + Some(TokenNode::Token(token)) => match token.item { + RawToken::String(_) => Some(peeked), + _ => None, + }, + + _ => None, + } + } +} diff --git a/src/parser/hir/syntax_shape/expression/unit.rs b/src/parser/hir/syntax_shape/expression/unit.rs new file mode 100644 index 000000000..cc3642bda --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/unit.rs @@ -0,0 +1,89 @@ +use crate::parser::hir::syntax_shape::{ExpandContext, ExpandExpression}; +use crate::parser::parse::tokens::RawNumber; +use crate::parser::parse::unit::Unit; +use crate::parser::{hir, hir::TokensIterator, RawToken, TokenNode}; +use crate::prelude::*; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::digit1; +use nom::combinator::{all_consuming, opt, value}; +use nom::IResult; + +#[derive(Debug, Copy, Clone)] +pub struct UnitShape; + +impl ExpandExpression for UnitShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let peeked = token_nodes.peek_any().not_eof("unit")?; + + let tag = match peeked.node { + TokenNode::Token(Tagged { + item: RawToken::Bare, + tag, + }) => tag, + _ => return Err(peeked.type_error("unit")), + }; + + let unit = unit_size(tag.slice(context.source), *tag); + + let (_, (number, unit)) = match unit { + Err(_) => { + return Err(ShellError::type_error( + "unit", + "word".tagged(Tag::unknown()), + )) + } + Ok((number, unit)) => (number, unit), + }; + + Ok(hir::Expression::size( + number.to_number(context.source), + unit, + tag, + )) + } +} + +fn unit_size(input: &str, bare_tag: Tag) -> IResult<&str, (Tagged, Unit)> { + let (input, digits) = digit1(input)?; + + let (input, dot) = opt(tag("."))(input)?; + + let (input, number) = match dot { + Some(dot) => { + let (input, rest) = digit1(input)?; + ( + input, + RawNumber::decimal(( + bare_tag.span.start(), + bare_tag.span.start() + digits.len() + dot.len() + rest.len(), + bare_tag.anchor, + )), + ) + } + + None => ( + input, + RawNumber::int(( + bare_tag.span.start(), + bare_tag.span.start() + digits.len(), + bare_tag.anchor, + )), + ), + }; + + let (input, unit) = all_consuming(alt(( + value(Unit::B, alt((tag("B"), tag("b")))), + value(Unit::KB, alt((tag("KB"), tag("kb"), tag("Kb")))), + value(Unit::MB, alt((tag("MB"), tag("mb"), tag("Mb")))), + value(Unit::MB, alt((tag("GB"), tag("gb"), tag("Gb")))), + value(Unit::MB, alt((tag("TB"), tag("tb"), tag("Tb")))), + value(Unit::MB, alt((tag("PB"), tag("pb"), tag("Pb")))), + )))(input)?; + + Ok((input, (number, unit))) +} diff --git a/src/parser/hir/syntax_shape/expression/variable_path.rs b/src/parser/hir/syntax_shape/expression/variable_path.rs new file mode 100644 index 000000000..afea1b149 --- /dev/null +++ b/src/parser/hir/syntax_shape/expression/variable_path.rs @@ -0,0 +1,396 @@ +use crate::parser::hir::syntax_shape::{ + expand_expr, expand_syntax, parse_single_node, AnyExpressionShape, BareShape, ExpandContext, + ExpandExpression, ExpandSyntax, Peeked, SkipSyntax, StringShape, TestSyntax, WhitespaceShape, +}; +use crate::parser::{hir, hir::Expression, hir::TokensIterator, Operator, RawToken}; +use crate::prelude::*; + +#[derive(Debug, Copy, Clone)] +pub struct VariablePathShape; + +impl ExpandExpression for VariablePathShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + // 1. let the head be the first token, expecting a variable + // 2. let the tail be an empty list of members + // 2. while the next token (excluding ws) is a dot: + // 1. consume the dot + // 2. consume the next token as a member and push it onto tail + + let head = expand_expr(&VariableShape, token_nodes, context)?; + let start = head.tag(); + let mut end = start; + let mut tail: Vec> = vec![]; + + loop { + match DotShape.skip(token_nodes, context) { + Err(_) => break, + Ok(_) => {} + } + + let syntax = expand_syntax(&MemberShape, token_nodes, context)?; + let member = syntax.to_tagged_string(context.source); + + end = member.tag(); + tail.push(member); + } + + Ok(hir::Expression::path(head, tail, start.until(end))) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct PathTailShape; + +impl ExpandSyntax for PathTailShape { + type Output = (Vec>, Tag); + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let mut end: Option = None; + let mut tail = vec![]; + + loop { + match DotShape.skip(token_nodes, context) { + Err(_) => break, + Ok(_) => {} + } + + let syntax = expand_syntax(&MemberShape, token_nodes, context)?; + let member = syntax.to_tagged_string(context.source); + end = Some(member.tag()); + tail.push(member); + } + + match end { + None => { + return Err(ShellError::type_error( + "path tail", + token_nodes.typed_tag_at_cursor(), + )) + } + + Some(end) => Ok((tail, end)), + } + } +} + +#[derive(Debug)] +pub enum ExpressionContinuation { + DotSuffix(Tag, Tagged), + InfixSuffix(Tagged, Expression), +} + +/// An expression continuation +#[derive(Debug, Copy, Clone)] +pub struct ExpressionContinuationShape; + +impl ExpandSyntax for ExpressionContinuationShape { + type Output = ExpressionContinuation; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + // Try to expand a `.` + let dot = expand_syntax(&DotShape, token_nodes, context); + + match dot { + // If a `.` was matched, it's a `Path`, and we expect a `Member` next + Ok(dot) => { + let syntax = expand_syntax(&MemberShape, token_nodes, context)?; + let member = syntax.to_tagged_string(context.source); + + Ok(ExpressionContinuation::DotSuffix(dot, member)) + } + + // Otherwise, we expect an infix operator and an expression next + Err(_) => { + let (_, op, _) = expand_syntax(&InfixShape, token_nodes, context)?; + let next = expand_expr(&AnyExpressionShape, token_nodes, context)?; + + Ok(ExpressionContinuation::InfixSuffix(op, next)) + } + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct VariableShape; + +impl ExpandExpression for VariableShape { + fn expand_expr<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + parse_single_node(token_nodes, "variable", |token, token_tag| { + Ok(match token { + RawToken::Variable(tag) => { + if tag.slice(context.source) == "it" { + hir::Expression::it_variable(tag, token_tag) + } else { + hir::Expression::variable(tag, token_tag) + } + } + _ => { + return Err(ShellError::type_error( + "variable", + token.type_name().tagged(token_tag), + )) + } + }) + }) + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Member { + String(/* outer */ Tag, /* inner */ Tag), + Bare(Tag), +} + +impl Member { + pub(crate) fn to_expr(&self) -> hir::Expression { + match self { + Member::String(outer, inner) => hir::Expression::string(inner, outer), + Member::Bare(tag) => hir::Expression::string(tag, tag), + } + } + + pub(crate) fn tag(&self) -> Tag { + match self { + Member::String(outer, _inner) => *outer, + Member::Bare(tag) => *tag, + } + } + + pub(crate) fn to_tagged_string(&self, source: &str) -> Tagged { + match self { + Member::String(outer, inner) => inner.string(source).tagged(outer), + Member::Bare(tag) => tag.tagged_string(source), + } + } + + pub(crate) fn tagged_type_name(&self) -> Tagged<&'static str> { + match self { + Member::String(outer, _inner) => "string".tagged(outer), + Member::Bare(tag) => "word".tagged(tag), + } + } +} + +enum ColumnPathState { + Initial, + LeadingDot(Tag), + Dot(Tag, Vec, Tag), + Member(Tag, Vec), + Error(ShellError), +} + +impl ColumnPathState { + pub fn dot(self, dot: Tag) -> ColumnPathState { + match self { + ColumnPathState::Initial => ColumnPathState::LeadingDot(dot), + ColumnPathState::LeadingDot(_) => { + ColumnPathState::Error(ShellError::type_error("column", "dot".tagged(dot))) + } + ColumnPathState::Dot(..) => { + ColumnPathState::Error(ShellError::type_error("column", "dot".tagged(dot))) + } + ColumnPathState::Member(tag, members) => ColumnPathState::Dot(tag, members, dot), + ColumnPathState::Error(err) => ColumnPathState::Error(err), + } + } + + pub fn member(self, member: Member) -> ColumnPathState { + match self { + ColumnPathState::Initial => ColumnPathState::Member(member.tag(), vec![member]), + ColumnPathState::LeadingDot(tag) => { + ColumnPathState::Member(tag.until(member.tag()), vec![member]) + } + + ColumnPathState::Dot(tag, mut tags, _) => { + ColumnPathState::Member(tag.until(member.tag()), { + tags.push(member); + tags + }) + } + ColumnPathState::Member(..) => { + ColumnPathState::Error(ShellError::type_error("column", member.tagged_type_name())) + } + ColumnPathState::Error(err) => ColumnPathState::Error(err), + } + } + + pub fn into_path(self, next: Peeked) -> Result>, ShellError> { + match self { + ColumnPathState::Initial => Err(next.type_error("column path")), + ColumnPathState::LeadingDot(dot) => { + Err(ShellError::type_error("column", "dot".tagged(dot))) + } + ColumnPathState::Dot(_tag, _members, dot) => { + Err(ShellError::type_error("column", "dot".tagged(dot))) + } + ColumnPathState::Member(tag, tags) => Ok(tags.tagged(tag)), + ColumnPathState::Error(err) => Err(err), + } + } +} + +pub fn expand_column_path<'a, 'b>( + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, +) -> Result>, ShellError> { + let mut state = ColumnPathState::Initial; + + loop { + let member = MemberShape.expand_syntax(token_nodes, context); + + match member { + Err(_) => break, + Ok(member) => state = state.member(member), + } + + let dot = DotShape.expand_syntax(token_nodes, context); + + match dot { + Err(_) => break, + Ok(dot) => state = state.dot(dot), + } + } + + state.into_path(token_nodes.peek_non_ws()) +} + +#[derive(Debug, Copy, Clone)] +pub struct ColumnPathShape; + +impl ExpandSyntax for ColumnPathShape { + type Output = Tagged>; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + expand_column_path(token_nodes, context) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct MemberShape; + +impl ExpandSyntax for MemberShape { + type Output = Member; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result { + let bare = BareShape.test(token_nodes, context); + if let Some(peeked) = bare { + let node = peeked.not_eof("column")?.commit(); + return Ok(Member::Bare(node.tag())); + } + + let string = StringShape.test(token_nodes, context); + + if let Some(peeked) = string { + let node = peeked.not_eof("column")?.commit(); + let (outer, inner) = node.expect_string(); + + return Ok(Member::String(outer, inner)); + } + + Err(token_nodes.peek_any().type_error("column")) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct DotShape; + +impl SkipSyntax for DotShape { + fn skip<'a, 'b>( + &self, + token_nodes: &mut TokensIterator<'_>, + context: &ExpandContext, + ) -> Result<(), ShellError> { + expand_syntax(self, token_nodes, context)?; + + Ok(()) + } +} + +impl ExpandSyntax for DotShape { + type Output = Tag; + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + _context: &ExpandContext, + ) -> Result { + parse_single_node(token_nodes, "dot", |token, token_tag| { + Ok(match token { + RawToken::Operator(Operator::Dot) => token_tag, + _ => { + return Err(ShellError::type_error( + "dot", + token.type_name().tagged(token_tag), + )) + } + }) + }) + } +} + +#[derive(Debug, Copy, Clone)] +pub struct InfixShape; + +impl ExpandSyntax for InfixShape { + type Output = (Tag, Tagged, Tag); + + fn expand_syntax<'a, 'b>( + &self, + token_nodes: &'b mut TokensIterator<'a>, + context: &ExpandContext, + ) -> Result { + let checkpoint = token_nodes.checkpoint(); + + // An infix operator must be prefixed by whitespace + let start = expand_syntax(&WhitespaceShape, checkpoint.iterator, context)?; + + // Parse the next TokenNode after the whitespace + let operator = + parse_single_node(checkpoint.iterator, "infix operator", |token, token_tag| { + Ok(match token { + // If it's an operator (and not `.`), it's a match + RawToken::Operator(operator) if operator != Operator::Dot => { + operator.tagged(token_tag) + } + + // Otherwise, it's not a match + _ => { + return Err(ShellError::type_error( + "infix operator", + token.type_name().tagged(token_tag), + )) + } + }) + })?; + + // An infix operator must be followed by whitespace + let end = expand_syntax(&WhitespaceShape, checkpoint.iterator, context)?; + + checkpoint.commit(); + + Ok((start, operator, end)) + } +} diff --git a/src/parser/hir/tokens_iterator.rs b/src/parser/hir/tokens_iterator.rs new file mode 100644 index 000000000..c0dd9c50f --- /dev/null +++ b/src/parser/hir/tokens_iterator.rs @@ -0,0 +1,365 @@ +pub(crate) mod debug; + +use crate::errors::ShellError; +use crate::parser::TokenNode; +use crate::{Tag, Tagged, TaggedItem}; +use derive_new::new; + +#[derive(Debug, new)] +pub struct TokensIterator<'a> { + tokens: &'a [TokenNode], + tag: Tag, + skip_ws: bool, + #[new(default)] + index: usize, + #[new(default)] + seen: indexmap::IndexSet, +} + +#[derive(Debug)] +pub struct Checkpoint<'content, 'me> { + pub(crate) iterator: &'me mut TokensIterator<'content>, + index: usize, + seen: indexmap::IndexSet, + committed: bool, +} + +impl<'content, 'me> Checkpoint<'content, 'me> { + pub(crate) fn commit(mut self) { + self.committed = true; + } +} + +impl<'content, 'me> std::ops::Drop for Checkpoint<'content, 'me> { + fn drop(&mut self) { + if !self.committed { + self.iterator.index = self.index; + self.iterator.seen = self.seen.clone(); + } + } +} + +#[derive(Debug)] +pub struct Peeked<'content, 'me> { + pub(crate) node: Option<&'content TokenNode>, + iterator: &'me mut TokensIterator<'content>, + from: usize, + to: usize, +} + +impl<'content, 'me> Peeked<'content, 'me> { + pub fn commit(&mut self) -> Option<&'content TokenNode> { + let Peeked { + node, + iterator, + from, + to, + } = self; + + let node = (*node)?; + iterator.commit(*from, *to); + Some(node) + } + + pub fn not_eof( + self, + expected: impl Into, + ) -> Result, ShellError> { + match self.node { + None => Err(ShellError::unexpected_eof( + expected, + self.iterator.eof_tag(), + )), + Some(node) => Ok(PeekedNode { + node, + iterator: self.iterator, + from: self.from, + to: self.to, + }), + } + } + + pub fn type_error(&self, expected: impl Into) -> ShellError { + peek_error(&self.node, self.iterator.eof_tag(), expected) + } +} + +#[derive(Debug)] +pub struct PeekedNode<'content, 'me> { + pub(crate) node: &'content TokenNode, + iterator: &'me mut TokensIterator<'content>, + from: usize, + to: usize, +} + +impl<'content, 'me> PeekedNode<'content, 'me> { + pub fn commit(self) -> &'content TokenNode { + let PeekedNode { + node, + iterator, + from, + to, + } = self; + + iterator.commit(from, to); + node + } + + pub fn rollback(self) {} + + pub fn type_error(&self, expected: impl Into) -> ShellError { + peek_error(&Some(self.node), self.iterator.eof_tag(), expected) + } +} + +pub fn peek_error( + node: &Option<&TokenNode>, + eof_tag: Tag, + expected: impl Into, +) -> ShellError { + match node { + None => ShellError::unexpected_eof(expected, eof_tag), + Some(node) => ShellError::type_error(expected, node.tagged_type_name()), + } +} + +impl<'content> TokensIterator<'content> { + #[cfg(test)] + pub fn all(tokens: &'content [TokenNode], tag: Tag) -> TokensIterator<'content> { + TokensIterator::new(tokens, tag, false) + } + + /// Use a checkpoint when you need to peek more than one token ahead, but can't be sure + /// that you'll succeed. + pub fn checkpoint<'me>(&'me mut self) -> Checkpoint<'content, 'me> { + let index = self.index; + let seen = self.seen.clone(); + + Checkpoint { + iterator: self, + index, + seen, + committed: false, + } + } + + pub fn anchor(&self) -> uuid::Uuid { + self.tag.anchor + } + + fn eof_tag(&self) -> Tag { + Tag::from((self.tag.span.end(), self.tag.span.end(), self.tag.anchor)) + } + + pub fn typed_tag_at_cursor(&mut self) -> Tagged<&'static str> { + let next = self.peek_any(); + + match next.node { + None => "end".tagged(self.eof_tag()), + Some(node) => node.tagged_type_name(), + } + } + + pub fn remove(&mut self, position: usize) { + self.seen.insert(position); + } + + pub fn at_end(&self) -> bool { + peek(self, self.skip_ws).is_none() + } + + pub fn at_end_possible_ws(&self) -> bool { + peek(self, true).is_none() + } + + pub fn advance(&mut self) { + self.seen.insert(self.index); + self.index += 1; + } + + pub fn extract(&mut self, f: impl Fn(&TokenNode) -> Option) -> Option<(usize, T)> { + for (i, item) in self.tokens.iter().enumerate() { + if self.seen.contains(&i) { + continue; + } + + match f(item) { + None => { + continue; + } + Some(value) => { + self.seen.insert(i); + return Some((i, value)); + } + } + } + + None + } + + pub fn move_to(&mut self, pos: usize) { + self.index = pos; + } + + pub fn restart(&mut self) { + self.index = 0; + } + + pub fn clone(&self) -> TokensIterator<'content> { + TokensIterator { + tokens: self.tokens, + tag: self.tag, + index: self.index, + seen: self.seen.clone(), + skip_ws: self.skip_ws, + } + } + + // Get the next token, not including whitespace + pub fn next_non_ws(&mut self) -> Option<&TokenNode> { + let mut peeked = start_next(self, true); + peeked.commit() + } + + // Peek the next token, not including whitespace + pub fn peek_non_ws<'me>(&'me mut self) -> Peeked<'content, 'me> { + start_next(self, true) + } + + // Peek the next token, including whitespace + pub fn peek_any<'me>(&'me mut self) -> Peeked<'content, 'me> { + start_next(self, false) + } + + fn commit(&mut self, from: usize, to: usize) { + for index in from..to { + self.seen.insert(index); + } + + self.index = to; + } + + pub fn debug_remaining(&self) -> Vec { + let mut tokens = self.clone(); + tokens.restart(); + tokens.cloned().collect() + } +} + +impl<'a> Iterator for TokensIterator<'a> { + type Item = &'a TokenNode; + + fn next(&mut self) -> Option<&'a TokenNode> { + next(self, self.skip_ws) + } +} + +fn peek<'content, 'me>( + iterator: &TokensIterator<'content>, + skip_ws: bool, +) -> Option<&'content TokenNode> { + let mut to = iterator.index; + + loop { + if to >= iterator.tokens.len() { + return None; + } + + if iterator.seen.contains(&to) { + to += 1; + continue; + } + + if to >= iterator.tokens.len() { + return None; + } + + let node = &iterator.tokens[to]; + + match node { + TokenNode::Whitespace(_) if skip_ws => { + to += 1; + } + _ => { + return Some(node); + } + } + } +} + +fn start_next<'content, 'me>( + iterator: &'me mut TokensIterator<'content>, + skip_ws: bool, +) -> Peeked<'content, 'me> { + let from = iterator.index; + let mut to = iterator.index; + + loop { + if to >= iterator.tokens.len() { + return Peeked { + node: None, + iterator, + from, + to, + }; + } + + if iterator.seen.contains(&to) { + to += 1; + continue; + } + + if to >= iterator.tokens.len() { + return Peeked { + node: None, + iterator, + from, + to, + }; + } + + let node = &iterator.tokens[to]; + + match node { + TokenNode::Whitespace(_) if skip_ws => { + to += 1; + } + _ => { + to += 1; + return Peeked { + node: Some(node), + iterator, + from, + to, + }; + } + } + } +} + +fn next<'a>(iterator: &mut TokensIterator<'a>, skip_ws: bool) -> Option<&'a TokenNode> { + loop { + if iterator.index >= iterator.tokens.len() { + return None; + } + + if iterator.seen.contains(&iterator.index) { + iterator.advance(); + continue; + } + + if iterator.index >= iterator.tokens.len() { + return None; + } + + match &iterator.tokens[iterator.index] { + TokenNode::Whitespace(_) if skip_ws => { + iterator.advance(); + } + other => { + iterator.advance(); + return Some(other); + } + } + } +} diff --git a/src/parser/hir/tokens_iterator/debug.rs b/src/parser/hir/tokens_iterator/debug.rs new file mode 100644 index 000000000..2e2672015 --- /dev/null +++ b/src/parser/hir/tokens_iterator/debug.rs @@ -0,0 +1,30 @@ +use crate::parser::hir::tokens_iterator::TokensIterator; +use crate::traits::ToDebug; + +#[derive(Debug)] +pub(crate) enum DebugIteratorToken { + Seen(String), + Unseen(String), + Cursor, +} + +pub(crate) fn debug_tokens(iterator: &TokensIterator, source: &str) -> Vec { + let mut out = vec![]; + + for (i, token) in iterator.tokens.iter().enumerate() { + if iterator.index == i { + out.push(DebugIteratorToken::Cursor); + } + + if iterator.seen.contains(&i) { + out.push(DebugIteratorToken::Seen(format!("{}", token.debug(source)))); + } else { + out.push(DebugIteratorToken::Unseen(format!( + "{}", + token.debug(source) + ))); + } + } + + out +} diff --git a/src/parser/parse/files.rs b/src/parser/parse/files.rs index afe75ddb2..3c28237f5 100644 --- a/src/parser/parse/files.rs +++ b/src/parser/parse/files.rs @@ -1,6 +1,7 @@ use crate::Tag; use derive_new::new; use language_reporting::{FileName, Location}; +use log::trace; use uuid::Uuid; #[derive(new, Debug, Clone)] @@ -18,7 +19,7 @@ impl language_reporting::ReportingFiles for Files { from_index: usize, to_index: usize, ) -> Option { - Some(Tag::from((from_index, to_index, file))) + Some(Tag::new(file, (from_index, to_index).into())) } fn file_id(&self, tag: Self::Span) -> Self::FileId { @@ -38,8 +39,18 @@ impl language_reporting::ReportingFiles for Files { let mut seen_lines = 0; let mut seen_bytes = 0; - for (pos, _) in source.match_indices('\n') { - if pos > byte_index { + for (pos, slice) in source.match_indices('\n') { + trace!( + "SEARCH={} SEEN={} POS={} SLICE={:?} LEN={} ALL={:?}", + byte_index, + seen_bytes, + pos, + slice, + source.len(), + source + ); + + if pos >= byte_index { return Some(language_reporting::Location::new( seen_lines, byte_index - seen_bytes, @@ -53,7 +64,7 @@ impl language_reporting::ReportingFiles for Files { if seen_lines == 0 { Some(language_reporting::Location::new(0, byte_index)) } else { - None + panic!("byte index {} wasn't valid", byte_index); } } @@ -64,7 +75,7 @@ impl language_reporting::ReportingFiles for Files { for (pos, _) in source.match_indices('\n') { if seen_lines == lineno { - return Some(Tag::from((seen_bytes, pos, file))); + return Some(Tag::new(file, (seen_bytes, pos + 1).into())); } else { seen_lines += 1; seen_bytes = pos + 1; @@ -72,16 +83,18 @@ impl language_reporting::ReportingFiles for Files { } if seen_lines == 0 { - Some(Tag::from((0, self.snippet.len() - 1, file))) + Some(Tag::new(file, (0, self.snippet.len() - 1).into())) } else { None } } fn source(&self, tag: Self::Span) -> Option { - if tag.span.start > tag.span.end { + trace!("source(tag={:?}) snippet={:?}", tag, self.snippet); + + if tag.span.start() > tag.span.end() { return None; - } else if tag.span.end >= self.snippet.len() { + } else if tag.span.end() > self.snippet.len() { return None; } Some(tag.slice(&self.snippet).to_string()) diff --git a/src/parser/parse/operator.rs b/src/parser/parse/operator.rs index 82a04ed79..7b5a5c77d 100644 --- a/src/parser/parse/operator.rs +++ b/src/parser/parse/operator.rs @@ -11,6 +11,7 @@ pub enum Operator { GreaterThan, LessThanOrEqual, GreaterThanOrEqual, + Dot, } impl ToDebug for Operator { @@ -32,6 +33,7 @@ impl Operator { Operator::GreaterThan => ">", Operator::LessThanOrEqual => "<=", Operator::GreaterThanOrEqual => ">=", + Operator::Dot => ".", } } } @@ -52,6 +54,7 @@ impl FromStr for Operator { ">" => Ok(Operator::GreaterThan), "<=" => Ok(Operator::LessThanOrEqual), ">=" => Ok(Operator::GreaterThanOrEqual), + "." => Ok(Operator::Dot), _ => Err(()), } } diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index 33903ba37..93ba043ba 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -14,24 +14,54 @@ use nom::combinator::*; use nom::multi::*; use nom::sequence::*; +use derive_new::new; use log::trace; use nom::dbg; use nom::*; use nom::{AsBytes, FindSubstring, IResult, InputLength, InputTake, Slice}; use nom_locate::{position, LocatedSpanEx}; +use nom_tracable::{tracable_parser, HasTracableInfo, TracableInfo}; use serde::{Deserialize, Serialize}; use std::fmt::Debug; use std::str::FromStr; use uuid::Uuid; -pub type NomSpan<'a> = LocatedSpanEx<&'a str, Uuid>; +pub type NomSpan<'a> = LocatedSpanEx<&'a str, TracableContext>; + +#[derive(Debug, Clone, Copy, PartialEq, new)] +pub struct TracableContext { + pub(crate) origin: Uuid, + pub(crate) info: TracableInfo, +} + +impl HasTracableInfo for TracableContext { + fn get_tracable_info(&self) -> TracableInfo { + self.info + } + + fn set_tracable_info(mut self, info: TracableInfo) -> Self { + TracableContext { + origin: self.origin, + info, + } + } +} + +impl std::ops::Deref for TracableContext { + type Target = TracableInfo; + + fn deref(&self) -> &TracableInfo { + &self.info + } +} pub fn nom_input(s: &str, anchor: Uuid) -> NomSpan<'_> { - LocatedSpanEx::new_extra(s, anchor) + LocatedSpanEx::new_extra(s, TracableContext::new(anchor, TracableInfo::new())) } macro_rules! operator { ($name:tt : $token:tt ) => { + #[tracable_parser] pub fn $name(input: NomSpan) -> IResult { let start = input.offset; let (input, tag) = tag(stringify!($token))(input)?; @@ -51,25 +81,7 @@ operator! { gte: >= } operator! { lte: <= } operator! { eq: == } operator! { neq: != } - -fn trace_step<'a, T: Debug>( - input: NomSpan<'a>, - name: &str, - block: impl FnOnce(NomSpan<'a>) -> IResult, T>, -) -> IResult, T> { - trace!(target: "nu::lite_parse", "+ before {} @ {:?}", name, input); - match block(input) { - Ok((input, result)) => { - trace!(target: "nu::lite_parse", "after {} @ {:?} -> {:?}", name, input, result); - Ok((input, result)) - } - - Err(e) => { - trace!(target: "nu::lite_parse", "- failed {} :: {:?}", name, e); - Err(e) - } - } -} +operator! { dot: . } #[derive(Debug, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)] pub enum Number { @@ -77,6 +89,15 @@ pub enum Number { Decimal(BigDecimal), } +impl std::fmt::Display for Number { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Number::Int(int) => write!(f, "{}", int), + Number::Decimal(decimal) => write!(f, "{}", decimal), + } + } +} + macro_rules! primitive_int { ($($ty:ty)*) => { $( @@ -148,540 +169,479 @@ impl Into for BigDecimal { } } +#[tracable_parser] +pub fn number(input: NomSpan) -> IResult { + let (input, number) = raw_number(input)?; + + Ok(( + input, + TokenTreeBuilder::tagged_number(number.item, number.tag), + )) +} + +#[tracable_parser] pub fn raw_number(input: NomSpan) -> IResult> { let anchoral = input; let start = input.offset; - trace_step(input, "raw_decimal", move |input| { - let (input, neg) = opt(tag("-"))(input)?; - let (input, head) = digit1(input)?; - let dot: IResult = tag(".")(input); + let (input, neg) = opt(tag("-"))(input)?; + let (input, head) = digit1(input)?; - let input = match dot { - Ok((input, dot)) => input, + match input.fragment.chars().next() { + None => return Ok((input, RawNumber::int((start, input.offset, input.extra)))), + Some('.') => (), + Some(other) if other.is_whitespace() => { + return Ok((input, RawNumber::int((start, input.offset, input.extra)))) + } + _ => { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::Tag, + ))) + } + } - // it's just an integer - Err(_) => return Ok((input, RawNumber::int((start, input.offset, input.extra)))), - }; + let dot: IResult = tag(".")(input); - let (input, tail) = digit1(input)?; + let input = match dot { + Ok((input, dot)) => input, - let end = input.offset; + // it's just an integer + Err(_) => return Ok((input, RawNumber::int((start, input.offset, input.extra)))), + }; - Ok((input, RawNumber::decimal((start, end, input.extra)))) - }) + let (input, tail) = digit1(input)?; + + let end = input.offset; + + let next = input.fragment.chars().next(); + + if let Some(next) = next { + if !next.is_whitespace() { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::Tag, + ))); + } + } + + Ok((input, RawNumber::decimal((start, end, input.extra)))) } +#[tracable_parser] pub fn operator(input: NomSpan) -> IResult { - trace_step(input, "operator", |input| { - let (input, operator) = alt((gte, lte, neq, gt, lt, eq))(input)?; + let (input, operator) = alt((gte, lte, neq, gt, lt, eq))(input)?; - Ok((input, operator)) - }) + Ok((input, operator)) } +#[tracable_parser] pub fn dq_string(input: NomSpan) -> IResult { - trace_step(input, "dq_string", |input| { - let start = input.offset; - let (input, _) = char('"')(input)?; - let start1 = input.offset; - let (input, _) = many0(none_of("\""))(input)?; - let end1 = input.offset; - let (input, _) = char('"')(input)?; - let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_string((start1, end1, input.extra), (start, end, input.extra)), - )) - }) + let start = input.offset; + let (input, _) = char('"')(input)?; + let start1 = input.offset; + let (input, _) = many0(none_of("\""))(input)?; + let end1 = input.offset; + let (input, _) = char('"')(input)?; + let end = input.offset; + Ok(( + input, + TokenTreeBuilder::tagged_string((start1, end1, input.extra), (start, end, input.extra)), + )) } +#[tracable_parser] pub fn sq_string(input: NomSpan) -> IResult { - trace_step(input, "sq_string", move |input| { - let start = input.offset; - let (input, _) = char('\'')(input)?; - let start1 = input.offset; - let (input, _) = many0(none_of("\'"))(input)?; - let end1 = input.offset; - let (input, _) = char('\'')(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = char('\'')(input)?; + let start1 = input.offset; + let (input, _) = many0(none_of("\'"))(input)?; + let end1 = input.offset; + let (input, _) = char('\'')(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_string((start1, end1, input.extra), (start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_string((start1, end1, input.extra), (start, end, input.extra)), + )) } +#[tracable_parser] pub fn string(input: NomSpan) -> IResult { - trace_step(input, "string", move |input| { - alt((sq_string, dq_string))(input) - }) + alt((sq_string, dq_string))(input) } +#[tracable_parser] pub fn external(input: NomSpan) -> IResult { - trace_step(input, "external", move |input| { - let start = input.offset; - let (input, _) = tag("^")(input)?; - let (input, bare) = take_while(is_bare_char)(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = tag("^")(input)?; + let (input, bare) = take_while(is_bare_char)(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_external(bare, (start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_external_command(bare, (start, end, input.extra)), + )) } +#[tracable_parser] pub fn pattern(input: NomSpan) -> IResult { - trace_step(input, "bare", move |input| { - let start = input.offset; - let (input, _) = take_while1(is_start_glob_char)(input)?; - let (input, _) = take_while(is_glob_char)(input)?; + let start = input.offset; + let (input, _) = take_while1(is_start_glob_char)(input)?; + let (input, _) = take_while(is_glob_char)(input)?; - let next_char = &input.fragment.chars().nth(0); + let next_char = &input.fragment.chars().nth(0); - if let Some(next_char) = next_char { - if is_external_word_char(*next_char) { - return Err(nom::Err::Error(nom::error::make_error( - input, - nom::error::ErrorKind::TakeWhile1, - ))); - } + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); } + } - let end = input.offset; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_pattern((start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_pattern((start, end, input.extra)), + )) } +#[tracable_parser] pub fn bare(input: NomSpan) -> IResult { - trace_step(input, "bare", move |input| { - let start = input.offset; - let (input, _) = take_while1(is_start_bare_char)(input)?; - let (input, _) = take_while(is_bare_char)(input)?; + let start = input.offset; + let (input, _) = take_while1(is_start_bare_char)(input)?; + let (input, last) = take_while(is_bare_char)(input)?; - let next_char = &input.fragment.chars().nth(0); + let next_char = &input.fragment.chars().nth(0); + let prev_char = last.fragment.chars().nth(0); - if let Some(next_char) = next_char { - if is_external_word_char(*next_char) || is_glob_specific_char(*next_char) { - return Err(nom::Err::Error(nom::error::make_error( - input, - nom::error::ErrorKind::TakeWhile1, - ))); - } + // if let (Some(prev), Some(next)) = (prev_char, next_char) { + // if prev == '.' && is_member_start(*next) { + // return Err(nom::Err::Error(nom::error::make_error( + // input, + // nom::error::ErrorKind::TakeWhile1, + // ))); + // } + // } + + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) || is_glob_specific_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); } + } - let end = input.offset; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_bare((start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_bare((start, end, input.extra)), + )) } +#[tracable_parser] pub fn external_word(input: NomSpan) -> IResult { - trace_step(input, "bare", move |input| { - let start = input.offset; - let (input, _) = take_while1(is_external_word_char)(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = take_while1(is_external_word_char)(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_external_word((start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_external_word((start, end, input.extra)), + )) } +#[tracable_parser] pub fn var(input: NomSpan) -> IResult { - trace_step(input, "var", move |input| { - let start = input.offset; - let (input, _) = tag("$")(input)?; - let (input, bare) = member(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = tag("$")(input)?; + let (input, bare) = ident(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_var(bare.tag(), (start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_var(bare, (start, end, input.extra)), + )) } -pub fn member(input: NomSpan) -> IResult { - trace_step(input, "identifier", move |input| { - let start = input.offset; - let (input, _) = take_while1(is_id_start)(input)?; - let (input, _) = take_while(is_id_continue)(input)?; +#[tracable_parser] +pub fn ident(input: NomSpan) -> IResult { + let start = input.offset; + let (input, _) = take_while1(is_start_bare_char)(input)?; + let (input, _) = take_while(is_bare_char)(input)?; + let end = input.offset; - let end = input.offset; - - Ok(( - input, - TokenTreeBuilder::tagged_member((start, end, input.extra)), - )) - }) + Ok((input, Tag::from((start, end, input.extra.origin)))) } +#[tracable_parser] pub fn flag(input: NomSpan) -> IResult { - trace_step(input, "flag", move |input| { - let start = input.offset; - let (input, _) = tag("--")(input)?; - let (input, bare) = bare(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = tag("--")(input)?; + let (input, bare) = bare(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_flag(bare.tag(), (start, end, input.extra)), - )) - }) + Ok(( + input, + TokenTreeBuilder::tagged_flag(bare.tag(), (start, end, input.extra)), + )) } +#[tracable_parser] pub fn shorthand(input: NomSpan) -> IResult { - trace_step(input, "shorthand", move |input| { - let start = input.offset; - let (input, _) = tag("-")(input)?; - let (input, bare) = bare(input)?; - let end = input.offset; + let start = input.offset; + let (input, _) = tag("-")(input)?; + let (input, bare) = bare(input)?; + let end = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_shorthand(bare.tag(), (start, end, input.extra)), - )) - }) -} - -pub fn raw_unit(input: NomSpan) -> IResult> { - trace_step(input, "raw_unit", move |input| { - let start = input.offset; - let (input, unit) = alt(( - tag("B"), - tag("b"), - tag("KB"), - tag("kb"), - tag("Kb"), - tag("K"), - tag("k"), - tag("MB"), - tag("mb"), - tag("Mb"), - tag("GB"), - tag("gb"), - tag("Gb"), - tag("TB"), - tag("tb"), - tag("Tb"), - tag("PB"), - tag("pb"), - tag("Pb"), - ))(input)?; - let end = input.offset; - - Ok(( - input, - Unit::from(unit.fragment).tagged((start, end, input.extra)), - )) - }) -} - -pub fn size(input: NomSpan) -> IResult { - trace_step(input, "size", move |input| { - let mut is_size = false; - let start = input.offset; - let (input, number) = raw_number(input)?; - if let Ok((input, Some(size))) = opt(raw_unit)(input) { - let end = input.offset; - - // Check to make sure there is no trailing parseable characters - if let Ok((input, Some(extra))) = opt(bare)(input) { - return Err(nom::Err::Error((input, nom::error::ErrorKind::Char))); - } - - Ok(( - input, - TokenTreeBuilder::tagged_size((number.item, *size), (start, end, input.extra)), - )) - } else { - let end = input.offset; - - // Check to make sure there is no trailing parseable characters - if let Ok((input, Some(extra))) = opt(bare)(input) { - return Err(nom::Err::Error((input, nom::error::ErrorKind::Char))); - } - - Ok(( - input, - TokenTreeBuilder::tagged_number(number.item, number.tag), - )) - } - }) + Ok(( + input, + TokenTreeBuilder::tagged_shorthand(bare.tag(), (start, end, input.extra)), + )) } +#[tracable_parser] pub fn leaf(input: NomSpan) -> IResult { - trace_step(input, "leaf", move |input| { - let (input, node) = alt(( - size, - string, - operator, - flag, - shorthand, - var, - external, - bare, - pattern, - external_word, - ))(input)?; + let (input, node) = alt((number, string, operator, flag, shorthand, var, external))(input)?; - Ok((input, node)) - }) + Ok((input, node)) } -pub fn token_list(input: NomSpan) -> IResult> { - trace_step(input, "token_list", move |input| { - let (input, first) = node(input)?; - let (input, list) = many0(pair(space1, node))(input)?; +#[tracable_parser] +pub fn token_list(input: NomSpan) -> IResult>> { + let start = input.offset; + let (input, first) = node(input)?; - Ok((input, make_token_list(None, first, list, None))) - }) + let (input, mut list) = many0(pair(alt((whitespace, dot)), node))(input)?; + + let end = input.offset; + + Ok(( + input, + make_token_list(first, list, None).tagged((start, end, input.extra.origin)), + )) } -pub fn spaced_token_list(input: NomSpan) -> IResult> { - trace_step(input, "spaced_token_list", move |input| { - let (input, sp_left) = opt(space1)(input)?; - let (input, first) = node(input)?; - let (input, list) = many0(pair(space1, node))(input)?; - let (input, sp_right) = opt(space1)(input)?; +#[tracable_parser] +pub fn spaced_token_list(input: NomSpan) -> IResult>> { + let start = input.offset; + let (input, pre_ws) = opt(whitespace)(input)?; + let (input, items) = token_list(input)?; + let (input, post_ws) = opt(whitespace)(input)?; + let end = input.offset; - Ok((input, make_token_list(sp_left, first, list, sp_right))) - }) + let mut out = vec![]; + + out.extend(pre_ws); + out.extend(items.item); + out.extend(post_ws); + + Ok((input, out.tagged((start, end, input.extra.origin)))) } fn make_token_list( - sp_left: Option, - first: TokenNode, - list: Vec<(NomSpan, TokenNode)>, - sp_right: Option, + first: Vec, + list: Vec<(TokenNode, Vec)>, + sp_right: Option, ) -> Vec { let mut nodes = vec![]; - if let Some(sp_left) = sp_left { - nodes.push(TokenNode::Whitespace(Tag::from(sp_left))); - } + nodes.extend(first); - nodes.push(first); - - for (ws, token) in list { - nodes.push(TokenNode::Whitespace(Tag::from(ws))); - nodes.push(token); + for (left, right) in list { + nodes.push(left); + nodes.extend(right); } if let Some(sp_right) = sp_right { - nodes.push(TokenNode::Whitespace(Tag::from(sp_right))); + nodes.push(sp_right); } nodes } +#[tracable_parser] pub fn whitespace(input: NomSpan) -> IResult { - trace_step(input, "whitespace", move |input| { - let left = input.offset; - let (input, ws1) = space1(input)?; - let right = input.offset; + let left = input.offset; + let (input, ws1) = space1(input)?; + let right = input.offset; - Ok(( - input, - TokenTreeBuilder::tagged_ws((left, right, input.extra)), - )) - }) -} - -pub fn delimited_paren(input: NomSpan) -> IResult { - trace_step(input, "delimited_paren", move |input| { - let left = input.offset; - let (input, _) = char('(')(input)?; - let (input, ws1) = opt(whitespace)(input)?; - let (input, inner_items) = opt(token_list)(input)?; - let (input, ws2) = opt(whitespace)(input)?; - let (input, _) = char(')')(input)?; - let right = input.offset; - - let mut items = vec![]; - - if let Some(space) = ws1 { - items.push(space); - } - - if let Some(inner_items) = inner_items { - items.extend(inner_items); - } - - if let Some(space) = ws2 { - items.push(space); - } - - Ok(( - input, - TokenTreeBuilder::tagged_parens(items, (left, right, input.extra)), - )) - }) -} - -pub fn delimited_square(input: NomSpan) -> IResult { - trace_step(input, "delimited_paren", move |input| { - let left = input.offset; - let (input, _) = char('[')(input)?; - let (input, ws1) = opt(whitespace)(input)?; - let (input, inner_items) = opt(token_list)(input)?; - let (input, ws2) = opt(whitespace)(input)?; - let (input, _) = char(']')(input)?; - let right = input.offset; - - let mut items = vec![]; - - if let Some(space) = ws1 { - items.push(space); - } - - if let Some(inner_items) = inner_items { - items.extend(inner_items); - } - - if let Some(space) = ws2 { - items.push(space); - } - - Ok(( - input, - TokenTreeBuilder::tagged_square(items, (left, right, input.extra)), - )) - }) -} - -pub fn delimited_brace(input: NomSpan) -> IResult { - trace_step(input, "delimited_brace", move |input| { - let left = input.offset; - let (input, _) = char('{')(input)?; - let (input, _) = opt(space1)(input)?; - let (input, items) = opt(token_list)(input)?; - let (input, _) = opt(space1)(input)?; - let (input, _) = char('}')(input)?; - let right = input.offset; - - Ok(( - input, - TokenTreeBuilder::tagged_brace( - items.unwrap_or_else(|| vec![]), - (left, right, input.extra), - ), - )) - }) -} - -pub fn raw_call(input: NomSpan) -> IResult> { - trace_step(input, "raw_call", move |input| { - let left = input.offset; - let (input, items) = token_list(input)?; - let right = input.offset; - - Ok(( - input, - TokenTreeBuilder::tagged_call(items, (left, right, input.extra)), - )) - }) -} - -pub fn path(input: NomSpan) -> IResult { - trace_step(input, "path", move |input| { - let left = input.offset; - let (input, head) = node1(input)?; - let (input, _) = tag(".")(input)?; - let (input, tail) = separated_list(tag("."), alt((member, string)))(input)?; - let right = input.offset; - - Ok(( - input, - TokenTreeBuilder::tagged_path((head, tail), (left, right, input.extra)), - )) - }) -} - -pub fn node1(input: NomSpan) -> IResult { - trace_step(input, "node1", alt((leaf, delimited_paren))) -} - -pub fn node(input: NomSpan) -> IResult { - trace_step( + Ok(( input, - "node", - alt(( - path, - leaf, - delimited_paren, - delimited_brace, - delimited_square, - )), - ) + TokenTreeBuilder::tagged_ws((left, right, input.extra)), + )) } +pub fn delimited(input: NomSpan, delimiter: Delimiter) -> IResult>> { + let left = input.offset; + let (input, _) = char(delimiter.open())(input)?; + let (input, inner_items) = opt(spaced_token_list)(input)?; + let (input, _) = char(delimiter.close())(input)?; + let right = input.offset; + + let mut items = vec![]; + + if let Some(inner_items) = inner_items { + items.extend(inner_items.item); + } + + Ok((input, items.tagged((left, right, input.extra.origin)))) +} + +#[tracable_parser] +pub fn delimited_paren(input: NomSpan) -> IResult { + let (input, tokens) = delimited(input, Delimiter::Paren)?; + + Ok(( + input, + TokenTreeBuilder::tagged_parens(tokens.item, tokens.tag), + )) +} + +#[tracable_parser] +pub fn delimited_square(input: NomSpan) -> IResult { + let (input, tokens) = delimited(input, Delimiter::Square)?; + + Ok(( + input, + TokenTreeBuilder::tagged_square(tokens.item, tokens.tag), + )) +} + +#[tracable_parser] +pub fn delimited_brace(input: NomSpan) -> IResult { + let (input, tokens) = delimited(input, Delimiter::Brace)?; + + Ok(( + input, + TokenTreeBuilder::tagged_brace(tokens.item, tokens.tag), + )) +} + +#[tracable_parser] +pub fn raw_call(input: NomSpan) -> IResult> { + let left = input.offset; + let (input, items) = token_list(input)?; + let right = input.offset; + + Ok(( + input, + TokenTreeBuilder::tagged_call(items.item, (left, right, input.extra)), + )) +} + +#[tracable_parser] +pub fn bare_path(input: NomSpan) -> IResult> { + let (input, head) = alt((bare, dot))(input)?; + + let (input, tail) = many0(alt((bare, dot, string)))(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if is_boundary(*next_char) { + let mut result = vec![head]; + result.extend(tail); + + Ok((input, result)) + } else { + Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::Many0, + ))) + } +} + +#[tracable_parser] +pub fn pattern_path(input: NomSpan) -> IResult> { + let (input, head) = alt((pattern, dot))(input)?; + + let (input, tail) = many0(alt((pattern, dot, string)))(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if is_boundary(*next_char) { + let mut result = vec![head]; + result.extend(tail); + + Ok((input, result)) + } else { + Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::Many0, + ))) + } +} + +#[tracable_parser] +pub fn node1(input: NomSpan) -> IResult { + alt((leaf, bare, pattern, external_word, delimited_paren))(input) +} + +#[tracable_parser] +pub fn node(input: NomSpan) -> IResult> { + alt(( + to_list(leaf), + bare_path, + pattern_path, + to_list(external_word), + to_list(delimited_paren), + to_list(delimited_brace), + to_list(delimited_square), + ))(input) +} + +fn to_list( + parser: impl Fn(NomSpan) -> IResult, +) -> impl Fn(NomSpan) -> IResult> { + move |input| { + let (input, next) = parser(input)?; + + Ok((input, vec![next])) + } +} + +#[tracable_parser] +pub fn nodes(input: NomSpan) -> IResult { + let (input, tokens) = token_list(input)?; + + Ok(( + input, + TokenTreeBuilder::tagged_token_list(tokens.item, tokens.tag), + )) +} + +#[tracable_parser] pub fn pipeline(input: NomSpan) -> IResult { - trace_step(input, "pipeline", |input| { - let start = input.offset; - let (input, head) = opt(tuple((opt(space1), raw_call, opt(space1))))(input)?; - let (input, items) = trace_step( + let start = input.offset; + let (input, head) = spaced_token_list(input)?; + let (input, items) = many0(tuple((tag("|"), spaced_token_list)))(input)?; + + if input.input_len() != 0 { + return Err(Err::Error(error_position!( input, - "many0", - many0(tuple((tag("|"), opt(space1), raw_call, opt(space1)))), - )?; - - let (input, tail) = opt(space1)(input)?; - let (input, newline) = opt(multispace1)(input)?; - - if input.input_len() != 0 { - return Err(Err::Error(error_position!( - input, - nom::error::ErrorKind::Eof - ))); - } - - let end = input.offset; - - Ok(( - input, - TokenTreeBuilder::tagged_pipeline( - (make_call_list(head, items), tail.map(Tag::from)), - (start, end, input.extra), - ), - )) - }) -} - -fn make_call_list( - head: Option<(Option, Tagged, Option)>, - items: Vec<(NomSpan, Option, Tagged, Option)>, -) -> Vec { - let mut out = vec![]; - - if let Some(head) = head { - let el = PipelineElement::new(None, head.0.map(Tag::from), head.1, head.2.map(Tag::from)); - - out.push(el); + nom::error::ErrorKind::Eof + ))); } - for (pipe, ws1, call, ws2) in items { - let el = PipelineElement::new( - Some(pipe).map(Tag::from), - ws1.map(Tag::from), - call, - ws2.map(Tag::from), - ); + let end = input.offset; - out.push(el); - } + let head_tag = head.tag(); + let mut all_items: Vec> = + vec![PipelineElement::new(None, head).tagged(head_tag)]; - out + all_items.extend(items.into_iter().map(|(pipe, items)| { + let items_tag = items.tag(); + PipelineElement::new(Some(Tag::from(pipe)), items).tagged(Tag::from(pipe).until(items_tag)) + })); + + Ok(( + input, + TokenTreeBuilder::tagged_pipeline(all_items, (start, end, input.extra)), + )) } fn int(frag: &str, neg: Option) -> i64 { @@ -693,9 +653,19 @@ fn int(frag: &str, neg: Option) -> i64 { } } +fn is_boundary(c: Option) -> bool { + match c { + None => true, + Some(')') | Some(']') | Some('}') => true, + Some(c) if c.is_whitespace() => true, + _ => false, + } +} + fn is_external_word_char(c: char) -> bool { match c { - ';' | '|' | '#' | '-' | '"' | '\'' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '`' => false, + ';' | '|' | '#' | '-' | '"' | '\'' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '`' + | '.' => false, other if other.is_whitespace() => false, _ => true, } @@ -717,8 +687,7 @@ fn is_glob_char(c: char) -> bool { fn is_start_bare_char(c: char) -> bool { match c { '+' => false, - _ if c.is_alphabetic() => true, - '.' => true, + _ if c.is_alphanumeric() => true, '\\' => true, '/' => true, '_' => true, @@ -732,7 +701,6 @@ fn is_bare_char(c: char) -> bool { match c { '+' => false, _ if c.is_alphanumeric() => true, - '.' => true, '\\' => true, '/' => true, '_' => true, @@ -759,6 +727,16 @@ fn is_id_continue(c: char) -> bool { } } +fn is_member_start(c: char) -> bool { + match c { + '"' | '\'' => true, + '1'..='9' => true, + + other if is_id_start(other) => true, + _ => false, + } +} + #[cfg(test)] mod tests { use super::*; @@ -768,41 +746,6 @@ mod tests { pub type CurriedNode = Box T + 'static>; - macro_rules! assert_leaf { - (parsers [ $($name:tt)* ] $input:tt -> $left:tt .. $right:tt { $kind:tt $parens:tt } ) => { - $( - assert_eq!( - apply($name, stringify!($name), $input), - token(RawToken::$kind $parens, $left, $right) - ); - )* - - assert_eq!( - apply(leaf, "leaf", $input), - token(RawToken::$kind $parens, $left, $right) - ); - - assert_eq!( - apply(leaf, "leaf", $input), - token(RawToken::$kind $parens, $left, $right) - ); - - assert_eq!( - apply(node, "node", $input), - token(RawToken::$kind $parens, $left, $right) - ); - }; - - (parsers [ $($name:tt)* ] $input:tt -> $left:tt .. $right:tt { $kind:tt } ) => { - $( - assert_eq!( - apply($name, stringify!($name), $input), - token(RawToken::$kind, $left, $right) - ); - )* - } - } - macro_rules! equal_tokens { ($source:tt -> $tokens:expr) => { let result = apply(pipeline, "pipeline", $source); @@ -823,53 +766,50 @@ mod tests { assert_eq!(debug_result, debug_expected) } } - - // apply(pipeline, "pipeline", r#"cargo +nightly run"#), - // build_token(b::pipeline(vec![( - // None, - // b::call( - // b::bare("cargo"), - // vec![ - // b::sp(), - // b::external_word("+nightly"), - // b::sp(), - // b::bare("run") - // ] - // ), - // None - // )])) }; + + (<$parser:tt> $source:tt -> $tokens:expr) => { + let result = apply($parser, stringify!($parser), $source); + let (expected_tree, expected_source) = TokenTreeBuilder::build(uuid::Uuid::nil(), $tokens); + + if result != expected_tree { + let debug_result = format!("{}", result.debug($source)); + let debug_expected = format!("{}", expected_tree.debug(&expected_source)); + + if debug_result == debug_expected { + assert_eq!( + result, expected_tree, + "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}", + $source, + debug_expected + ) + } else { + assert_eq!(debug_result, debug_expected) + } + } + }; + } #[test] fn test_integer() { - assert_leaf! { - parsers [ size ] - "123" -> 0..3 { Number(RawNumber::int((0, 3, test_uuid())).item) } + equal_tokens! { + + "123" -> b::token_list(vec![b::int(123)]) } - assert_leaf! { - parsers [ size ] - "-123" -> 0..4 { Number(RawNumber::int((0, 4, test_uuid())).item) } - } - } - - #[test] - fn test_size() { - assert_leaf! { - parsers [ size ] - "123MB" -> 0..5 { Size(RawNumber::int((0, 3, test_uuid())).item, Unit::MB) } - } - - assert_leaf! { - parsers [ size ] - "10GB" -> 0..4 { Size(RawNumber::int((0, 2, test_uuid())).item, Unit::GB) } + equal_tokens! { + + "-123" -> b::token_list(vec![b::int(-123)]) } } #[test] fn test_operator() { - assert_eq!(apply(node, "node", ">"), build_token(b::op(">"))); + equal_tokens! { + + ">" -> b::token_list(vec![b::op(">")]) + } // assert_leaf! { // parsers [ operator ] @@ -899,37 +839,50 @@ mod tests { #[test] fn test_string() { - assert_leaf! { - parsers [ string dq_string ] - r#""hello world""# -> 0..13 { String(tag(1, 12)) } + equal_tokens! { + + r#""hello world""# -> b::token_list(vec![b::string("hello world")]) } - assert_leaf! { - parsers [ string sq_string ] - r"'hello world'" -> 0..13 { String(tag(1, 12)) } + equal_tokens! { + + r#"'hello world'"# -> b::token_list(vec![b::string("hello world")]) } } #[test] fn test_bare() { - assert_leaf! { - parsers [ bare ] - "hello" -> 0..5 { Bare } + equal_tokens! { + + "hello" -> b::token_list(vec![b::bare("hello")]) + } + } + + #[test] + fn test_simple_path() { + equal_tokens! { + + "450MB" -> b::token_list(vec![b::bare("450MB")]) } - assert_leaf! { - parsers [ bare ] - "chrome.exe" -> 0..10 { Bare } + equal_tokens! { + + "chrome.exe" -> b::token_list(vec![b::bare("chrome"), b::op(Operator::Dot), b::bare("exe")]) } - assert_leaf! { - parsers [ bare ] - r"C:\windows\system.dll" -> 0..21 { Bare } + equal_tokens! { + + ".azure" -> b::token_list(vec![b::op(Operator::Dot), b::bare("azure")]) } - assert_leaf! { - parsers [ bare ] - r"C:\Code\-testing\my_tests.js" -> 0..28 { Bare } + equal_tokens! { + + r"C:\windows\system.dll" -> b::token_list(vec![b::bare(r"C:\windows\system"), b::op(Operator::Dot), b::bare("dll")]) + } + + equal_tokens! { + + r"C:\Code\-testing\my_tests.js" -> b::token_list(vec![b::bare(r"C:\Code\-testing\my_tests"), b::op(Operator::Dot), b::bare("js")]) } } @@ -956,223 +909,170 @@ mod tests { #[test] fn test_variable() { - assert_leaf! { - parsers [ var ] - "$it" -> 0..3 { Variable(tag(1, 3)) } + equal_tokens! { + + "$it" -> b::token_list(vec![b::var("it")]) } - assert_leaf! { - parsers [ var ] - "$name" -> 0..5 { Variable(tag(1, 5)) } + equal_tokens! { + + "$name" -> b::token_list(vec![b::var("name")]) } } #[test] fn test_external() { - assert_leaf! { - parsers [ external ] - "^ls" -> 0..3 { ExternalCommand(tag(1, 3)) } + equal_tokens! { + + "^ls" -> b::token_list(vec![b::external_command("ls")]) + } + } + + #[test] + fn test_dot_prefixed_name() { + equal_tokens! { + + ".azure" -> b::token_list(vec![b::op("."), b::bare("azure")]) } } #[test] fn test_delimited_paren() { - assert_eq!( - apply(node, "node", "(abc)"), - build_token(b::parens(vec![b::bare("abc")])) - ); + equal_tokens! { + + "(abc)" -> b::token_list(vec![b::parens(vec![b::bare("abc")])]) + } - assert_eq!( - apply(node, "node", "( abc )"), - build_token(b::parens(vec![b::ws(" "), b::bare("abc"), b::ws(" ")])) - ); + equal_tokens! { + + "( abc )" -> b::token_list(vec![b::parens(vec![b::ws(" "), b::bare("abc"), b::ws(" ")])]) + } - assert_eq!( - apply(node, "node", "( abc def )"), - build_token(b::parens(vec![ - b::ws(" "), - b::bare("abc"), - b::sp(), - b::bare("def"), - b::sp() - ])) - ); + equal_tokens! { + + "( abc def )" -> b::token_list(vec![b::parens(vec![b::ws(" "), b::bare("abc"), b::ws(" "), b::bare("def"), b::ws(" ")])]) + } - assert_eq!( - apply(node, "node", "( abc def 123 456GB )"), - build_token(b::parens(vec![ - b::ws(" "), - b::bare("abc"), - b::sp(), - b::bare("def"), - b::sp(), - b::int(123), - b::sp(), - b::size(456, "GB"), - b::sp() - ])) - ); + equal_tokens! { + + "( abc def 123 456GB )" -> b::token_list(vec![b::parens(vec![ + b::ws(" "), b::bare("abc"), b::ws(" "), b::bare("def"), b::ws(" "), b::int(123), b::ws(" "), b::bare("456GB"), b::ws(" ") + ])]) + } } #[test] fn test_delimited_square() { - assert_eq!( - apply(node, "node", "[abc]"), - build_token(b::square(vec![b::bare("abc")])) - ); + equal_tokens! { + + "[abc]" -> b::token_list(vec![b::square(vec![b::bare("abc")])]) + } - assert_eq!( - apply(node, "node", "[ abc ]"), - build_token(b::square(vec![b::ws(" "), b::bare("abc"), b::ws(" ")])) - ); + equal_tokens! { + + "[ abc ]" -> b::token_list(vec![b::square(vec![b::ws(" "), b::bare("abc"), b::ws(" ")])]) + } - assert_eq!( - apply(node, "node", "[ abc def ]"), - build_token(b::square(vec![ - b::ws(" "), - b::bare("abc"), - b::sp(), - b::bare("def"), - b::sp() - ])) - ); + equal_tokens! { + + "[ abc def ]" -> b::token_list(vec![b::square(vec![b::ws(" "), b::bare("abc"), b::ws(" "), b::bare("def"), b::ws(" ")])]) + } - assert_eq!( - apply(node, "node", "[ abc def 123 456GB ]"), - build_token(b::square(vec![ - b::ws(" "), - b::bare("abc"), - b::sp(), - b::bare("def"), - b::sp(), - b::int(123), - b::sp(), - b::size(456, "GB"), - b::sp() - ])) - ); + equal_tokens! { + + "[ abc def 123 456GB ]" -> b::token_list(vec![b::square(vec![ + b::ws(" "), b::bare("abc"), b::ws(" "), b::bare("def"), b::ws(" "), b::int(123), b::ws(" "), b::bare("456GB"), b::ws(" ") + ])]) + } } #[test] fn test_path() { let _ = pretty_env_logger::try_init(); - assert_eq!( - apply(node, "node", "$it.print"), - build_token(b::path(b::var("it"), vec![b::member("print")])) - ); - assert_eq!( - apply(node, "node", "$head.part1.part2"), - build_token(b::path( - b::var("head"), - vec![b::member("part1"), b::member("part2")] - )) - ); + equal_tokens! { + + "$it.print" -> b::token_list(vec![b::var("it"), b::op("."), b::bare("print")]) + } - assert_eq!( - apply(node, "node", "( hello ).world"), - build_token(b::path( - b::parens(vec![b::sp(), b::bare("hello"), b::sp()]), - vec![b::member("world")] - )) - ); + equal_tokens! { + + "$head.part1.part2" -> b::token_list(vec![b::var("head"), b::op("."), b::bare("part1"), b::op("."), b::bare("part2")]) + } - assert_eq!( - apply(node, "node", "( hello ).\"world\""), - build_token(b::path( - b::parens(vec![b::sp(), b::bare("hello"), b::sp()],), - vec![b::string("world")] - )) - ); + equal_tokens! { + + "( hello ).world" -> b::token_list(vec![b::parens(vec![b::sp(), b::bare("hello"), b::sp()]), b::op("."), b::bare("world")]) + } + + equal_tokens! { + + r#"( hello )."world""# -> b::token_list(vec![b::parens(vec![b::sp(), b::bare("hello"), b::sp()]), b::op("."), b::string("world")]) + } } #[test] fn test_nested_path() { - assert_eq!( - apply( - node, - "node", - "( $it.is.\"great news\".right yep $yep ).\"world\"" - ), - build_token(b::path( - b::parens(vec![ - b::sp(), - b::path( + equal_tokens! { + + r#"( $it.is."great news".right yep $yep )."world""# -> b::token_list( + vec![ + b::parens(vec![ + b::sp(), b::var("it"), - vec![b::member("is"), b::string("great news"), b::member("right")] - ), - b::sp(), - b::bare("yep"), - b::sp(), - b::var("yep"), - b::sp() - ]), - vec![b::string("world")] - )) - ) + b::op("."), + b::bare("is"), + b::op("."), + b::string("great news"), + b::op("."), + b::bare("right"), + b::sp(), + b::bare("yep"), + b::sp(), + b::var("yep"), + b::sp() + ]), + b::op("."), b::string("world")] + ) + } } #[test] fn test_smoke_single_command() { - assert_eq!( - apply(raw_call, "raw_call", "git add ."), - build(b::call( - b::bare("git"), - vec![b::sp(), b::bare("add"), b::sp(), b::bare(".")] - )) - ); + equal_tokens! { + + "git add ." -> b::token_list(vec![b::bare("git"), b::sp(), b::bare("add"), b::sp(), b::op(".")]) + } - assert_eq!( - apply(raw_call, "raw_call", "open Cargo.toml"), - build(b::call( - b::bare("open"), - vec![b::sp(), b::bare("Cargo.toml")] - )) - ); + equal_tokens! { + + "open Cargo.toml" -> b::token_list(vec![b::bare("open"), b::sp(), b::bare("Cargo"), b::op("."), b::bare("toml")]) + } - assert_eq!( - apply(raw_call, "raw_call", "select package.version"), - build(b::call( - b::bare("select"), - vec![b::sp(), b::bare("package.version")] - )) - ); + equal_tokens! { + + "select package.version" -> b::token_list(vec![b::bare("select"), b::sp(), b::bare("package"), b::op("."), b::bare("version")]) + } - assert_eq!( - apply(raw_call, "raw_call", "echo $it"), - build(b::call(b::bare("echo"), vec![b::sp(), b::var("it")])) - ); + equal_tokens! { + + "echo $it" -> b::token_list(vec![b::bare("echo"), b::sp(), b::var("it")]) + } - assert_eq!( - apply(raw_call, "raw_call", "open Cargo.toml --raw"), - build(b::call( - b::bare("open"), - vec![b::sp(), b::bare("Cargo.toml"), b::sp(), b::flag("raw")] - )) - ); + equal_tokens! { + + "open Cargo.toml --raw" -> b::token_list(vec![b::bare("open"), b::sp(), b::bare("Cargo"), b::op("."), b::bare("toml"), b::sp(), b::flag("raw")]) + } - assert_eq!( - apply(raw_call, "raw_call", "open Cargo.toml -r"), - build(b::call( - b::bare("open"), - vec![b::sp(), b::bare("Cargo.toml"), b::sp(), b::shorthand("r")] - )) - ); + equal_tokens! { + + "open Cargo.toml -r" -> b::token_list(vec![b::bare("open"), b::sp(), b::bare("Cargo"), b::op("."), b::bare("toml"), b::sp(), b::shorthand("r")]) + } - assert_eq!( - apply(raw_call, "raw_call", "config --set tabs 2"), - build(b::call( - b::bare("config"), - vec![ - b::sp(), - b::flag("set"), - b::sp(), - b::bare("tabs"), - b::sp(), - b::int(2) - ] - )) - ); + equal_tokens! { + + "config --set tabs 2" -> b::token_list(vec![b::bare("config"), b::sp(), b::flag("set"), b::sp(), b::bare("tabs"), b::sp(), b::int(2)]) + } } #[test] @@ -1181,120 +1081,159 @@ mod tests { equal_tokens!( "cargo +nightly run" -> - b::pipeline(vec![( - None, - b::call( - b::bare("cargo"), - vec![ - b::sp(), - b::external_word("+nightly"), - b::sp(), - b::bare("run") - ] - ), - None - )]) + b::pipeline(vec![vec![ + b::bare("cargo"), + b::sp(), + b::external_word("+nightly"), + b::sp(), + b::bare("run") + ]]) ); equal_tokens!( "rm foo%bar" -> - b::pipeline(vec![( - None, - b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), - None - )]) + b::pipeline(vec![vec![ + b::bare("rm"), b::sp(), b::external_word("foo%bar") + ]]) ); equal_tokens!( "rm foo%bar" -> - b::pipeline(vec![( - None, - b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), - None - )]) + b::pipeline(vec![vec![ + b::bare("rm"), b::sp(), b::external_word("foo%bar"), + ]]) ); } #[test] - fn test_smoke_pipeline() { + fn test_pipeline() { let _ = pretty_env_logger::try_init(); - assert_eq!( - apply( - pipeline, - "pipeline", - r#"git branch --merged | split-row "`n" | where $it != "* master""# - ), - build_token(b::pipeline(vec![ - ( - None, - b::call( - b::bare("git"), - vec![b::sp(), b::bare("branch"), b::sp(), b::flag("merged")] - ), - Some(" ") - ), - ( - Some(" "), - b::call(b::bare("split-row"), vec![b::sp(), b::string("`n")]), - Some(" ") - ), - ( - Some(" "), - b::call( - b::bare("where"), - vec![ - b::sp(), - b::var("it"), - b::sp(), - b::op("!="), - b::sp(), - b::string("* master") - ] - ), - None - ) - ])) - ); - - assert_eq!( - apply(pipeline, "pipeline", "ls | where { $it.size > 100 }"), - build_token(b::pipeline(vec![ - (None, b::call(b::bare("ls"), vec![]), Some(" ")), - ( - Some(" "), - b::call( - b::bare("where"), - vec![ - b::sp(), - b::braced(vec![ - b::path(b::var("it"), vec![b::member("size")]), - b::sp(), - b::op(">"), - b::sp(), - b::int(100) - ]) - ] - ), - None - ) - ])) - ) + equal_tokens! { + "sys | echo" -> b::pipeline(vec![ + vec![ + b::bare("sys"), b::sp() + ], + vec![ + b::sp(), b::bare("echo") + ] + ]) + } } - fn apply( - f: impl Fn(NomSpan) -> Result<(NomSpan, T), nom::Err<(NomSpan, nom::error::ErrorKind)>>, + #[test] + fn test_patterns() { + equal_tokens! { + + "cp ../formats/*" -> b::pipeline(vec![vec![b::bare("cp"), b::ws(" "), b::op("."), b::op("."), b::pattern("/formats/*")]]) + } + + equal_tokens! { + + "cp * /dev/null" -> b::pipeline(vec![vec![b::bare("cp"), b::ws(" "), b::pattern("*"), b::ws(" "), b::bare("/dev/null")]]) + } + } + + // #[test] + // fn test_pseudo_paths() { + // let _ = pretty_env_logger::try_init(); + + // equal_tokens!( + // r#"sys | where cpu."max ghz" > 1"# -> + // b::pipeline(vec![ + // (None, b::call(b::bare("sys"), vec![]), Some(" ")), + // ( + // Some(" "), + // b::call( + // b::bare("where"), + // vec![ + // b::sp(), + // b::path(b::bare("cpu"), vec![b::string("max ghz")]), + // b::sp(), + // b::op(">"), + // b::sp(), + // b::int(1) + // ] + // ), + // None + // ) + // ]) + // ); + // } + + // #[test] + // fn test_smoke_pipeline() { + // let _ = pretty_env_logger::try_init(); + + // assert_eq!( + // apply( + // pipeline, + // "pipeline", + // r#"git branch --merged | split-row "`n" | where $it != "* master""# + // ), + // build_token(b::pipeline(vec![ + // ( + // None, + // b::call( + // b::bare("git"), + // vec![b::sp(), b::bare("branch"), b::sp(), b::flag("merged")] + // ), + // Some(" ") + // ), + // ( + // Some(" "), + // b::call(b::bare("split-row"), vec![b::sp(), b::string("`n")]), + // Some(" ") + // ), + // ( + // Some(" "), + // b::call( + // b::bare("where"), + // vec![ + // b::sp(), + // b::var("it"), + // b::sp(), + // b::op("!="), + // b::sp(), + // b::string("* master") + // ] + // ), + // None + // ) + // ])) + // ); + + // assert_eq!( + // apply(pipeline, "pipeline", "ls | where { $it.size > 100 }"), + // build_token(b::pipeline(vec![ + // (None, b::call(b::bare("ls"), vec![]), Some(" ")), + // ( + // Some(" "), + // b::call( + // b::bare("where"), + // vec![ + // b::sp(), + // b::braced(vec![ + // b::path(b::var("it"), vec![b::member("size")]), + // b::sp(), + // b::op(">"), + // b::sp(), + // b::int(100) + // ]) + // ] + // ), + // None + // ) + // ])) + // ) + // } + + fn apply( + f: impl Fn(NomSpan) -> Result<(NomSpan, TokenNode), nom::Err<(NomSpan, nom::error::ErrorKind)>>, desc: &str, string: &str, - ) -> T { - match f(NomSpan::new_extra(string, uuid::Uuid::nil())) { - Ok(v) => v.1, - Err(other) => { - println!("{:?}", other); - println!("for {} @ {}", string, desc); - panic!("No dice"); - } - } + ) -> TokenNode { + f(nom_input(string, uuid::Uuid::nil())).unwrap().1 } fn tag(left: usize, right: usize) -> Tag { @@ -1312,17 +1251,6 @@ mod tests { TokenNode::Delimited(spanned) } - fn path(head: TokenNode, tail: Vec, left: usize, right: usize) -> TokenNode { - let tag = head.tag(); - - let node = PathNode::new( - Box::new(head), - tail.into_iter().map(TokenNode::Token).collect(), - ); - let spanned = node.tagged((left, right, tag.anchor)); - TokenNode::Path(spanned) - } - fn token(token: RawToken, left: usize, right: usize) -> TokenNode { TokenNode::Token(token.tagged((left, right, uuid::Uuid::nil()))) } diff --git a/src/parser/parse/pipeline.rs b/src/parser/parse/pipeline.rs index 42bbe23a1..36813e39c 100644 --- a/src/parser/parse/pipeline.rs +++ b/src/parser/parse/pipeline.rs @@ -1,4 +1,4 @@ -use crate::parser::CallNode; +use crate::parser::TokenNode; use crate::traits::ToDebug; use crate::{Tag, Tagged}; use derive_new::new; @@ -7,20 +7,16 @@ use std::fmt; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, new)] pub struct Pipeline { - pub(crate) parts: Vec, - pub(crate) post_ws: Option, + pub(crate) parts: Vec>, + // pub(crate) post_ws: Option, } impl ToDebug for Pipeline { fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { - for part in &self.parts { + for part in self.parts.iter() { write!(f, "{}", part.debug(source))?; } - if let Some(post_ws) = self.post_ws { - write!(f, "{}", post_ws.slice(source))? - } - Ok(()) } } @@ -28,10 +24,7 @@ impl ToDebug for Pipeline { #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters, new)] pub struct PipelineElement { pub pipe: Option, - pub pre_ws: Option, - #[get = "pub(crate)"] - call: Tagged, - pub post_ws: Option, + pub tokens: Tagged>, } impl ToDebug for PipelineElement { @@ -40,14 +33,8 @@ impl ToDebug for PipelineElement { write!(f, "{}", pipe.slice(source))?; } - if let Some(pre_ws) = self.pre_ws { - write!(f, "{}", pre_ws.slice(source))?; - } - - write!(f, "{}", self.call.debug(source))?; - - if let Some(post_ws) = self.post_ws { - write!(f, "{}", post_ws.slice(source))?; + for token in &self.tokens.item { + write!(f, "{}", token.debug(source))?; } Ok(()) diff --git a/src/parser/parse/token_tree.rs b/src/parser/parse/token_tree.rs index e0072360e..8cbb28264 100644 --- a/src/parser/parse/token_tree.rs +++ b/src/parser/parse/token_tree.rs @@ -1,5 +1,6 @@ use crate::errors::ShellError; -use crate::parser::parse::{call_node::*, flag::*, operator::*, pipeline::*, tokens::*}; +use crate::parser::parse::{call_node::*, flag::*, pipeline::*, tokens::*}; +use crate::prelude::*; use crate::traits::ToDebug; use crate::{Tag, Tagged, Text}; use derive_new::new; @@ -12,15 +13,14 @@ pub enum TokenNode { Token(Token), Call(Tagged), + Nodes(Tagged>), Delimited(Tagged), Pipeline(Tagged), - Operator(Tagged), Flag(Tagged), Member(Tag), Whitespace(Tag), Error(Tagged>), - Path(Tagged), } impl ToDebug for TokenNode { @@ -94,32 +94,33 @@ impl TokenNode { pub fn tag(&self) -> Tag { match self { TokenNode::Token(t) => t.tag(), + TokenNode::Nodes(t) => t.tag(), TokenNode::Call(s) => s.tag(), TokenNode::Delimited(s) => s.tag(), TokenNode::Pipeline(s) => s.tag(), - TokenNode::Operator(s) => s.tag(), TokenNode::Flag(s) => s.tag(), TokenNode::Member(s) => *s, TokenNode::Whitespace(s) => *s, TokenNode::Error(s) => s.tag(), - TokenNode::Path(s) => s.tag(), } } - pub fn type_name(&self) -> String { + pub fn type_name(&self) -> &'static str { match self { TokenNode::Token(t) => t.type_name(), + TokenNode::Nodes(_) => "nodes", TokenNode::Call(_) => "command", TokenNode::Delimited(d) => d.type_name(), TokenNode::Pipeline(_) => "pipeline", - TokenNode::Operator(_) => "operator", TokenNode::Flag(_) => "flag", TokenNode::Member(_) => "member", TokenNode::Whitespace(_) => "whitespace", TokenNode::Error(_) => "error", - TokenNode::Path(_) => "path", } - .to_string() + } + + pub fn tagged_type_name(&self) -> Tagged<&'static str> { + self.type_name().tagged(self.tag()) } pub fn old_debug<'a>(&'a self, source: &'a Text) -> DebugTokenNode<'a> { @@ -134,6 +135,16 @@ impl TokenNode { self.tag().slice(source) } + pub fn get_variable(&self) -> Result<(Tag, Tag), ShellError> { + match self { + TokenNode::Token(Tagged { + item: RawToken::Variable(inner_tag), + tag: outer_tag, + }) => Ok((*outer_tag, *inner_tag)), + _ => Err(ShellError::type_error("variable", self.tagged_type_name())), + } + } + pub fn is_bare(&self) -> bool { match self { TokenNode::Token(Tagged { @@ -144,6 +155,20 @@ impl TokenNode { } } + pub fn as_block(&self) -> Option> { + match self { + TokenNode::Delimited(Tagged { + item: + DelimitedNode { + delimiter, + children, + }, + tag, + }) if *delimiter == Delimiter::Brace => Some((&children[..]).tagged(tag)), + _ => None, + } + } + pub fn is_external(&self) -> bool { match self { TokenNode::Token(Tagged { @@ -181,13 +206,60 @@ impl TokenNode { _ => Err(ShellError::string("unimplemented")), } } + + pub fn is_whitespace(&self) -> bool { + match self { + TokenNode::Whitespace(_) => true, + _ => false, + } + } + + pub fn expect_string(&self) -> (Tag, Tag) { + match self { + TokenNode::Token(Tagged { + item: RawToken::String(inner_tag), + tag: outer_tag, + }) => (*outer_tag, *inner_tag), + other => panic!("Expected string, found {:?}", other), + } + } +} + +#[cfg(test)] +impl TokenNode { + pub fn expect_list(&self) -> Tagged<&[TokenNode]> { + match self { + TokenNode::Nodes(Tagged { item, tag }) => (&item[..]).tagged(tag), + other => panic!("Expected list, found {:?}", other), + } + } + + pub fn expect_var(&self) -> (Tag, Tag) { + match self { + TokenNode::Token(Tagged { + item: RawToken::Variable(inner_tag), + tag: outer_tag, + }) => (*outer_tag, *inner_tag), + other => panic!("Expected var, found {:?}", other), + } + } + + pub fn expect_bare(&self) -> Tag { + match self { + TokenNode::Token(Tagged { + item: RawToken::Bare, + tag, + }) => *tag, + other => panic!("Expected var, found {:?}", other), + } + } } #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters, new)] #[get = "pub(crate)"] pub struct DelimitedNode { - delimiter: Delimiter, - children: Vec, + pub(crate) delimiter: Delimiter, + pub(crate) children: Vec, } impl DelimitedNode { @@ -207,6 +279,24 @@ pub enum Delimiter { Square, } +impl Delimiter { + pub(crate) fn open(&self) -> char { + match self { + Delimiter::Paren => '(', + Delimiter::Brace => '{', + Delimiter::Square => '[', + } + } + + pub(crate) fn close(&self) -> char { + match self { + Delimiter::Paren => ')', + Delimiter::Brace => '}', + Delimiter::Square => ']', + } + } +} + #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters, new)] #[get = "pub(crate)"] pub struct PathNode { diff --git a/src/parser/parse/token_tree_builder.rs b/src/parser/parse/token_tree_builder.rs index 9a2e6ab72..67298987a 100644 --- a/src/parser/parse/token_tree_builder.rs +++ b/src/parser/parse/token_tree_builder.rs @@ -3,7 +3,7 @@ use crate::prelude::*; use crate::parser::parse::flag::{Flag, FlagKind}; use crate::parser::parse::operator::Operator; use crate::parser::parse::pipeline::{Pipeline, PipelineElement}; -use crate::parser::parse::token_tree::{DelimitedNode, Delimiter, PathNode, TokenNode}; +use crate::parser::parse::token_tree::{DelimitedNode, Delimiter, TokenNode}; use crate::parser::parse::tokens::{RawNumber, RawToken}; use crate::parser::parse::unit::Unit; use crate::parser::CallNode; @@ -31,60 +31,68 @@ impl TokenTreeBuilder { (node, builder.output) } - pub fn pipeline(input: Vec<(Option<&str>, CurriedCall, Option<&str>)>) -> CurriedToken { - let input: Vec<(Option, CurriedCall, Option)> = input - .into_iter() - .map(|(pre, call, post)| { - ( - pre.map(|s| s.to_string()), - call, - post.map(|s| s.to_string()), - ) - }) - .collect(); + fn build_tagged(&mut self, callback: impl FnOnce(&mut TokenTreeBuilder) -> T) -> Tagged { + let start = self.pos; + let ret = callback(self); + let end = self.pos; + ret.tagged((start, end, self.anchor)) + } + + pub fn pipeline(input: Vec>) -> CurriedToken { Box::new(move |b| { let start = b.pos; - let mut out: Vec = vec![]; + let mut out: Vec> = vec![]; let mut input = input.into_iter().peekable(); - let (pre, call, post) = input + let head = input .next() .expect("A pipeline must contain at least one element"); let pipe = None; - let pre_tag = pre.map(|pre| b.consume_tag(&pre)); - let call = call(b); - let post_tag = post.map(|post| b.consume_tag(&post)); + let head = b.build_tagged(|b| head.into_iter().map(|node| node(b)).collect()); - out.push(PipelineElement::new(pipe, pre_tag, call, post_tag)); + let head_tag: Tag = head.tag; + out.push(PipelineElement::new(pipe, head).tagged(head_tag)); loop { match input.next() { None => break, - Some((pre, call, post)) => { + Some(node) => { + let start = b.pos; let pipe = Some(b.consume_tag("|")); - let pre_span = pre.map(|pre| b.consume_tag(&pre)); - let call = call(b); - let post_span = post.map(|post| b.consume_tag(&post)); + let node = + b.build_tagged(|b| node.into_iter().map(|node| node(b)).collect()); + let end = b.pos; - out.push(PipelineElement::new(pipe, pre_span, call, post_span)); + out.push(PipelineElement::new(pipe, node).tagged((start, end, b.anchor))); } } } let end = b.pos; - TokenTreeBuilder::tagged_pipeline((out, None), (start, end, b.anchor)) + TokenTreeBuilder::tagged_pipeline(out, (start, end, b.anchor)) }) } - pub fn tagged_pipeline( - input: (Vec, Option), - tag: impl Into, - ) -> TokenNode { - TokenNode::Pipeline(Pipeline::new(input.0, input.1.into()).tagged(tag.into())) + pub fn tagged_pipeline(input: Vec>, tag: impl Into) -> TokenNode { + TokenNode::Pipeline(Pipeline::new(input).tagged(tag.into())) + } + + pub fn token_list(input: Vec) -> CurriedToken { + Box::new(move |b| { + let start = b.pos; + let tokens = input.into_iter().map(|i| i(b)).collect(); + let end = b.pos; + + TokenTreeBuilder::tagged_token_list(tokens, (start, end, b.anchor)) + }) + } + + pub fn tagged_token_list(input: Vec, tag: impl Into) -> TokenNode { + TokenNode::Nodes(input.tagged(tag)) } pub fn op(input: impl Into) -> CurriedToken { @@ -100,7 +108,7 @@ impl TokenTreeBuilder { } pub fn tagged_op(input: impl Into, tag: impl Into) -> TokenNode { - TokenNode::Operator(input.into().tagged(tag.into())) + TokenNode::Token(RawToken::Operator(input.into()).tagged(tag.into())) } pub fn string(input: impl Into) -> CurriedToken { @@ -168,8 +176,23 @@ impl TokenTreeBuilder { TokenNode::Token(RawToken::ExternalWord.tagged(input.into())) } - pub fn tagged_external(input: impl Into, tag: impl Into) -> TokenNode { - TokenNode::Token(RawToken::ExternalCommand(input.into()).tagged(tag.into())) + pub fn external_command(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (outer_start, _) = b.consume("^"); + let (inner_start, end) = b.consume(&input); + b.pos = end; + + TokenTreeBuilder::tagged_external_command( + (inner_start, end, b.anchor), + (outer_start, end, b.anchor), + ) + }) + } + + pub fn tagged_external_command(inner: impl Into, outer: impl Into) -> TokenNode { + TokenNode::Token(RawToken::ExternalCommand(inner.into()).tagged(outer.into())) } pub fn int(input: impl Into) -> CurriedToken { @@ -229,29 +252,6 @@ impl TokenTreeBuilder { TokenNode::Token(RawToken::Size(int, unit).tagged(tag.into())) } - pub fn path(head: CurriedToken, tail: Vec) -> CurriedToken { - Box::new(move |b| { - let start = b.pos; - let head = head(b); - - let mut output = vec![]; - - for item in tail { - b.consume("."); - - output.push(item(b)); - } - - let end = b.pos; - - TokenTreeBuilder::tagged_path((head, output), (start, end, b.anchor)) - }) - } - - pub fn tagged_path(input: (TokenNode, Vec), tag: impl Into) -> TokenNode { - TokenNode::Path(PathNode::new(Box::new(input.0), input.1).tagged(tag.into())) - } - pub fn var(input: impl Into) -> CurriedToken { let input = input.into(); diff --git a/src/parser/parse/tokens.rs b/src/parser/parse/tokens.rs index d796a8fcb..77a856af3 100644 --- a/src/parser/parse/tokens.rs +++ b/src/parser/parse/tokens.rs @@ -1,4 +1,5 @@ use crate::parser::parse::unit::*; +use crate::parser::Operator; use crate::prelude::*; use crate::{Tagged, Text}; use std::fmt; @@ -7,6 +8,7 @@ use std::str::FromStr; #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum RawToken { Number(RawNumber), + Operator(Operator), Size(RawNumber, Unit), String(Tag), Variable(Tag), @@ -49,12 +51,13 @@ impl RawToken { pub fn type_name(&self) -> &'static str { match self { RawToken::Number(_) => "Number", + RawToken::Operator(..) => "operator", RawToken::Size(..) => "Size", RawToken::String(_) => "String", - RawToken::Variable(_) => "Variable", - RawToken::ExternalCommand(_) => "ExternalCommand", - RawToken::ExternalWord => "ExternalWord", - RawToken::GlobPattern => "GlobPattern", + RawToken::Variable(_) => "variable", + RawToken::ExternalCommand(_) => "external command", + RawToken::ExternalWord => "external word", + RawToken::GlobPattern => "glob pattern", RawToken::Bare => "String", } } diff --git a/src/parser/parse_command.rs b/src/parser/parse_command.rs index 36ba82f8e..d383689fd 100644 --- a/src/parser/parse_command.rs +++ b/src/parser/parse_command.rs @@ -1,92 +1,35 @@ -use crate::context::Context; use crate::errors::{ArgumentError, ShellError}; +use crate::parser::hir::syntax_shape::{expand_expr, spaced}; use crate::parser::registry::{NamedType, PositionalType, Signature}; -use crate::parser::{baseline_parse_tokens, CallNode}; +use crate::parser::TokensIterator; use crate::parser::{ - hir::{self, NamedArguments}, - Flag, RawToken, TokenNode, + hir::{self, ExpandContext, NamedArguments}, + Flag, }; use crate::traits::ToDebug; -use crate::{Tag, Tagged, TaggedItem, Text}; +use crate::{Tag, Tagged, Text}; use log::trace; -pub fn parse_command( +pub fn parse_command_tail( config: &Signature, - context: &Context, - call: &Tagged, - source: &Text, -) -> Result { - let Tagged { item: raw_call, .. } = call; - - trace!("Processing {:?}", config); - - let head = parse_command_head(call.head())?; - - let children: Option> = raw_call.children().as_ref().map(|nodes| { - nodes - .iter() - .cloned() - .filter(|node| match node { - TokenNode::Whitespace(_) => false, - _ => true, - }) - .collect() - }); - - match parse_command_tail(&config, context, children, source, call.tag())? { - None => Ok(hir::Call::new(Box::new(head), None, None)), - Some((positional, named)) => Ok(hir::Call::new(Box::new(head), positional, named)), - } -} - -fn parse_command_head(head: &TokenNode) -> Result { - match head { - TokenNode::Token( - spanned @ Tagged { - item: RawToken::Bare, - .. - }, - ) => Ok(spanned.map(|_| hir::RawExpression::Literal(hir::Literal::Bare))), - - TokenNode::Token(Tagged { - item: RawToken::String(inner_tag), - tag, - }) => Ok(hir::RawExpression::Literal(hir::Literal::String(*inner_tag)).tagged(*tag)), - - other => Err(ShellError::unexpected(&format!( - "command head -> {:?}", - other - ))), - } -} - -fn parse_command_tail( - config: &Signature, - context: &Context, - tail: Option>, - source: &Text, + context: &ExpandContext, + tail: &mut TokensIterator, command_tag: Tag, ) -> Result>, Option)>, ShellError> { - let tail = &mut match &tail { - None => hir::TokensIterator::new(&[]), - Some(tail) => hir::TokensIterator::new(tail), - }; - let mut named = NamedArguments::new(); - - trace_remaining("nodes", tail.clone(), source); + trace_remaining("nodes", tail.clone(), context.source()); for (name, kind) in &config.named { trace!(target: "nu::parse", "looking for {} : {:?}", name, kind); match kind { NamedType::Switch => { - let flag = extract_switch(name, tail, source); + let flag = extract_switch(name, tail, context.source()); named.insert_switch(name, flag); } NamedType::Mandatory(syntax_type) => { - match extract_mandatory(config, name, tail, source, command_tag) { + match extract_mandatory(config, name, tail, context.source(), command_tag) { Err(err) => return Err(err), // produce a correct diagnostic Ok((pos, flag)) => { tail.move_to(pos); @@ -99,42 +42,47 @@ fn parse_command_tail( )); } - let expr = - hir::baseline_parse_next_expr(tail, context, source, *syntax_type)?; + let expr = expand_expr(&spaced(*syntax_type), tail, context)?; tail.restart(); named.insert_mandatory(name, expr); } } } - NamedType::Optional(syntax_type) => match extract_optional(name, tail, source) { - Err(err) => return Err(err), // produce a correct diagnostic - Ok(Some((pos, flag))) => { - tail.move_to(pos); + NamedType::Optional(syntax_type) => { + match extract_optional(name, tail, context.source()) { + Err(err) => return Err(err), // produce a correct diagnostic + Ok(Some((pos, flag))) => { + tail.move_to(pos); - if tail.at_end() { - return Err(ShellError::argument_error( - config.name.clone(), - ArgumentError::MissingValueForName(name.to_string()), - flag.tag(), - )); + if tail.at_end() { + return Err(ShellError::argument_error( + config.name.clone(), + ArgumentError::MissingValueForName(name.to_string()), + flag.tag(), + )); + } + + let expr = expand_expr(&spaced(*syntax_type), tail, context); + + match expr { + Err(_) => named.insert_optional(name, None), + Ok(expr) => named.insert_optional(name, Some(expr)), + } + + tail.restart(); } - let expr = hir::baseline_parse_next_expr(tail, context, source, *syntax_type)?; - - tail.restart(); - named.insert_optional(name, Some(expr)); + Ok(None) => { + tail.restart(); + named.insert_optional(name, None); + } } - - Ok(None) => { - tail.restart(); - named.insert_optional(name, None); - } - }, + } }; } - trace_remaining("after named", tail.clone(), source); + trace_remaining("after named", tail.clone(), context.source()); let mut positional = vec![]; @@ -143,7 +91,7 @@ fn parse_command_tail( match arg { PositionalType::Mandatory(..) => { - if tail.len() == 0 { + if tail.at_end() { return Err(ShellError::argument_error( config.name.clone(), ArgumentError::MissingMandatoryPositional(arg.name().to_string()), @@ -153,25 +101,36 @@ fn parse_command_tail( } PositionalType::Optional(..) => { - if tail.len() == 0 { + if tail.at_end() { break; } } } - let result = hir::baseline_parse_next_expr(tail, context, source, arg.syntax_type())?; + let result = expand_expr(&spaced(arg.syntax_type()), tail, context)?; positional.push(result); } - trace_remaining("after positional", tail.clone(), source); + trace_remaining("after positional", tail.clone(), context.source()); if let Some(syntax_type) = config.rest_positional { - let remainder = baseline_parse_tokens(tail, context, source, syntax_type)?; - positional.extend(remainder); + let mut out = vec![]; + + loop { + if tail.at_end_possible_ws() { + break; + } + + let next = expand_expr(&spaced(syntax_type), tail, context)?; + + out.push(next); + } + + positional.extend(out); } - trace_remaining("after rest", tail.clone(), source); + trace_remaining("after rest", tail.clone(), context.source()); trace!("Constructed positional={:?} named={:?}", positional, named); diff --git a/src/parser/registry.rs b/src/parser/registry.rs index 955a1a04c..888e5ae1e 100644 --- a/src/parser/registry.rs +++ b/src/parser/registry.rs @@ -1,11 +1,11 @@ // TODO: Temporary redirect pub(crate) use crate::context::CommandRegistry; use crate::evaluate::{evaluate_baseline_expr, Scope}; -use crate::parser::{hir, hir::SyntaxShape, parse_command, CallNode}; +use crate::parser::{hir, hir::SyntaxShape}; use crate::prelude::*; use derive_new::new; use indexmap::IndexMap; -use log::trace; + use serde::{Deserialize, Serialize}; use std::fmt; @@ -271,21 +271,6 @@ impl<'a> Iterator for PositionalIter<'a> { } } -impl Signature { - pub(crate) fn parse_args( - &self, - call: &Tagged, - context: &Context, - source: &Text, - ) -> Result { - let args = parse_command(self, context, call, source)?; - - trace!("parsed args: {:?}", args); - - Ok(args) - } -} - pub(crate) fn evaluate_args( call: &hir::Call, registry: &CommandRegistry, diff --git a/src/plugins/add.rs b/src/plugins/add.rs index 03e1d4282..997400d67 100644 --- a/src/plugins/add.rs +++ b/src/plugins/add.rs @@ -1,10 +1,13 @@ +use itertools::Itertools; use nu::{ - serve_plugin, CallInfo, Plugin, Primitive, ReturnSuccess, ReturnValue, ShellError, Signature, - SyntaxShape, Tagged, Value, + serve_plugin, CallInfo, Plugin, ReturnSuccess, ReturnValue, ShellError, Signature, SyntaxShape, + Tagged, Value, }; +pub type ColumnPath = Vec>; + struct Add { - field: Option, + field: Option, value: Option, } impl Add { @@ -19,12 +22,13 @@ impl Add { let value_tag = value.tag(); match (value.item, self.value.clone()) { (obj @ Value::Row(_), Some(v)) => match &self.field { - Some(f) => match obj.insert_data_at_path(value_tag, &f, v) { + Some(f) => match obj.insert_data_at_column_path(value_tag, &f, v) { Some(v) => return Ok(v), None => { return Err(ShellError::string(format!( "add could not find place to insert field {:?} {}", - obj, f + obj, + f.iter().map(|i| &i.item).join(".") ))) } }, @@ -44,7 +48,7 @@ impl Plugin for Add { fn config(&mut self) -> Result { Ok(Signature::build("add") .desc("Add a new field to the table.") - .required("Field", SyntaxShape::String) + .required("Field", SyntaxShape::ColumnPath) .required("Value", SyntaxShape::String) .rest(SyntaxShape::String) .filter()) @@ -53,12 +57,13 @@ impl Plugin for Add { fn begin_filter(&mut self, call_info: CallInfo) -> Result, ShellError> { if let Some(args) = call_info.args.positional { match &args[0] { - Tagged { - item: Value::Primitive(Primitive::String(s)), + table @ Tagged { + item: Value::Table(_), .. } => { - self.field = Some(s.clone()); + self.field = Some(table.as_column_path()?.item); } + _ => { return Err(ShellError::string(format!( "Unrecognized type in params: {:?}", diff --git a/src/plugins/edit.rs b/src/plugins/edit.rs index db116fedf..6d35530ef 100644 --- a/src/plugins/edit.rs +++ b/src/plugins/edit.rs @@ -1,10 +1,12 @@ use nu::{ - serve_plugin, CallInfo, Plugin, Primitive, ReturnSuccess, ReturnValue, ShellError, Signature, - SyntaxShape, Tagged, Value, + serve_plugin, CallInfo, Plugin, ReturnSuccess, ReturnValue, ShellError, Signature, SyntaxShape, + Tagged, Value, }; +pub type ColumnPath = Vec>; + struct Edit { - field: Option, + field: Option, value: Option, } impl Edit { @@ -19,7 +21,7 @@ impl Edit { let value_tag = value.tag(); match (value.item, self.value.clone()) { (obj @ Value::Row(_), Some(v)) => match &self.field { - Some(f) => match obj.replace_data_at_path(value_tag, &f, v) { + Some(f) => match obj.replace_data_at_column_path(value_tag, &f, v) { Some(v) => return Ok(v), None => { return Err(ShellError::string( @@ -43,7 +45,7 @@ impl Plugin for Edit { fn config(&mut self) -> Result { Ok(Signature::build("edit") .desc("Edit an existing column to have a new value.") - .required("Field", SyntaxShape::String) + .required("Field", SyntaxShape::ColumnPath) .required("Value", SyntaxShape::String) .filter()) } @@ -51,11 +53,11 @@ impl Plugin for Edit { fn begin_filter(&mut self, call_info: CallInfo) -> Result, ShellError> { if let Some(args) = call_info.args.positional { match &args[0] { - Tagged { - item: Value::Primitive(Primitive::String(s)), + table @ Tagged { + item: Value::Table(_), .. } => { - self.field = Some(s.clone()); + self.field = Some(table.as_column_path()?.item); } _ => { return Err(ShellError::string(format!( diff --git a/src/plugins/inc.rs b/src/plugins/inc.rs index ecab03dc9..4e6f6f0f6 100644 --- a/src/plugins/inc.rs +++ b/src/plugins/inc.rs @@ -14,8 +14,10 @@ pub enum SemVerAction { Patch, } +pub type ColumnPath = Vec>; + struct Inc { - field: Option, + field: Option, error: Option, action: Option, } @@ -85,16 +87,17 @@ impl Inc { } Value::Row(_) => match self.field { Some(ref f) => { - let replacement = match value.item.get_data_by_path(value.tag(), f) { + let replacement = match value.item.get_data_by_column_path(value.tag(), f) { Some(result) => self.inc(result.map(|x| x.clone()))?, None => { return Err(ShellError::string("inc could not find field to replace")) } }; - match value - .item - .replace_data_at_path(value.tag(), f, replacement.item.clone()) - { + match value.item.replace_data_at_column_path( + value.tag(), + f, + replacement.item.clone(), + ) { Some(v) => return Ok(v), None => { return Err(ShellError::string("inc could not find field to replace")) @@ -120,7 +123,7 @@ impl Plugin for Inc { .switch("major") .switch("minor") .switch("patch") - .rest(SyntaxShape::String) + .rest(SyntaxShape::ColumnPath) .filter()) } @@ -138,11 +141,11 @@ impl Plugin for Inc { if let Some(args) = call_info.args.positional { for arg in args { match arg { - Tagged { - item: Value::Primitive(Primitive::String(s)), + table @ Tagged { + item: Value::Table(_), .. } => { - self.field = Some(s); + self.field = Some(table.as_column_path()?.item); } _ => { return Err(ShellError::string(format!( @@ -209,8 +212,13 @@ mod tests { } fn with_parameter(&mut self, name: &str) -> &mut Self { + let fields: Vec> = name + .split(".") + .map(|s| Value::string(s.to_string()).tagged(Tag::unknown_span(self.anchor))) + .collect(); + self.positionals - .push(Value::string(name.to_string()).tagged(Tag::unknown_span(self.anchor))); + .push(Value::Table(fields).tagged(Tag::unknown_span(self.anchor))); self } @@ -297,7 +305,12 @@ mod tests { ) .is_ok()); - assert_eq!(plugin.field, Some("package.version".to_string())); + assert_eq!( + plugin + .field + .map(|f| f.into_iter().map(|f| f.item).collect()), + Some(vec!["package".to_string(), "version".to_string()]) + ); } #[test] diff --git a/src/plugins/str.rs b/src/plugins/str.rs index 4b74914f0..7bd35733d 100644 --- a/src/plugins/str.rs +++ b/src/plugins/str.rs @@ -1,6 +1,6 @@ use nu::{ serve_plugin, CallInfo, Plugin, Primitive, ReturnSuccess, ReturnValue, ShellError, Signature, - SyntaxShape, Tagged, Value, + SyntaxShape, Tagged, TaggedItem, Value, }; #[derive(Debug, Eq, PartialEq)] @@ -10,8 +10,10 @@ enum Action { ToInteger, } +pub type ColumnPath = Vec>; + struct Str { - field: Option, + field: Option, params: Option>, error: Option, action: Option, @@ -43,8 +45,8 @@ impl Str { Ok(applied) } - fn for_field(&mut self, field: &str) { - self.field = Some(String::from(field)); + fn for_field(&mut self, column_path: ColumnPath) { + self.field = Some(column_path); } fn permit(&mut self) -> bool { @@ -92,14 +94,15 @@ impl Str { } Value::Row(_) => match self.field { Some(ref f) => { - let replacement = match value.item.get_data_by_path(value.tag(), f) { + let replacement = match value.item.get_data_by_column_path(value.tag(), f) { Some(result) => self.strutils(result.map(|x| x.clone()))?, None => return Ok(Tagged::from_item(Value::nothing(), value.tag)), }; - match value - .item - .replace_data_at_path(value.tag(), f, replacement.item.clone()) - { + match value.item.replace_data_at_column_path( + value.tag(), + f, + replacement.item.clone(), + ) { Some(v) => return Ok(v), None => { return Err(ShellError::string("str could not find field to replace")) @@ -127,7 +130,7 @@ impl Plugin for Str { .switch("downcase") .switch("upcase") .switch("to-int") - .rest(SyntaxShape::Member) + .rest(SyntaxShape::ColumnPath) .filter()) } @@ -148,15 +151,21 @@ impl Plugin for Str { match possible_field { Tagged { item: Value::Primitive(Primitive::String(s)), - .. + tag, } => match self.action { Some(Action::Downcase) | Some(Action::Upcase) | Some(Action::ToInteger) | None => { - self.for_field(&s); + self.for_field(vec![s.clone().tagged(tag)]); } }, + table @ Tagged { + item: Value::Table(_), + .. + } => { + self.field = Some(table.as_column_path()?.item); + } _ => { return Err(ShellError::string(format!( "Unrecognized type in params: {:?}", @@ -227,8 +236,13 @@ mod tests { } fn with_parameter(&mut self, name: &str) -> &mut Self { + let fields: Vec> = name + .split(".") + .map(|s| Value::string(s.to_string()).tagged(Tag::unknown_span(self.anchor))) + .collect(); + self.positionals - .push(Value::string(name.to_string()).tagged(Tag::unknown())); + .push(Value::Table(fields).tagged(Tag::unknown_span(self.anchor))); self } @@ -303,7 +317,12 @@ mod tests { ) .is_ok()); - assert_eq!(plugin.field, Some("package.description".to_string())); + assert_eq!( + plugin + .field + .map(|f| f.into_iter().map(|f| f.item).collect()), + Some(vec!["package".to_string(), "description".to_string()]) + ) } #[test] diff --git a/src/shell/helper.rs b/src/shell/helper.rs index 6fb454435..85591cf04 100644 --- a/src/shell/helper.rs +++ b/src/shell/helper.rs @@ -1,3 +1,4 @@ +use crate::parser::hir::TokensIterator; use crate::parser::nom_input; use crate::parser::parse::token_tree::TokenNode; use crate::parser::parse::tokens::RawToken; @@ -77,16 +78,12 @@ impl Highlighter for Helper { Ok(v) => v, }; - let Pipeline { parts, post_ws } = pipeline; + let Pipeline { parts } = pipeline; let mut iter = parts.into_iter(); loop { match iter.next() { None => { - if let Some(ws) = post_ws { - out.push_str(ws.slice(line)); - } - return Cow::Owned(out); } Some(token) => { @@ -107,13 +104,12 @@ impl Highlighter for Helper { fn paint_token_node(token_node: &TokenNode, line: &str) -> String { let styled = match token_node { TokenNode::Call(..) => Color::Cyan.bold().paint(token_node.tag().slice(line)), + TokenNode::Nodes(..) => Color::Green.bold().paint(token_node.tag().slice(line)), TokenNode::Whitespace(..) => Color::White.normal().paint(token_node.tag().slice(line)), TokenNode::Flag(..) => Color::Black.bold().paint(token_node.tag().slice(line)), TokenNode::Member(..) => Color::Yellow.bold().paint(token_node.tag().slice(line)), - TokenNode::Path(..) => Color::Green.bold().paint(token_node.tag().slice(line)), TokenNode::Error(..) => Color::Red.bold().paint(token_node.tag().slice(line)), TokenNode::Delimited(..) => Color::White.paint(token_node.tag().slice(line)), - TokenNode::Operator(..) => Color::White.normal().paint(token_node.tag().slice(line)), TokenNode::Pipeline(..) => Color::Blue.normal().paint(token_node.tag().slice(line)), TokenNode::Token(Tagged { item: RawToken::Number(..), @@ -147,6 +143,10 @@ fn paint_token_node(token_node: &TokenNode, line: &str) -> String { item: RawToken::ExternalWord, .. }) => Color::Black.bold().paint(token_node.tag().slice(line)), + TokenNode::Token(Tagged { + item: RawToken::Operator(..), + .. + }) => Color::Black.bold().paint(token_node.tag().slice(line)), }; styled.to_string() @@ -159,25 +159,19 @@ fn paint_pipeline_element(pipeline_element: &PipelineElement, line: &str) -> Str styled.push_str(&Color::Purple.paint("|")); } - if let Some(ws) = pipeline_element.pre_ws { - styled.push_str(&Color::White.normal().paint(ws.slice(line))); - } + let mut tokens = + TokensIterator::new(&pipeline_element.tokens, pipeline_element.tokens.tag, false); + let head = tokens.next(); - styled.push_str( - &Color::Cyan - .bold() - .paint(pipeline_element.call().head().tag().slice(line)) - .to_string(), - ); - - if let Some(children) = pipeline_element.call().children() { - for child in children { - styled.push_str(&paint_token_node(child, line)); + match head { + None => return styled, + Some(head) => { + styled.push_str(&Color::Cyan.bold().paint(head.tag().slice(line)).to_string()) } } - if let Some(ws) = pipeline_element.post_ws { - styled.push_str(&Color::White.normal().paint(ws.slice(line))); + for token in tokens { + styled.push_str(&paint_token_node(token, line)); } styled.to_string() diff --git a/tests/command_open_tests.rs b/tests/command_open_tests.rs index 54dc7ad54..e9047883c 100644 --- a/tests/command_open_tests.rs +++ b/tests/command_open_tests.rs @@ -212,7 +212,7 @@ fn open_can_parse_ini() { fn open_can_parse_utf16_ini() { let actual = nu!( cwd: "tests/fixtures/formats", - "open utf16.ini | get .ShellClassInfo | get IconIndex | echo $it" + "open utf16.ini | get '.ShellClassInfo' | get IconIndex | echo $it" ); assert_eq!(actual, "-236") diff --git a/tests/helpers/mod.rs b/tests/helpers/mod.rs index 04fd88992..199038b53 100644 --- a/tests/helpers/mod.rs +++ b/tests/helpers/mod.rs @@ -93,6 +93,7 @@ macro_rules! nu { .write_all(commands.as_bytes()) .expect("couldn't write to stdin"); + let output = process .wait_with_output() .expect("couldn't read from stdout");