diff --git a/Cargo.lock b/Cargo.lock index a9289a06d..f8070f165 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3125,6 +3125,7 @@ dependencies = [ "derive_is_enum_variant", "dunce", "indexmap", + "itertools", "log 0.4.13", "nu-errors", "nu-protocol", @@ -3134,6 +3135,7 @@ dependencies = [ "num-traits 0.2.14", "serde 1.0.120", "shellexpand", + "smart-default", ] [[package]] @@ -5086,6 +5088,17 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +[[package]] +name = "smart-default" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "133659a15339456eeeb07572eb02a91c91e9815e9cbc89566944d2c8d3efdbf6" +dependencies = [ + "proc-macro2", + "quote 1.0.8", + "syn 1.0.58", +] + [[package]] name = "socket2" version = "0.3.19" diff --git a/Cargo.toml b/Cargo.toml index 84f708cd9..fad3a6142 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,81 +18,115 @@ members = ["crates/*/"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nu-cli = {version = "0.26.0", path = "./crates/nu-cli", default-features=false} -nu-command = {version = "0.26.0", path = "./crates/nu-command"} -nu-data = {version = "0.26.0", path = "./crates/nu-data"} -nu-engine = {version = "0.26.0", path = "./crates/nu-engine"} -nu-errors = {version = "0.26.0", path = "./crates/nu-errors"} -nu-parser = {version = "0.26.0", path = "./crates/nu-parser"} -nu-plugin = {version = "0.26.0", path = "./crates/nu-plugin"} -nu-protocol = {version = "0.26.0", path = "./crates/nu-protocol"} -nu-source = {version = "0.26.0", path = "./crates/nu-source"} -nu-value-ext = {version = "0.26.0", path = "./crates/nu-value-ext"} +nu-cli = { version = "0.26.0", path = "./crates/nu-cli", default-features = false } +nu-command = { version = "0.26.0", path = "./crates/nu-command" } +nu-data = { version = "0.26.0", path = "./crates/nu-data" } +nu-engine = { version = "0.26.0", path = "./crates/nu-engine" } +nu-errors = { version = "0.26.0", path = "./crates/nu-errors" } +nu-parser = { version = "0.26.0", path = "./crates/nu-parser" } +nu-plugin = { version = "0.26.0", path = "./crates/nu-plugin" } +nu-protocol = { version = "0.26.0", path = "./crates/nu-protocol" } +nu-source = { version = "0.26.0", path = "./crates/nu-source" } +nu-value-ext = { version = "0.26.0", path = "./crates/nu-value-ext" } -nu_plugin_binaryview = {version = "0.26.0", path = "./crates/nu_plugin_binaryview", optional = true} -nu_plugin_chart = {version = "0.26.0", path = "./crates/nu_plugin_chart", optional = true} -nu_plugin_fetch = {version = "0.26.0", path = "./crates/nu_plugin_fetch", optional = true} -nu_plugin_from_bson = {version = "0.26.0", path = "./crates/nu_plugin_from_bson", optional = true} -nu_plugin_from_sqlite = {version = "0.26.0", path = "./crates/nu_plugin_from_sqlite", optional = true} -nu_plugin_inc = {version = "0.26.0", path = "./crates/nu_plugin_inc", optional = true} -nu_plugin_match = {version = "0.26.0", path = "./crates/nu_plugin_match", optional = true} -nu_plugin_post = {version = "0.26.0", path = "./crates/nu_plugin_post", optional = true} -nu_plugin_ps = {version = "0.26.0", path = "./crates/nu_plugin_ps", optional = true} -nu_plugin_s3 = {version = "0.26.0", path = "./crates/nu_plugin_s3", optional = true} -nu_plugin_selector = {version = "0.26.0", path = "./crates/nu_plugin_selector", optional = true} -nu_plugin_start = {version = "0.26.0", path = "./crates/nu_plugin_start", optional = true} -nu_plugin_sys = {version = "0.26.0", path = "./crates/nu_plugin_sys", optional = true} -nu_plugin_textview = {version = "0.26.0", path = "./crates/nu_plugin_textview", optional = true} -nu_plugin_to_bson = {version = "0.26.0", path = "./crates/nu_plugin_to_bson", optional = true} -nu_plugin_to_sqlite = {version = "0.26.0", path = "./crates/nu_plugin_to_sqlite", optional = true} -nu_plugin_tree = {version = "0.26.0", path = "./crates/nu_plugin_tree", optional = true} -nu_plugin_xpath = {version = "0.26.0", path = "./crates/nu_plugin_xpath", optional = true} +nu_plugin_binaryview = { version = "0.26.0", path = "./crates/nu_plugin_binaryview", optional = true } +nu_plugin_chart = { version = "0.26.0", path = "./crates/nu_plugin_chart", optional = true } +nu_plugin_fetch = { version = "0.26.0", path = "./crates/nu_plugin_fetch", optional = true } +nu_plugin_from_bson = { version = "0.26.0", path = "./crates/nu_plugin_from_bson", optional = true } +nu_plugin_from_sqlite = { version = "0.26.0", path = "./crates/nu_plugin_from_sqlite", optional = true } +nu_plugin_inc = { version = "0.26.0", path = "./crates/nu_plugin_inc", optional = true } +nu_plugin_match = { version = "0.26.0", path = "./crates/nu_plugin_match", optional = true } +nu_plugin_post = { version = "0.26.0", path = "./crates/nu_plugin_post", optional = true } +nu_plugin_ps = { version = "0.26.0", path = "./crates/nu_plugin_ps", optional = true } +nu_plugin_s3 = { version = "0.26.0", path = "./crates/nu_plugin_s3", optional = true } +nu_plugin_selector = { version = "0.26.0", path = "./crates/nu_plugin_selector", optional = true } +nu_plugin_start = { version = "0.26.0", path = "./crates/nu_plugin_start", optional = true } +nu_plugin_sys = { version = "0.26.0", path = "./crates/nu_plugin_sys", optional = true } +nu_plugin_textview = { version = "0.26.0", path = "./crates/nu_plugin_textview", optional = true } +nu_plugin_to_bson = { version = "0.26.0", path = "./crates/nu_plugin_to_bson", optional = true } +nu_plugin_to_sqlite = { version = "0.26.0", path = "./crates/nu_plugin_to_sqlite", optional = true } +nu_plugin_tree = { version = "0.26.0", path = "./crates/nu_plugin_tree", optional = true } +nu_plugin_xpath = { version = "0.26.0", path = "./crates/nu_plugin_xpath", optional = true } # Required to bootstrap the main binary clap = "2.33.3" -ctrlc = {version = "3.1.6", optional = true} -futures = {version = "0.3.5", features = ["compat", "io-compat"]} +ctrlc = { version = "3.1.6", optional = true } +futures = { version = "0.3.5", features = ["compat", "io-compat"] } itertools = "0.10.0" log = "0.4.11" pretty_env_logger = "0.4.0" [dev-dependencies] dunce = "1.0.1" -nu-test-support = {version = "0.26.0", path = "./crates/nu-test-support"} +nu-test-support = { version = "0.26.0", path = "./crates/nu-test-support" } [build-dependencies] [features] ctrlc-support = ["nu-cli/ctrlc", "nu-command/ctrlc"] -directories-support = ["nu-cli/directories", "nu-cli/dirs", "nu-command/directories", "nu-command/dirs", "nu-data/directories", "nu-data/dirs", "nu-engine/dirs"] +directories-support = [ + "nu-cli/directories", + "nu-cli/dirs", + "nu-command/directories", + "nu-command/dirs", + "nu-data/directories", + "nu-data/dirs", + "nu-engine/dirs", +] ptree-support = ["nu-cli/ptree", "nu-command/ptree"] rustyline-support = ["nu-cli/rustyline-support", "nu-command/rustyline-support"] term-support = ["nu-cli/term", "nu-command/term"] uuid-support = ["nu-cli/uuid_crate", "nu-command/uuid_crate"] -which-support = ["nu-cli/ichwh", "nu-cli/which", "nu-command/ichwh", "nu-command/which"] +which-support = [ + "nu-cli/ichwh", + "nu-cli/which", + "nu-command/ichwh", + "nu-command/which", +] default = [ - "nu-cli/shadow-rs", - "sys", - "ps", - "textview", - "inc", - "directories-support", - "ctrlc-support", - "which-support", - "ptree-support", - "term-support", - "uuid-support", - "rustyline-support", - "match", - "post", - "fetch", - "zip-support", + "nu-cli/shadow-rs", + "sys", + "ps", + "textview", + "inc", + "directories-support", + "ctrlc-support", + "which-support", + "ptree-support", + "term-support", + "uuid-support", + "rustyline-support", + "match", + "post", + "fetch", + "zip-support", ] -extra = ["default", "binaryview", "tree", "clipboard-cli", "trash-support", "start", "bson", "sqlite", "s3", "chart", "xpath", "selector"] -stable = ["default"] -wasi = ["inc", "match", "ptree-support", "match", "tree", "rustyline-support"] +stable = ["default"] +extra = [ + "default", + "binaryview", + "tree", + "clipboard-cli", + "trash-support", + "start", + "bson", + "sqlite", + "s3", + "chart", + "xpath", + "selector", +] + +wasi = [ + "inc", + "match", + "ptree-support", + "match", + "tree", + "rustyline-support", +] trace = ["nu-parser/trace"] diff --git a/crates/nu-command/src/examples.rs b/crates/nu-command/src/examples.rs index 932011fa6..220ae063f 100644 --- a/crates/nu-command/src/examples.rs +++ b/crates/nu-command/src/examples.rs @@ -178,7 +178,7 @@ pub fn test_anchors(cmd: Command) -> Result<(), ShellError> { let block = parse_line(&pipeline_with_anchor, &ctx)?; - if let Some(_) = &sample_pipeline.result { + if sample_pipeline.result.is_some() { let result = block_on(evaluate_block(block, &mut ctx))?; ctx.with_errors(|reasons| reasons.iter().cloned().take(1).next()) diff --git a/crates/nu-parser/Cargo.toml b/crates/nu-parser/Cargo.toml index 5f8468647..3f18989a8 100644 --- a/crates/nu-parser/Cargo.toml +++ b/crates/nu-parser/Cargo.toml @@ -9,22 +9,24 @@ version = "0.26.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bigdecimal = {version = "0.2.0", features = ["serde"]} +bigdecimal = { version = "0.2.0", features = ["serde"] } codespan-reporting = "0.11.0" derive-new = "0.5.8" derive_is_enum_variant = "0.1.1" -indexmap = {version = "1.6.1", features = ["serde-1"]} +indexmap = { version = "1.6.1", features = ["serde-1"] } log = "0.4.11" -num-bigint = {version = "0.3.1", features = ["serde"]} +num-bigint = { version = "0.3.1", features = ["serde"] } num-traits = "0.2.14" serde = "1.0.118" shellexpand = "2.1.0" +itertools = "0.10.0" +smart-default = "0.6.0" dunce = "1.0.1" -nu-errors = {version = "0.26.0", path = "../nu-errors"} -nu-protocol = {version = "0.26.0", path = "../nu-protocol"} -nu-source = {version = "0.26.0", path = "../nu-source"} -nu-test-support = {version = "0.26.0", path = "../nu-test-support"} +nu-errors = { version = "0.26.0", path = "../nu-errors" } +nu-protocol = { version = "0.26.0", path = "../nu-protocol" } +nu-source = { version = "0.26.0", path = "../nu-source" } +nu-test-support = { version = "0.26.0", path = "../nu-test-support" } [features] stable = [] diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs deleted file mode 100644 index c69f41039..000000000 --- a/crates/nu-parser/src/lex.rs +++ /dev/null @@ -1,993 +0,0 @@ -use std::str::CharIndices; -use std::{fmt, iter::Peekable}; - -use nu_source::{Span, Spanned, SpannedItem}; - -use nu_errors::ParseError; - -type Input<'t> = Peekable>; - -#[derive(Debug)] -pub struct Token { - pub contents: TokenContents, - pub span: Span, -} -impl Token { - pub fn new(contents: TokenContents, span: Span) -> Token { - Token { contents, span } - } -} - -#[derive(Debug, PartialEq, is_enum_variant)] -pub enum TokenContents { - /// A baseline token is an atomic chunk of source code. This means that the - /// token contains the entirety of string literals, as well as the entirety - /// of sections delimited by paired delimiters. - /// - /// For example, if the token begins with `{`, the baseline token continues - /// until the closing `}` (after taking comments and string literals into - /// consideration). - Baseline(String), - Comment(String), - Pipe, - Semicolon, - EOL, -} - -impl fmt::Display for TokenContents { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TokenContents::Baseline(base) => write!(f, "{}", base), - TokenContents::Comment(comm) => write!(f, "#{}", comm), - TokenContents::Pipe => write!(f, "|"), - TokenContents::Semicolon => write!(f, ";"), - TokenContents::EOL => write!(f, "\\n"), - } - } -} - -/// A `LiteCommand` is a list of words that will get meaning when processed by -/// the parser. -#[derive(Debug, Clone)] -pub struct LiteCommand { - pub parts: Vec>, - ///Preceding comments. Each String in the vec is one line. The comment literal is not included. - pub comments: Option>>, -} - -impl LiteCommand { - fn new() -> LiteCommand { - LiteCommand { - parts: vec![], - comments: None, - } - } - - pub fn comments_joined(&self) -> String { - match &self.comments { - None => "".to_string(), - Some(text) => text - .iter() - .map(|s| s.item.clone()) - .collect::>() - .join("\n"), - } - } - - pub fn is_empty(&self) -> bool { - self.parts.is_empty() - } - - pub fn has_content(&self) -> bool { - !self.is_empty() - } - - pub fn push(&mut self, item: Spanned) { - self.parts.push(item) - } - - pub(crate) fn span(&self) -> Span { - let start = if let Some(x) = self.parts.first() { - x.span.start() - } else { - 0 - }; - - let end = if let Some(x) = self.parts.last() { - x.span.end() - } else { - 0 - }; - - Span::new(start, end) - } -} - -/// A `LitePipeline` is a series of `LiteCommand`s, separated by `|`. -#[derive(Debug, Clone)] -pub struct LitePipeline { - pub commands: Vec, -} - -impl Default for LitePipeline { - fn default() -> Self { - Self::new() - } -} - -impl LitePipeline { - pub fn new() -> Self { - Self { commands: vec![] } - } - - pub fn is_empty(&self) -> bool { - self.commands.is_empty() - } - - pub fn has_content(&self) -> bool { - !self.commands.is_empty() - } - - pub fn push(&mut self, item: LiteCommand) { - self.commands.push(item) - } - - pub(crate) fn span(&self) -> Span { - let start = if !self.commands.is_empty() { - self.commands[0].span().start() - } else { - 0 - }; - - if let Some((last, _)) = self.commands[..].split_last() { - Span::new(start, last.span().end()) - } else { - Span::new(start, 0) - } - } -} - -/// A `LiteGroup` is a series of `LitePipeline`s, separated by `;`. -#[derive(Debug, Clone)] -pub struct LiteGroup { - pub pipelines: Vec, -} - -impl Default for LiteGroup { - fn default() -> Self { - Self::new() - } -} - -impl LiteGroup { - pub fn new() -> Self { - Self { pipelines: vec![] } - } - - pub fn is_empty(&self) -> bool { - self.pipelines.is_empty() - } - - pub fn has_content(&self) -> bool { - !self.pipelines.is_empty() - } - - pub fn push(&mut self, item: LitePipeline) { - self.pipelines.push(item) - } - - #[cfg(test)] - pub(crate) fn span(&self) -> Span { - let start = if !self.pipelines.is_empty() { - self.pipelines[0].span().start() - } else { - 0 - }; - - if let Some((last, _)) = self.pipelines[..].split_last() { - Span::new(start, last.span().end()) - } else { - Span::new(start, 0) - } - } -} - -/// A `LiteBlock` is a series of `LiteGroup`s, separated by newlines. -#[derive(Debug, Clone)] -pub struct LiteBlock { - pub block: Vec, -} - -impl LiteBlock { - pub fn new(block: Vec) -> Self { - Self { block } - } - - pub fn is_empty(&self) -> bool { - self.block.is_empty() - } - - pub fn push(&mut self, item: LiteGroup) { - self.block.push(item) - } - - #[cfg(test)] - pub(crate) fn span(&self) -> Span { - let start = if !self.block.is_empty() { - self.block[0].span().start() - } else { - 0 - }; - - if let Some((last, _)) = self.block[..].split_last() { - Span::new(start, last.span().end()) - } else { - Span::new(start, 0) - } - } -} - -#[derive(Clone, Copy)] -enum BlockKind { - Paren, - CurlyBracket, - SquareBracket, -} - -impl BlockKind { - fn closing(self) -> char { - match self { - BlockKind::Paren => ')', - BlockKind::SquareBracket => ']', - BlockKind::CurlyBracket => '}', - } - } -} - -/// Finds the extents of a basline token, returning the string with its -/// associated span, along with any parse error that was discovered along the -/// way. -/// -/// Baseline tokens are unparsed content separated by spaces or a command -/// separator (like pipe or semicolon) Baseline tokens may be surrounded by -/// quotes (single, double, or backtick) or braces (square, paren, curly) -/// -/// Baseline tokens may be further processed based on the needs of the syntax -/// shape that encounters them. They are still lightly lexed. For example, if a -/// baseline token begins with `{`, the entire token will continue until the -/// closing `}`, taking comments into consideration. -pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned, Option) { - let mut token_contents = String::new(); - let start_offset = if let Some((pos, _)) = src.peek() { - *pos - } else { - 0 - }; - - // This variable tracks the starting character of a string literal, so that - // we remain inside the string literal lexer mode until we encounter the - // closing quote. - let mut quote_start: Option = None; - - // This Vec tracks paired delimiters - let mut block_level: Vec = vec![]; - - // A baseline token is terminated if it's not nested inside of a paired - // delimiter and the next character is one of: `|`, `;`, `#` or any - // whitespace. - fn is_termination(block_level: &[BlockKind], c: char) -> bool { - block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#') - } - - // The process of slurping up a baseline token repeats: - // - // - String literal, which begins with `'`, `"` or `\``, and continues until - // the same character is encountered again. - // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until - // the matching closing delimiter is found, skipping comments and string - // literals. - // - When not nested inside of a delimiter pair, when a terminating - // character (whitespace, `|`, `;` or `#`) is encountered, the baseline - // token is done. - // - Otherwise, accumulate the character into the current baseline token. - while let Some((_, c)) = src.peek() { - let c = *c; - - if quote_start.is_some() { - // If we encountered the closing quote character for the current - // string, we're done with the current string. - if Some(c) == quote_start { - quote_start = None; - } - } else if c == '\'' || c == '"' || c == '`' { - // We encountered the opening quote of a string literal. - quote_start = Some(c); - } else if c == '[' { - // We encountered an opening `[` delimiter. - block_level.push(BlockKind::SquareBracket); - } else if c == ']' { - // We encountered a closing `]` delimiter. Pop off the opening `[` - // delimiter. - if let Some(BlockKind::SquareBracket) = block_level.last() { - let _ = block_level.pop(); - } - } else if c == '{' { - // We encountered an opening `{` delimiter. - block_level.push(BlockKind::CurlyBracket); - } else if c == '}' { - // We encountered a closing `}` delimiter. Pop off the opening `{`. - if let Some(BlockKind::CurlyBracket) = block_level.last() { - let _ = block_level.pop(); - } - } else if c == '(' { - // We enceountered an opening `(` delimiter. - block_level.push(BlockKind::Paren); - } else if c == ')' { - // We encountered a closing `)` delimiter. Pop off the opening `(`. - if let Some(BlockKind::Paren) = block_level.last() { - let _ = block_level.pop(); - } - } else if is_termination(&block_level, c) { - break; - } - - // Otherwise, accumulate the character into the current token. - token_contents.push(c); - - // Consume the character. - let _ = src.next(); - } - - let span = Span::new( - start_offset + span_offset, - start_offset + span_offset + token_contents.len(), - ); - - // If there is still unclosed opening delimiters, close them and add - // synthetic closing characters to the accumulated token. - if let Some(block) = block_level.last() { - let delim: char = (*block).closing(); - let cause = ParseError::unexpected_eof(delim.to_string(), span); - - while let Some(bk) = block_level.pop() { - token_contents.push(bk.closing()); - } - - return (token_contents.spanned(span), Some(cause)); - } - - if let Some(delimiter) = quote_start { - // The non-lite parse trims quotes on both sides, so we add the expected quote so that - // anyone wanting to consume this partial parse (e.g., completions) will be able to get - // correct information from the non-lite parse. - token_contents.push(delimiter); - - return ( - token_contents.spanned(span), - Some(ParseError::unexpected_eof(delimiter.to_string(), span)), - ); - } - - // If we didn't accumulate any characters, it's an unexpected error. - if token_contents.is_empty() { - return ( - token_contents.spanned(span), - Some(ParseError::unexpected_eof("command".to_string(), span)), - ); - } - - (token_contents.spanned(span), None) -} - -/// Try to parse a list of tokens into a block. -pub fn block(tokens: Vec) -> (LiteBlock, Option) { - // Accumulate chunks of tokens into groups. - let mut groups = vec![]; - - // The current group - let mut group = LiteGroup::new(); - - // The current pipeline - let mut pipeline = LitePipeline::new(); - - // The current command - let mut command = LiteCommand::new(); - - let mut prev_comments = None; - let mut prev_comment_indent = 0; - - let mut prev_token: Option = None; - - // The parsing process repeats: - // - // - newline (`\n` or `\r`) - // - pipes (`|`) - // - semicolon - fn finish_command( - prev_comments: &mut Option>>, - command: &mut LiteCommand, - pipeline: &mut LitePipeline, - ) { - if let Some(prev_comments_) = prev_comments { - //Add previous comments to this command - command.comments = Some(prev_comments_.clone()); - //Reset - *prev_comments = None; - } - pipeline.push(command.clone()); - *command = LiteCommand::new(); - } - - for token in tokens { - match &token.contents { - TokenContents::EOL => { - // We encountered a newline character. If the last token on the - // current line is a `|`, continue the current group on the next - // line. Otherwise, close up the current group by rolling up the - // current command into the current pipeline, and then roll up - // the current pipeline into the group. - - // If the last token on the current line is a `|`, the group - // continues on the next line. - if let Some(prev) = &prev_token { - if let TokenContents::Pipe = prev.contents { - continue; - } - if let TokenContents::EOL = prev.contents { - //If we have an empty line we discard previous comments as they are not - //part of a command - //Example nu Code: - //#I am a comment getting discarded - // - //def e [] {echo hi} - prev_comments = None - } - } - - // If we have an open command, push it into the current - // pipeline. - if command.has_content() { - finish_command(&mut prev_comments, &mut command, &mut pipeline); - } - - // If we have an open pipeline, push it into the current group. - if pipeline.has_content() { - group.push(pipeline); - pipeline = LitePipeline::new(); - } - - // If we have an open group, accumulate it into `groups`. - if group.has_content() { - groups.push(group); - group = LiteGroup::new(); - } - } - TokenContents::Pipe => { - // We encountered a pipe (`|`) character, which terminates a - // command. - - // If the current command has content, accumulate it into - // the current pipeline and start a new command. - if command.has_content() { - finish_command(&mut prev_comments, &mut command, &mut pipeline); - } else { - // If the current command doesn't have content, return an - // error that indicates that the `|` was unexpected. - return ( - LiteBlock::new(groups), - Some(ParseError::extra_tokens( - "|".to_string().spanned(token.span), - )), - ); - } - } - TokenContents::Semicolon => { - // We encountered a semicolon (`;`) character, which terminates - // a pipeline. - - // If the current command has content, accumulate it into the - // current pipeline and start a new command. - if command.has_content() { - finish_command(&mut prev_comments, &mut command, &mut pipeline); - } - - // If the current pipeline has content, accumulate it into the - // current group and start a new pipeline. - if pipeline.has_content() { - group.push(pipeline); - pipeline = LitePipeline::new(); - } - } - TokenContents::Baseline(bare) => { - // We encountered an unclassified character. Accumulate it into - // the current command as a string. - - command.push(bare.to_string().spanned(token.span)); - } - TokenContents::Comment(comment) => { - if prev_comments.is_none() { - //Calculate amount of space indent - if let Some((i, _)) = comment.chars().enumerate().find(|(_, ch)| *ch != ' ') { - prev_comment_indent = i; - } - } - let comment: String = comment - .chars() - .enumerate() - .skip_while(|(i, ch)| *i < prev_comment_indent && *ch == ' ') - .map(|(_, ch)| ch) - .collect(); - - //Because we skipped some spaces at start, the span needs to be adjusted - let comment_span = Span::new(token.span.end() - comment.len(), token.span.end()); - - prev_comments - .get_or_insert(vec![]) - .push(comment.spanned(comment_span)); - } - } - prev_token = Some(token); - } - - // If the current command has content, accumulate it into the current pipeline. - if command.has_content() { - finish_command(&mut prev_comments, &mut command, &mut pipeline) - } - - // If the current pipeline has content, accumulate it into the current group. - if pipeline.has_content() { - group.push(pipeline); - } - - // If the current group has content, accumulate it into the list of groups. - if group.has_content() { - groups.push(group); - } - - // Return a new LiteBlock with the accumulated list of groups. - (LiteBlock::new(groups), None) -} - -/// Breaks the input string into a vector of tokens. This tokenization only tries to classify separators like -/// semicolons, pipes, etc from external bare values (values that haven't been classified further) -/// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings) -pub fn lex(input: &str, span_offset: usize) -> (Vec, Option) { - // Break the input slice into an iterator of Unicode characters. - let mut char_indices = input.char_indices().peekable(); - let mut error = None; - - let mut output = vec![]; - let mut is_complete = true; - - // The lexing process repeats. One character of lookahead is sufficient to decide what to do next. - // - // - `|`: the token is either `|` token or a `||` token - // - `;`: the token is a semicolon - // - `\n` or `\r`: the token is an EOL (end of line) token - // - other whitespace: ignored - // - `#` the token starts a line comment, which contains all of the subsequent characters until the next EOL - // - - while let Some((idx, c)) = char_indices.peek() { - if *c == '|' { - // If the next character is `|`, it's either `|` or `||`. - - let idx = *idx; - let prev_idx = idx; - let _ = char_indices.next(); - - // If the next character is `|`, we're looking at a `||`. - if let Some((idx, c)) = char_indices.peek() { - if *c == '|' { - let idx = *idx; - let _ = char_indices.next(); - output.push(Token::new( - TokenContents::Baseline("||".into()), - Span::new(span_offset + prev_idx, span_offset + idx + 1), - )); - continue; - } - } - - // Otherwise, it's just a regular `|` token. - output.push(Token::new( - TokenContents::Pipe, - Span::new(span_offset + idx, span_offset + idx + 1), - )); - is_complete = false; - } else if *c == ';' { - // If the next character is a `;`, we're looking at a semicolon token. - - if !is_complete && error.is_none() { - error = Some(ParseError::extra_tokens( - ";".to_string().spanned(Span::new(*idx, idx + 1)), - )); - } - let idx = *idx; - let _ = char_indices.next(); - output.push(Token::new( - TokenContents::Semicolon, - Span::new(span_offset + idx, span_offset + idx + 1), - )); - } else if *c == '\n' || *c == '\r' { - // If the next character is a newline, we're looking at an EOL (end of line) token. - - let idx = *idx; - let _ = char_indices.next(); - output.push(Token::new( - TokenContents::EOL, - Span::new(span_offset + idx, span_offset + idx + 1), - )); - } else if *c == '#' { - let comment_start = *idx + 1; - let mut comment = String::new(); - //Don't copy '#' into comment string - char_indices.next(); - while let Some((_, c)) = char_indices.peek() { - if *c == '\n' { - break; - } - comment.push(*c); - //Advance char_indices - let _ = char_indices.next(); - } - let token = Token::new( - TokenContents::Comment(comment.clone()), - Span::new( - span_offset + comment_start, - span_offset + comment_start + comment.len(), - ), - ); - output.push(token); - } else if c.is_whitespace() { - // If the next character is non-newline whitespace, skip it. - - let _ = char_indices.next(); - } else { - // Otherwise, try to consume an unclassified token. - - let (result, err) = baseline(&mut char_indices, span_offset); - if error.is_none() { - error = err; - } - is_complete = true; - let Spanned { item, span } = result; - output.push(Token::new(TokenContents::Baseline(item), span)); - } - } - - (output, error) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn span(left: usize, right: usize) -> Span { - Span::new(left, right) - } - - mod bare { - use super::*; - - #[test] - fn simple_1() { - let input = "foo bar baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 3)); - } - - #[test] - fn simple_2() { - let input = "'foo bar' baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 9)); - } - - #[test] - fn simple_3() { - let input = "'foo\" bar' baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 10)); - } - - #[test] - fn simple_4() { - let input = "[foo bar] baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 9)); - } - - #[test] - fn simple_5() { - let input = "'foo 'bar baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 9)); - } - - #[test] - fn simple_6() { - let input = "''foo baz"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 5)); - } - - #[test] - fn simple_7() { - let input = "'' foo"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 2)); - } - - #[test] - fn simple_8() { - let input = " '' foo"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(1, 3)); - } - - #[test] - fn simple_9() { - let input = " 'foo' foo"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(1, 6)); - } - - #[test] - fn simple_10() { - let input = "[foo, bar]"; - - let (result, err) = lex(input, 0); - - assert!(err.is_none()); - assert_eq!(result[0].span, span(0, 10)); - } - - #[test] - fn lex_comment() { - let input = r#" -#A comment -def e [] {echo hi} - "#; - - let (result, err) = lex(input, 0); - assert!(err.is_none()); - //result[0] == EOL - assert_eq!(result[1].span, span(2, 11)); - assert_eq!( - result[1].contents, - TokenContents::Comment("A comment".to_string()) - ); - } - - #[test] - fn ignore_future() { - let input = "foo 'bar"; - - let (result, _) = lex(input, 0); - - assert_eq!(result[0].span, span(0, 3)); - } - - #[test] - fn invalid_1() { - let input = "'foo bar"; - - let (_, err) = lex(input, 0); - - assert!(err.is_some()); - } - - #[test] - fn invalid_2() { - let input = "'bar"; - - let (_, err) = lex(input, 0); - - assert!(err.is_some()); - } - - #[test] - fn invalid_4() { - let input = " 'bar"; - - let (_, err) = lex(input, 0); - - assert!(err.is_some()); - } - } - - mod lite_parse { - use super::*; - - #[test] - fn pipeline() { - let (result, err) = lex("cmd1 | cmd2 ; deploy", 0); - assert!(err.is_none()); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.span(), span(0, 20)); - assert_eq!(result.block[0].pipelines[0].span(), span(0, 11)); - assert_eq!(result.block[0].pipelines[1].span(), span(14, 20)); - } - - #[test] - fn simple_1() { - let (result, err) = lex("foo", 0); - assert!(err.is_none()); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1); - assert_eq!( - result.block[0].pipelines[0].commands[0].parts[0].span, - span(0, 3) - ); - } - - #[test] - fn simple_offset() { - let (result, err) = lex("foo", 10); - assert!(err.is_none()); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1); - assert_eq!( - result.block[0].pipelines[0].commands[0].parts[0].span, - span(10, 13) - ); - } - - #[test] - fn incomplete_result() { - let (result, err) = lex("my_command \"foo' --test", 10); - assert!(matches!(err.unwrap().reason(), nu_errors::ParseErrorReason::Eof { .. })); - let (result, _) = block(result); - - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); - - assert_eq!( - result.block[0].pipelines[0].commands[0].parts[0].item, - "my_command" - ); - assert_eq!( - result.block[0].pipelines[0].commands[0].parts[1].item, - "\"foo' --test\"" - ); - } - #[test] - fn command_with_comment() { - let code = r#" -# My echo -# * It's much better :) -def my_echo [arg] { echo $arg } - "#; - let (result, err) = lex(code, 0); - assert!(err.is_none()); - let (result, err) = block(result); - assert!(err.is_none()); - - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4); - assert_eq!( - result.block[0].pipelines[0].commands[0].comments, - Some(vec![ - //Leading space is trimmed - "My echo".to_string().spanned(Span::new(3, 10)), - "* It's much better :)" - .to_string() - .spanned(Span::new(13, 34)) - ]) - ); - } - #[test] - fn discarded_comment() { - let code = r#" -# This comment gets discarded, because of the following empty line - -echo 42 - "#; - let (result, err) = lex(code, 0); - assert!(err.is_none()); - // assert_eq!(format!("{:?}", result), ""); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); - assert_eq!(result.block[0].pipelines[0].commands[0].comments, None); - } - } - - #[test] - fn no_discarded_white_space_start_of_comment() { - let code = r#" -#No white_space at firt line ==> No white_space discarded -# Starting space is not discarded -echo 42 - "#; - let (result, err) = lex(code, 0); - assert!(err.is_none()); - // assert_eq!(format!("{:?}", result), ""); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); - assert_eq!( - result.block[0].pipelines[0].commands[0].comments, - Some(vec![ - "No white_space at firt line ==> No white_space discarded" - .to_string() - .spanned(Span::new(2, 58)), - " Starting space is not discarded" - .to_string() - .spanned(Span::new(60, 94)), - ]) - ); - } - - #[test] - fn multiple_discarded_white_space_start_of_comment() { - let code = r#" -# Discard 2 spaces -# Discard 1 space -# Discard 2 spaces -echo 42 - "#; - let (result, err) = lex(code, 0); - assert!(err.is_none()); - // assert_eq!(format!("{:?}", result), ""); - let (result, err) = block(result); - assert!(err.is_none()); - assert_eq!(result.block.len(), 1); - assert_eq!(result.block[0].pipelines.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands.len(), 1); - assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); - assert_eq!( - result.block[0].pipelines[0].commands[0].comments, - Some(vec![ - "Discard 2 spaces".to_string().spanned(Span::new(4, 20)), - "Discard 1 space".to_string().spanned(Span::new(23, 38)), - "Discard 2 spaces".to_string().spanned(Span::new(42, 58)), - ]) - ); - } -} diff --git a/crates/nu-parser/src/lex/lexer.rs b/crates/nu-parser/src/lex/lexer.rs new file mode 100644 index 000000000..e372da28f --- /dev/null +++ b/crates/nu-parser/src/lex/lexer.rs @@ -0,0 +1,497 @@ +use smart_default::SmartDefault; +use std::iter::Peekable; +use std::str::CharIndices; + +use nu_errors::ParseError; +use nu_source::{HasSpan, Span, Spanned, SpannedItem}; + +use super::token_group::TokenBuilder; + +use super::tokens::{ + CommandBuilder, CommentsBuilder, GroupBuilder, LiteBlock, LiteCommand, LiteComment, + PipelineBuilder, TokenContents, +}; + +type Input<'t> = Peekable>; + +#[derive(Debug, Clone)] +pub struct Token { + pub contents: TokenContents, + pub span: Span, +} + +impl Token { + pub fn new(contents: TokenContents, span: Span) -> Token { + Token { contents, span } + } +} + +#[derive(Clone, Copy)] +enum BlockKind { + Paren, + CurlyBracket, + SquareBracket, +} + +impl BlockKind { + fn closing(self) -> char { + match self { + BlockKind::Paren => ')', + BlockKind::SquareBracket => ']', + BlockKind::CurlyBracket => '}', + } + } +} + +/// Finds the extents of a basline token, returning the string with its +/// associated span, along with any parse error that was discovered along the +/// way. +/// +/// Baseline tokens are unparsed content separated by spaces or a command +/// separator (like pipe or semicolon) Baseline tokens may be surrounded by +/// quotes (single, double, or backtick) or braces (square, paren, curly) +/// +/// Baseline tokens may be further processed based on the needs of the syntax +/// shape that encounters them. They are still lightly lexed. For example, if a +/// baseline token begins with `{`, the entire token will continue until the +/// closing `}`, taking comments into consideration. +pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned, Option) { + let mut token_contents = String::new(); + let start_offset = if let Some((pos, _)) = src.peek() { + *pos + } else { + 0 + }; + + // This variable tracks the starting character of a string literal, so that + // we remain inside the string literal lexer mode until we encounter the + // closing quote. + let mut quote_start: Option = None; + + // This Vec tracks paired delimiters + let mut block_level: Vec = vec![]; + + // A baseline token is terminated if it's not nested inside of a paired + // delimiter and the next character is one of: `|`, `;`, `#` or any + // whitespace. + fn is_termination(block_level: &[BlockKind], c: char) -> bool { + block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#') + } + + // The process of slurping up a baseline token repeats: + // + // - String literal, which begins with `'`, `"` or `\``, and continues until + // the same character is encountered again. + // - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until + // the matching closing delimiter is found, skipping comments and string + // literals. + // - When not nested inside of a delimiter pair, when a terminating + // character (whitespace, `|`, `;` or `#`) is encountered, the baseline + // token is done. + // - Otherwise, accumulate the character into the current baseline token. + while let Some((_, c)) = src.peek() { + let c = *c; + + if quote_start.is_some() { + // If we encountered the closing quote character for the current + // string, we're done with the current string. + if Some(c) == quote_start { + quote_start = None; + } + } else if c == '\'' || c == '"' || c == '`' { + // We encountered the opening quote of a string literal. + quote_start = Some(c); + } else if c == '[' { + // We encountered an opening `[` delimiter. + block_level.push(BlockKind::SquareBracket); + } else if c == ']' { + // We encountered a closing `]` delimiter. Pop off the opening `[` + // delimiter. + if let Some(BlockKind::SquareBracket) = block_level.last() { + let _ = block_level.pop(); + } + } else if c == '{' { + // We encountered an opening `{` delimiter. + block_level.push(BlockKind::CurlyBracket); + } else if c == '}' { + // We encountered a closing `}` delimiter. Pop off the opening `{`. + if let Some(BlockKind::CurlyBracket) = block_level.last() { + let _ = block_level.pop(); + } + } else if c == '(' { + // We enceountered an opening `(` delimiter. + block_level.push(BlockKind::Paren); + } else if c == ')' { + // We encountered a closing `)` delimiter. Pop off the opening `(`. + if let Some(BlockKind::Paren) = block_level.last() { + let _ = block_level.pop(); + } + } else if is_termination(&block_level, c) { + break; + } + + // Otherwise, accumulate the character into the current token. + token_contents.push(c); + + // Consume the character. + let _ = src.next(); + } + + let span = Span::new( + start_offset + span_offset, + start_offset + span_offset + token_contents.len(), + ); + + // If there is still unclosed opening delimiters, close them and add + // synthetic closing characters to the accumulated token. + if let Some(block) = block_level.last() { + let delim: char = (*block).closing(); + let cause = ParseError::unexpected_eof(delim.to_string(), span); + + while let Some(bk) = block_level.pop() { + token_contents.push(bk.closing()); + } + + return (token_contents.spanned(span), Some(cause)); + } + + if let Some(delimiter) = quote_start { + // The non-lite parse trims quotes on both sides, so we add the expected quote so that + // anyone wanting to consume this partial parse (e.g., completions) will be able to get + // correct information from the non-lite parse. + token_contents.push(delimiter); + + return ( + token_contents.spanned(span), + Some(ParseError::unexpected_eof(delimiter.to_string(), span)), + ); + } + + // If we didn't accumulate any characters, it's an unexpected error. + if token_contents.is_empty() { + return ( + token_contents.spanned(span), + Some(ParseError::unexpected_eof("command".to_string(), span)), + ); + } + + (token_contents.spanned(span), None) +} + +/// We encountered a `#` character. Keep consuming characters until we encounter +/// a newline character (but don't consume it). +fn parse_comment(input: &mut Input, hash_offset: usize) -> LiteComment { + let mut comment = String::new(); + let mut in_ws = true; + let mut body_start = 0; + + input.next(); + + while let Some((_, c)) = input.peek() { + if *c == '\n' { + break; + } + + if in_ws && c.is_whitespace() { + body_start += c.len_utf8(); + } else if in_ws && !c.is_whitespace() { + in_ws = false; + } + + comment.push(*c); + input.next(); + } + + if body_start == 0 { + let len = comment.len(); + + LiteComment::new(comment.spanned(Span::new(hash_offset + 1, hash_offset + 1 + len))) + } else { + let ws = comment[..body_start].to_string(); + let body = comment[body_start..].to_string(); + + let body_len = body.len(); + + LiteComment::new_with_ws( + ws.spanned(Span::new(hash_offset + 1, hash_offset + 1 + body_start)), + body.spanned(Span::new( + hash_offset + 1 + body_start, + hash_offset + 1 + body_start + body_len, + )), + ) + } +} + +#[derive(SmartDefault)] +struct BlockParser { + groups: TokenBuilder, + group: GroupBuilder, + pipeline: PipelineBuilder, + command: CommandBuilder, + prev_token: Option, + prev_comments: CommentsBuilder, + prev_comment_indent: usize, +} + +impl BlockParser { + fn consumed(&mut self, token: Token) { + self.prev_token = Some(token); + } + + fn success(mut self) -> (LiteBlock, Option) { + self.close_group(); + + (LiteBlock::new(self.groups.map(|g| g.into())), None) + } + + fn fail(self, error: ParseError) -> (LiteBlock, Option) { + (LiteBlock::new(self.groups.map(|g| g.into())), Some(error)) + } + + fn comment(&mut self, token: &LiteComment) { + if self.prev_comments.is_empty() { + self.prev_comment_indent = token.ws_len(); + } + + self.prev_comments + .push(token.unindent(self.prev_comment_indent)); + } + + fn eoleol(&mut self) { + self.prev_comment_indent = 0; + self.prev_comments.take(); + + self.eol(); + } + + fn eol(&mut self) { + // If the last token on the current line is a `|`, the group + // continues on the next line. + if let Some(prev) = &self.prev_token { + if let TokenContents::Pipe = prev.contents { + return; + } + } + + self.close_group(); + } + + fn pipe(&mut self) -> Result<(), ()> { + // If the current command has content, accumulate it into + // the current pipeline and start a new command. + + match self.close_command() { + None => Err(()), + Some(command) => { + self.pipeline.push(command); + Ok(()) + } + } + } + + fn semicolon(&mut self) { + self.close_pipeline(); + } + + fn baseline(&mut self, part: Spanned) { + // We encountered an unclassified character. Accumulate it into + // the current command as a string. + + self.command.push(part); + } + + fn close_command(&mut self) -> Option { + let command = self.command.take()?; + let command = LiteCommand { + parts: command.into(), + comments: self.prev_comments.take().map(|c| c.into()), + }; + + self.prev_comment_indent = 0; + + Some(command) + } + + fn close_pipeline(&mut self) { + if let Some(command) = self.close_command() { + self.pipeline.push(command); + } + + if let Some(pipeline) = self.pipeline.take() { + self.group.push(pipeline); + } + } + + fn close_group(&mut self) { + self.close_pipeline(); + + if let Some(group) = self.group.take() { + self.groups.push(group); + } + } +} + +/// Try to parse a list of tokens into a block. +pub fn block(tokens: Vec) -> (LiteBlock, Option) { + let mut parser = BlockParser::default(); + + let mut tokens = tokens.iter().peekable(); + + // The parsing process repeats: + // + // - newline (`\n` or `\r`) + // - pipes (`|`) + // - semicolon + while let Some(token) = tokens.next() { + match &token.contents { + TokenContents::EOL => { + // If we encounter two newline characters in a row, use a special eoleol event, + // which allows the parser to discard comments that shouldn't be treated as + // documentation for the following item. + if let Some(Token { + contents: TokenContents::EOL, + .. + }) = tokens.peek() + { + tokens.next(); + parser.eoleol(); + } else { + // We encountered a newline character. If the last token on the + // current line is a `|`, continue the current group on the next + // line. Otherwise, close up the current group by rolling up the + // current command into the current pipeline, and then roll up + // the current pipeline into the group. + parser.eol(); + } + } + TokenContents::Pipe => { + // We encountered a pipe (`|`) character, which terminates a + // command. + + if parser.pipe().is_err() { + // If the current command doesn't have content, return an + // error that indicates that the `|` was unexpected. + return parser.fail(ParseError::extra_tokens( + "|".to_string().spanned(token.span), + )); + } + // match parser.pipe() {} + } + TokenContents::Semicolon => { + // We encountered a semicolon (`;`) character, which terminates + // a pipeline. + + parser.semicolon(); + } + TokenContents::Baseline(part) => { + // We encountered an unclassified character. Accumulate it into + // the current command as a string. + + parser.baseline(part.to_string().spanned(token.span)); + } + TokenContents::Comment(comment) => parser.comment(comment), + } + + parser.consumed(token.clone()); + } + + parser.success() +} + +/// Breaks the input string into a vector of tokens. This tokenization only tries to classify separators like +/// semicolons, pipes, etc from external bare values (values that haven't been classified further) +/// Takes in a string and and offset, which is used to offset the spans created (for when this function is used to parse inner strings) +pub fn lex(input: &str, span_offset: usize) -> (Vec, Option) { + // Break the input slice into an iterator of Unicode characters. + let mut char_indices = input.char_indices().peekable(); + let mut error = None; + + let mut output = vec![]; + let mut is_complete = true; + + // The lexing process repeats. One character of lookahead is sufficient to decide what to do next. + // + // - `|`: the token is either `|` token or a `||` token + // - `;`: the token is a semicolon + // - `\n` or `\r`: the token is an EOL (end of line) token + // - other whitespace: ignored + // - `#` the token starts a line comment, which contains all of the subsequent characters until the next EOL + // - + while let Some((idx, c)) = char_indices.peek() { + if *c == '|' { + // If the next character is `|`, it's either `|` or `||`. + + let idx = *idx; + let prev_idx = idx; + let _ = char_indices.next(); + + // If the next character is `|`, we're looking at a `||`. + if let Some((idx, c)) = char_indices.peek() { + if *c == '|' { + let idx = *idx; + let _ = char_indices.next(); + output.push(Token::new( + TokenContents::Baseline("||".into()), + Span::new(span_offset + prev_idx, span_offset + idx + 1), + )); + continue; + } + } + + // Otherwise, it's just a regular `|` token. + output.push(Token::new( + TokenContents::Pipe, + Span::new(span_offset + idx, span_offset + idx + 1), + )); + is_complete = false; + } else if *c == ';' { + // If the next character is a `;`, we're looking at a semicolon token. + + if !is_complete && error.is_none() { + error = Some(ParseError::extra_tokens( + ";".to_string().spanned(Span::new(*idx, idx + 1)), + )); + } + let idx = *idx; + let _ = char_indices.next(); + output.push(Token::new( + TokenContents::Semicolon, + Span::new(span_offset + idx, span_offset + idx + 1), + )); + } else if *c == '\n' || *c == '\r' { + // If the next character is a newline, we're looking at an EOL (end of line) token. + + let idx = *idx; + let _ = char_indices.next(); + output.push(Token::new( + TokenContents::EOL, + Span::new(span_offset + idx, span_offset + idx + 1), + )); + } else if *c == '#' { + // If the next character is `#`, we're at the beginning of a line + // comment. The comment continues until the next newline. + let idx = *idx; + + let comment = parse_comment(&mut char_indices, idx); + let span = comment.span(); + + output.push(Token::new(TokenContents::Comment(comment), span)); + } else if c.is_whitespace() { + // If the next character is non-newline whitespace, skip it. + + let _ = char_indices.next(); + } else { + // Otherwise, try to consume an unclassified token. + + let (result, err) = baseline(&mut char_indices, span_offset); + if error.is_none() { + error = err; + } + is_complete = true; + let Spanned { item, span } = result; + output.push(Token::new(TokenContents::Baseline(item), span)); + } + } + + (output, error) +} diff --git a/crates/nu-parser/src/lex/mod.rs b/crates/nu-parser/src/lex/mod.rs new file mode 100644 index 000000000..c9e8ed0f6 --- /dev/null +++ b/crates/nu-parser/src/lex/mod.rs @@ -0,0 +1,6 @@ +pub mod lexer; +mod token_group; +pub mod tokens; + +#[cfg(test)] +mod tests; diff --git a/crates/nu-parser/src/lex/tests.rs b/crates/nu-parser/src/lex/tests.rs new file mode 100644 index 000000000..73941af08 --- /dev/null +++ b/crates/nu-parser/src/lex/tests.rs @@ -0,0 +1,358 @@ +use nu_source::{Span, SpannedItem}; + +use super::lexer::*; +use super::tokens::*; + +fn span(left: usize, right: usize) -> Span { + Span::new(left, right) +} + +mod bare { + + use super::*; + + #[test] + fn simple_1() { + let input = "foo bar baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 3)); + } + + #[test] + fn simple_2() { + let input = "'foo bar' baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 9)); + } + + #[test] + fn simple_3() { + let input = "'foo\" bar' baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 10)); + } + + #[test] + fn simple_4() { + let input = "[foo bar] baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 9)); + } + + #[test] + fn simple_5() { + let input = "'foo 'bar baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 9)); + } + + #[test] + fn simple_6() { + let input = "''foo baz"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 5)); + } + + #[test] + fn simple_7() { + let input = "'' foo"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 2)); + } + + #[test] + fn simple_8() { + let input = " '' foo"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(1, 3)); + } + + #[test] + fn simple_9() { + let input = " 'foo' foo"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(1, 6)); + } + + #[test] + fn simple_10() { + let input = "[foo, bar]"; + + let (result, err) = lex(input, 0); + + assert!(err.is_none()); + assert_eq!(result[0].span, span(0, 10)); + } + + #[test] + fn lex_comment() { + let input = r#" +#A comment +def e [] {echo hi} + "#; + + let (result, err) = lex(input, 0); + assert!(err.is_none()); + + //result[0] == EOL + assert_eq!(result[1].span, span(2, 11)); + assert_eq!( + result[1].contents, + TokenContents::Comment(LiteComment::new( + "A comment".to_string().spanned(Span::new(2, 11)) + )) + ); + } + + #[test] + fn ignore_future() { + let input = "foo 'bar"; + + let (result, _) = lex(input, 0); + + assert_eq!(result[0].span, span(0, 3)); + } + + #[test] + fn invalid_1() { + let input = "'foo bar"; + + let (_, err) = lex(input, 0); + + assert!(err.is_some()); + } + + #[test] + fn invalid_2() { + let input = "'bar"; + + let (_, err) = lex(input, 0); + + assert!(err.is_some()); + } + + #[test] + fn invalid_4() { + let input = " 'bar"; + + let (_, err) = lex(input, 0); + + assert!(err.is_some()); + } +} + +mod lite_parse { + use nu_source::HasSpan; + + use super::*; + + #[test] + fn pipeline() { + let (result, err) = lex("cmd1 | cmd2 ; deploy", 0); + assert!(err.is_none()); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.span(), span(0, 20)); + assert_eq!(result.block[0].pipelines[0].span(), span(0, 11)); + assert_eq!(result.block[0].pipelines[1].span(), span(14, 20)); + } + + #[test] + fn simple_1() { + let (result, err) = lex("foo", 0); + assert!(err.is_none()); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1); + assert_eq!( + result.block[0].pipelines[0].commands[0].parts[0].span, + span(0, 3) + ); + } + + #[test] + fn simple_offset() { + let (result, err) = lex("foo", 10); + assert!(err.is_none()); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1); + assert_eq!( + result.block[0].pipelines[0].commands[0].parts[0].span, + span(10, 13) + ); + } + + #[test] + fn incomplete_result() { + let (result, err) = lex("my_command \"foo' --test", 10); + assert!(matches!(err.unwrap().reason(), nu_errors::ParseErrorReason::Eof { .. })); + let (result, _) = block(result); + + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); + + assert_eq!( + result.block[0].pipelines[0].commands[0].parts[0].item, + "my_command" + ); + assert_eq!( + result.block[0].pipelines[0].commands[0].parts[1].item, + "\"foo' --test\"" + ); + } + #[test] + fn command_with_comment() { + let code = r#" +# My echo +# * It's much better :) +def my_echo [arg] { echo $arg } + "#; + let (result, err) = lex(code, 0); + assert!(err.is_none()); + let (result, err) = block(result); + assert!(err.is_none()); + + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4); + assert_eq!( + result.block[0].pipelines[0].commands[0].comments, + Some(vec![ + //Leading space is trimmed + LiteComment::new_with_ws( + " ".to_string().spanned(Span::new(2, 3)), + "My echo".to_string().spanned(Span::new(3, 10)) + ), + LiteComment::new_with_ws( + " ".to_string().spanned(Span::new(12, 13)), + "* It's much better :)" + .to_string() + .spanned(Span::new(13, 34)) + ) + ]) + ); + } + #[test] + fn discarded_comment() { + let code = r#" +# This comment gets discarded, because of the following empty line + +echo 42 + "#; + let (result, err) = lex(code, 0); + assert!(err.is_none()); + // assert_eq!(format!("{:?}", result), ""); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); + assert_eq!(result.block[0].pipelines[0].commands[0].comments, None); + } +} + +#[test] +fn no_discarded_white_space_start_of_comment() { + let code = r#" +#No white_space at firt line ==> No white_space discarded +# Starting space is not discarded +echo 42 + "#; + let (result, err) = lex(code, 0); + assert!(err.is_none()); + // assert_eq!(format!("{:?}", result), ""); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); + assert_eq!( + result.block[0].pipelines[0].commands[0].comments, + Some(vec![ + LiteComment::new( + "No white_space at firt line ==> No white_space discarded" + .to_string() + .spanned(Span::new(2, 58)) + ), + LiteComment::new( + " Starting space is not discarded" + .to_string() + .spanned(Span::new(60, 94)) + ), + ]) + ); +} + +#[test] +fn multiple_discarded_white_space_start_of_comment() { + let code = r#" +# Discard 2 spaces +# Discard 1 space +# Discard 2 spaces +echo 42 + "#; + let (result, err) = lex(code, 0); + assert!(err.is_none()); + // assert_eq!(format!("{:?}", result), ""); + let (result, err) = block(result); + assert!(err.is_none()); + assert_eq!(result.block.len(), 1); + assert_eq!(result.block[0].pipelines.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands.len(), 1); + assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2); + assert_eq!( + result.block[0].pipelines[0].commands[0].comments, + Some(vec![ + LiteComment::new_with_ws( + " ".to_string().spanned(Span::new(2, 4)), + "Discard 2 spaces".to_string().spanned(Span::new(4, 20)) + ), + LiteComment::new_with_ws( + " ".to_string().spanned(Span::new(22, 23)), + "Discard 1 space".to_string().spanned(Span::new(23, 38)) + ), + LiteComment::new_with_ws( + " ".to_string().spanned(Span::new(40, 42)), + "Discard 2 spaces".to_string().spanned(Span::new(42, 58)) + ), + ]) + ); +} diff --git a/crates/nu-parser/src/lex/token_group.rs b/crates/nu-parser/src/lex/token_group.rs new file mode 100644 index 000000000..b9027ba2a --- /dev/null +++ b/crates/nu-parser/src/lex/token_group.rs @@ -0,0 +1,76 @@ +use smart_default::SmartDefault; +use std::iter::FromIterator; + +use derive_new::new; +use nu_source::{HasSpan, Span}; + +#[derive(Debug, Clone, SmartDefault, new)] +pub struct TokenBuilder { + #[default(None)] + contents: Option>, +} + +impl Into> for TokenBuilder +where + T: HasSpan, +{ + fn into(self) -> Vec { + self.contents.unwrap_or_else(Vec::new) + } +} + +impl HasSpan for TokenBuilder +where + T: HasSpan, +{ + fn span(&self) -> Span { + match &self.contents { + Some(vec) => { + let mut iter = vec.iter(); + let head = iter.next(); + let last = iter.last().or(head); + + match (head, last) { + (Some(head), Some(last)) => Span::new(head.span().start(), last.span().end()), + _ => Span::default(), + } + } + None => Span::new(0, 0), + } + } +} + +impl TokenBuilder +where + T: HasSpan, +{ + pub fn is_empty(&self) -> bool { + self.contents.is_none() + } + + pub fn take(&mut self) -> Option> { + self.contents.take().map(|c| TokenBuilder::new(Some(c))) + } + + pub fn map(self, mapper: impl Fn(T) -> U) -> I + where + I: FromIterator, + { + match self.contents { + Some(contents) => contents.into_iter().map(mapper).collect(), + None => I::from_iter(None), + } + } + + pub fn push(&mut self, item: T) { + let contents = match self.contents.take() { + Some(mut contents) => { + contents.push(item); + contents + } + None => vec![item], + }; + + self.contents.replace(contents); + } +} diff --git a/crates/nu-parser/src/lex/tokens.rs b/crates/nu-parser/src/lex/tokens.rs new file mode 100644 index 000000000..baa23a3ea --- /dev/null +++ b/crates/nu-parser/src/lex/tokens.rs @@ -0,0 +1,212 @@ +use derive_new::new; +use itertools::Itertools; +use std::fmt; + +use nu_source::{HasSpan, Span, Spanned, SpannedItem}; + +use super::token_group::TokenBuilder; + +#[derive(Debug, Clone, PartialEq, is_enum_variant)] +pub enum TokenContents { + /// A baseline token is an atomic chunk of source code. This means that the + /// token contains the entirety of string literals, as well as the entirety + /// of sections delimited by paired delimiters. + /// + /// For example, if the token begins with `{`, the baseline token continues + /// until the closing `}` (after taking comments and string literals into + /// consideration). + Baseline(String), + Comment(LiteComment), + Pipe, + Semicolon, + EOL, +} + +impl fmt::Display for TokenContents { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TokenContents::Baseline(base) => write!(f, "{}", base), + TokenContents::Comment(comm) => write!(f, "{}", comm), + TokenContents::Pipe => write!(f, "|"), + TokenContents::Semicolon => write!(f, ";"), + TokenContents::EOL => write!(f, "\\n"), + } + } +} + +pub type CommandBuilder = TokenBuilder>; +pub type CommentsBuilder = TokenBuilder; +pub type PipelineBuilder = TokenBuilder; +pub type GroupBuilder = TokenBuilder; + +/// A LiteComment is a line comment. It begins with `#` and continues until (but not including) the +/// next newline. +/// +/// It remembers any leading whitespace, which is used in later processing steps to strip off +/// leading whitespace for an entire comment block when it is associated with a definition. +#[derive(Debug, PartialEq, Clone)] +pub struct LiteComment { + leading_ws: Option>, + rest: Spanned, +} + +impl LiteComment { + pub fn new(string: impl Into>) -> LiteComment { + LiteComment { + leading_ws: None, + rest: string.into(), + } + } + + pub fn new_with_ws( + ws: impl Into>, + comment: impl Into>, + ) -> LiteComment { + LiteComment { + leading_ws: Some(ws.into()), + rest: comment.into(), + } + } + + pub fn unindent(&self, excluded_spaces: usize) -> LiteComment { + match &self.leading_ws { + // If there's no leading whitespace, there's no whitespace to exclude + None => self.clone(), + Some(Spanned { item, span }) => { + // If the number of spaces to exclude is larger than the amount of whitespace we + // have, there's no whitespace to move into the comment body. + if excluded_spaces > item.len() { + self.clone() + } else { + // If there are no spaces to exclude, prepend all of the leading_whitespace to + // the comment body. + if excluded_spaces == 0 { + let rest_span = self.span(); + let rest = format!("{}{}", item, self.rest.item).spanned(rest_span); + return LiteComment { + leading_ws: None, + rest, + }; + } + + // Pull off excluded_spaces number of spaces, and create a new Spanned + // for that whitespace. Any remaining spaces will be added to the comment. + let excluded_ws = item[..excluded_spaces] + .to_string() + .spanned(Span::new(span.start(), span.start() + excluded_spaces)); + + let included_ws = &item[excluded_spaces..]; + let rest_start = span.start() + excluded_spaces; + let rest_span = Span::new(rest_start, rest_start + self.rest.len()); + + let rest = format!("{}{}", included_ws, self.rest.item).spanned(rest_span); + + LiteComment { + leading_ws: Some(excluded_ws), + rest, + } + } + } + } + } + + pub fn ws_len(&self) -> usize { + match &self.leading_ws { + None => 0, + Some(ws) => ws.item.len(), + } + } + + pub(crate) fn trim(&self) -> Spanned { + let trimmed = self.rest.trim(); + + trimmed.to_string().spanned(Span::new( + self.rest.span().start(), + self.rest.span().start() + trimmed.len(), + )) + } +} + +impl fmt::Display for LiteComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.leading_ws { + None => write!(f, "#{}", self.rest.item), + Some(leading) => write!(f, "#{}{}", leading.item, self.rest.item), + } + } +} + +impl HasSpan for LiteComment { + fn span(&self) -> Span { + match &self.leading_ws { + None => self.rest.span(), + Some(leading) => leading.span().until(self.rest.span()), + } + } +} + +/// A `LiteCommand` is a list of words that will get meaning when processed by +/// the parser. +#[derive(Debug, Default, Clone)] +pub struct LiteCommand { + pub parts: Vec>, + /// Preceding comments. + pub comments: Option>, +} + +impl HasSpan for LiteCommand { + fn span(&self) -> Span { + Span::from_list(&self.parts) + } +} + +impl LiteCommand { + pub fn comments_joined(&self) -> String { + match &self.comments { + None => "".to_string(), + Some(text) => text.iter().map(|s| s.trim().item).join("\n"), + } + } +} + +/// A `LitePipeline` is a series of `LiteCommand`s, separated by `|`. +#[derive(Debug, Clone, new)] +pub struct LitePipeline { + pub commands: Vec, +} + +impl HasSpan for LitePipeline { + fn span(&self) -> Span { + Span::from_list(&self.commands) + } +} + +/// A `LiteGroup` is a series of `LitePipeline`s, separated by `;`. +#[derive(Debug, Clone, new)] +pub struct LiteGroup { + pub pipelines: Vec, +} + +impl From for LiteGroup { + fn from(group: GroupBuilder) -> Self { + LiteGroup::new(group.map(|p| LitePipeline::new(p.into()))) + } +} + +impl HasSpan for LiteGroup { + fn span(&self) -> Span { + Span::from_list(&self.pipelines) + } +} + +/// A `LiteBlock` is a series of `LiteGroup`s, separated by newlines. +#[derive(Debug, Clone, new)] +pub struct LiteBlock { + pub block: Vec, +} + +impl HasSpan for LiteBlock { + fn span(&self) -> Span { + Span::from_list(&self.block) + } +} diff --git a/crates/nu-parser/src/lib.rs b/crates/nu-parser/src/lib.rs index e532a2048..4565c8da1 100644 --- a/crates/nu-parser/src/lib.rs +++ b/crates/nu-parser/src/lib.rs @@ -9,7 +9,8 @@ mod scope; mod shapes; mod signature; -pub use lex::{block, lex, LiteBlock, LiteCommand, LiteGroup, LitePipeline}; +pub use lex::lexer::{block, lex}; +pub use lex::tokens::{LiteBlock, LiteCommand, LiteGroup, LitePipeline}; pub use parse::{classify_block, garbage, parse, parse_full_column_path, parse_math_expression}; pub use path::expand_ndots; pub use scope::ParserScope; diff --git a/crates/nu-parser/src/parse.rs b/crates/nu-parser/src/parse.rs index 57411fcb6..a0122c226 100644 --- a/crates/nu-parser/src/parse.rs +++ b/crates/nu-parser/src/parse.rs @@ -9,10 +9,11 @@ use nu_protocol::hir::{ Unit, }; use nu_protocol::{NamedType, PositionalType, Signature, SyntaxShape, UnspannedPathMember}; -use nu_source::{Span, Spanned, SpannedItem}; +use nu_source::{HasSpan, Span, Spanned, SpannedItem}; use num_bigint::BigInt; -use crate::lex::{block, lex, LiteBlock, LiteCommand, LitePipeline}; +use crate::lex::lexer::{block, lex}; +use crate::lex::tokens::{LiteBlock, LiteCommand, LitePipeline}; use crate::path::expand_path; use crate::scope::ParserScope; use bigdecimal::BigDecimal; @@ -2142,7 +2143,7 @@ fn unit_parse_byte_units() -> Result<(), ParseError> { string: String, value: i64, unit: Unit, - }; + } let cases = [ TestCase { diff --git a/crates/nu-parser/src/parse/def.rs b/crates/nu-parser/src/parse/def.rs index b7c85586e..055fff095 100644 --- a/crates/nu-parser/src/parse/def.rs +++ b/crates/nu-parser/src/parse/def.rs @@ -1,12 +1,15 @@ -use crate::parse::{classify_block, util::trim_quotes}; +use crate::{ + lex::tokens::LiteCommand, + parse::{classify_block, util::trim_quotes}, +}; use indexmap::IndexMap; use nu_errors::ParseError; use nu_protocol::hir::Block; -use nu_source::SpannedItem; +use nu_source::{HasSpan, SpannedItem}; //use crate::errors::{ParseError, ParseResult}; -use crate::lex::{block, lex, LiteCommand}; +use crate::lex::lexer::{block, lex}; use crate::ParserScope; diff --git a/crates/nu-parser/src/parse/def/param_flag_list.rs b/crates/nu-parser/src/parse/def/param_flag_list.rs index ccdfc883f..d753cd38e 100644 --- a/crates/nu-parser/src/parse/def/param_flag_list.rs +++ b/crates/nu-parser/src/parse/def/param_flag_list.rs @@ -14,7 +14,10 @@ use log::debug; use crate::{ - lex::{lex, Token, TokenContents}, + lex::{ + lexer::{lex, Token}, + tokens::TokenContents, + }, parse::util::token_to_spanned_string, }; use nu_errors::ParseError; diff --git a/crates/nu-parser/src/parse/util.rs b/crates/nu-parser/src/parse/util.rs index b09a8dc1b..95d28fe36 100644 --- a/crates/nu-parser/src/parse/util.rs +++ b/crates/nu-parser/src/parse/util.rs @@ -2,7 +2,7 @@ use nu_errors::ParseError; use nu_protocol::hir::{Expression, SpannedExpression}; use nu_source::{Span, Spanned, SpannedItem}; -use crate::lex::Token; +use crate::lex::lexer::Token; pub(crate) fn token_to_spanned_string(token: &Token) -> Spanned { token.contents.to_string().spanned(token.span) diff --git a/crates/nu-source/src/meta.rs b/crates/nu-source/src/meta.rs index 91d8e9ae2..671c21509 100644 --- a/crates/nu-source/src/meta.rs +++ b/crates/nu-source/src/meta.rs @@ -500,6 +500,19 @@ impl Span { Span::new(0, 0) } + pub fn from_list(list: &[impl HasSpan]) -> Span { + let mut iterator = list.iter(); + + match iterator.next() { + None => Span::new(0, 0), + Some(first) => { + let last = iterator.last().unwrap_or(first); + + Span::new(first.span().start, last.span().end) + } + } + } + /// Creates a new `Span` from start and end inputs. The end parameter must be greater than or equal to the start parameter. pub fn new(start: usize, end: usize) -> Span { assert!(