nushell/crates/nu-parser/src/lite_parser.rs
Devyn Cairns 8e2917b9ae
Make assignment and const consistent with let/mut (#13385)
# Description

This makes assignment operations and `const` behave the same way `let`
and `mut` do, absorbing the rest of the pipeline.

Changes the lexer to be able to recognize assignment operators as a
separate token, and then makes the lite parser continue to push spans
into the same command regardless of any redirections or pipes if an
assignment operator is encountered. Because the pipeline is no longer
split up by the lite parser at this point, it's trivial to just parse
the right hand side as if it were a subexpression not contained within
parentheses.

# User-Facing Changes
Big breaking change. These are all now possible:

```nushell
const path = 'a' | path join 'b'

mut x = 2
$x = random int
$x = [1 2 3] | math sum

$env.FOO = random chars
```

In the past, these would have led to (an attempt at) bare word string
parsing. So while `$env.FOO = bar` would have previously set the
environment variable `FOO` to the string `"bar"`, it now tries to run
the command named `bar`, hence the major breaking change.

However, this is desirable because it is very consistent - if you see
the `=`, you can just assume it absorbs everything else to the right of
it.

# Tests + Formatting
Added tests for the new behaviour. Adjusted some existing tests that
depended on the right hand side of assignments being parsed as
barewords.

# After Submitting
- [ ] release notes (breaking change!)
2024-07-30 18:55:22 -05:00

442 lines
17 KiB
Rust

//! Lite parsing converts a flat stream of tokens from the lexer to a syntax element structure that
//! can be parsed.
use crate::{Token, TokenContents};
use itertools::{Either, Itertools};
use nu_protocol::{ast::RedirectionSource, ParseError, Span};
use std::mem;
#[derive(Debug, Clone, Copy)]
pub enum LiteRedirectionTarget {
File {
connector: Span,
file: Span,
append: bool,
},
Pipe {
connector: Span,
},
}
impl LiteRedirectionTarget {
pub fn connector(&self) -> Span {
match self {
LiteRedirectionTarget::File { connector, .. }
| LiteRedirectionTarget::Pipe { connector } => *connector,
}
}
pub fn spans(&self) -> impl Iterator<Item = Span> {
match *self {
LiteRedirectionTarget::File {
connector, file, ..
} => Either::Left([connector, file].into_iter()),
LiteRedirectionTarget::Pipe { connector } => Either::Right(std::iter::once(connector)),
}
}
}
#[derive(Debug, Clone)]
pub enum LiteRedirection {
Single {
source: RedirectionSource,
target: LiteRedirectionTarget,
},
Separate {
out: LiteRedirectionTarget,
err: LiteRedirectionTarget,
},
}
impl LiteRedirection {
pub fn spans(&self) -> impl Iterator<Item = Span> {
match self {
LiteRedirection::Single { target, .. } => Either::Left(target.spans()),
LiteRedirection::Separate { out, err } => {
Either::Right(out.spans().chain(err.spans()).sorted())
}
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LiteCommand {
pub pipe: Option<Span>,
pub comments: Vec<Span>,
pub parts: Vec<Span>,
pub redirection: Option<LiteRedirection>,
}
impl LiteCommand {
fn push(&mut self, span: Span) {
self.parts.push(span);
}
fn check_accepts_redirection(&self, span: Span) -> Option<ParseError> {
self.parts
.is_empty()
.then_some(ParseError::UnexpectedRedirection { span })
}
fn try_add_redirection(
&mut self,
source: RedirectionSource,
target: LiteRedirectionTarget,
) -> Result<(), ParseError> {
let redirection = match (self.redirection.take(), source) {
(None, _) if self.parts.is_empty() => Err(ParseError::UnexpectedRedirection {
span: target.connector(),
}),
(None, source) => Ok(LiteRedirection::Single { source, target }),
(
Some(LiteRedirection::Single {
source: RedirectionSource::Stdout,
target: out,
}),
RedirectionSource::Stderr,
) => Ok(LiteRedirection::Separate { out, err: target }),
(
Some(LiteRedirection::Single {
source: RedirectionSource::Stderr,
target: err,
}),
RedirectionSource::Stdout,
) => Ok(LiteRedirection::Separate { out: target, err }),
(
Some(LiteRedirection::Single {
source,
target: first,
}),
_,
) => Err(ParseError::MultipleRedirections(
source,
first.connector(),
target.connector(),
)),
(
Some(LiteRedirection::Separate { out, .. }),
RedirectionSource::Stdout | RedirectionSource::StdoutAndStderr,
) => Err(ParseError::MultipleRedirections(
RedirectionSource::Stdout,
out.connector(),
target.connector(),
)),
(Some(LiteRedirection::Separate { err, .. }), RedirectionSource::Stderr) => {
Err(ParseError::MultipleRedirections(
RedirectionSource::Stderr,
err.connector(),
target.connector(),
))
}
}?;
self.redirection = Some(redirection);
Ok(())
}
pub fn parts_including_redirection(&self) -> impl Iterator<Item = Span> + '_ {
self.parts.iter().copied().chain(
self.redirection
.iter()
.flat_map(|redirection| redirection.spans()),
)
}
}
#[derive(Debug, Clone, Default)]
pub struct LitePipeline {
pub commands: Vec<LiteCommand>,
}
impl LitePipeline {
fn push(&mut self, element: &mut LiteCommand) {
if !element.parts.is_empty() || element.redirection.is_some() {
self.commands.push(mem::take(element));
}
}
}
#[derive(Debug, Clone, Default)]
pub struct LiteBlock {
pub block: Vec<LitePipeline>,
}
impl LiteBlock {
fn push(&mut self, pipeline: &mut LitePipeline) {
if !pipeline.commands.is_empty() {
self.block.push(mem::take(pipeline));
}
}
}
fn last_non_comment_token(tokens: &[Token], cur_idx: usize) -> Option<TokenContents> {
let mut expect = TokenContents::Comment;
for token in tokens.iter().take(cur_idx).rev() {
// skip ([Comment]+ [Eol]) pair
match (token.contents, expect) {
(TokenContents::Comment, TokenContents::Comment)
| (TokenContents::Comment, TokenContents::Eol) => expect = TokenContents::Eol,
(TokenContents::Eol, TokenContents::Eol) => expect = TokenContents::Comment,
(token, _) => return Some(token),
}
}
None
}
pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
if tokens.is_empty() {
return (LiteBlock::default(), None);
}
let mut block = LiteBlock::default();
let mut pipeline = LitePipeline::default();
let mut command = LiteCommand::default();
let mut last_token = TokenContents::Eol;
let mut file_redirection = None;
let mut curr_comment: Option<Vec<Span>> = None;
let mut is_assignment = false;
let mut error = None;
for (idx, token) in tokens.iter().enumerate() {
if is_assignment {
match &token.contents {
// Consume until semicolon or terminating EOL. Assignments absorb pipelines and
// redirections.
TokenContents::Eol => {
// Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
//
// `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
// and so `[Comment] | [Eol]` should be ignore to make it work
let actual_token = last_non_comment_token(tokens, idx);
if actual_token != Some(TokenContents::Pipe) {
is_assignment = false;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
}
TokenContents::Semicolon => {
is_assignment = false;
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
command.comments.push(token.span);
curr_comment = None;
}
_ => command.push(token.span),
}
} else if let Some((source, append, span)) = file_redirection.take() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::Item => {
let target = LiteRedirectionTarget::File {
connector: span,
file: token.span,
append,
};
if let Err(err) = command.try_add_redirection(source, target) {
error = error.or(Some(err));
command.push(span);
command.push(token.span)
}
}
TokenContents::AssignmentOperator => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::OutGreaterThan
| TokenContents::OutGreaterGreaterThan
| TokenContents::ErrGreaterThan
| TokenContents::ErrGreaterGreaterThan
| TokenContents::OutErrGreaterThan
| TokenContents::OutErrGreaterGreaterThan => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
command.push(token.span);
}
TokenContents::Pipe
| TokenContents::ErrGreaterPipe
| TokenContents::OutErrGreaterPipe => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Eol => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
}
TokenContents::Semicolon => {
error = error.or(Some(ParseError::Expected("redirection target", token.span)));
command.push(span);
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
error = error.or(Some(ParseError::Expected("redirection target", span)));
command.push(span);
command.comments.push(token.span);
curr_comment = None;
}
}
} else {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
command.push(token.span);
}
TokenContents::Item => {
// This is commented out to preserve old parser behavior,
// but we should probably error here.
//
// if element.redirection.is_some() {
// error = error.or(Some(ParseError::LabeledError(
// "Unexpected positional".into(),
// "cannot add positional arguments after output redirection".into(),
// token.span,
// )));
// }
//
// For example, this is currently allowed: ^echo thing o> out.txt extra_arg
// If we have a comment, go ahead and attach it
if let Some(curr_comment) = curr_comment.take() {
command.comments = curr_comment;
}
command.push(token.span);
}
TokenContents::AssignmentOperator => {
// When in assignment mode, we'll just consume pipes or redirections as part of
// the command.
is_assignment = true;
if let Some(curr_comment) = curr_comment.take() {
command.comments = curr_comment;
}
command.push(token.span);
}
TokenContents::OutGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stdout, false, token.span));
}
TokenContents::OutGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stdout, true, token.span));
}
TokenContents::ErrGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stderr, false, token.span));
}
TokenContents::ErrGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::Stderr, true, token.span));
}
TokenContents::OutErrGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection =
Some((RedirectionSource::StdoutAndStderr, false, token.span));
}
TokenContents::OutErrGreaterGreaterThan => {
error = error.or(command.check_accepts_redirection(token.span));
file_redirection = Some((RedirectionSource::StdoutAndStderr, true, token.span));
}
TokenContents::ErrGreaterPipe => {
let target = LiteRedirectionTarget::Pipe {
connector: token.span,
};
if let Err(err) = command.try_add_redirection(RedirectionSource::Stderr, target)
{
error = error.or(Some(err));
}
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::OutErrGreaterPipe => {
let target = LiteRedirectionTarget::Pipe {
connector: token.span,
};
if let Err(err) =
command.try_add_redirection(RedirectionSource::StdoutAndStderr, target)
{
error = error.or(Some(err));
}
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Pipe => {
pipeline.push(&mut command);
command.pipe = Some(token.span);
}
TokenContents::Eol => {
// Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
//
// `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
// and so `[Comment] | [Eol]` should be ignore to make it work
let actual_token = last_non_comment_token(tokens, idx);
if actual_token != Some(TokenContents::Pipe) {
pipeline.push(&mut command);
block.push(&mut pipeline);
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
}
TokenContents::Semicolon => {
pipeline.push(&mut command);
block.push(&mut pipeline);
}
TokenContents::Comment => {
// Comment is beside something
if last_token != TokenContents::Eol {
command.comments.push(token.span);
curr_comment = None;
} else {
// Comment precedes something
if let Some(curr_comment) = &mut curr_comment {
curr_comment.push(token.span);
} else {
curr_comment = Some(vec![token.span]);
}
}
}
}
}
last_token = token.contents;
}
if let Some((_, _, span)) = file_redirection {
command.push(span);
error = error.or(Some(ParseError::Expected("redirection target", span)));
}
pipeline.push(&mut command);
block.push(&mut pipeline);
if last_non_comment_token(tokens, tokens.len()) == Some(TokenContents::Pipe) {
(
block,
Some(ParseError::UnexpectedEof(
"pipeline missing end".into(),
tokens[tokens.len() - 1].span,
)),
)
} else {
(block, error)
}
}