nushell/crates/nu-parser/src/lite_parser.rs
WindSoilder b150f9f5d8
Avoid blocking when o+e> redirects too much stderr message (#8784)
# Description

Fixes: #8565

Here is another pr #7240 tried to address the issue, but it works in a
wrong way.

After this change `o+e>` won't redirect all stdout message then stderr
message and it works more like how bash does.

# User-Facing Changes

For the given python code:
```python
# test.py
import sys

print('aa'*300, flush=True)
print('bb'*999999, file=sys.stderr, flush=True)
print('cc'*300, flush=True)
```

Running `python test.py out+err> a.txt` shoudn't hang nushell, and
`a.txt` keeps output in the same order

## About the change
The core idea is that when doing lite-parsing, introduce a new variant
`LiteElement::SameTargetRedirection` if we meet `out+err>` redirection
token(which is generated by lex function),

During converting from lite block to block,
LiteElement::SameTargetRedirection will be converted to
PipelineElement::SameTargetRedirection.

Then in the block eval process, if we get
PipelineElement::SameTargetRedirection, we'll invoke `run-external` with
`--redirect-combine` flag, then pipe the result into save command

## What happened internally?

Take the following command as example:
`^ls o+e> log.txt`

lex parsing result(`Tokens`) are not changed, but `LiteBlock` and
`Block` is changed after this pr.
### LiteBlock before
```rust
LiteBlock {
    block: [
        LitePipeline { commands: [
            Command(None, LiteCommand { comments: [], parts: [Span { start: 39041, end: 39044 }] }),
            // actually the span of first Redirection is wrong too..
            Redirection(Span { start: 39058, end: 39062 }, StdoutAndStderr, LiteCommand { comments: [], parts: [Span { start: 39050, end: 39057 }] }),
        ]
    }]
}
```
### LiteBlock after
```rust
LiteBlock { 
    block: [
        LitePipeline {
            commands: [
                SameTargetRedirection {
                    cmd: (None, LiteCommand { comments: [], parts: [Span { start: 147945, end: 147948}]}),
                    redirection: (Span { start: 147949, end: 147957 }, LiteCommand { comments: [], parts: [Span { start: 147958, end: 147965 }]})
                }
            ]
        }
    ]
}
```
### Block before
```rust
Pipeline {
    elements: [
        Expression(None, Expression {
            expr: ExternalCall(Expression { expr: String("ls"), span: Span { start: 39042, end: 39044 }, ty: String, custom_completion: None }, [], false),
            span: Span { start: 39041, end: 39044 },
            ty: Any, custom_completion: None 
        }),
        Redirection(Span { start: 39058, end: 39062 }, StdoutAndStderr, Expression { expr: String("out.txt"), span: Span { start: 39050, end: 39057 }, ty: String, custom_completion: None })] }
```
### Block after
```rust
Pipeline {
    elements: [
        SameTargetRedirection { 
            cmd: (None, Expression {
                expr: ExternalCall(Expression { expr: String("ls"), span: Span { start: 147946, end: 147948 }, ty: String, custom_completion: None}, [], false),
                span: Span { start: 147945, end: 147948},
                ty: Any, custom_completion: None
            }),
            redirection: (Span { start: 147949, end: 147957}, Expression {expr: String("log.txt"), span: Span { start: 147958, end: 147965 },ty: String,custom_completion: None}
        }
    ]
}
```

# Tests + Formatting

Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- crates/nu-utils/standard_library/tests.nu` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```

# After Submitting

If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
2023-05-17 17:47:03 -05:00

472 lines
18 KiB
Rust

/// Lite parsing converts a flat stream of tokens from the lexer to a syntax element structure that
/// can be parsed.
use crate::{Token, TokenContents};
use nu_protocol::{ast::Redirection, ParseError, Span};
#[derive(Debug)]
pub struct LiteCommand {
pub comments: Vec<Span>,
pub parts: Vec<Span>,
}
impl Default for LiteCommand {
fn default() -> Self {
Self::new()
}
}
impl LiteCommand {
pub fn new() -> Self {
Self {
comments: vec![],
parts: vec![],
}
}
pub fn push(&mut self, span: Span) {
self.parts.push(span);
}
pub fn is_empty(&self) -> bool {
self.parts.is_empty()
}
}
// Note: the Span is the span of the connector not the whole element
#[derive(Debug)]
pub enum LiteElement {
Command(Option<Span>, LiteCommand),
Redirection(Span, Redirection, LiteCommand),
// SeparateRedirection variant can only be generated by two different Redirection variant
SeparateRedirection {
out: (Span, LiteCommand),
err: (Span, LiteCommand),
},
// SameTargetRedirection variant can only be generated by Command with Redirection::OutAndErr
SameTargetRedirection {
cmd: (Option<Span>, LiteCommand),
redirection: (Span, LiteCommand),
},
}
#[derive(Debug)]
pub struct LitePipeline {
pub commands: Vec<LiteElement>,
}
impl Default for LitePipeline {
fn default() -> Self {
Self::new()
}
}
impl LitePipeline {
pub fn new() -> Self {
Self { commands: vec![] }
}
pub fn push(&mut self, element: LiteElement) {
self.commands.push(element);
}
pub fn insert(&mut self, index: usize, element: LiteElement) {
self.commands.insert(index, element);
}
pub fn is_empty(&self) -> bool {
self.commands.is_empty()
}
}
#[derive(Debug)]
pub struct LiteBlock {
pub block: Vec<LitePipeline>,
}
impl Default for LiteBlock {
fn default() -> Self {
Self::new()
}
}
impl LiteBlock {
pub fn new() -> Self {
Self { block: vec![] }
}
pub fn push(&mut self, mut pipeline: LitePipeline) {
// once we push `pipeline` to our block
// the block takes ownership of `pipeline`, which means that
// our `pipeline` is complete on collecting commands.
self.merge_redirections(&mut pipeline);
self.merge_cmd_with_outerr_redirection(&mut pipeline);
self.block.push(pipeline);
}
pub fn is_empty(&self) -> bool {
self.block.is_empty()
}
fn merge_cmd_with_outerr_redirection(&self, pipeline: &mut LitePipeline) {
let mut cmd_index = None;
let mut outerr_index = None;
for (index, cmd) in pipeline.commands.iter().enumerate() {
if let LiteElement::Command(..) = cmd {
cmd_index = Some(index);
}
if let LiteElement::Redirection(_span, Redirection::StdoutAndStderr, _target_cmd) = cmd
{
outerr_index = Some(index);
break;
}
}
if let (Some(cmd_index), Some(outerr_index)) = (cmd_index, outerr_index) {
// we can make sure that cmd_index is less than outerr_index.
let outerr_redirect = pipeline.commands.remove(outerr_index);
let cmd = pipeline.commands.remove(cmd_index);
// `outerr_redirect` and `cmd` should always be `LiteElement::Command` and `LiteElement::Redirection`
if let (
LiteElement::Command(cmd_span, lite_cmd),
LiteElement::Redirection(span, _, outerr_cmd),
) = (cmd, outerr_redirect)
{
pipeline.insert(
cmd_index,
LiteElement::SameTargetRedirection {
cmd: (cmd_span, lite_cmd),
redirection: (span, outerr_cmd),
},
)
}
}
}
fn merge_redirections(&self, pipeline: &mut LitePipeline) {
// In case our command may contains both stdout and stderr redirection.
// We pick them out and Combine them into one LiteElement::SeparateRedirection variant.
let mut stdout_index = None;
let mut stderr_index = None;
for (index, cmd) in pipeline.commands.iter().enumerate() {
if let LiteElement::Redirection(_span, redirection, _target_cmd) = cmd {
match *redirection {
Redirection::Stderr => stderr_index = Some(index),
Redirection::Stdout => stdout_index = Some(index),
Redirection::StdoutAndStderr => {}
}
}
}
if let (Some(out_indx), Some(err_indx)) = (stdout_index, stderr_index) {
let (out_redirect, err_redirect, new_indx) = {
// to avoid panic, we need to remove commands which have larger index first.
if out_indx > err_indx {
let out_redirect = pipeline.commands.remove(out_indx);
let err_redirect = pipeline.commands.remove(err_indx);
(out_redirect, err_redirect, err_indx)
} else {
let err_redirect = pipeline.commands.remove(err_indx);
let out_redirect = pipeline.commands.remove(out_indx);
(out_redirect, err_redirect, out_indx)
}
};
// `out_redirect` and `err_redirect` should always be `LiteElement::Redirection`
if let (
LiteElement::Redirection(out_span, _, out_command),
LiteElement::Redirection(err_span, _, err_command),
) = (out_redirect, err_redirect)
{
// using insert with specific index to keep original
// pipeline commands order.
pipeline.insert(
new_indx,
LiteElement::SeparateRedirection {
out: (out_span, out_command),
err: (err_span, err_command),
},
)
}
}
}
}
pub fn lite_parse(tokens: &[Token]) -> (LiteBlock, Option<ParseError>) {
let mut block = LiteBlock::new();
let mut curr_pipeline = LitePipeline::new();
let mut curr_command = LiteCommand::new();
let mut last_token = TokenContents::Eol;
let mut last_connector = TokenContents::Pipe;
let mut last_connector_span: Option<Span> = None;
if tokens.is_empty() {
return (LiteBlock::new(), None);
}
let mut curr_comment: Option<Vec<Span>> = None;
let mut error = None;
for token in tokens.iter() {
match &token.contents {
TokenContents::PipePipe => {
error = error.or(Some(ParseError::ShellOrOr(token.span)));
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::Item => {
// If we have a comment, go ahead and attach it
if let Some(curr_comment) = curr_comment.take() {
curr_command.comments = curr_comment;
}
curr_command.push(token.span);
last_token = TokenContents::Item;
}
TokenContents::OutGreaterThan
| TokenContents::ErrGreaterThan
| TokenContents::OutErrGreaterThan => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = token.contents;
last_connector = token.contents;
last_connector_span = Some(token.span);
}
TokenContents::Pipe => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
token.span,
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
last_token = TokenContents::Pipe;
last_connector = TokenContents::Pipe;
last_connector_span = Some(token.span);
}
TokenContents::Eol => {
if last_token != TokenContents::Pipe && last_token != TokenContents::OutGreaterThan
{
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span.expect(
"internal error: redirection missing span information",
),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
last_connector = TokenContents::Pipe;
last_connector_span = None;
}
}
if last_token == TokenContents::Eol {
// Clear out the comment as we're entering a new comment
curr_comment = None;
}
last_token = TokenContents::Eol;
}
TokenContents::Semicolon => {
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline
.push(LiteElement::Command(last_connector_span, curr_command));
}
}
curr_command = LiteCommand::new();
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
curr_pipeline = LitePipeline::new();
last_connector = TokenContents::Pipe;
last_connector_span = None;
}
last_token = TokenContents::Semicolon;
}
TokenContents::Comment => {
// Comment is beside something
if last_token != TokenContents::Eol {
curr_command.comments.push(token.span);
curr_comment = None;
} else {
// Comment precedes something
if let Some(curr_comment) = &mut curr_comment {
curr_comment.push(token.span);
} else {
curr_comment = Some(vec![token.span]);
}
}
last_token = TokenContents::Comment;
}
}
}
if !curr_command.is_empty() {
match last_connector {
TokenContents::OutGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stdout,
curr_command,
));
}
TokenContents::ErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::Stderr,
curr_command,
));
}
TokenContents::OutErrGreaterThan => {
curr_pipeline.push(LiteElement::Redirection(
last_connector_span
.expect("internal error: redirection missing span information"),
Redirection::StdoutAndStderr,
curr_command,
));
}
_ => {
curr_pipeline.push(LiteElement::Command(last_connector_span, curr_command));
}
}
}
if !curr_pipeline.is_empty() {
block.push(curr_pipeline);
}
if last_token == TokenContents::Pipe {
(
block,
Some(ParseError::UnexpectedEof(
"pipeline missing end".into(),
tokens[tokens.len() - 1].span,
)),
)
} else {
(block, error)
}
}