Replace ExternalStream with new ByteStream type (#12774)

# Description
This PR introduces a `ByteStream` type which is a `Read`-able stream of
bytes. Internally, it has an enum over three different byte stream
sources:
```rust
pub enum ByteStreamSource {
    Read(Box<dyn Read + Send + 'static>),
    File(File),
    Child(ChildProcess),
}
```

This is in comparison to the current `RawStream` type, which is an
`Iterator<Item = Vec<u8>>` and has to allocate for each read chunk.

Currently, `PipelineData::ExternalStream` serves a weird dual role where
it is either external command output or a wrapper around `RawStream`.
`ByteStream` makes this distinction more clear (via `ByteStreamSource`)
and replaces `PipelineData::ExternalStream` in this PR:
```rust
pub enum PipelineData {
    Empty,
    Value(Value, Option<PipelineMetadata>),
    ListStream(ListStream, Option<PipelineMetadata>),
    ByteStream(ByteStream, Option<PipelineMetadata>),
}
```

The PR is relatively large, but a decent amount of it is just repetitive
changes.

This PR fixes #7017, fixes #10763, and fixes #12369.

This PR also improves performance when piping external commands. Nushell
should, in most cases, have competitive pipeline throughput compared to,
e.g., bash.
| Command | Before (MB/s) | After (MB/s) | Bash (MB/s) |
| -------------------------------------------------- | -------------:|
------------:| -----------:|
| `throughput \| rg 'x'` | 3059 | 3744 | 3739 |
| `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 |

# User-Facing Changes
- This is a breaking change for the plugin communication protocol,
because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`.
Plugins now only have to deal with a single input stream, as opposed to
the previous three streams: stdout, stderr, and exit code.
- The output of `describe` has been changed for external/byte streams.
- Temporary breaking change: `bytes starts-with` no longer works with
byte streams. This is to keep the PR smaller, and `bytes ends-with`
already does not work on byte streams.
- If a process core dumped, then instead of having a `Value::Error` in
the `exit_code` column of the output returned from `complete`, it now is
a `Value::Int` with the negation of the signal number.

# After Submitting
- Update docs and book as necessary
- Release notes (e.g., plugin protocol changes)
- Adapt/convert commands to work with byte streams (high priority is
`str length`, `bytes starts-with`, and maybe `bytes ends-with`).
- Refactor the `tee` code, Devyn has already done some work on this.

---------

Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
This commit is contained in:
Ian Manske
2024-05-16 14:11:18 +00:00
committed by GitHub
parent 1b8eb23785
commit 6fd854ed9f
210 changed files with 3955 additions and 4012 deletions

View File

@ -43,7 +43,7 @@ impl Command for Collect {
stack.captures_to_stack_preserve_out_dest(closure.captures.clone());
let metadata = input.metadata();
let input = input.into_value(call.head);
let input = input.into_value(call.head)?;
let mut saved_positional = None;
if let Some(var) = block.signature.get_positional(0) {

View File

@ -1,5 +1,5 @@
use nu_engine::command_prelude::*;
use nu_protocol::{engine::StateWorkingSet, PipelineMetadata};
use nu_protocol::{engine::StateWorkingSet, ByteStreamSource, PipelineMetadata};
#[derive(Clone)]
pub struct Describe;
@ -162,73 +162,38 @@ fn run(
let metadata = input.metadata();
let description = match input {
PipelineData::ExternalStream {
ref stdout,
ref stderr,
ref exit_code,
..
} => {
if options.detailed {
let stdout = if stdout.is_some() {
Value::record(
record! {
"type" => Value::string("stream", head),
"origin" => Value::string("external", head),
"subtype" => Value::string("any", head),
},
head,
)
} else {
Value::nothing(head)
};
let stderr = if stderr.is_some() {
Value::record(
record! {
"type" => Value::string("stream", head),
"origin" => Value::string("external", head),
"subtype" => Value::string("any", head),
},
head,
)
} else {
Value::nothing(head)
};
let exit_code = if exit_code.is_some() {
Value::record(
record! {
"type" => Value::string("stream", head),
"origin" => Value::string("external", head),
"subtype" => Value::string("int", head),
},
head,
)
} else {
Value::nothing(head)
PipelineData::ByteStream(stream, ..) => {
let description = if options.detailed {
let origin = match stream.source() {
ByteStreamSource::Read(_) => "unknown",
ByteStreamSource::File(_) => "file",
ByteStreamSource::Child(_) => "external",
};
Value::record(
record! {
"type" => Value::string("stream", head),
"origin" => Value::string("external", head),
"stdout" => stdout,
"stderr" => stderr,
"exit_code" => exit_code,
"type" => Value::string("byte stream", head),
"origin" => Value::string(origin, head),
"metadata" => metadata_to_value(metadata, head),
},
head,
)
} else {
Value::string("raw input", head)
Value::string("byte stream", head)
};
if !options.no_collect {
stream.drain()?;
}
description
}
PipelineData::ListStream(_, _) => {
PipelineData::ListStream(stream, ..) => {
if options.detailed {
let subtype = if options.no_collect {
Value::string("any", head)
} else {
describe_value(input.into_value(head), head, engine_state)
describe_value(stream.into_value(), head, engine_state)
};
Value::record(
record! {
@ -242,19 +207,19 @@ fn run(
} else if options.no_collect {
Value::string("stream", head)
} else {
let value = input.into_value(head);
let value = stream.into_value();
let base_description = value.get_type().to_string();
Value::string(format!("{} (stream)", base_description), head)
}
}
_ => {
let value = input.into_value(head);
PipelineData::Value(value, ..) => {
if !options.detailed {
Value::string(value.get_type().to_string(), head)
} else {
describe_value(value, head, engine_state)
}
}
PipelineData::Empty => Value::string(Type::Nothing.to_string(), head),
};
Ok(description.into_pipeline_data())

View File

@ -1,6 +1,13 @@
use nu_engine::{command_prelude::*, get_eval_block_with_early_return, redirect_env};
use nu_protocol::{engine::Closure, ListStream, OutDest, RawStream};
use std::thread;
use nu_protocol::{
engine::Closure,
process::{ChildPipe, ChildProcess, ExitStatus},
ByteStream, ByteStreamSource, OutDest,
};
use std::{
io::{Cursor, Read},
thread,
};
#[derive(Clone)]
pub struct Do;
@ -86,115 +93,91 @@ impl Command for Do {
}
match result {
Ok(PipelineData::ExternalStream {
stdout,
stderr,
exit_code,
span,
metadata,
trim_end_newline,
}) if capture_errors => {
// Use a thread to receive stdout message.
// Or we may get a deadlock if child process sends out too much bytes to stderr.
//
// For example: in normal linux system, stderr pipe's limit is 65535 bytes.
// if child process sends out 65536 bytes, the process will be hanged because no consumer
// consumes the first 65535 bytes
// So we need a thread to receive stdout message, then the current thread can continue to consume
// stderr messages.
let stdout_handler = stdout
.map(|stdout_stream| {
thread::Builder::new()
.name("stderr redirector".to_string())
.spawn(move || {
let ctrlc = stdout_stream.ctrlc.clone();
let span = stdout_stream.span;
RawStream::new(
Box::new(std::iter::once(
stdout_stream.into_bytes().map(|s| s.item),
)),
ctrlc,
span,
None,
)
Ok(PipelineData::ByteStream(stream, metadata)) if capture_errors => {
let span = stream.span();
match stream.into_child() {
Ok(mut child) => {
// Use a thread to receive stdout message.
// Or we may get a deadlock if child process sends out too much bytes to stderr.
//
// For example: in normal linux system, stderr pipe's limit is 65535 bytes.
// if child process sends out 65536 bytes, the process will be hanged because no consumer
// consumes the first 65535 bytes
// So we need a thread to receive stdout message, then the current thread can continue to consume
// stderr messages.
let stdout_handler = child
.stdout
.take()
.map(|mut stdout| {
thread::Builder::new()
.name("stdout consumer".to_string())
.spawn(move || {
let mut buf = Vec::new();
stdout.read_to_end(&mut buf)?;
Ok::<_, ShellError>(buf)
})
.err_span(head)
})
.err_span(head)
})
.transpose()?;
.transpose()?;
// Intercept stderr so we can return it in the error if the exit code is non-zero.
// The threading issues mentioned above dictate why we also need to intercept stdout.
let mut stderr_ctrlc = None;
let stderr_msg = match stderr {
None => "".to_string(),
Some(stderr_stream) => {
stderr_ctrlc.clone_from(&stderr_stream.ctrlc);
stderr_stream.into_string().map(|s| s.item)?
}
};
// Intercept stderr so we can return it in the error if the exit code is non-zero.
// The threading issues mentioned above dictate why we also need to intercept stdout.
let stderr_msg = match child.stderr.take() {
None => String::new(),
Some(mut stderr) => {
let mut buf = String::new();
stderr.read_to_string(&mut buf).err_span(span)?;
buf
}
};
let stdout = if let Some(handle) = stdout_handler {
match handle.join() {
Err(err) => {
let stdout = if let Some(handle) = stdout_handler {
match handle.join() {
Err(err) => {
return Err(ShellError::ExternalCommand {
label: "Fail to receive external commands stdout message"
.to_string(),
help: format!("{err:?}"),
span,
});
}
Ok(res) => Some(res?),
}
} else {
None
};
if child.wait()? != ExitStatus::Exited(0) {
return Err(ShellError::ExternalCommand {
label: "Fail to receive external commands stdout message"
.to_string(),
help: format!("{err:?}"),
label: "External command failed".to_string(),
help: stderr_msg,
span,
});
}
Ok(res) => Some(res),
}
} else {
None
};
let exit_code: Vec<Value> = match exit_code {
None => vec![],
Some(exit_code_stream) => exit_code_stream.into_iter().collect(),
};
if let Some(Value::Int { val: code, .. }) = exit_code.last() {
if *code != 0 {
return Err(ShellError::ExternalCommand {
label: "External command failed".to_string(),
help: stderr_msg,
span,
});
let mut child = ChildProcess::from_raw(None, None, None, span);
if let Some(stdout) = stdout {
child.stdout = Some(ChildPipe::Tee(Box::new(Cursor::new(stdout))));
}
if !stderr_msg.is_empty() {
child.stderr = Some(ChildPipe::Tee(Box::new(Cursor::new(stderr_msg))));
}
Ok(PipelineData::ByteStream(
ByteStream::child(child, span),
metadata,
))
}
Err(stream) => Ok(PipelineData::ByteStream(stream, metadata)),
}
Ok(PipelineData::ExternalStream {
stdout,
stderr: Some(RawStream::new(
Box::new(std::iter::once(Ok(stderr_msg.into_bytes()))),
stderr_ctrlc,
span,
None,
)),
exit_code: Some(ListStream::new(exit_code.into_iter(), span, None)),
span,
metadata,
trim_end_newline,
})
}
Ok(PipelineData::ExternalStream {
stdout,
stderr,
exit_code: _,
span,
metadata,
trim_end_newline,
}) if ignore_program_errors
&& !matches!(caller_stack.stdout(), OutDest::Pipe | OutDest::Capture) =>
Ok(PipelineData::ByteStream(mut stream, metadata))
if ignore_program_errors
&& !matches!(caller_stack.stdout(), OutDest::Pipe | OutDest::Capture) =>
{
Ok(PipelineData::ExternalStream {
stdout,
stderr,
exit_code: None,
span,
metadata,
trim_end_newline,
})
if let ByteStreamSource::Child(child) = stream.source_mut() {
child.set_exit_code(0)
}
Ok(PipelineData::ByteStream(stream, metadata))
}
Ok(PipelineData::Value(Value::Error { .. }, ..)) | Err(_) if ignore_shell_errors => {
Ok(PipelineData::empty())

View File

@ -121,12 +121,14 @@ impl Command for For {
Err(err) => {
return Err(err);
}
Ok(pipeline) => {
let exit_code = pipeline.drain_with_exit_code()?;
if exit_code != 0 {
return Ok(PipelineData::new_external_stream_with_only_exit_code(
exit_code,
));
Ok(data) => {
if let Some(status) = data.drain()? {
let code = status.code();
if code != 0 {
return Ok(
PipelineData::new_external_stream_with_only_exit_code(code),
);
}
}
}
}
@ -159,12 +161,14 @@ impl Command for For {
Err(err) => {
return Err(err);
}
Ok(pipeline) => {
let exit_code = pipeline.drain_with_exit_code()?;
if exit_code != 0 {
return Ok(PipelineData::new_external_stream_with_only_exit_code(
exit_code,
));
Ok(data) => {
if let Some(status) = data.drain()? {
let code = status.code();
if code != 0 {
return Ok(
PipelineData::new_external_stream_with_only_exit_code(code),
);
}
}
}
}
@ -173,7 +177,7 @@ impl Command for For {
x => {
stack.add_var(var_id, x);
eval_block(&engine_state, stack, block, PipelineData::empty())?.into_value(head);
eval_block(&engine_state, stack, block, PipelineData::empty())?.into_value(head)?;
}
}
Ok(PipelineData::empty())

View File

@ -61,7 +61,7 @@ impl Command for Let {
let eval_block = get_eval_block(engine_state);
let stack = &mut stack.start_capture();
let pipeline_data = eval_block(engine_state, stack, block, input)?;
let value = pipeline_data.into_value(call.head);
let value = pipeline_data.into_value(call.head)?;
// if given variable type is Glob, and our result is string
// then nushell need to convert from Value::String to Value::Glob

View File

@ -53,12 +53,12 @@ impl Command for Loop {
Err(err) => {
return Err(err);
}
Ok(pipeline) => {
let exit_code = pipeline.drain_with_exit_code()?;
if exit_code != 0 {
return Ok(PipelineData::new_external_stream_with_only_exit_code(
exit_code,
));
Ok(data) => {
if let Some(status) = data.drain()? {
let code = status.code();
if code != 0 {
return Ok(PipelineData::new_external_stream_with_only_exit_code(code));
}
}
}
}

View File

@ -61,7 +61,7 @@ impl Command for Mut {
let eval_block = get_eval_block(engine_state);
let stack = &mut stack.start_capture();
let pipeline_data = eval_block(engine_state, stack, block, input)?;
let value = pipeline_data.into_value(call.head);
let value = pipeline_data.into_value(call.head)?;
// if given variable type is Glob, and our result is string
// then nushell need to convert from Value::String to Value::Glob

View File

@ -62,10 +62,11 @@ impl Command for Try {
}
// external command may fail to run
Ok(pipeline) => {
let (pipeline, external_failed) = pipeline.check_external_failed();
let (pipeline, external_failed) = pipeline.check_external_failed()?;
if external_failed {
let exit_code = pipeline.drain_with_exit_code()?;
stack.add_env_var("LAST_EXIT_CODE".into(), Value::int(exit_code, call.head));
let status = pipeline.drain()?;
let code = status.map(|status| status.code()).unwrap_or(0);
stack.add_env_var("LAST_EXIT_CODE".into(), Value::int(code.into(), call.head));
let err_value = Value::nothing(call.head);
handle_catch(err_value, catch_block, engine_state, stack, eval_block)
} else {

View File

@ -70,14 +70,16 @@ impl Command for While {
Err(err) => {
return Err(err);
}
Ok(pipeline) => {
let exit_code = pipeline.drain_with_exit_code()?;
if exit_code != 0 {
return Ok(
PipelineData::new_external_stream_with_only_exit_code(
exit_code,
),
);
Ok(data) => {
if let Some(status) = data.drain()? {
let code = status.code();
if code != 0 {
return Ok(
PipelineData::new_external_stream_with_only_exit_code(
code,
),
);
}
}
}
}

View File

@ -122,10 +122,9 @@ pub fn eval_block(
stack.add_env_var("PWD".to_string(), Value::test_string(cwd.to_string_lossy()));
match nu_engine::eval_block::<WithoutDebug>(engine_state, &mut stack, &block, input) {
Err(err) => panic!("test eval error in `{}`: {:?}", "TODO", err),
Ok(result) => result.into_value(Span::test_data()),
}
nu_engine::eval_block::<WithoutDebug>(engine_state, &mut stack, &block, input)
.and_then(|data| data.into_value(Span::test_data()))
.unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", "TODO", err))
}
pub fn check_example_evaluates_to_expected_output(