Replace ExternalStream with new ByteStream type (#12774)

# Description
This PR introduces a `ByteStream` type which is a `Read`-able stream of
bytes. Internally, it has an enum over three different byte stream
sources:
```rust
pub enum ByteStreamSource {
    Read(Box<dyn Read + Send + 'static>),
    File(File),
    Child(ChildProcess),
}
```

This is in comparison to the current `RawStream` type, which is an
`Iterator<Item = Vec<u8>>` and has to allocate for each read chunk.

Currently, `PipelineData::ExternalStream` serves a weird dual role where
it is either external command output or a wrapper around `RawStream`.
`ByteStream` makes this distinction more clear (via `ByteStreamSource`)
and replaces `PipelineData::ExternalStream` in this PR:
```rust
pub enum PipelineData {
    Empty,
    Value(Value, Option<PipelineMetadata>),
    ListStream(ListStream, Option<PipelineMetadata>),
    ByteStream(ByteStream, Option<PipelineMetadata>),
}
```

The PR is relatively large, but a decent amount of it is just repetitive
changes.

This PR fixes #7017, fixes #10763, and fixes #12369.

This PR also improves performance when piping external commands. Nushell
should, in most cases, have competitive pipeline throughput compared to,
e.g., bash.
| Command | Before (MB/s) | After (MB/s) | Bash (MB/s) |
| -------------------------------------------------- | -------------:|
------------:| -----------:|
| `throughput \| rg 'x'` | 3059 | 3744 | 3739 |
| `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 |

# User-Facing Changes
- This is a breaking change for the plugin communication protocol,
because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`.
Plugins now only have to deal with a single input stream, as opposed to
the previous three streams: stdout, stderr, and exit code.
- The output of `describe` has been changed for external/byte streams.
- Temporary breaking change: `bytes starts-with` no longer works with
byte streams. This is to keep the PR smaller, and `bytes ends-with`
already does not work on byte streams.
- If a process core dumped, then instead of having a `Value::Error` in
the `exit_code` column of the output returned from `complete`, it now is
a `Value::Int` with the negation of the signal number.

# After Submitting
- Update docs and book as necessary
- Release notes (e.g., plugin protocol changes)
- Adapt/convert commands to work with byte streams (high priority is
`str length`, `bytes starts-with`, and maybe `bytes ends-with`).
- Refactor the `tee` code, Devyn has already done some work on this.

---------

Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
This commit is contained in:
Ian Manske
2024-05-16 14:11:18 +00:00
committed by GitHub
parent 1b8eb23785
commit 6fd854ed9f
210 changed files with 3955 additions and 4012 deletions

View File

@ -150,7 +150,7 @@ pub fn to_delimited_data(
span: Span,
config: &Config,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(span);
let value = input.into_value(span)?;
let output = match from_value_to_delimited_string(&value, sep, config, span) {
Ok(mut x) => {
if noheaders {

View File

@ -46,7 +46,7 @@ impl Command for ToJson {
let span = call.head;
// allow ranges to expand and turn into array
let input = input.try_expand_range()?;
let value = input.into_value(span);
let value = input.into_value(span)?;
let json_value = value_to_json_value(&value)?;
let json_result = if raw {

View File

@ -75,7 +75,7 @@ MessagePack: https://msgpack.org/
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let value_span = input.span().unwrap_or(call.head);
let value = input.into_value(value_span);
let value = input.into_value(value_span)?;
let mut out = vec![];
write_value(&mut out, &value, 0)?;

View File

@ -70,7 +70,7 @@ impl Command for ToMsgpackz {
.transpose()?;
let value_span = input.span().unwrap_or(call.head);
let value = input.into_value(value_span);
let value = input.into_value(value_span)?;
let mut out_buf = vec![];
let mut out = brotli::CompressorWriter::new(
&mut out_buf,

View File

@ -53,7 +53,7 @@ impl Command for ToNuon {
};
let span = call.head;
let value = input.into_value(span);
let value = input.into_value(span)?;
match nuon::to_nuon(&value, style, Some(span)) {
Ok(serde_nuon_string) => {

View File

@ -1,6 +1,12 @@
use chrono_humanize::HumanTime;
use nu_engine::command_prelude::*;
use nu_protocol::{format_duration, format_filesize_from_conf, Config, RawStream, ValueIterator};
use nu_protocol::{format_duration, format_filesize_from_conf, ByteStream, Config};
const LINE_ENDING: &str = if cfg!(target_os = "windows") {
"\r\n"
} else {
"\n"
};
#[derive(Clone)]
pub struct ToText;
@ -28,39 +34,28 @@ impl Command for ToText {
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let span = call.head;
let config = engine_state.get_config();
let line_ending = if cfg!(target_os = "windows") {
"\r\n"
} else {
"\n"
};
let input = input.try_expand_range()?;
if let PipelineData::ListStream(stream, _) = input {
Ok(PipelineData::ExternalStream {
stdout: Some(RawStream::new(
Box::new(ListStreamIterator {
stream: stream.into_inner(),
separator: line_ending.into(),
config: config.clone(),
}),
engine_state.ctrlc.clone(),
span,
None,
)),
stderr: None,
exit_code: None,
span,
metadata: None,
trim_end_newline: false,
})
} else {
// FIXME: don't collect! stream the output wherever possible!
// Even if the data is collected when it arrives at `to text`, we should be able to stream it out
let collected_input = local_into_string(input.into_value(span), line_ending, config);
Ok(Value::string(collected_input, span).into_pipeline_data())
match input {
PipelineData::Empty => Ok(Value::string(String::new(), span).into_pipeline_data()),
PipelineData::Value(value, ..) => {
let str = local_into_string(value, LINE_ENDING, engine_state.get_config());
Ok(Value::string(str, span).into_pipeline_data())
}
PipelineData::ListStream(stream, meta) => {
let span = stream.span();
let config = engine_state.get_config().clone();
let iter = stream.into_inner().map(move |value| {
let mut str = local_into_string(value, LINE_ENDING, &config);
str.push_str(LINE_ENDING);
str
});
Ok(PipelineData::ByteStream(
ByteStream::from_iter(iter, span, engine_state.ctrlc.clone()),
meta,
))
}
PipelineData::ByteStream(stream, meta) => Ok(PipelineData::ByteStream(stream, meta)),
}
}
@ -85,26 +80,6 @@ impl Command for ToText {
}
}
struct ListStreamIterator {
stream: ValueIterator,
separator: String,
config: Config,
}
impl Iterator for ListStreamIterator {
type Item = Result<Vec<u8>, ShellError>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(item) = self.stream.next() {
let mut string = local_into_string(item, &self.separator, &self.config);
string.push_str(&self.separator);
Some(Ok(string.as_bytes().to_vec()))
} else {
None
}
}
}
fn local_into_string(value: Value, separator: &str, config: &Config) -> String {
let span = value.span();
match value {

View File

@ -141,7 +141,7 @@ fn to_toml(
input: PipelineData,
span: Span,
) -> Result<PipelineData, ShellError> {
let value = input.into_value(span);
let value = input.into_value(span)?;
let toml_value = value_to_toml_value(engine_state, &value, span)?;
match toml_value {

View File

@ -132,7 +132,7 @@ impl Job {
}
fn run(mut self, input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
let value = input.into_value(head);
let value = input.into_value(head)?;
self.write_xml_entry(value, true).and_then(|_| {
let b = self.writer.into_inner().into_inner();

View File

@ -95,7 +95,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
}
fn to_yaml(input: PipelineData, head: Span) -> Result<PipelineData, ShellError> {
let value = input.into_value(head);
let value = input.into_value(head)?;
let yaml_value = value_to_yaml_value(&value)?;
match serde_yaml::to_string(&yaml_value) {