Replace ExternalStream with new ByteStream type (#12774)

# Description
This PR introduces a `ByteStream` type which is a `Read`-able stream of
bytes. Internally, it has an enum over three different byte stream
sources:
```rust
pub enum ByteStreamSource {
    Read(Box<dyn Read + Send + 'static>),
    File(File),
    Child(ChildProcess),
}
```

This is in comparison to the current `RawStream` type, which is an
`Iterator<Item = Vec<u8>>` and has to allocate for each read chunk.

Currently, `PipelineData::ExternalStream` serves a weird dual role where
it is either external command output or a wrapper around `RawStream`.
`ByteStream` makes this distinction more clear (via `ByteStreamSource`)
and replaces `PipelineData::ExternalStream` in this PR:
```rust
pub enum PipelineData {
    Empty,
    Value(Value, Option<PipelineMetadata>),
    ListStream(ListStream, Option<PipelineMetadata>),
    ByteStream(ByteStream, Option<PipelineMetadata>),
}
```

The PR is relatively large, but a decent amount of it is just repetitive
changes.

This PR fixes #7017, fixes #10763, and fixes #12369.

This PR also improves performance when piping external commands. Nushell
should, in most cases, have competitive pipeline throughput compared to,
e.g., bash.
| Command | Before (MB/s) | After (MB/s) | Bash (MB/s) |
| -------------------------------------------------- | -------------:|
------------:| -----------:|
| `throughput \| rg 'x'` | 3059 | 3744 | 3739 |
| `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 |

# User-Facing Changes
- This is a breaking change for the plugin communication protocol,
because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`.
Plugins now only have to deal with a single input stream, as opposed to
the previous three streams: stdout, stderr, and exit code.
- The output of `describe` has been changed for external/byte streams.
- Temporary breaking change: `bytes starts-with` no longer works with
byte streams. This is to keep the PR smaller, and `bytes ends-with`
already does not work on byte streams.
- If a process core dumped, then instead of having a `Value::Error` in
the `exit_code` column of the output returned from `complete`, it now is
a `Value::Int` with the negation of the signal number.

# After Submitting
- Update docs and book as necessary
- Release notes (e.g., plugin protocol changes)
- Adapt/convert commands to work with byte streams (high priority is
`str length`, `bytes starts-with`, and maybe `bytes ends-with`).
- Refactor the `tee` code, Devyn has already done some work on this.

---------

Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
This commit is contained in:
Ian Manske
2024-05-16 14:11:18 +00:00
committed by GitHub
parent 1b8eb23785
commit 6fd854ed9f
210 changed files with 3955 additions and 4012 deletions

View File

@ -108,7 +108,7 @@ impl<'a> PluginExecutionContext for PluginExecutionCommandContext<'a> {
Value::Closure { val, .. } => {
ClosureEvalOnce::new(&self.engine_state, &self.stack, *val)
.run_with_input(PipelineData::Empty)
.map(|data| data.into_value(span))
.and_then(|data| data.into_value(span))
.unwrap_or_else(|err| Value::error(err, self.call.head))
}
_ => value.clone(),

View File

@ -26,7 +26,7 @@ use crate::{
/// This should be larger than the largest commonly sent message to avoid excessive fragmentation.
///
/// The buffers coming from external streams are typically each 8192 bytes, so double that.
/// The buffers coming from byte streams are typically each 8192 bytes, so double that.
pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384;
/// Spawn the command for a plugin, in the given `mode`. After spawning, it can be passed to

View File

@ -519,8 +519,8 @@ impl InterfaceManager for PluginInterfaceManager {
.map_data(|data| {
let ctrlc = self.get_ctrlc(id)?;
// Register the streams in the response
for stream_id in data.stream_ids() {
// Register the stream in the response
if let Some(stream_id) = data.stream_id() {
self.recv_stream_started(id, stream_id);
}
@ -602,7 +602,7 @@ impl InterfaceManager for PluginInterfaceManager {
meta,
))
}
PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data),
PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data),
}
}
@ -953,7 +953,7 @@ impl PluginInterface {
let call = PluginCall::CustomValueOp(value.map(|cv| cv.without_source()), op);
match self.plugin_call(call, None)? {
PluginCallResponse::PipelineData(out_data) => Ok(out_data.into_value(span)),
PluginCallResponse::PipelineData(out_data) => out_data.into_value(span),
PluginCallResponse::Error(err) => Err(err.into()),
_ => Err(ShellError::PluginFailedToDecode {
msg: format!("Received unexpected response to custom value {op_name}() call"),
@ -1091,7 +1091,7 @@ impl Interface for PluginInterface {
meta,
))
}
PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data),
PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data),
}
}
}

View File

@ -9,10 +9,10 @@ use crate::{
use nu_plugin_core::{interface_test_util::TestCase, Interface, InterfaceManager};
use nu_plugin_protocol::{
test_util::{expected_test_custom_value, test_plugin_custom_value},
CallInfo, CustomValueOp, EngineCall, EngineCallResponse, EvaluatedCall, ExternalStreamInfo,
ByteStreamInfo, CallInfo, CustomValueOp, EngineCall, EngineCallResponse, EvaluatedCall,
ListStreamInfo, PipelineDataHeader, PluginCall, PluginCallId, PluginCallResponse,
PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, RawStreamInfo,
StreamData, StreamMessage,
PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, StreamData,
StreamMessage,
};
use nu_protocol::{
ast::{Math, Operator},
@ -154,16 +154,9 @@ fn manager_consume_all_propagates_message_error_to_readers() -> Result<(), Shell
test.add(invalid_output());
let stream = manager.read_pipeline_data(
PipelineDataHeader::ExternalStream(ExternalStreamInfo {
PipelineDataHeader::ByteStream(ByteStreamInfo {
id: 0,
span: Span::test_data(),
stdout: Some(RawStreamInfo {
id: 0,
is_binary: false,
known_size: None,
}),
stderr: None,
exit_code: None,
trim_end_newline: false,
}),
None,
)?;
@ -378,7 +371,7 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> {
fake_plugin_call(&mut manager, n);
}
// Check list streams, external streams
// Check list streams, byte streams
manager.consume(PluginOutput::CallResponse(
0,
PluginCallResponse::PipelineData(PipelineDataHeader::ListStream(ListStreamInfo {
@ -388,23 +381,9 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> {
))?;
manager.consume(PluginOutput::CallResponse(
1,
PluginCallResponse::PipelineData(PipelineDataHeader::ExternalStream(ExternalStreamInfo {
PluginCallResponse::PipelineData(PipelineDataHeader::ByteStream(ByteStreamInfo {
id: 1,
span: Span::test_data(),
stdout: Some(RawStreamInfo {
id: 1,
is_binary: false,
known_size: None,
}),
stderr: Some(RawStreamInfo {
id: 2,
is_binary: false,
known_size: None,
}),
exit_code: Some(ListStreamInfo {
id: 3,
span: Span::test_data(),
}),
trim_end_newline: false,
})),
))?;
@ -423,22 +402,20 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> {
"plugin_call_input_streams[0] should be Some(0)"
);
// ExternalStream should have three
// ByteStream should have one
if let Some(sub) = manager.plugin_call_states.get(&1) {
assert_eq!(
3, sub.remaining_streams_to_read,
"ExternalStream remaining_streams_to_read should be 3"
1, sub.remaining_streams_to_read,
"ByteStream remaining_streams_to_read should be 1"
);
} else {
panic!("failed to find subscription for ExternalStream (1), maybe it was removed");
}
for n in [1, 2, 3] {
assert_eq!(
Some(&1),
manager.plugin_call_input_streams.get(&n),
"plugin_call_input_streams[{n}] should be Some(1)"
);
panic!("failed to find subscription for ByteStream (1), maybe it was removed");
}
assert_eq!(
Some(&1),
manager.plugin_call_input_streams.get(&1),
"plugin_call_input_streams[1] should be Some(1)"
);
Ok(())
}
@ -1087,7 +1064,7 @@ fn interface_run() -> Result<(), ShellError> {
assert_eq!(
Value::test_int(number),
result.into_value(Span::test_data())
result.into_value(Span::test_data())?,
);
assert!(test.has_unconsumed_write());
Ok(())
@ -1136,7 +1113,7 @@ fn interface_prepare_pipeline_data_accepts_normal_values() -> Result<(), ShellEr
match interface.prepare_pipeline_data(PipelineData::Value(value.clone(), None), &state) {
Ok(data) => assert_eq!(
value.get_type(),
data.into_value(Span::test_data()).get_type()
data.into_value(Span::test_data())?.get_type(),
),
Err(err) => panic!("failed to accept {value:?}: {err}"),
}