Replace ExternalStream with new ByteStream type (#12774)

# Description
This PR introduces a `ByteStream` type which is a `Read`-able stream of
bytes. Internally, it has an enum over three different byte stream
sources:
```rust
pub enum ByteStreamSource {
    Read(Box<dyn Read + Send + 'static>),
    File(File),
    Child(ChildProcess),
}
```

This is in comparison to the current `RawStream` type, which is an
`Iterator<Item = Vec<u8>>` and has to allocate for each read chunk.

Currently, `PipelineData::ExternalStream` serves a weird dual role where
it is either external command output or a wrapper around `RawStream`.
`ByteStream` makes this distinction more clear (via `ByteStreamSource`)
and replaces `PipelineData::ExternalStream` in this PR:
```rust
pub enum PipelineData {
    Empty,
    Value(Value, Option<PipelineMetadata>),
    ListStream(ListStream, Option<PipelineMetadata>),
    ByteStream(ByteStream, Option<PipelineMetadata>),
}
```

The PR is relatively large, but a decent amount of it is just repetitive
changes.

This PR fixes #7017, fixes #10763, and fixes #12369.

This PR also improves performance when piping external commands. Nushell
should, in most cases, have competitive pipeline throughput compared to,
e.g., bash.
| Command | Before (MB/s) | After (MB/s) | Bash (MB/s) |
| -------------------------------------------------- | -------------:|
------------:| -----------:|
| `throughput \| rg 'x'` | 3059 | 3744 | 3739 |
| `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 |

# User-Facing Changes
- This is a breaking change for the plugin communication protocol,
because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`.
Plugins now only have to deal with a single input stream, as opposed to
the previous three streams: stdout, stderr, and exit code.
- The output of `describe` has been changed for external/byte streams.
- Temporary breaking change: `bytes starts-with` no longer works with
byte streams. This is to keep the PR smaller, and `bytes ends-with`
already does not work on byte streams.
- If a process core dumped, then instead of having a `Value::Error` in
the `exit_code` column of the output returned from `complete`, it now is
a `Value::Int` with the negation of the signal number.

# After Submitting
- Update docs and book as necessary
- Release notes (e.g., plugin protocol changes)
- Adapt/convert commands to work with byte streams (high priority is
`str length`, `bytes starts-with`, and maybe `bytes ends-with`).
- Refactor the `tee` code, Devyn has already done some work on this.

---------

Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
This commit is contained in:
Ian Manske
2024-05-16 14:11:18 +00:00
committed by GitHub
parent 1b8eb23785
commit 6fd854ed9f
210 changed files with 3955 additions and 4012 deletions

View File

@ -53,7 +53,7 @@ fn nu_highlight_string(code_string: &str, engine_state: &EngineState, stack: &mu
Value::string(code_string, Span::unknown()).into_pipeline_data(),
) {
let result = output.into_value(Span::unknown());
if let Ok(s) = result.coerce_into_string() {
if let Ok(s) = result.and_then(Value::coerce_into_string) {
return s; // successfully highlighted string
}
}
@ -280,7 +280,7 @@ fn get_documentation(
) {
Ok(output) => {
let result = output.into_value(Span::unknown());
match result.coerce_into_string() {
match result.and_then(Value::coerce_into_string) {
Ok(s) => {
let _ = write!(long_desc, "\n > {s}\n");
}

View File

@ -350,14 +350,15 @@ fn get_converted_value(
.and_then(|record| record.get(direction));
if let Some(conversion) = conversion {
match conversion.as_closure() {
Ok(closure) => ClosureEvalOnce::new(engine_state, stack, closure.clone())
.debug(false)
.run_with_value(orig_val.clone())
.map(|data| ConversionResult::Ok(data.into_value(orig_val.span())))
.unwrap_or_else(ConversionResult::ConversionError),
Err(e) => ConversionResult::ConversionError(e),
}
conversion
.as_closure()
.and_then(|closure| {
ClosureEvalOnce::new(engine_state, stack, closure.clone())
.debug(false)
.run_with_value(orig_val.clone())
})
.and_then(|data| data.into_value(orig_val.span()))
.map_or_else(ConversionResult::ConversionError, ConversionResult::Ok)
} else {
ConversionResult::CellPathError
}

View File

@ -9,8 +9,8 @@ use nu_protocol::{
debugger::DebugContext,
engine::{Closure, EngineState, Redirection, Stack},
eval_base::Eval,
Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span, Spanned, Type,
Value, VarId, ENV_VARIABLE_ID,
ByteStreamSource, Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span,
Spanned, Type, Value, VarId, ENV_VARIABLE_ID,
};
use nu_utils::IgnoreCaseExt;
use std::{borrow::Cow, fs::OpenOptions, path::PathBuf};
@ -209,7 +209,6 @@ pub fn redirect_env(engine_state: &EngineState, caller_stack: &mut Stack, callee
}
}
#[allow(clippy::too_many_arguments)]
fn eval_external(
engine_state: &EngineState,
stack: &mut Stack,
@ -284,7 +283,7 @@ pub fn eval_expression_with_input<D: DebugContext>(
let stack = &mut stack.start_capture();
// FIXME: protect this collect with ctrl-c
input = eval_subexpression::<D>(engine_state, stack, block, input)?
.into_value(*span)
.into_value(*span)?
.follow_cell_path(&full_cell_path.tail, false)?
.into_pipeline_data()
} else {
@ -301,7 +300,7 @@ pub fn eval_expression_with_input<D: DebugContext>(
}
};
// If input is PipelineData::ExternalStream,
// If input an external command,
// then `might_consume_external_result` will consume `stderr` if `stdout` is `None`.
// This should not happen if the user wants to capture stderr.
if !matches!(stack.stdout(), OutDest::Pipe | OutDest::Capture)
@ -309,15 +308,10 @@ pub fn eval_expression_with_input<D: DebugContext>(
{
Ok((input, false))
} else {
Ok(might_consume_external_result(input))
input.check_external_failed()
}
}
// Try to catch and detect if external command runs to failed.
fn might_consume_external_result(input: PipelineData) -> (PipelineData, bool) {
input.check_external_failed()
}
fn eval_redirection<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,
@ -410,10 +404,17 @@ fn eval_element_with_input_inner<D: DebugContext>(
element: &PipelineElement,
input: PipelineData,
) -> Result<(PipelineData, bool), ShellError> {
let (data, ok) = eval_expression_with_input::<D>(engine_state, stack, &element.expr, input)?;
let (data, failed) =
eval_expression_with_input::<D>(engine_state, stack, &element.expr, input)?;
if !matches!(data, PipelineData::ExternalStream { .. }) {
if let Some(redirection) = element.redirection.as_ref() {
if let Some(redirection) = element.redirection.as_ref() {
let is_external = if let PipelineData::ByteStream(stream, ..) = &data {
matches!(stream.source(), ByteStreamSource::Child(..))
} else {
false
};
if !is_external {
match redirection {
&PipelineRedirection::Single {
source: RedirectionSource::Stderr,
@ -424,8 +425,8 @@ fn eval_element_with_input_inner<D: DebugContext>(
..
} => {
return Err(ShellError::GenericError {
error: "`e>|` only works with external streams".into(),
msg: "`e>|` only works on external streams".into(),
error: "`e>|` only works on external commands".into(),
msg: "`e>|` only works on external commands".into(),
span: Some(span),
help: None,
inner: vec![],
@ -436,8 +437,8 @@ fn eval_element_with_input_inner<D: DebugContext>(
target: RedirectionTarget::Pipe { span },
} => {
return Err(ShellError::GenericError {
error: "`o+e>|` only works with external streams".into(),
msg: "`o+e>|` only works on external streams".into(),
error: "`o+e>|` only works on external commands".into(),
msg: "`o+e>|` only works on external commands".into(),
span: Some(span),
help: None,
inner: vec![],
@ -448,15 +449,33 @@ fn eval_element_with_input_inner<D: DebugContext>(
}
}
let data = if matches!(stack.pipe_stdout(), Some(OutDest::File(_)))
&& !matches!(stack.pipe_stderr(), Some(OutDest::Pipe))
{
data.write_to_out_dests(engine_state, stack)?
} else {
data
let has_stdout_file = matches!(stack.pipe_stdout(), Some(OutDest::File(_)));
let data = match &data {
PipelineData::Value(..) | PipelineData::ListStream(..) => {
if has_stdout_file {
data.write_to_out_dests(engine_state, stack)?;
PipelineData::Empty
} else {
data
}
}
PipelineData::ByteStream(stream, ..) => {
let write = match stream.source() {
ByteStreamSource::Read(_) | ByteStreamSource::File(_) => has_stdout_file,
ByteStreamSource::Child(_) => false,
};
if write {
data.write_to_out_dests(engine_state, stack)?;
PipelineData::Empty
} else {
data
}
}
PipelineData::Empty => PipelineData::Empty,
};
Ok((data, ok))
Ok((data, failed))
}
fn eval_element_with_input<D: DebugContext>(
@ -466,12 +485,18 @@ fn eval_element_with_input<D: DebugContext>(
input: PipelineData,
) -> Result<(PipelineData, bool), ShellError> {
D::enter_element(engine_state, element);
let result = eval_element_with_input_inner::<D>(engine_state, stack, element, input);
D::leave_element(engine_state, element, &result);
result
match eval_element_with_input_inner::<D>(engine_state, stack, element, input) {
Ok((data, failed)) => {
let res = Ok(data);
D::leave_element(engine_state, element, &res);
res.map(|data| (data, failed))
}
Err(err) => {
let res = Err(err);
D::leave_element(engine_state, element, &res);
res.map(|data| (data, false))
}
}
}
pub fn eval_block_with_early_return<D: DebugContext>(
@ -555,17 +580,20 @@ pub fn eval_block<D: DebugContext>(
}
input = PipelineData::Empty;
match output {
stream @ PipelineData::ExternalStream { .. } => {
let exit_code = stream.drain_with_exit_code()?;
stack.add_env_var(
"LAST_EXIT_CODE".into(),
Value::int(exit_code, last.expr.span),
);
if exit_code != 0 {
break;
PipelineData::ByteStream(stream, ..) => {
let span = stream.span();
let status = stream.drain()?;
if let Some(status) = status {
stack.add_env_var(
"LAST_EXIT_CODE".into(),
Value::int(status.code().into(), span),
);
if status.code() != 0 {
break;
}
}
}
PipelineData::ListStream(stream, _) => {
PipelineData::ListStream(stream, ..) => {
stream.drain()?;
}
PipelineData::Value(..) | PipelineData::Empty => {}
@ -684,7 +712,7 @@ impl Eval for EvalRuntime {
_: Span,
) -> Result<Value, ShellError> {
// FIXME: protect this collect with ctrl-c
Ok(eval_call::<D>(engine_state, stack, call, PipelineData::empty())?.into_value(call.head))
eval_call::<D>(engine_state, stack, call, PipelineData::empty())?.into_value(call.head)
}
fn eval_external_call(
@ -696,7 +724,7 @@ impl Eval for EvalRuntime {
) -> Result<Value, ShellError> {
let span = head.span;
// FIXME: protect this collect with ctrl-c
Ok(eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span))
eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span)
}
fn eval_subexpression<D: DebugContext>(
@ -706,12 +734,8 @@ impl Eval for EvalRuntime {
span: Span,
) -> Result<Value, ShellError> {
let block = engine_state.get_block(block_id);
// FIXME: protect this collect with ctrl-c
Ok(
eval_subexpression::<D>(engine_state, stack, block, PipelineData::empty())?
.into_value(span),
)
eval_subexpression::<D>(engine_state, stack, block, PipelineData::empty())?.into_value(span)
}
fn regex_match(