Replace ExternalStream with new ByteStream type (#12774)

# Description This PR introduces a `ByteStream` type which is a `Read`-able stream of bytes. Internally, it has an enum over three different byte stream sources: ```rust pub enum ByteStreamSource { Read(Box<dyn Read + Send + 'static>), File(File), Child(ChildProcess), } ``` This is in comparison to the current `RawStream` type, which is an `Iterator<Item = Vec<u8>>` and has to allocate for each read chunk. Currently, `PipelineData::ExternalStream` serves a weird dual role where it is either external command output or a wrapper around `RawStream`. `ByteStream` makes this distinction more clear (via `ByteStreamSource`) and replaces `PipelineData::ExternalStream` in this PR: ```rust pub enum PipelineData { Empty, Value(Value, Option<PipelineMetadata>), ListStream(ListStream, Option<PipelineMetadata>), ByteStream(ByteStream, Option<PipelineMetadata>), } ``` The PR is relatively large, but a decent amount of it is just repetitive changes. This PR fixes #7017, fixes #10763, and fixes #12369. This PR also improves performance when piping external commands. Nushell should, in most cases, have competitive pipeline throughput compared to, e.g., bash. | Command | Before (MB/s) | After (MB/s) | Bash (MB/s) | | -------------------------------------------------- | -------------:| ------------:| -----------:| | `throughput \| rg 'x'` | 3059 | 3744 | 3739 | | `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 | # User-Facing Changes - This is a breaking change for the plugin communication protocol, because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`. Plugins now only have to deal with a single input stream, as opposed to the previous three streams: stdout, stderr, and exit code. - The output of `describe` has been changed for external/byte streams. - Temporary breaking change: `bytes starts-with` no longer works with byte streams. This is to keep the PR smaller, and `bytes ends-with` already does not work on byte streams. - If a process core dumped, then instead of having a `Value::Error` in the `exit_code` column of the output returned from `complete`, it now is a `Value::Int` with the negation of the signal number. # After Submitting - Update docs and book as necessary - Release notes (e.g., plugin protocol changes) - Adapt/convert commands to work with byte streams (high priority is `str length`, `bytes starts-with`, and maybe `bytes ends-with`). - Refactor the `tee` code, Devyn has already done some work on this. --------- Co-authored-by: Devyn Cairns <devyn.cairns@gmail.com>
2025-08-10 00:18:28 +02:00 · 2024-05-16 14:11:18 +00:00
parent 1b8eb23785
commit 6fd854ed9f
210 changed files with 3955 additions and 4012 deletions
--- a/crates/nu-engine/src/documentation.rs
+++ b/crates/nu-engine/src/documentation.rs
@ -53,7 +53,7 @@ fn nu_highlight_string(code_string: &str, engine_state: &EngineState, stack: &mu
            Value::string(code_string, Span::unknown()).into_pipeline_data(),
        ) {
            let result = output.into_value(Span::unknown());
-            if let Ok(s) = result.coerce_into_string() {
+            if let Ok(s) = result.and_then(Value::coerce_into_string) {
                return s; // successfully highlighted string
            }
        }
@ -280,7 +280,7 @@ fn get_documentation(
            ) {
                Ok(output) => {
                    let result = output.into_value(Span::unknown());
-                    match result.coerce_into_string() {
+                    match result.and_then(Value::coerce_into_string) {
                        Ok(s) => {
                            let _ = write!(long_desc, "\n  > {s}\n");
                        }
--- a/crates/nu-engine/src/env.rs
+++ b/crates/nu-engine/src/env.rs
@ -350,14 +350,15 @@ fn get_converted_value(
        .and_then(|record| record.get(direction));

    if let Some(conversion) = conversion {
-        match conversion.as_closure() {
-            Ok(closure) => ClosureEvalOnce::new(engine_state, stack, closure.clone())
-                .debug(false)
-                .run_with_value(orig_val.clone())
-                .map(|data| ConversionResult::Ok(data.into_value(orig_val.span())))
-                .unwrap_or_else(ConversionResult::ConversionError),
-            Err(e) => ConversionResult::ConversionError(e),
-        }
+        conversion
+            .as_closure()
+            .and_then(|closure| {
+                ClosureEvalOnce::new(engine_state, stack, closure.clone())
+                    .debug(false)
+                    .run_with_value(orig_val.clone())
+            })
+            .and_then(|data| data.into_value(orig_val.span()))
+            .map_or_else(ConversionResult::ConversionError, ConversionResult::Ok)
    } else {
        ConversionResult::CellPathError
    }
--- a/crates/nu-engine/src/eval.rs
+++ b/crates/nu-engine/src/eval.rs
@ -9,8 +9,8 @@ use nu_protocol::{
    debugger::DebugContext,
    engine::{Closure, EngineState, Redirection, Stack},
    eval_base::Eval,
-    Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span, Spanned, Type,
-    Value, VarId, ENV_VARIABLE_ID,
+    ByteStreamSource, Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span,
+    Spanned, Type, Value, VarId, ENV_VARIABLE_ID,
 };
 use nu_utils::IgnoreCaseExt;
 use std::{borrow::Cow, fs::OpenOptions, path::PathBuf};
@ -209,7 +209,6 @@ pub fn redirect_env(engine_state: &EngineState, caller_stack: &mut Stack, callee
    }
 }

-#[allow(clippy::too_many_arguments)]
 fn eval_external(
    engine_state: &EngineState,
    stack: &mut Stack,
@ -284,7 +283,7 @@ pub fn eval_expression_with_input<D: DebugContext>(
                    let stack = &mut stack.start_capture();
                    // FIXME: protect this collect with ctrl-c
                    input = eval_subexpression::<D>(engine_state, stack, block, input)?
-                        .into_value(*span)
+                        .into_value(*span)?
                        .follow_cell_path(&full_cell_path.tail, false)?
                        .into_pipeline_data()
                } else {
@ -301,7 +300,7 @@ pub fn eval_expression_with_input<D: DebugContext>(
        }
    };

-    // If input is PipelineData::ExternalStream,
+    // If input an external command,
    // then `might_consume_external_result` will consume `stderr` if `stdout` is `None`.
    // This should not happen if the user wants to capture stderr.
    if !matches!(stack.stdout(), OutDest::Pipe | OutDest::Capture)
@ -309,15 +308,10 @@ pub fn eval_expression_with_input<D: DebugContext>(
    {
        Ok((input, false))
    } else {
-        Ok(might_consume_external_result(input))
+        input.check_external_failed()
    }
 }

-// Try to catch and detect if external command runs to failed.
-fn might_consume_external_result(input: PipelineData) -> (PipelineData, bool) {
-    input.check_external_failed()
-}
-
 fn eval_redirection<D: DebugContext>(
    engine_state: &EngineState,
    stack: &mut Stack,
@ -410,10 +404,17 @@ fn eval_element_with_input_inner<D: DebugContext>(
    element: &PipelineElement,
    input: PipelineData,
 ) -> Result<(PipelineData, bool), ShellError> {
-    let (data, ok) = eval_expression_with_input::<D>(engine_state, stack, &element.expr, input)?;
+    let (data, failed) =
+        eval_expression_with_input::<D>(engine_state, stack, &element.expr, input)?;

-    if !matches!(data, PipelineData::ExternalStream { .. }) {
-        if let Some(redirection) = element.redirection.as_ref() {
+    if let Some(redirection) = element.redirection.as_ref() {
+        let is_external = if let PipelineData::ByteStream(stream, ..) = &data {
+            matches!(stream.source(), ByteStreamSource::Child(..))
+        } else {
+            false
+        };
+
+        if !is_external {
            match redirection {
                &PipelineRedirection::Single {
                    source: RedirectionSource::Stderr,
@ -424,8 +425,8 @@ fn eval_element_with_input_inner<D: DebugContext>(
                    ..
                } => {
                    return Err(ShellError::GenericError {
-                        error: "`e>|` only works with external streams".into(),
-                        msg: "`e>|` only works on external streams".into(),
+                        error: "`e>|` only works on external commands".into(),
+                        msg: "`e>|` only works on external commands".into(),
                        span: Some(span),
                        help: None,
                        inner: vec![],
@ -436,8 +437,8 @@ fn eval_element_with_input_inner<D: DebugContext>(
                    target: RedirectionTarget::Pipe { span },
                } => {
                    return Err(ShellError::GenericError {
-                        error: "`o+e>|` only works with external streams".into(),
-                        msg: "`o+e>|` only works on external streams".into(),
+                        error: "`o+e>|` only works on external commands".into(),
+                        msg: "`o+e>|` only works on external commands".into(),
                        span: Some(span),
                        help: None,
                        inner: vec![],
@ -448,15 +449,33 @@ fn eval_element_with_input_inner<D: DebugContext>(
        }
    }

-    let data = if matches!(stack.pipe_stdout(), Some(OutDest::File(_)))
-        && !matches!(stack.pipe_stderr(), Some(OutDest::Pipe))
-    {
-        data.write_to_out_dests(engine_state, stack)?
-    } else {
-        data
+    let has_stdout_file = matches!(stack.pipe_stdout(), Some(OutDest::File(_)));
+
+    let data = match &data {
+        PipelineData::Value(..) | PipelineData::ListStream(..) => {
+            if has_stdout_file {
+                data.write_to_out_dests(engine_state, stack)?;
+                PipelineData::Empty
+            } else {
+                data
+            }
+        }
+        PipelineData::ByteStream(stream, ..) => {
+            let write = match stream.source() {
+                ByteStreamSource::Read(_) | ByteStreamSource::File(_) => has_stdout_file,
+                ByteStreamSource::Child(_) => false,
+            };
+            if write {
+                data.write_to_out_dests(engine_state, stack)?;
+                PipelineData::Empty
+            } else {
+                data
+            }
+        }
+        PipelineData::Empty => PipelineData::Empty,
    };

-    Ok((data, ok))
+    Ok((data, failed))
 }

 fn eval_element_with_input<D: DebugContext>(
@ -466,12 +485,18 @@ fn eval_element_with_input<D: DebugContext>(
    input: PipelineData,
 ) -> Result<(PipelineData, bool), ShellError> {
    D::enter_element(engine_state, element);
-
-    let result = eval_element_with_input_inner::<D>(engine_state, stack, element, input);
-
-    D::leave_element(engine_state, element, &result);
-
-    result
+    match eval_element_with_input_inner::<D>(engine_state, stack, element, input) {
+        Ok((data, failed)) => {
+            let res = Ok(data);
+            D::leave_element(engine_state, element, &res);
+            res.map(|data| (data, failed))
+        }
+        Err(err) => {
+            let res = Err(err);
+            D::leave_element(engine_state, element, &res);
+            res.map(|data| (data, false))
+        }
+    }
 }

 pub fn eval_block_with_early_return<D: DebugContext>(
@ -555,17 +580,20 @@ pub fn eval_block<D: DebugContext>(
            }
            input = PipelineData::Empty;
            match output {
-                stream @ PipelineData::ExternalStream { .. } => {
-                    let exit_code = stream.drain_with_exit_code()?;
-                    stack.add_env_var(
-                        "LAST_EXIT_CODE".into(),
-                        Value::int(exit_code, last.expr.span),
-                    );
-                    if exit_code != 0 {
-                        break;
+                PipelineData::ByteStream(stream, ..) => {
+                    let span = stream.span();
+                    let status = stream.drain()?;
+                    if let Some(status) = status {
+                        stack.add_env_var(
+                            "LAST_EXIT_CODE".into(),
+                            Value::int(status.code().into(), span),
+                        );
+                        if status.code() != 0 {
+                            break;
+                        }
                    }
                }
-                PipelineData::ListStream(stream, _) => {
+                PipelineData::ListStream(stream, ..) => {
                    stream.drain()?;
                }
                PipelineData::Value(..) | PipelineData::Empty => {}
@ -684,7 +712,7 @@ impl Eval for EvalRuntime {
        _: Span,
    ) -> Result<Value, ShellError> {
        // FIXME: protect this collect with ctrl-c
-        Ok(eval_call::<D>(engine_state, stack, call, PipelineData::empty())?.into_value(call.head))
+        eval_call::<D>(engine_state, stack, call, PipelineData::empty())?.into_value(call.head)
    }

    fn eval_external_call(
@ -696,7 +724,7 @@ impl Eval for EvalRuntime {
    ) -> Result<Value, ShellError> {
        let span = head.span;
        // FIXME: protect this collect with ctrl-c
-        Ok(eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span))
+        eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span)
    }

    fn eval_subexpression<D: DebugContext>(
@ -706,12 +734,8 @@ impl Eval for EvalRuntime {
        span: Span,
    ) -> Result<Value, ShellError> {
        let block = engine_state.get_block(block_id);
-
        // FIXME: protect this collect with ctrl-c
-        Ok(
-            eval_subexpression::<D>(engine_state, stack, block, PipelineData::empty())?
-                .into_value(span),
-        )
+        eval_subexpression::<D>(engine_state, stack, block, PipelineData::empty())?.into_value(span)
    }

    fn regex_match(