Support ByteStreams in bytes starts-with and bytes ends-with (#12887)

# Description Restores `bytes starts-with` so that it is able to work with byte streams once again. For parity/consistency, this PR also adds byte stream support to `bytes ends-with`. # User-Facing Changes - `bytes ends-with` now supports byte streams. # Tests + Formatting Re-enabled tests for `bytes starts-with` and added tests for `bytes ends-with`.
2025-02-22 21:41:26 +01:00 · 2024-05-16 23:59:08 +00:00 · 2024-05-16 23:59:08 +00:00 · 6891267b53
commit 6891267b53
parent aec41f3df0
5 changed files with 275 additions and 96 deletions
--- a/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs
+++ b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs
@ -0,0 +1,120 @@
+use nu_test_support::nu;
+
+#[test]
+fn basic_binary_end_with() {
+    let actual = nu!(r#"
+            "hello world" | into binary | bytes ends-with 0x[77 6f 72 6c 64]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn basic_string_fails() {
+    let actual = nu!(r#"
+            "hello world" | bytes ends-with 0x[77 6f 72 6c 64]
+        "#);
+
+    assert!(actual.err.contains("command doesn't support"));
+    assert_eq!(actual.out, "");
+}
+
+#[test]
+fn short_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn short_stream_mismatch() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[010203]) 5 | bytes ends-with 0x[010204]
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn short_stream_binary_overflow() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101010101]
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn long_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes ends-with 0x[010101]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_binary_overflow() {
+    // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes ends-with (0..32768 | each {|| 0x[01] } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn long_stream_binary_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01020304]) 8192 | bytes ends-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_string_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater hell 8192 | bytes ends-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_mixed_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build $binseg $strseg)
+        "#);
+
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_mixed_overflow() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build 0x[01] $binseg $strseg)
+        "#);
+
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "false");
+}
--- a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs
+++ b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs
@ -1 +1,2 @@
+mod ends_with;
 mod starts_with;
--- a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs
+++ b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs
@ -19,102 +19,102 @@ fn basic_string_fails() {
    assert_eq!(actual.out, "");
 }

-// #[test]
-// fn short_stream_binary() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101]
-//         "#);
+#[test]
+fn short_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101]
+        "#);

-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}

-// #[test]
-// fn short_stream_mismatch() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204]
-//         "#);
+#[test]
+fn short_stream_mismatch() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204]
+        "#);

-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}

-// #[test]
-// fn short_stream_binary_overflow() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101]
-//         "#);
+#[test]
+fn short_stream_binary_overflow() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101]
+        "#);

-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}

-// #[test]
-// fn long_stream_binary() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101]
-//         "#);
+#[test]
+fn long_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101]
+        "#);

-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}

-// #[test]
-// fn long_stream_binary_overflow() {
-//     // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_binary_overflow() {
+    // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect)
+        "#);

-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}

-// #[test]
-// fn long_stream_binary_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_binary_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
+        "#);

-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}

-// #[test]
-// fn long_stream_string_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_string_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
+        "#);

-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}

-// #[test]
-// fn long_stream_mixed_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
-//             let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+#[test]
+fn long_stream_mixed_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)

-//             nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg)
-//         "#);
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg)
+        "#);

-//     assert_eq!(
-//         actual.err, "",
-//         "invocation failed. command line limit likely reached"
-//     );
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "true");
+}

-// #[test]
-// fn long_stream_mixed_overflow() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
-//             let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+#[test]
+fn long_stream_mixed_overflow() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)

-//             nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01])
-//         "#);
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01])
+        "#);

-//     assert_eq!(
-//         actual.err, "",
-//         "invocation failed. command line limit likely reached"
-//     );
-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "false");
+}
--- a/crates/nu-command/src/bytes/ends_with.rs
+++ b/crates/nu-command/src/bytes/ends_with.rs
@ -1,5 +1,9 @@
 use nu_cmd_base::input_handler::{operate, CmdArgument};
 use nu_engine::command_prelude::*;
+use std::{
+    collections::VecDeque,
+    io::{self, BufRead},
+};

 struct Arguments {
    pattern: Vec<u8>,
@ -52,14 +56,54 @@ impl Command for BytesEndsWith {
        call: &Call,
        input: PipelineData,
    ) -> Result<PipelineData, ShellError> {
+        let head = call.head;
        let pattern: Vec<u8> = call.req(engine_state, stack, 0)?;
        let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
-        let arg = Arguments {
-            pattern,
-            cell_paths,
-        };
-        operate(ends_with, arg, input, call.head, engine_state.ctrlc.clone())
+
+        if let PipelineData::ByteStream(stream, ..) = input {
+            let span = stream.span();
+            if pattern.is_empty() {
+                return Ok(Value::bool(true, head).into_pipeline_data());
+            }
+            let Some(mut reader) = stream.reader() else {
+                return Ok(Value::bool(false, head).into_pipeline_data());
+            };
+            let cap = pattern.len();
+            let mut end = VecDeque::<u8>::with_capacity(cap);
+            loop {
+                let buf = match reader.fill_buf() {
+                    Ok(&[]) => break,
+                    Ok(buf) => buf,
+                    Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                    Err(e) => return Err(e.into_spanned(span).into()),
+                };
+                let len = buf.len();
+                if len >= cap {
+                    end.clear();
+                    end.extend(&buf[(len - cap)..])
+                } else {
+                    let new_len = len + end.len();
+                    if new_len > cap {
+                        // The `drain` below will panic if `(new_len - cap) > end.len()`.
+                        // But this cannot happen since we know `len < cap` (as checked above):
+                        //   (len + end.len() - cap) > end.len()
+                        //   => (len - cap) > 0
+                        //   => len > cap
+                        end.drain(..(new_len - cap));
+                    }
+                    end.extend(buf);
+                }
+                reader.consume(len);
+            }
+            Ok(Value::bool(end == pattern, head).into_pipeline_data())
+        } else {
+            let arg = Arguments {
+                pattern,
+                cell_paths,
+            };
+            operate(ends_with, arg, input, head, engine_state.ctrlc.clone())
+        }
    }

    fn examples(&self) -> Vec<Example> {
--- a/crates/nu-command/src/bytes/starts_with.rs
+++ b/crates/nu-command/src/bytes/starts_with.rs
@ -1,5 +1,6 @@
 use nu_cmd_base::input_handler::{operate, CmdArgument};
 use nu_engine::command_prelude::*;
+use std::io::Read;

 struct Arguments {
    pattern: Vec<u8>,
@ -53,20 +54,33 @@ impl Command for BytesStartsWith {
        call: &Call,
        input: PipelineData,
    ) -> Result<PipelineData, ShellError> {
+        let head = call.head;
        let pattern: Vec<u8> = call.req(engine_state, stack, 0)?;
        let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
        let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
-        let arg = Arguments {
-            pattern,
-            cell_paths,
-        };
-        operate(
-            starts_with,
-            arg,
-            input,
-            call.head,
-            engine_state.ctrlc.clone(),
-        )
+
+        if let PipelineData::ByteStream(stream, ..) = input {
+            let span = stream.span();
+            if pattern.is_empty() {
+                return Ok(Value::bool(true, head).into_pipeline_data());
+            }
+            let Some(reader) = stream.reader() else {
+                return Ok(Value::bool(false, head).into_pipeline_data());
+            };
+            let mut start = Vec::with_capacity(pattern.len());
+            reader
+                .take(pattern.len() as u64)
+                .read_to_end(&mut start)
+                .err_span(span)?;
+
+            Ok(Value::bool(start == pattern, head).into_pipeline_data())
+        } else {
+            let arg = Arguments {
+                pattern,
+                cell_paths,
+            };
+            operate(starts_with, arg, input, head, engine_state.ctrlc.clone())
+        }
    }

    fn examples(&self) -> Vec<Example> {