Implementing ByteStream interuption on infinite stream (#13552)

# Description

This PR should address #13530 by explicitly handling ByteStreams. 

The issue can be replicated easily on linux by running:

```nushell
open /dev/urandom | into binary | bytes at ..10
```

Would leave the output hanging and with no way to cancel it, this was
likely because it was trying to collect the input stream and would not
complete.

I have also put in an error to say that using negative offsets for a
bytestream without a length cannot be used.

```nushell
~/git/nushell> open /dev/urandom | into binary | bytes at (-1)..
Error: nu:🐚:incorrect_value

  × Incorrect value.
   ╭─[entry #3:1:35]
 1 │ open /dev/urandom | into binary | bytes at (-1)..
   ·                                   ────┬─── ───┬──
   ·                                       │       ╰── encountered here
   ·                                       ╰── Negative range values cannot be used with streams that don't specify a length
   ╰────
   ```

# User-Facing Changes

No operation changes, only the warning you get back for negative offsets

# Tests + Formatting

Ran `toolkit check pr ` with no errors or warnings

Manual testing of the example commands above

---------

Co-authored-by: Ian Manske <ian.manske@pm.me>
Co-authored-by: Simon Curtis <simon.curtis@candc-uk.com>
This commit is contained in:
Simon Curtis
2025-01-11 21:28:08 +00:00
committed by GitHub
parent 0b71eb201c
commit f05162811c
15 changed files with 612 additions and 103 deletions

View File

@ -1,15 +1,14 @@
use nu_cmd_base::{
input_handler::{operate, CmdArgument},
util,
};
use std::ops::Bound;
use nu_cmd_base::input_handler::{operate, CmdArgument};
use nu_engine::command_prelude::*;
use nu_protocol::Range;
use nu_protocol::{IntRange, Range};
#[derive(Clone)]
pub struct BytesAt;
struct Arguments {
indexes: Subbytes,
range: IntRange,
cell_paths: Option<Vec<CellPath>>,
}
@ -19,15 +18,6 @@ impl CmdArgument for Arguments {
}
}
impl From<(isize, isize)> for Subbytes {
fn from(input: (isize, isize)) -> Self {
Self(input.0, input.1)
}
}
#[derive(Clone, Copy)]
struct Subbytes(isize, isize);
impl Command for BytesAt {
fn name(&self) -> &str {
"bytes at"
@ -44,6 +34,7 @@ impl Command for BytesAt {
(Type::table(), Type::table()),
(Type::record(), Type::record()),
])
.allow_variants_without_examples(true)
.required("range", SyntaxShape::Range, "The range to get bytes.")
.rest(
"rest",
@ -68,86 +59,115 @@ impl Command for BytesAt {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let range: Range = call.req(engine_state, stack, 0)?;
let indexes = match util::process_range(&range) {
Ok(idxs) => idxs.into(),
Err(processing_error) => {
return Err(processing_error("could not perform subbytes", call.head));
let range = match call.req(engine_state, stack, 0)? {
Range::IntRange(range) => range,
_ => {
return Err(ShellError::UnsupportedInput {
msg: "Float ranges are not supported for byte streams".into(),
input: "value originates from here".into(),
msg_span: call.head,
input_span: call.head,
})
}
};
let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
let args = Arguments {
indexes,
cell_paths,
};
operate(action, args, input, call.head, engine_state.signals())
if let PipelineData::ByteStream(stream, metadata) = input {
let stream = stream.slice(call.head, call.arguments_span(), range)?;
Ok(PipelineData::ByteStream(stream, metadata))
} else {
operate(
map_value,
Arguments { range, cell_paths },
input,
call.head,
engine_state.signals(),
)
}
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Get a subbytes `0x[10 01]` from the bytes `0x[33 44 55 10 01 13]`",
example: " 0x[33 44 55 10 01 13] | bytes at 3..<4",
result: Some(Value::test_binary(vec![0x10])),
},
Example {
description: "Get a subbytes `0x[10 01 13]` from the bytes `0x[33 44 55 10 01 13]`",
example: " 0x[33 44 55 10 01 13] | bytes at 3..6",
result: Some(Value::test_binary(vec![0x10, 0x01, 0x13])),
},
Example {
description: "Get the remaining characters from a starting index",
example: " { data: 0x[33 44 55 10 01 13] } | bytes at 3.. data",
description: "Extract bytes starting from a specific index",
example: "{ data: 0x[33 44 55 10 01 13 10] } | bytes at 3.. data",
result: Some(Value::test_record(record! {
"data" => Value::test_binary(vec![0x10, 0x01, 0x13]),
"data" => Value::test_binary(vec![0x10, 0x01, 0x13, 0x10]),
})),
},
Example {
description: "Get the characters from the beginning until ending index",
example: " 0x[33 44 55 10 01 13] | bytes at ..<4",
description: "Slice out `0x[10 01 13]` from `0x[33 44 55 10 01 13]`",
example: "0x[33 44 55 10 01 13] | bytes at 3..5",
result: Some(Value::test_binary(vec![0x10, 0x01, 0x13])),
},
Example {
description: "Extract bytes from the start up to a specific index",
example: "0x[33 44 55 10 01 13 10] | bytes at ..4",
result: Some(Value::test_binary(vec![0x33, 0x44, 0x55, 0x10, 0x01])),
},
Example {
description: "Extract byte `0x[10]` using an exclusive end index",
example: "0x[33 44 55 10 01 13 10] | bytes at 3..<4",
result: Some(Value::test_binary(vec![0x10])),
},
Example {
description: "Extract bytes up to a negative index (inclusive)",
example: "0x[33 44 55 10 01 13 10] | bytes at ..-4",
result: Some(Value::test_binary(vec![0x33, 0x44, 0x55, 0x10])),
},
Example {
description:
"Or the characters from the beginning until ending index inside a table",
example: r#" [[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes at 1.. ColB ColC"#,
description: "Slice bytes across multiple table columns",
example: r#"[[ColA ColB ColC]; [0x[11 12 13] 0x[14 15 16] 0x[17 18 19]]] | bytes at 1.. ColB ColC"#,
result: Some(Value::test_list(vec![Value::test_record(record! {
"ColA" => Value::test_binary(vec![0x11, 0x12, 0x13]),
"ColB" => Value::test_binary(vec![0x15, 0x16]),
"ColC" => Value::test_binary(vec![0x18, 0x19]),
})])),
},
Example {
description: "Extract the last three bytes using a negative start index",
example: "0x[33 44 55 10 01 13 10] | bytes at (-3)..",
result: Some(Value::test_binary(vec![0x01, 0x13, 0x10])),
},
]
}
}
fn action(input: &Value, args: &Arguments, head: Span) -> Value {
let range = &args.indexes;
fn map_value(input: &Value, args: &Arguments, head: Span) -> Value {
let range = &args.range;
match input {
Value::Binary { val, .. } => {
let len = val.len() as isize;
let start = if range.0 < 0 { range.0 + len } else { range.0 };
let end = if range.1 < 0 { range.1 + len } else { range.1 };
let len = val.len() as u64;
let start: u64 = range.absolute_start(len);
let start: usize = match start.try_into() {
Ok(start) => start,
Err(_) => {
let span = input.span();
return Value::error(
ShellError::UnsupportedInput {
msg: format!(
"Absolute start position {start} was too large for your system arch."
),
input: args.range.to_string(),
msg_span: span,
input_span: span,
},
head,
);
}
};
if start > end {
Value::binary(vec![], head)
} else {
let val_iter = val.iter().skip(start as usize);
Value::binary(
if end == isize::MAX {
val_iter.copied().collect::<Vec<u8>>()
} else {
val_iter.take((end - start + 1) as usize).copied().collect()
},
head,
)
}
let bytes: Vec<u8> = match range.absolute_end(len) {
Bound::Unbounded => val[start..].into(),
Bound::Included(end) => val[start..=end as usize].into(),
Bound::Excluded(end) => val[start..end as usize].into(),
};
Value::binary(bytes, head)
}
Value::Error { .. } => input.clone(),
other => Value::error(
ShellError::UnsupportedInput {
msg: "Only binary values are supported".into(),
@ -159,3 +179,14 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(BytesAt {})
}
}

View File

@ -61,7 +61,12 @@ mod test_examples {
signature.operates_on_cell_paths(),
),
);
check_example_evaluates_to_expected_output(&example, cwd.as_path(), &mut engine_state);
check_example_evaluates_to_expected_output(
cmd.name(),
&example,
cwd.as_path(),
&mut engine_state,
);
}
check_all_signature_input_output_types_entries_have_examples(

View File

@ -1,6 +1,4 @@
use nu_engine::command_prelude::*;
use nu_protocol::Signals;
use std::io::{self, Read};
#[derive(Clone)]
pub struct Skip;
@ -96,22 +94,10 @@ impl Command for Skip {
PipelineData::ByteStream(stream, metadata) => {
if stream.type_().is_binary_coercible() {
let span = stream.span();
if let Some(mut reader) = stream.reader() {
// Copy the number of skipped bytes into the sink before proceeding
io::copy(&mut (&mut reader).take(n as u64), &mut io::sink())
.err_span(span)?;
Ok(PipelineData::ByteStream(
ByteStream::read(
reader,
call.head,
Signals::empty(),
ByteStreamType::Binary,
),
metadata,
))
} else {
Ok(PipelineData::Empty)
}
Ok(PipelineData::ByteStream(
stream.skip(span, n as u64)?,
metadata,
))
} else {
Err(ShellError::OnlySupportsThisInputType {
exp_input_type: "list, binary or range".into(),

View File

@ -1,6 +1,5 @@
use nu_engine::command_prelude::*;
use nu_protocol::Signals;
use std::io::Read;
#[derive(Clone)]
pub struct Take;
@ -89,20 +88,11 @@ impl Command for Take {
)),
PipelineData::ByteStream(stream, metadata) => {
if stream.type_().is_binary_coercible() {
if let Some(reader) = stream.reader() {
// Just take 'rows' bytes off the stream, mimicking the binary behavior
Ok(PipelineData::ByteStream(
ByteStream::read(
reader.take(rows_desired as u64),
head,
Signals::empty(),
ByteStreamType::Binary,
),
metadata,
))
} else {
Ok(PipelineData::Empty)
}
let span = stream.span();
Ok(PipelineData::ByteStream(
stream.take(span, rows_desired as u64)?,
metadata,
))
} else {
Err(ShellError::OnlySupportsThisInputType {
exp_input_type: "list, binary or range".into(),

View File

@ -0,0 +1,72 @@
use nu_test_support::nu;
#[test]
pub fn returns_error_for_relative_range_on_infinite_stream() {
let actual = nu!("nu --testbin iecho 3 | bytes at ..-3");
assert!(
actual.err.contains(
"Relative range values cannot be used with streams that don't have a known length"
),
"Expected error message for negative range with infinite stream"
);
}
#[test]
pub fn returns_bytes_for_fixed_range_on_infinite_stream_including_end() {
let actual = nu!("nu --testbin iecho 3 | bytes at ..10 | decode");
assert_eq!(
actual.out, "33333",
"Expected bytes from index 0 to 10, but got different output"
);
}
#[test]
pub fn returns_bytes_for_fixed_range_on_infinite_stream_excluding_end() {
let actual = nu!("nu --testbin iecho 3 | bytes at ..<9 | decode");
assert_eq!(
actual.out, "3333",
"Expected bytes from index 0 to 8, but got different output"
);
}
#[test]
pub fn test_string_returns_correct_slice_for_simple_positive_slice() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at ..4 | decode");
assert_eq!(actual.out, "Hello");
}
#[test]
pub fn test_string_returns_correct_slice_for_negative_start() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at (-5)..10 | decode");
assert_eq!(actual.out, "World");
}
#[test]
pub fn test_string_returns_correct_slice_for_negative_end() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at ..-7 | decode");
assert_eq!(actual.out, "Hello");
}
#[test]
pub fn test_string_returns_correct_slice_for_empty_slice() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at 5..<5 | decode");
assert_eq!(actual.out, "");
}
#[test]
pub fn test_string_returns_correct_slice_for_out_of_bounds() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at ..20 | decode");
assert_eq!(actual.out, "Hello World");
}
#[test]
pub fn test_string_returns_correct_slice_for_invalid_range() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at 11..5 | decode");
assert_eq!(actual.out, "");
}
#[test]
pub fn test_string_returns_correct_slice_for_max_end() {
let actual = nu!("\"Hello World\" | encode utf8 | bytes at 6..<11 | decode");
assert_eq!(actual.out, "World");
}

View File

@ -1 +1,2 @@
mod at;
mod collect;