From b55ed69c92b0eecdf4c33b7ee65790f0f57802e9 Mon Sep 17 00:00:00 2001 From: Bahex Date: Thu, 30 Jan 2025 15:50:01 +0300 Subject: [PATCH] fix range bugs in `str substring`, `str index-of`, `slice`, `bytes at` (#14863) - fixes #14769 # Description ## Bugs - `str substring 0..<0` When passed a range containing no elements, for non-zero cases `str substring` behaves correctly: ```nushell ("hello world" | str substring 1..<1) == "" # => true ``` but if the range is `0..<0`, it returns the whole string instead ```nushell "hello world" | str substring 0..<0 # => hello world ``` - `[0 1 2] | range 0..<0` Similar behavior to `str substring` - `str index-of` - off-by-one on end bounds - underflow on negative start bounds - `bytes at` has inconsistent behavior, works correctly when the size is known, returns one byte less when it's not known (streaming) This can be demonstrated by comparing the outputs of following snippets ```nushell "hello world" | into binary | bytes at ..<5 | decode # => hello "hello world" | into binary | chunks 1 | bytes collect | bytes at ..<5 | decode # => hell ``` - `bytes at` panics on decreasing (`5..3`) ranges if the input size is known. Does not panic with streaming input. ## Changes - implement `FromValue` for `IntRange`, as it is very common to use integer ranges as arguments - `IntRange::absolute_start` can now point one-past-end - `IntRange::absolute_end` converts relative `Included` bounds to absolute `Excluded` bounds - `IntRange::absolute_bounds` is a convenience method that calls the other `absolute_*` methods and transforms reverse ranges to empty at `start` (`5..3` => `5..<5`) - refactored `str substring` tests to allow empty exclusive range tests - fix the `0..<0` case for `str substring` and `str index-of` - `IntRange::distance` never returns `Included(0)` As a general rule `Included(n) == Excluded(n + 1)`. This makes returning `Included(0)` bug prone as users of the function will likely rely on this general rule and cause bugs. - `ByteStream::slice` no longer has an off-by-one on inputs without a known size. This affected `bytes at`. - `bytes at` no longer panics on reverse ranges - `bytes at` is now consistent between streaming and non streaming inputs. # User-Facing Changes There should be no noticeable changes other than the bugfix. # Tests + Formatting - :green_circle: toolkit fmt - :green_circle: toolkit clippy - :green_circle: toolkit test - :green_circle: toolkit test stdlib # After Submitting N/A --- crates/nu-command/src/bytes/at.rs | 9 +- crates/nu-command/src/filters/slice.rs | 91 +++---- .../nu-command/src/strings/str_/index_of.rs | 91 ++++--- .../nu-command/src/strings/str_/substring.rs | 228 +++++++++--------- crates/nu-command/tests/commands/bytes/at.rs | 9 +- crates/nu-command/tests/commands/slice.rs | 14 ++ .../nu-protocol/src/pipeline/byte_stream.rs | 4 +- crates/nu-protocol/src/value/range.rs | 50 +++- .../nu-protocol/tests/pipeline/byte_stream.rs | 17 ++ 9 files changed, 281 insertions(+), 232 deletions(-) diff --git a/crates/nu-command/src/bytes/at.rs b/crates/nu-command/src/bytes/at.rs index 9663e545a4..b89be18d1e 100644 --- a/crates/nu-command/src/bytes/at.rs +++ b/crates/nu-command/src/bytes/at.rs @@ -141,7 +141,7 @@ fn map_value(input: &Value, args: &Arguments, head: Span) -> Value { Value::Binary { val, .. } => { let len = val.len() as u64; let start: u64 = range.absolute_start(len); - let start: usize = match start.try_into() { + let _start: usize = match start.try_into() { Ok(start) => start, Err(_) => { let span = input.span(); @@ -159,10 +159,11 @@ fn map_value(input: &Value, args: &Arguments, head: Span) -> Value { } }; - let bytes: Vec = match range.absolute_end(len) { + let (start, end) = range.absolute_bounds(val.len()); + let bytes: Vec = match end { Bound::Unbounded => val[start..].into(), - Bound::Included(end) => val[start..=end as usize].into(), - Bound::Excluded(end) => val[start..end as usize].into(), + Bound::Included(end) => val[start..=end].into(), + Bound::Excluded(end) => val[start..end].into(), }; Value::binary(bytes, head) diff --git a/crates/nu-command/src/filters/slice.rs b/crates/nu-command/src/filters/slice.rs index e3f49a4bb3..638d6dfca3 100644 --- a/crates/nu-command/src/filters/slice.rs +++ b/crates/nu-command/src/filters/slice.rs @@ -1,5 +1,5 @@ use nu_engine::command_prelude::*; -use nu_protocol::Range; +use nu_protocol::IntRange; use std::ops::Bound; #[derive(Clone)] @@ -66,68 +66,49 @@ impl Command for Slice { ) -> Result { let head = call.head; let metadata = input.metadata(); - let rows: Spanned = call.req(engine_state, stack, 0)?; + let range: IntRange = call.req(engine_state, stack, 0)?; - match rows.item { - Range::IntRange(range) => { - let start = range.start(); - let end = match range.end() { - Bound::Included(end) => end, - Bound::Excluded(end) => end - 1, - Bound::Unbounded => { - if range.step() < 0 { - i64::MIN - } else { - i64::MAX - } - } - }; + // only collect the input if we have any negative indices + if range.is_relative() { + let v: Vec<_> = input.into_iter().collect(); - // only collect the input if we have any negative indices - if start < 0 || end < 0 { - let v: Vec<_> = input.into_iter().collect(); - let vlen: i64 = v.len() as i64; + let (from, to) = range.absolute_bounds(v.len()); - let from = if start < 0 { - (vlen + start) as usize + let count = match to { + Bound::Excluded(to) => to.saturating_sub(from), + Bound::Included(to) => to.saturating_sub(from) + 1, + Bound::Unbounded => usize::MAX, + }; + + if count == 0 { + Ok(PipelineData::Value(Value::list(vec![], head), None)) + } else { + let iter = v.into_iter().skip(from).take(count); + Ok(iter.into_pipeline_data(head, engine_state.signals().clone())) + } + } else { + let from = range.start() as usize; + let count = match range.end() { + Bound::Excluded(to) | Bound::Included(to) if range.start() > to => 0, + Bound::Excluded(to) => (to as usize).saturating_sub(from), + Bound::Included(to) => (to as usize).saturating_sub(from) + 1, + Bound::Unbounded => { + if range.step() < 0 { + 0 } else { - start as usize - }; - - let to = if end < 0 { - (vlen + end) as usize - } else if end > v.len() as i64 { - v.len() - } else { - end as usize - }; - - if from > to { - Ok(PipelineData::Value(Value::nothing(head), None)) - } else { - let iter = v.into_iter().skip(from).take(to - from + 1); - Ok(iter.into_pipeline_data(head, engine_state.signals().clone())) - } - } else { - let from = start as usize; - let to = end as usize; - - if from > to { - Ok(PipelineData::Value(Value::nothing(head), None)) - } else { - let iter = input.into_iter().skip(from).take(to - from + 1); - Ok(iter.into_pipeline_data(head, engine_state.signals().clone())) + usize::MAX } } - .map(|x| x.set_metadata(metadata)) + }; + + if count == 0 { + Ok(PipelineData::Value(Value::list(vec![], head), None)) + } else { + let iter = input.into_iter().skip(from).take(count); + Ok(iter.into_pipeline_data(head, engine_state.signals().clone())) } - Range::FloatRange(_) => Err(ShellError::UnsupportedInput { - msg: "float range".into(), - input: "value originates from here".into(), - msg_span: call.head, - input_span: rows.span, - }), } + .map(|x| x.set_metadata(metadata)) } } diff --git a/crates/nu-command/src/strings/str_/index_of.rs b/crates/nu-command/src/strings/str_/index_of.rs index eace6c0494..efd9540158 100644 --- a/crates/nu-command/src/strings/str_/index_of.rs +++ b/crates/nu-command/src/strings/str_/index_of.rs @@ -1,16 +1,15 @@ +use std::ops::Bound; + use crate::{grapheme_flags, grapheme_flags_const}; -use nu_cmd_base::{ - input_handler::{operate, CmdArgument}, - util, -}; +use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; -use nu_protocol::{engine::StateWorkingSet, Range}; +use nu_protocol::{engine::StateWorkingSet, IntRange}; use unicode_segmentation::UnicodeSegmentation; struct Arguments { end: bool, substring: String, - range: Option>, + range: Option>, cell_paths: Option>, graphemes: bool, } @@ -170,46 +169,44 @@ fn action( ) -> Value { match input { Value::String { val: s, .. } => { - let mut range_span = head; - let (start_index, end_index) = if let Some(spanned_range) = range { - range_span = spanned_range.span; + let (search_str, start_index) = if let Some(spanned_range) = range { + let range_span = spanned_range.span; let range = &spanned_range.item; - match util::process_range(range) { - Ok(r) => { - // `process_range()` returns `isize::MAX` if the range is open-ended, - // which is not ideal for us - let end = if r.1 as usize > s.len() { - s.len() - } else { - r.1 as usize - }; - (r.0 as usize, end) - } - Err(processing_error) => { - let err = processing_error("could not find `index-of`", head); - return Value::error(err, head); - } - } - } else { - (0usize, s.len()) - }; - if s.get(start_index..end_index).is_none() { - return Value::error( - ShellError::OutOfBounds { - left_flank: start_index.to_string(), - right_flank: end_index.to_string(), - span: range_span, - }, - head, - ); - } + let (start, end) = range.absolute_bounds(s.len()); + let s = match end { + Bound::Excluded(end) => s.get(start..end), + Bound::Included(end) => s.get(start..=end), + Bound::Unbounded => s.get(start..), + }; + + let s = match s { + Some(s) => s, + None => { + return Value::error( + ShellError::OutOfBounds { + left_flank: start.to_string(), + right_flank: match range.end() { + Bound::Unbounded => "".to_string(), + Bound::Included(end) => format!("={end}"), + Bound::Excluded(end) => format!("<{end}"), + }, + span: range_span, + }, + head, + ) + } + }; + (s, start) + } else { + (s.as_str(), 0) + }; // When the -e flag is present, search using rfind instead of find.s if let Some(result) = if *end { - s[start_index..end_index].rfind(&**substring) + search_str.rfind(&**substring) } else { - s[start_index..end_index].find(&**substring) + search_str.find(&**substring) } { let result = result + start_index; Value::int( @@ -294,7 +291,7 @@ mod tests { #[test] fn returns_index_of_next_substring() { let word = Value::test_string("Cargo.Cargo"); - let range = Range::new( + let range = IntRange::new( Value::int(1, Span::test_data()), Value::nothing(Span::test_data()), Value::nothing(Span::test_data()), @@ -324,7 +321,7 @@ mod tests { #[test] fn index_does_not_exist_due_to_end_index() { let word = Value::test_string("Cargo.Banana"); - let range = Range::new( + let range = IntRange::new( Value::nothing(Span::test_data()), Value::nothing(Span::test_data()), Value::int(5, Span::test_data()), @@ -354,7 +351,7 @@ mod tests { #[test] fn returns_index_of_nums_in_middle_due_to_index_limit_from_both_ends() { let word = Value::test_string("123123123"); - let range = Range::new( + let range = IntRange::new( Value::int(2, Span::test_data()), Value::nothing(Span::test_data()), Value::int(6, Span::test_data()), @@ -384,7 +381,7 @@ mod tests { #[test] fn index_does_not_exists_due_to_strict_bounds() { let word = Value::test_string("123456"); - let range = Range::new( + let range = IntRange::new( Value::int(2, Span::test_data()), Value::nothing(Span::test_data()), Value::int(5, Span::test_data()), @@ -431,7 +428,7 @@ mod tests { fn index_is_not_a_char_boundary() { let word = Value::string(String::from("๐Ÿ’›"), Span::test_data()); - let range = Range::new( + let range = IntRange::new( Value::int(0, Span::test_data()), Value::int(1, Span::test_data()), Value::int(2, Span::test_data()), @@ -462,7 +459,7 @@ mod tests { fn index_is_out_of_bounds() { let word = Value::string(String::from("hello"), Span::test_data()); - let range = Range::new( + let range = IntRange::new( Value::int(-1, Span::test_data()), Value::int(1, Span::test_data()), Value::int(3, Span::test_data()), @@ -486,6 +483,6 @@ mod tests { }; let actual = action(&word, &options, Span::test_data()); - assert!(actual.is_error()); + assert_eq!(actual, Value::test_int(-1)); } } diff --git a/crates/nu-command/src/strings/str_/substring.rs b/crates/nu-command/src/strings/str_/substring.rs index 58f32391fa..57e4426f49 100644 --- a/crates/nu-command/src/strings/str_/substring.rs +++ b/crates/nu-command/src/strings/str_/substring.rs @@ -1,17 +1,16 @@ +use std::ops::Bound; + use crate::{grapheme_flags, grapheme_flags_const}; -use nu_cmd_base::{ - input_handler::{operate, CmdArgument}, - util, -}; +use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; -use nu_protocol::{engine::StateWorkingSet, Range}; +use nu_protocol::{engine::StateWorkingSet, IntRange}; use unicode_segmentation::UnicodeSegmentation; #[derive(Clone)] pub struct SubCommand; struct Arguments { - indexes: Substring, + range: IntRange, cell_paths: Option>, graphemes: bool, } @@ -22,15 +21,6 @@ impl CmdArgument for Arguments { } } -#[derive(Clone)] -struct Substring(isize, isize); - -impl From<(isize, isize)> for Substring { - fn from(input: (isize, isize)) -> Substring { - Substring(input.0, input.1) - } -} - impl Command for SubCommand { fn name(&self) -> &str { "str substring" @@ -87,19 +77,12 @@ impl Command for SubCommand { call: &Call, input: PipelineData, ) -> Result { - let range: Range = call.req(engine_state, stack, 0)?; - - let indexes = match util::process_range(&range) { - Ok(idxs) => idxs.into(), - Err(processing_error) => { - return Err(processing_error("could not perform substring", call.head)) - } - }; + let range: IntRange = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let args = Arguments { - indexes, + range, cell_paths, graphemes: grapheme_flags(engine_state, stack, call)?, }; @@ -112,19 +95,12 @@ impl Command for SubCommand { call: &Call, input: PipelineData, ) -> Result { - let range: Range = call.req_const(working_set, 0)?; - - let indexes = match util::process_range(&range) { - Ok(idxs) => idxs.into(), - Err(processing_error) => { - return Err(processing_error("could not perform substring", call.head)) - } - }; + let range: IntRange = call.req_const(working_set, 0)?; let cell_paths: Vec = call.rest_const(working_set, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let args = Arguments { - indexes, + range, cell_paths, graphemes: grapheme_flags_const(working_set, call)?, }; @@ -160,58 +136,38 @@ impl Command for SubCommand { } fn action(input: &Value, args: &Arguments, head: Span) -> Value { - let options = &args.indexes; match input { Value::String { val: s, .. } => { - let len: isize = s.len() as isize; + let s = if args.graphemes { + let indices = s + .grapheme_indices(true) + .map(|(idx, s)| (idx, s.len())) + .collect::>(); - let start: isize = if options.0 < 0 { - options.0 + len - } else { - options.0 - }; - let end: isize = if options.1 < 0 { - options.1 + len - } else { - options.1 - }; + let (idx_start, idx_end) = args.range.absolute_bounds(indices.len()); + let idx_range = match idx_end { + Bound::Excluded(end) => &indices[idx_start..end], + Bound::Included(end) => &indices[idx_start..=end], + Bound::Unbounded => &indices[idx_start..], + }; - if start > end { - Value::string("", head) + if let Some((start, end)) = idx_range.first().zip(idx_range.last()) { + let start = start.0; + let end = end.0 + end.1; + s[start..end].to_owned() + } else { + String::new() + } } else { - Value::string( - { - if end == isize::MAX { - if args.graphemes { - s.graphemes(true) - .skip(start as usize) - .collect::>() - .join("") - } else { - String::from_utf8_lossy( - &s.bytes().skip(start as usize).collect::>(), - ) - .to_string() - } - } else if args.graphemes { - s.graphemes(true) - .skip(start as usize) - .take((end - start + 1) as usize) - .collect::>() - .join("") - } else { - String::from_utf8_lossy( - &s.bytes() - .skip(start as usize) - .take((end - start + 1) as usize) - .collect::>(), - ) - .to_string() - } - }, - head, - ) - } + let (start, end) = args.range.absolute_bounds(s.len()); + let s = match end { + Bound::Excluded(end) => &s.as_bytes()[start..end], + Bound::Included(end) => &s.as_bytes()[start..=end], + Bound::Unbounded => &s.as_bytes()[start..], + }; + String::from_utf8_lossy(s).into_owned() + }; + Value::string(s, head) } // Propagate errors by explicitly matching them before the final case. Value::Error { .. } => input.clone(), @@ -228,8 +184,11 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value { } #[cfg(test)] +#[allow(clippy::reversed_empty_ranges)] mod tests { - use super::{action, Arguments, Span, SubCommand, Substring, Value}; + use nu_protocol::IntRange; + + use super::{action, Arguments, Span, SubCommand, Value}; #[test] fn test_examples() { @@ -237,21 +196,66 @@ mod tests { test_examples(SubCommand {}) } + + #[derive(Clone, Copy, Debug)] + struct RangeHelper { + start: i64, + end: i64, + inclusion: nu_protocol::ast::RangeInclusion, + } + #[derive(Debug)] struct Expectation<'a> { - options: (isize, isize), + range: RangeHelper, expected: &'a str, } - impl Expectation<'_> { - fn options(&self) -> Substring { - Substring(self.options.0, self.options.1) + impl From> for RangeHelper { + fn from(value: std::ops::RangeInclusive) -> Self { + RangeHelper { + start: *value.start(), + end: *value.end(), + inclusion: nu_protocol::ast::RangeInclusion::Inclusive, + } } } - fn expectation(word: &str, indexes: (isize, isize)) -> Expectation { + impl From> for RangeHelper { + fn from(value: std::ops::Range) -> Self { + RangeHelper { + start: value.start, + end: value.end, + inclusion: nu_protocol::ast::RangeInclusion::RightExclusive, + } + } + } + + impl From for IntRange { + fn from(value: RangeHelper) -> Self { + match IntRange::new( + Value::test_int(value.start), + Value::test_int(value.start + (if value.start <= value.end { 1 } else { -1 })), + Value::test_int(value.end), + value.inclusion, + Span::test_data(), + ) { + Ok(val) => val, + Err(e) => { + panic!("{value:?}: {e:?}") + } + } + } + } + + impl Expectation<'_> { + fn range(&self) -> IntRange { + self.range.into() + } + } + + fn expectation(word: &str, range: impl Into) -> Expectation { Expectation { - options: indexes, + range: range.into(), expected: word, } } @@ -261,30 +265,31 @@ mod tests { let word = Value::test_string("andres"); let cases = vec![ - expectation("a", (0, 0)), - expectation("an", (0, 1)), - expectation("and", (0, 2)), - expectation("andr", (0, 3)), - expectation("andre", (0, 4)), - expectation("andres", (0, 5)), - expectation("andres", (0, 6)), - expectation("a", (0, -6)), - expectation("an", (0, -5)), - expectation("and", (0, -4)), - expectation("andr", (0, -3)), - expectation("andre", (0, -2)), - expectation("andres", (0, -1)), + expectation("", 0..0), + expectation("a", 0..=0), + expectation("an", 0..=1), + expectation("and", 0..=2), + expectation("andr", 0..=3), + expectation("andre", 0..=4), + expectation("andres", 0..=5), + expectation("andres", 0..=6), + expectation("a", 0..=-6), + expectation("an", 0..=-5), + expectation("and", 0..=-4), + expectation("andr", 0..=-3), + expectation("andre", 0..=-2), + expectation("andres", 0..=-1), // str substring [ -4 , _ ] // str substring -4 , - expectation("dres", (-4, isize::MAX)), - expectation("", (0, -110)), - expectation("", (6, 0)), - expectation("", (6, -1)), - expectation("", (6, -2)), - expectation("", (6, -3)), - expectation("", (6, -4)), - expectation("", (6, -5)), - expectation("", (6, -6)), + expectation("dres", -4..=i64::MAX), + expectation("", 0..=-110), + expectation("", 6..=0), + expectation("", 6..=-1), + expectation("", 6..=-2), + expectation("", 6..=-3), + expectation("", 6..=-4), + expectation("", 6..=-5), + expectation("", 6..=-6), ]; for expectation in &cases { @@ -293,7 +298,7 @@ mod tests { let actual = action( &word, &Arguments { - indexes: expectation.options(), + range: expectation.range(), cell_paths: None, graphemes: false, }, @@ -308,9 +313,10 @@ mod tests { fn use_utf8_bytes() { let word = Value::string(String::from("๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ"), Span::test_data()); + let range: RangeHelper = (4..=5).into(); let options = Arguments { cell_paths: None, - indexes: Substring(4, 5), + range: range.into(), graphemes: false, }; diff --git a/crates/nu-command/tests/commands/bytes/at.rs b/crates/nu-command/tests/commands/bytes/at.rs index 4a8d034b3f..c01beeb6f0 100644 --- a/crates/nu-command/tests/commands/bytes/at.rs +++ b/crates/nu-command/tests/commands/bytes/at.rs @@ -15,7 +15,12 @@ pub fn returns_error_for_relative_range_on_infinite_stream() { pub fn returns_bytes_for_fixed_range_on_infinite_stream_including_end() { let actual = nu!("nu --testbin iecho 3 | bytes at ..10 | decode"); assert_eq!( - actual.out, "33333", + actual.out, "333333", + "Expected bytes from index 0 to 10, but got different output" + ); + let actual = nu!("nu --testbin iecho 3 | bytes at ..10 | decode"); + assert_eq!( + actual.out, "333333", "Expected bytes from index 0 to 10, but got different output" ); } @@ -24,7 +29,7 @@ pub fn returns_bytes_for_fixed_range_on_infinite_stream_including_end() { pub fn returns_bytes_for_fixed_range_on_infinite_stream_excluding_end() { let actual = nu!("nu --testbin iecho 3 | bytes at ..<9 | decode"); assert_eq!( - actual.out, "3333", + actual.out, "33333", "Expected bytes from index 0 to 8, but got different output" ); } diff --git a/crates/nu-command/tests/commands/slice.rs b/crates/nu-command/tests/commands/slice.rs index d7639ad8dc..d6a3e0b888 100644 --- a/crates/nu-command/tests/commands/slice.rs +++ b/crates/nu-command/tests/commands/slice.rs @@ -66,3 +66,17 @@ fn negative_indices() { assert_eq!(actual.out, "1"); }); } + +#[test] +fn zero_to_zero_exclusive() { + let actual = nu!(r#"[0 1 2 3] | slice 0..<0 | to nuon"#); + + assert_eq!(actual.out, "[]"); +} + +#[test] +fn to_negative_one_inclusive() { + let actual = nu!(r#"[0 1 2 3] | slice 2..-1 | to nuon"#); + + assert_eq!(actual.out, "[2, 3]"); +} diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs index e06a651088..adefbdf9f0 100644 --- a/crates/nu-protocol/src/pipeline/byte_stream.rs +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -295,8 +295,8 @@ impl ByteStream { match range.distance() { Bound::Unbounded => stream, - Bound::Included(distance) => stream.and_then(|s| s.take(val_span, distance)), - Bound::Excluded(distance) => stream.and_then(|s| s.take(val_span, distance - 1)), + Bound::Included(distance) => stream.and_then(|s| s.take(val_span, distance + 1)), + Bound::Excluded(distance) => stream.and_then(|s| s.take(val_span, distance)), } } } diff --git a/crates/nu-protocol/src/value/range.rs b/crates/nu-protocol/src/value/range.rs index 268b24b623..9ce9c6fa54 100644 --- a/crates/nu-protocol/src/value/range.rs +++ b/crates/nu-protocol/src/value/range.rs @@ -5,10 +5,12 @@ use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, fmt::Display}; mod int_range { - use crate::{ast::RangeInclusion, ShellError, Signals, Span, Value}; + use crate::{ast::RangeInclusion, FromValue, ShellError, Signals, Span, Value}; use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, fmt::Display, ops::Bound}; + use super::Range; + #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct IntRange { pub(crate) start: i64, @@ -83,10 +85,9 @@ mod int_range { // Resolves the absolute start position given the length of the input value pub fn absolute_start(&self, len: u64) -> u64 { - let max_index = len - 1; match self.start { start if start < 0 => len.saturating_sub(start.unsigned_abs()), - start => max_index.min(start as u64), + start => len.min(start.unsigned_abs()), } } @@ -95,8 +96,9 @@ mod int_range { pub fn distance(&self) -> Bound { match self.end { Bound::Unbounded => Bound::Unbounded, - Bound::Included(end) if self.start > end => Bound::Included(0), - Bound::Excluded(end) if self.start > end => Bound::Excluded(0), + Bound::Included(end) | Bound::Excluded(end) if self.start > end => { + Bound::Excluded(0) + } Bound::Included(end) => Bound::Included((end - self.start) as u64), Bound::Excluded(end) => Bound::Excluded((end - self.start) as u64), } @@ -107,20 +109,32 @@ mod int_range { } pub fn absolute_end(&self, len: u64) -> Bound { - let max_index = len - 1; match self.end { Bound::Unbounded => Bound::Unbounded, - Bound::Included(i) => Bound::Included(match i { - i if i < 0 => len.saturating_sub(i.unsigned_abs()), - i => max_index.min(i as u64), - }), + Bound::Included(i) => match i { + i if i < 0 => Bound::Excluded(len.saturating_sub((i + 1).unsigned_abs())), + i => Bound::Included((len - 1).min(i.unsigned_abs())), + }, Bound::Excluded(i) => Bound::Excluded(match i { i if i < 0 => len.saturating_sub(i.unsigned_abs()), - i => len.min(i as u64), + i => len.min(i.unsigned_abs()), }), } } + pub fn absolute_bounds(&self, len: usize) -> (usize, Bound) { + let start = self.absolute_start(len as u64) as usize; + let end = self.absolute_end(len as u64).map(|e| e as usize); + match end { + Bound::Excluded(end) | Bound::Included(end) if end < start => { + (start, Bound::Excluded(start)) + } + Bound::Excluded(end) => (start, Bound::Excluded(end)), + Bound::Included(end) => (start, Bound::Included(end)), + Bound::Unbounded => (start, Bound::Unbounded), + } + } + pub fn step(&self) -> i64 { self.step } @@ -246,6 +260,20 @@ mod int_range { } } + impl FromValue for IntRange { + fn from_value(v: Value) -> Result { + let span = v.span(); + let range = Range::from_value(v)?; + match range { + Range::IntRange(v) => Ok(v), + Range::FloatRange(_) => Err(ShellError::TypeMismatch { + err_message: "expected an int range".into(), + span, + }), + } + } + } + pub struct Iter { current: Option, step: i64, diff --git a/crates/nu-protocol/tests/pipeline/byte_stream.rs b/crates/nu-protocol/tests/pipeline/byte_stream.rs index 3d06573461..7478d8184a 100644 --- a/crates/nu-protocol/tests/pipeline/byte_stream.rs +++ b/crates/nu-protocol/tests/pipeline/byte_stream.rs @@ -16,6 +16,23 @@ pub fn test_simple_positive_slice_exclusive() { assert_eq!(result, "Hello"); } +#[test] +pub fn test_simple_positive_slice_exclusive_streaming() { + let data = b"Hello World".to_vec(); + let stream = ByteStream::read_binary(data, Span::test_data(), Signals::empty()); + let sliced = stream + .with_known_size(None) + .slice( + Span::test_data(), + Span::test_data(), + create_range(0, 5, RangeInclusion::RightExclusive), + ) + .unwrap(); + let result = sliced.into_bytes().unwrap(); + let result = String::from_utf8(result).unwrap(); + assert_eq!(result, "Hello"); +} + #[test] pub fn test_negative_start_exclusive() { let data = b"Hello World".to_vec();