fix range bugs in str substring, str index-of, slice, bytes at (#14863)

- fixes #14769

# Description

## Bugs

-   `str substring 0..<0`

When passed a range containing no elements, for non-zero cases `str
substring` behaves correctly:
 
    ```nushell
    ("hello world" | str substring 1..<1) == ""
    # => true
    ```

    but if the range is `0..<0`, it returns the whole string instead

    ```nushell
    "hello world" | str substring 0..<0
    # => hello world
    ```
-   `[0 1 2] | range 0..<0`
    Similar behavior to `str substring`
-   `str index-of`
    - off-by-one on end bounds
    - underflow on negative start bounds
- `bytes at` has inconsistent behavior, works correctly when the size is
known, returns one byte less when it's not known (streaming)
This can be demonstrated by comparing the outputs of following snippets
    ```nushell
    "hello world" | into binary | bytes at ..<5 | decode
    # => hello

"hello world" | into binary | chunks 1 | bytes collect | bytes at ..<5 |
decode
    # => hell
    ```
- `bytes at` panics on decreasing (`5..3`) ranges if the input size is
known. Does not panic with streaming input.

## Changes

- implement `FromValue` for `IntRange`, as it is very common to use
integer ranges as arguments
- `IntRange::absolute_start` can now point one-past-end
- `IntRange::absolute_end` converts relative `Included` bounds to
absolute `Excluded` bounds
- `IntRange::absolute_bounds` is a convenience method that calls the
other `absolute_*` methods and transforms reverse ranges to empty at
`start` (`5..3` => `5..<5`)
- refactored `str substring` tests to allow empty exclusive range tests
- fix the `0..<0` case for `str substring` and `str index-of`
- `IntRange::distance` never returns `Included(0)`

  As a general rule `Included(n) == Excluded(n + 1)`.
  
This makes returning `Included(0)` bug prone as users of the function
will likely rely on this general rule and cause bugs.
- `ByteStream::slice` no longer has an off-by-one on inputs without a
known size. This affected `bytes at`.
- `bytes at` no longer panics on reverse ranges
- `bytes at` is now consistent between streaming and non streaming
inputs.

# User-Facing Changes
There should be no noticeable changes other than the bugfix.

# Tests + Formatting

- 🟢 toolkit fmt
- 🟢 toolkit clippy
- 🟢 toolkit test
- 🟢 toolkit test stdlib

# After Submitting
N/A
This commit is contained in:
Bahex
2025-01-30 15:50:01 +03:00
committed by GitHub
parent 948965d42f
commit b55ed69c92
9 changed files with 281 additions and 232 deletions

View File

@ -295,8 +295,8 @@ impl ByteStream {
match range.distance() {
Bound::Unbounded => stream,
Bound::Included(distance) => stream.and_then(|s| s.take(val_span, distance)),
Bound::Excluded(distance) => stream.and_then(|s| s.take(val_span, distance - 1)),
Bound::Included(distance) => stream.and_then(|s| s.take(val_span, distance + 1)),
Bound::Excluded(distance) => stream.and_then(|s| s.take(val_span, distance)),
}
}
}

View File

@ -5,10 +5,12 @@ use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display};
mod int_range {
use crate::{ast::RangeInclusion, ShellError, Signals, Span, Value};
use crate::{ast::RangeInclusion, FromValue, ShellError, Signals, Span, Value};
use serde::{Deserialize, Serialize};
use std::{cmp::Ordering, fmt::Display, ops::Bound};
use super::Range;
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct IntRange {
pub(crate) start: i64,
@ -83,10 +85,9 @@ mod int_range {
// Resolves the absolute start position given the length of the input value
pub fn absolute_start(&self, len: u64) -> u64 {
let max_index = len - 1;
match self.start {
start if start < 0 => len.saturating_sub(start.unsigned_abs()),
start => max_index.min(start as u64),
start => len.min(start.unsigned_abs()),
}
}
@ -95,8 +96,9 @@ mod int_range {
pub fn distance(&self) -> Bound<u64> {
match self.end {
Bound::Unbounded => Bound::Unbounded,
Bound::Included(end) if self.start > end => Bound::Included(0),
Bound::Excluded(end) if self.start > end => Bound::Excluded(0),
Bound::Included(end) | Bound::Excluded(end) if self.start > end => {
Bound::Excluded(0)
}
Bound::Included(end) => Bound::Included((end - self.start) as u64),
Bound::Excluded(end) => Bound::Excluded((end - self.start) as u64),
}
@ -107,20 +109,32 @@ mod int_range {
}
pub fn absolute_end(&self, len: u64) -> Bound<u64> {
let max_index = len - 1;
match self.end {
Bound::Unbounded => Bound::Unbounded,
Bound::Included(i) => Bound::Included(match i {
i if i < 0 => len.saturating_sub(i.unsigned_abs()),
i => max_index.min(i as u64),
}),
Bound::Included(i) => match i {
i if i < 0 => Bound::Excluded(len.saturating_sub((i + 1).unsigned_abs())),
i => Bound::Included((len - 1).min(i.unsigned_abs())),
},
Bound::Excluded(i) => Bound::Excluded(match i {
i if i < 0 => len.saturating_sub(i.unsigned_abs()),
i => len.min(i as u64),
i => len.min(i.unsigned_abs()),
}),
}
}
pub fn absolute_bounds(&self, len: usize) -> (usize, Bound<usize>) {
let start = self.absolute_start(len as u64) as usize;
let end = self.absolute_end(len as u64).map(|e| e as usize);
match end {
Bound::Excluded(end) | Bound::Included(end) if end < start => {
(start, Bound::Excluded(start))
}
Bound::Excluded(end) => (start, Bound::Excluded(end)),
Bound::Included(end) => (start, Bound::Included(end)),
Bound::Unbounded => (start, Bound::Unbounded),
}
}
pub fn step(&self) -> i64 {
self.step
}
@ -246,6 +260,20 @@ mod int_range {
}
}
impl FromValue for IntRange {
fn from_value(v: Value) -> Result<Self, ShellError> {
let span = v.span();
let range = Range::from_value(v)?;
match range {
Range::IntRange(v) => Ok(v),
Range::FloatRange(_) => Err(ShellError::TypeMismatch {
err_message: "expected an int range".into(),
span,
}),
}
}
}
pub struct Iter {
current: Option<i64>,
step: i64,