Improves commands that support range input (#13113)

# Description Fixes: #13105 Fixes: #13077 This pr makes `str substring`, `bytes at` work better with negative index. And it also fixes the false range semantic on `detect columns -c` in some cases. # User-Facing Changes For `str substring`, `bytes at`, it will no-longer return an error if start index is larger than end index. It makes sense to return an empty string of empty bytes directly. ### Before ```nushell # str substring ❯ ("aaa" | str substring 2..-3) == "" Error: nu:🐚:type_mismatch × Type mismatch. ╭─[entry #23:1:10] 1 │ ("aaa" | str substring 2..-3) == "" · ──────┬────── · ╰── End must be greater than or equal to Start 2 │ true ╰──── # bytes at ❯ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) Error: nu:🐚:type_mismatch × Type mismatch. ╭─[entry #27:1:25] 1 │ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) · ────┬─── · ╰── End must be greater than or equal to Start ╰──── ``` ### After ```nushell # str substring ❯ ("aaa" | str substring 2..-3) == "" true # bytes at ❯ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) true ``` # Tests + Formatting Added some tests, adjust existing tests
2025-07-25 21:01:51 +02:00 · 2024-06-18 20:19:13 +08:00
parent ae6489f04b
commit 28ed0fe700
5 changed files with 85 additions and 98 deletions
--- a/crates/nu-cmd-base/src/util.rs
+++ b/crates/nu-cmd-base/src/util.rs
@ -20,13 +20,14 @@ pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf
 type MakeRangeError = fn(&str, Span) -> ShellError;
 /// Returns a inclusive pair of boundary in given `range`.
 pub fn process_range(range: &Range) -> Result<(isize, isize), MakeRangeError> {
    match range {
        Range::IntRange(range) => {
            let start = range.start().try_into().unwrap_or(0);
            let end = match range.end() {
-                Bound::Included(v) => (v + 1) as isize,
+                Bound::Included(v) => v as isize,
-                Bound::Excluded(v) => v as isize,
+                Bound::Excluded(v) => (v - 1) as isize,
                Bound::Unbounded => isize::MAX,
            };
            Ok((start, end))
--- a/crates/nu-command/src/bytes/at.rs
+++ b/crates/nu-command/src/bytes/at.rs
@ -128,48 +128,24 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
    let range = &args.indexes;
    match input {
        Value::Binary { val, .. } => {
            use std::cmp::{self, Ordering};
            let len = val.len() as isize;
            let start = if range.0 < 0 { range.0 + len } else { range.0 };
            let end = if range.1 < 0 { range.1 + len } else { range.1 };
-            let end = if range.1 < 0 {
+            if start > end {
                cmp::max(range.1 + len, 0)
            } else {
                range.1
            };
            if start < len && end >= 0 {
                match start.cmp(&end) {
                    Ordering::Equal => Value::binary(vec![], head),
                    Ordering::Greater => Value::error(
                        ShellError::TypeMismatch {
                            err_message: "End must be greater than or equal to Start".to_string(),
                            span: head,
                        },
                        head,
                    ),
                    Ordering::Less => Value::binary(
                        if end == isize::MAX {
                            val.iter()
                                .skip(start as usize)
                                .copied()
                                .collect::<Vec<u8>>()
                        } else {
                            val.iter()
                                .skip(start as usize)
                                .take((end - start) as usize)
                                .copied()
                                .collect()
                        },
                        head,
                    ),
                }
            } else {
                Value::binary(vec![], head)
            } else {
                let val_iter = val.iter().skip(start as usize);
                Value::binary(
                    if end == isize::MAX {
                        val_iter.copied().collect::<Vec<u8>>()
                    } else {
                        val_iter.take((end - start + 1) as usize).copied().collect()
                    },
                    head,
                )
            }
        }
        Value::Error { .. } => input.clone(),
        other => Value::error(
--- a/crates/nu-command/src/strings/str_/substring.rs
+++ b/crates/nu-command/src/strings/str_/substring.rs
@ -5,7 +5,6 @@ use nu_cmd_base::{
 };
 use nu_engine::command_prelude::*;
 use nu_protocol::{engine::StateWorkingSet, Range};
 use std::cmp::Ordering;
 use unicode_segmentation::UnicodeSegmentation;
 #[derive(Clone)]
@ -151,6 +150,11 @@ impl Command for SubCommand {
                example: " '🇯🇵ほげ ふが ぴよ' | str substring --grapheme-clusters 4..5",
                result: Some(Value::test_string("ふが")),
            },
            Example {
                description: "sub string by negative index",
                example: " 'good nushell' | str substring 5..-2",
                result: Some(Value::test_string("nushel")),
            },
        ]
    }
 }
@ -167,22 +171,15 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
                options.0
            };
            let end: isize = if options.1 < 0 {
-                std::cmp::max(len + options.1, 0)
+                options.1 + len
            } else {
                options.1
            };
-            if start < len && end >= 0 {
+            if start > end {
-                match start.cmp(&end) {
+                Value::string("", head)
-                    Ordering::Equal => Value::string("", head),
+            } else {
-                    Ordering::Greater => Value::error(
+                Value::string(
                        ShellError::TypeMismatch {
                            err_message: "End must be greater than or equal to Start".to_string(),
                            span: head,
                        },
                        head,
                    ),
                    Ordering::Less => Value::string(
                    {
                        if end == isize::MAX {
                            if args.graphemes {
@ -199,24 +196,21 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
                        } else if args.graphemes {
                            s.graphemes(true)
                                .skip(start as usize)
-                                    .take((end - start) as usize)
+                                .take((end - start + 1) as usize)
                                .collect::<Vec<&str>>()
                                .join("")
                        } else {
                            String::from_utf8_lossy(
                                &s.bytes()
                                    .skip(start as usize)
-                                        .take((end - start) as usize)
+                                    .take((end - start + 1) as usize)
                                    .collect::<Vec<_>>(),
                            )
                            .to_string()
                        }
                    },
                    head,
-                    ),
+                )
                }
            } else {
                Value::string("", head)
            }
        }
        // Propagate errors by explicitly matching them before the final case.
@ -243,6 +237,7 @@ mod tests {
        test_examples(SubCommand {})
    }
    #[derive(Debug)]
    struct Expectation<'a> {
        options: (isize, isize),
        expected: &'a str,
@ -266,18 +261,19 @@ mod tests {
        let word = Value::test_string("andres");
        let cases = vec![
-            expectation("a", (0, 1)),
+            expectation("a", (0, 0)),
-            expectation("an", (0, 2)),
+            expectation("an", (0, 1)),
-            expectation("and", (0, 3)),
+            expectation("and", (0, 2)),
-            expectation("andr", (0, 4)),
+            expectation("andr", (0, 3)),
-            expectation("andre", (0, 5)),
+            expectation("andre", (0, 4)),
            expectation("andres", (0, 5)),
            expectation("andres", (0, 6)),
-            expectation("", (0, -6)),
+            expectation("a", (0, -6)),
-            expectation("a", (0, -5)),
+            expectation("an", (0, -5)),
-            expectation("an", (0, -4)),
+            expectation("and", (0, -4)),
-            expectation("and", (0, -3)),
+            expectation("andr", (0, -3)),
-            expectation("andr", (0, -2)),
+            expectation("andre", (0, -2)),
-            expectation("andre", (0, -1)),
+            expectation("andres", (0, -1)),
            // str substring [ -4 , _ ]
            // str substring   -4 ,
            expectation("dres", (-4, isize::MAX)),
@ -292,6 +288,7 @@ mod tests {
        ];
        for expectation in &cases {
            println!("{:?}", expectation);
            let expected = expectation.expected;
            let actual = action(
                &word,
--- a/crates/nu-command/tests/commands/detect_columns.rs
+++ b/crates/nu-command/tests/commands/detect_columns.rs
@ -31,12 +31,12 @@ fn detect_columns_with_legacy_and_flag_c() {
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
            "[[c1,c3,c4,c5]; ['a b',c,d,e]]",
-            "0..0",
+            "0..1",
        ),
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
            "[[c1,c2,c3,c4]; [a,b,c,'d e']]",
-            "(-2)..(-2)",
+            "(-2)..(-1)",
        ),
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
@ -72,10 +72,10 @@ drwxr-xr-x  2 root root 4.0K Mar 20 08:28 =(char nl)
 drwxr-xr-x  4 root root 4.0K Mar 20 08:18 ~(char nl)
 -rw-r--r--  1 root root 3.0K Mar 20 07:23 ~asdf(char nl)\"";
    let expected = "[
-['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column8'];
+['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column7', 'column8'];
-['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20 08:28', '='],
+['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20', '08:28', '='],
-['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20 08:18', '~'],
+['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20', '08:18', '~'],
-['-rw-r--r--',  '1', 'root', 'root', '3.0K', 'Mar 20 07:23', '~asdf']
+['-rw-r--r--',  '1', 'root', 'root', '3.0K', 'Mar 20', '07:23', '~asdf']
 ]";
    let range = "5..6";
    let cmd = format!(
--- a/crates/nu-command/tests/commands/str_/mod.rs
+++ b/crates/nu-command/tests/commands/str_/mod.rs
@ -255,7 +255,7 @@ fn substrings_the_input() {
 }
 #[test]
-fn substring_errors_if_start_index_is_greater_than_end_index() {
+fn substring_empty_if_start_index_is_greater_than_end_index() {
    Playground::setup("str_test_9", |dirs, sandbox| {
        sandbox.with_files(&[FileWithContent(
            "sample.toml",
@ -270,12 +270,10 @@ fn substring_errors_if_start_index_is_greater_than_end_index() {
            r#"
                 open sample.toml
                 | str substring 6..4 fortune.teller.phone
                 | get fortune.teller.phone
             "#
        ));
-
+        assert_eq!(actual.out, "")
        assert!(actual
            .err
            .contains("End must be greater than or equal to Start"))
    })
 }
@ -375,6 +373,21 @@ fn substrings_the_input_and_treats_end_index_as_length_if_blank_end_index_given(
    })
 }
 #[test]
 fn substring_by_negative_index() {
    Playground::setup("str_test_13", |dirs, _| {
        let actual = nu!(
            cwd: dirs.test(), "'apples' | str substring 0..-1",
        );
        assert_eq!(actual.out, "apples");
        let actual = nu!(
            cwd: dirs.test(), "'apples' | str substring 0..<-1",
        );
        assert_eq!(actual.out, "apple");
    })
 }
 #[test]
 fn str_reverse() {
    let actual = nu!(r#"