Improves commands that support range input (#13113)

# Description Fixes: #13105 Fixes: #13077 This pr makes `str substring`, `bytes at` work better with negative index. And it also fixes the false range semantic on `detect columns -c` in some cases. # User-Facing Changes For `str substring`, `bytes at`, it will no-longer return an error if start index is larger than end index. It makes sense to return an empty string of empty bytes directly. ### Before ```nushell # str substring ❯ ("aaa" | str substring 2..-3) == "" Error: nu:🐚:type_mismatch × Type mismatch. ╭─[entry #23:1:10] 1 │ ("aaa" | str substring 2..-3) == "" · ──────┬────── · ╰── End must be greater than or equal to Start 2 │ true ╰──── # bytes at ❯ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) Error: nu:🐚:type_mismatch × Type mismatch. ╭─[entry #27:1:25] 1 │ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) · ────┬─── · ╰── End must be greater than or equal to Start ╰──── ``` ### After ```nushell # str substring ❯ ("aaa" | str substring 2..-3) == "" true # bytes at ❯ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8) true ``` # Tests + Formatting Added some tests, adjust existing tests
2025-08-09 02:05:02 +02:00 · 2024-06-18 20:19:13 +08:00
parent ae6489f04b
commit 28ed0fe700
5 changed files with 85 additions and 98 deletions
--- a/crates/nu-cmd-base/src/util.rs
+++ b/crates/nu-cmd-base/src/util.rs
@ -20,13 +20,14 @@ pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf

 type MakeRangeError = fn(&str, Span) -> ShellError;

+/// Returns a inclusive pair of boundary in given `range`.
 pub fn process_range(range: &Range) -> Result<(isize, isize), MakeRangeError> {
    match range {
        Range::IntRange(range) => {
            let start = range.start().try_into().unwrap_or(0);
            let end = match range.end() {
-                Bound::Included(v) => (v + 1) as isize,
-                Bound::Excluded(v) => v as isize,
+                Bound::Included(v) => v as isize,
+                Bound::Excluded(v) => (v - 1) as isize,
                Bound::Unbounded => isize::MAX,
            };
            Ok((start, end))
--- a/crates/nu-command/src/bytes/at.rs
+++ b/crates/nu-command/src/bytes/at.rs
@ -128,48 +128,24 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
    let range = &args.indexes;
    match input {
        Value::Binary { val, .. } => {
-            use std::cmp::{self, Ordering};
            let len = val.len() as isize;
-
            let start = if range.0 < 0 { range.0 + len } else { range.0 };
+            let end = if range.1 < 0 { range.1 + len } else { range.1 };

-            let end = if range.1 < 0 {
-                cmp::max(range.1 + len, 0)
-            } else {
-                range.1
-            };
-
-            if start < len && end >= 0 {
-                match start.cmp(&end) {
-                    Ordering::Equal => Value::binary(vec![], head),
-                    Ordering::Greater => Value::error(
-                        ShellError::TypeMismatch {
-                            err_message: "End must be greater than or equal to Start".to_string(),
-                            span: head,
-                        },
-                        head,
-                    ),
-                    Ordering::Less => Value::binary(
-                        if end == isize::MAX {
-                            val.iter()
-                                .skip(start as usize)
-                                .copied()
-                                .collect::<Vec<u8>>()
-                        } else {
-                            val.iter()
-                                .skip(start as usize)
-                                .take((end - start) as usize)
-                                .copied()
-                                .collect()
-                        },
-                        head,
-                    ),
-                }
-            } else {
+            if start > end {
                Value::binary(vec![], head)
+            } else {
+                let val_iter = val.iter().skip(start as usize);
+                Value::binary(
+                    if end == isize::MAX {
+                        val_iter.copied().collect::<Vec<u8>>()
+                    } else {
+                        val_iter.take((end - start + 1) as usize).copied().collect()
+                    },
+                    head,
+                )
            }
        }
-
        Value::Error { .. } => input.clone(),

        other => Value::error(
--- a/crates/nu-command/src/strings/str_/substring.rs
+++ b/crates/nu-command/src/strings/str_/substring.rs
@ -5,7 +5,6 @@ use nu_cmd_base::{
 };
 use nu_engine::command_prelude::*;
 use nu_protocol::{engine::StateWorkingSet, Range};
-use std::cmp::Ordering;
 use unicode_segmentation::UnicodeSegmentation;

 #[derive(Clone)]
@ -151,6 +150,11 @@ impl Command for SubCommand {
                example: " '🇯🇵ほげ ふが ぴよ' | str substring --grapheme-clusters 4..5",
                result: Some(Value::test_string("ふが")),
            },
+            Example {
+                description: "sub string by negative index",
+                example: " 'good nushell' | str substring 5..-2",
+                result: Some(Value::test_string("nushel")),
+            },
        ]
    }
 }
@ -167,56 +171,46 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
                options.0
            };
            let end: isize = if options.1 < 0 {
-                std::cmp::max(len + options.1, 0)
+                options.1 + len
            } else {
                options.1
            };

-            if start < len && end >= 0 {
-                match start.cmp(&end) {
-                    Ordering::Equal => Value::string("", head),
-                    Ordering::Greater => Value::error(
-                        ShellError::TypeMismatch {
-                            err_message: "End must be greater than or equal to Start".to_string(),
-                            span: head,
-                        },
-                        head,
-                    ),
-                    Ordering::Less => Value::string(
-                        {
-                            if end == isize::MAX {
-                                if args.graphemes {
-                                    s.graphemes(true)
-                                        .skip(start as usize)
-                                        .collect::<Vec<&str>>()
-                                        .join("")
-                                } else {
-                                    String::from_utf8_lossy(
-                                        &s.bytes().skip(start as usize).collect::<Vec<_>>(),
-                                    )
-                                    .to_string()
-                                }
-                            } else if args.graphemes {
+            if start > end {
+                Value::string("", head)
+            } else {
+                Value::string(
+                    {
+                        if end == isize::MAX {
+                            if args.graphemes {
                                s.graphemes(true)
                                    .skip(start as usize)
-                                    .take((end - start) as usize)
                                    .collect::<Vec<&str>>()
                                    .join("")
                            } else {
                                String::from_utf8_lossy(
-                                    &s.bytes()
-                                        .skip(start as usize)
-                                        .take((end - start) as usize)
-                                        .collect::<Vec<_>>(),
+                                    &s.bytes().skip(start as usize).collect::<Vec<_>>(),
                                )
                                .to_string()
                            }
-                        },
-                        head,
-                    ),
-                }
-            } else {
-                Value::string("", head)
+                        } else if args.graphemes {
+                            s.graphemes(true)
+                                .skip(start as usize)
+                                .take((end - start + 1) as usize)
+                                .collect::<Vec<&str>>()
+                                .join("")
+                        } else {
+                            String::from_utf8_lossy(
+                                &s.bytes()
+                                    .skip(start as usize)
+                                    .take((end - start + 1) as usize)
+                                    .collect::<Vec<_>>(),
+                            )
+                            .to_string()
+                        }
+                    },
+                    head,
+                )
            }
        }
        // Propagate errors by explicitly matching them before the final case.
@ -243,6 +237,7 @@ mod tests {

        test_examples(SubCommand {})
    }
+    #[derive(Debug)]
    struct Expectation<'a> {
        options: (isize, isize),
        expected: &'a str,
@ -266,18 +261,19 @@ mod tests {
        let word = Value::test_string("andres");

        let cases = vec![
-            expectation("a", (0, 1)),
-            expectation("an", (0, 2)),
-            expectation("and", (0, 3)),
-            expectation("andr", (0, 4)),
-            expectation("andre", (0, 5)),
+            expectation("a", (0, 0)),
+            expectation("an", (0, 1)),
+            expectation("and", (0, 2)),
+            expectation("andr", (0, 3)),
+            expectation("andre", (0, 4)),
+            expectation("andres", (0, 5)),
            expectation("andres", (0, 6)),
-            expectation("", (0, -6)),
-            expectation("a", (0, -5)),
-            expectation("an", (0, -4)),
-            expectation("and", (0, -3)),
-            expectation("andr", (0, -2)),
-            expectation("andre", (0, -1)),
+            expectation("a", (0, -6)),
+            expectation("an", (0, -5)),
+            expectation("and", (0, -4)),
+            expectation("andr", (0, -3)),
+            expectation("andre", (0, -2)),
+            expectation("andres", (0, -1)),
            // str substring [ -4 , _ ]
            // str substring   -4 ,
            expectation("dres", (-4, isize::MAX)),
@ -292,6 +288,7 @@ mod tests {
        ];

        for expectation in &cases {
+            println!("{:?}", expectation);
            let expected = expectation.expected;
            let actual = action(
                &word,
--- a/crates/nu-command/tests/commands/detect_columns.rs
+++ b/crates/nu-command/tests/commands/detect_columns.rs
@ -31,12 +31,12 @@ fn detect_columns_with_legacy_and_flag_c() {
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
            "[[c1,c3,c4,c5]; ['a b',c,d,e]]",
-            "0..0",
+            "0..1",
        ),
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
            "[[c1,c2,c3,c4]; [a,b,c,'d e']]",
-            "(-2)..(-2)",
+            "(-2)..(-1)",
        ),
        (
            "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
@ -72,10 +72,10 @@ drwxr-xr-x  2 root root 4.0K Mar 20 08:28 =(char nl)
 drwxr-xr-x  4 root root 4.0K Mar 20 08:18 ~(char nl)
 -rw-r--r--  1 root root 3.0K Mar 20 07:23 ~asdf(char nl)\"";
    let expected = "[
-['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column8'];
-['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20 08:28', '='],
-['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20 08:18', '~'],
-['-rw-r--r--',  '1', 'root', 'root', '3.0K', 'Mar 20 07:23', '~asdf']
+['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column7', 'column8'];
+['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20', '08:28', '='],
+['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20', '08:18', '~'],
+['-rw-r--r--',  '1', 'root', 'root', '3.0K', 'Mar 20', '07:23', '~asdf']
 ]";
    let range = "5..6";
    let cmd = format!(
--- a/crates/nu-command/tests/commands/str_/mod.rs
+++ b/crates/nu-command/tests/commands/str_/mod.rs
@ -255,7 +255,7 @@ fn substrings_the_input() {
 }

 #[test]
-fn substring_errors_if_start_index_is_greater_than_end_index() {
+fn substring_empty_if_start_index_is_greater_than_end_index() {
    Playground::setup("str_test_9", |dirs, sandbox| {
        sandbox.with_files(&[FileWithContent(
            "sample.toml",
@ -270,12 +270,10 @@ fn substring_errors_if_start_index_is_greater_than_end_index() {
            r#"
                 open sample.toml
                 | str substring 6..4 fortune.teller.phone
+                 | get fortune.teller.phone
             "#
        ));
-
-        assert!(actual
-            .err
-            .contains("End must be greater than or equal to Start"))
+        assert_eq!(actual.out, "")
    })
 }

@ -375,6 +373,21 @@ fn substrings_the_input_and_treats_end_index_as_length_if_blank_end_index_given(
    })
 }

+#[test]
+fn substring_by_negative_index() {
+    Playground::setup("str_test_13", |dirs, _| {
+        let actual = nu!(
+            cwd: dirs.test(), "'apples' | str substring 0..-1",
+        );
+        assert_eq!(actual.out, "apples");
+
+        let actual = nu!(
+            cwd: dirs.test(), "'apples' | str substring 0..<-1",
+        );
+        assert_eq!(actual.out, "apple");
+    })
+}
+
 #[test]
 fn str_reverse() {
    let actual = nu!(r#"