Improves commands that support range input (#13113)

# Description
Fixes: #13105
Fixes: #13077

This pr makes `str substring`, `bytes at` work better with negative
index.

And it also fixes the false range semantic on `detect columns -c` in
some cases.

# User-Facing Changes
For `str substring`, `bytes at`, it will no-longer return an error if
start index is larger than end index. It makes sense to return an empty
string of empty bytes directly.

### Before
```nushell
# str substring
❯ ("aaa" | str substring 2..-3) == ""
Error: nu:🐚:type_mismatch

  × Type mismatch.
   ╭─[entry #23:1:10]
 1 │ ("aaa" | str substring 2..-3) == ""
   ·          ──────┬──────
   ·                ╰── End must be greater than or equal to Start
 2 │ true
   ╰────

# bytes at
❯ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8)
Error: nu:🐚:type_mismatch

  × Type mismatch.
   ╭─[entry #27:1:25]
 1 │ ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8)
   ·                         ────┬───
   ·                             ╰── End must be greater than or equal to Start
   ╰────
```
### After
```nushell
# str substring
❯ ("aaa" | str substring 2..-3) == ""
true

# bytes at
❯  ("aaa" | encode utf-8 | bytes at 2..-3) == ("" | encode utf-8)
true
```
# Tests + Formatting
Added some tests, adjust existing tests
This commit is contained in:
Wind 2024-06-18 20:19:13 +08:00 committed by GitHub
parent ae6489f04b
commit 28ed0fe700
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 85 additions and 98 deletions

View File

@ -20,13 +20,14 @@ pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf
type MakeRangeError = fn(&str, Span) -> ShellError;
/// Returns a inclusive pair of boundary in given `range`.
pub fn process_range(range: &Range) -> Result<(isize, isize), MakeRangeError> {
match range {
Range::IntRange(range) => {
let start = range.start().try_into().unwrap_or(0);
let end = match range.end() {
Bound::Included(v) => (v + 1) as isize,
Bound::Excluded(v) => v as isize,
Bound::Included(v) => v as isize,
Bound::Excluded(v) => (v - 1) as isize,
Bound::Unbounded => isize::MAX,
};
Ok((start, end))

View File

@ -128,48 +128,24 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
let range = &args.indexes;
match input {
Value::Binary { val, .. } => {
use std::cmp::{self, Ordering};
let len = val.len() as isize;
let start = if range.0 < 0 { range.0 + len } else { range.0 };
let end = if range.1 < 0 { range.1 + len } else { range.1 };
let end = if range.1 < 0 {
cmp::max(range.1 + len, 0)
} else {
range.1
};
if start < len && end >= 0 {
match start.cmp(&end) {
Ordering::Equal => Value::binary(vec![], head),
Ordering::Greater => Value::error(
ShellError::TypeMismatch {
err_message: "End must be greater than or equal to Start".to_string(),
span: head,
},
head,
),
Ordering::Less => Value::binary(
if end == isize::MAX {
val.iter()
.skip(start as usize)
.copied()
.collect::<Vec<u8>>()
} else {
val.iter()
.skip(start as usize)
.take((end - start) as usize)
.copied()
.collect()
},
head,
),
}
} else {
if start > end {
Value::binary(vec![], head)
} else {
let val_iter = val.iter().skip(start as usize);
Value::binary(
if end == isize::MAX {
val_iter.copied().collect::<Vec<u8>>()
} else {
val_iter.take((end - start + 1) as usize).copied().collect()
},
head,
)
}
}
Value::Error { .. } => input.clone(),
other => Value::error(

View File

@ -5,7 +5,6 @@ use nu_cmd_base::{
};
use nu_engine::command_prelude::*;
use nu_protocol::{engine::StateWorkingSet, Range};
use std::cmp::Ordering;
use unicode_segmentation::UnicodeSegmentation;
#[derive(Clone)]
@ -151,6 +150,11 @@ impl Command for SubCommand {
example: " '🇯🇵ほげ ふが ぴよ' | str substring --grapheme-clusters 4..5",
result: Some(Value::test_string("ふが")),
},
Example {
description: "sub string by negative index",
example: " 'good nushell' | str substring 5..-2",
result: Some(Value::test_string("nushel")),
},
]
}
}
@ -167,56 +171,46 @@ fn action(input: &Value, args: &Arguments, head: Span) -> Value {
options.0
};
let end: isize = if options.1 < 0 {
std::cmp::max(len + options.1, 0)
options.1 + len
} else {
options.1
};
if start < len && end >= 0 {
match start.cmp(&end) {
Ordering::Equal => Value::string("", head),
Ordering::Greater => Value::error(
ShellError::TypeMismatch {
err_message: "End must be greater than or equal to Start".to_string(),
span: head,
},
head,
),
Ordering::Less => Value::string(
{
if end == isize::MAX {
if args.graphemes {
s.graphemes(true)
.skip(start as usize)
.collect::<Vec<&str>>()
.join("")
} else {
String::from_utf8_lossy(
&s.bytes().skip(start as usize).collect::<Vec<_>>(),
)
.to_string()
}
} else if args.graphemes {
if start > end {
Value::string("", head)
} else {
Value::string(
{
if end == isize::MAX {
if args.graphemes {
s.graphemes(true)
.skip(start as usize)
.take((end - start) as usize)
.collect::<Vec<&str>>()
.join("")
} else {
String::from_utf8_lossy(
&s.bytes()
.skip(start as usize)
.take((end - start) as usize)
.collect::<Vec<_>>(),
&s.bytes().skip(start as usize).collect::<Vec<_>>(),
)
.to_string()
}
},
head,
),
}
} else {
Value::string("", head)
} else if args.graphemes {
s.graphemes(true)
.skip(start as usize)
.take((end - start + 1) as usize)
.collect::<Vec<&str>>()
.join("")
} else {
String::from_utf8_lossy(
&s.bytes()
.skip(start as usize)
.take((end - start + 1) as usize)
.collect::<Vec<_>>(),
)
.to_string()
}
},
head,
)
}
}
// Propagate errors by explicitly matching them before the final case.
@ -243,6 +237,7 @@ mod tests {
test_examples(SubCommand {})
}
#[derive(Debug)]
struct Expectation<'a> {
options: (isize, isize),
expected: &'a str,
@ -266,18 +261,19 @@ mod tests {
let word = Value::test_string("andres");
let cases = vec![
expectation("a", (0, 1)),
expectation("an", (0, 2)),
expectation("and", (0, 3)),
expectation("andr", (0, 4)),
expectation("andre", (0, 5)),
expectation("a", (0, 0)),
expectation("an", (0, 1)),
expectation("and", (0, 2)),
expectation("andr", (0, 3)),
expectation("andre", (0, 4)),
expectation("andres", (0, 5)),
expectation("andres", (0, 6)),
expectation("", (0, -6)),
expectation("a", (0, -5)),
expectation("an", (0, -4)),
expectation("and", (0, -3)),
expectation("andr", (0, -2)),
expectation("andre", (0, -1)),
expectation("a", (0, -6)),
expectation("an", (0, -5)),
expectation("and", (0, -4)),
expectation("andr", (0, -3)),
expectation("andre", (0, -2)),
expectation("andres", (0, -1)),
// str substring [ -4 , _ ]
// str substring -4 ,
expectation("dres", (-4, isize::MAX)),
@ -292,6 +288,7 @@ mod tests {
];
for expectation in &cases {
println!("{:?}", expectation);
let expected = expectation.expected;
let actual = action(
&word,

View File

@ -31,12 +31,12 @@ fn detect_columns_with_legacy_and_flag_c() {
(
"$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
"[[c1,c3,c4,c5]; ['a b',c,d,e]]",
"0..0",
"0..1",
),
(
"$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
"[[c1,c2,c3,c4]; [a,b,c,'d e']]",
"(-2)..(-2)",
"(-2)..(-1)",
),
(
"$\"c1 c2 c3 c4 c5(char nl)a b c d e\"",
@ -72,10 +72,10 @@ drwxr-xr-x 2 root root 4.0K Mar 20 08:28 =(char nl)
drwxr-xr-x 4 root root 4.0K Mar 20 08:18 ~(char nl)
-rw-r--r-- 1 root root 3.0K Mar 20 07:23 ~asdf(char nl)\"";
let expected = "[
['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column8'];
['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20 08:28', '='],
['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20 08:18', '~'],
['-rw-r--r--', '1', 'root', 'root', '3.0K', 'Mar 20 07:23', '~asdf']
['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column7', 'column8'];
['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20', '08:28', '='],
['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20', '08:18', '~'],
['-rw-r--r--', '1', 'root', 'root', '3.0K', 'Mar 20', '07:23', '~asdf']
]";
let range = "5..6";
let cmd = format!(

View File

@ -255,7 +255,7 @@ fn substrings_the_input() {
}
#[test]
fn substring_errors_if_start_index_is_greater_than_end_index() {
fn substring_empty_if_start_index_is_greater_than_end_index() {
Playground::setup("str_test_9", |dirs, sandbox| {
sandbox.with_files(&[FileWithContent(
"sample.toml",
@ -270,12 +270,10 @@ fn substring_errors_if_start_index_is_greater_than_end_index() {
r#"
open sample.toml
| str substring 6..4 fortune.teller.phone
| get fortune.teller.phone
"#
));
assert!(actual
.err
.contains("End must be greater than or equal to Start"))
assert_eq!(actual.out, "")
})
}
@ -375,6 +373,21 @@ fn substrings_the_input_and_treats_end_index_as_length_if_blank_end_index_given(
})
}
#[test]
fn substring_by_negative_index() {
Playground::setup("str_test_13", |dirs, _| {
let actual = nu!(
cwd: dirs.test(), "'apples' | str substring 0..-1",
);
assert_eq!(actual.out, "apples");
let actual = nu!(
cwd: dirs.test(), "'apples' | str substring 0..<-1",
);
assert_eq!(actual.out, "apple");
})
}
#[test]
fn str_reverse() {
let actual = nu!(r#"