nushell/crates/nu-command/src/strings/str_/index_of.rs
JT 1e3e034021
Spanned Value step 1: span all value cases (#10042)
# Description

This doesn't really do much that the user could see, but it helps get us
ready to do the steps of the refactor to split the span off of Value, so
that values can be spanless. This allows us to have top-level values
that can hold both a Value and a Span, without requiring that all values
have them.

We expect to see significant memory reduction by removing so many
unnecessary spans from values. For example, a table of 100,000 rows and
5 columns would have a savings of ~8megs in just spans that are almost
always duplicated.

# User-Facing Changes

Nothing yet

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
2023-08-25 08:48:05 +12:00

407 lines
12 KiB
Rust

use crate::grapheme_flags;
use nu_cmd_base::input_handler::{operate, CmdArgument};
use nu_cmd_base::util;
use nu_engine::CallExt;
use nu_protocol::{
ast::{Call, CellPath},
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, Range, ShellError, Signature, Span, Spanned, SyntaxShape,
Type, Value,
};
use unicode_segmentation::UnicodeSegmentation;
struct Arguments {
end: bool,
substring: String,
range: Option<Range>,
cell_paths: Option<Vec<CellPath>>,
graphemes: bool,
}
impl CmdArgument for Arguments {
fn take_cell_paths(&mut self) -> Option<Vec<CellPath>> {
self.cell_paths.take()
}
}
#[derive(Clone)]
pub struct SubCommand;
#[derive(Clone)]
pub struct IndexOfOptionalBounds(i32, i32);
impl Command for SubCommand {
fn name(&self) -> &str {
"str index-of"
}
fn signature(&self) -> Signature {
Signature::build("str index-of")
.input_output_types(vec![
(Type::String, Type::Int),
(Type::List(Box::new(Type::String)), Type::List(Box::new(Type::Int))),
(Type::Table(vec![]), Type::Table(vec![])),
(Type::Record(vec![]), Type::Record(vec![])),
])
.allow_variants_without_examples(true)
.required("string", SyntaxShape::String, "the string to find in the input")
.switch(
"grapheme-clusters",
"count indexes using grapheme clusters (all visible chars have length 1)",
Some('g'),
)
.switch(
"utf-8-bytes",
"count indexes using UTF-8 bytes (default; non-ASCII chars have length 2+)",
Some('b'),
)
.rest(
"rest",
SyntaxShape::CellPath,
"For a data structure input, search strings at the given cell paths, and replace with result",
)
.named(
"range",
SyntaxShape::Range,
"optional start and/or end index",
Some('r'),
)
.switch("end", "search from the end of the input", Some('e'))
.category(Category::Strings)
}
fn usage(&self) -> &str {
"Returns start index of first occurrence of string in input, or -1 if no match."
}
fn search_terms(&self) -> Vec<&str> {
vec!["match", "find", "search"]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let substring: Spanned<String> = call.req(engine_state, stack, 0)?;
let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
let args = Arguments {
substring: substring.item,
range: call.get_flag(engine_state, stack, "range")?,
end: call.has_flag("end"),
cell_paths,
graphemes: grapheme_flags(call)?,
};
operate(action, args, input, call.head, engine_state.ctrlc.clone())
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns index of string in input",
example: " 'my_library.rb' | str index-of '.rb'",
result: Some(Value::test_int(10)),
},
Example {
description: "Count length using grapheme clusters",
example: "'🇯🇵ほげ ふが ぴよ' | str index-of -g 'ふが'",
result: Some(Value::test_int(4)),
},
Example {
description: "Returns index of string in input within a`rhs open range`",
example: " '.rb.rb' | str index-of '.rb' -r 1..",
result: Some(Value::test_int(3)),
},
Example {
description: "Returns index of string in input within a lhs open range",
example: " '123456' | str index-of '6' -r ..4",
result: Some(Value::test_int(-1)),
},
Example {
description: "Returns index of string in input within a range",
example: " '123456' | str index-of '3' -r 1..4",
result: Some(Value::test_int(2)),
},
Example {
description: "Returns index of string in input",
example: " '/this/is/some/path/file.txt' | str index-of '/' -e",
result: Some(Value::test_int(18)),
},
]
}
}
fn action(
input: &Value,
Arguments {
ref substring,
range,
end,
graphemes,
..
}: &Arguments,
head: Span,
) -> Value {
match input {
Value::String { val: s, .. } => {
let (start_index, end_index) = if let Some(range) = range {
match util::process_range(range) {
Ok(r) => {
// `process_range()` returns `isize::MAX` if the range is open-ended,
// which is not ideal for us
let end = if r.1 as usize > s.len() {
s.len()
} else {
r.1 as usize
};
(r.0 as usize, end)
}
Err(processing_error) => {
let err = processing_error("could not find `index-of`", head);
return Value::Error {
error: Box::new(err),
span: head,
};
}
}
} else {
(0usize, s.len())
};
// When the -e flag is present, search using rfind instead of find.s
if let Some(result) = if *end {
s[start_index..end_index].rfind(&**substring)
} else {
s[start_index..end_index].find(&**substring)
} {
let result = result + start_index;
Value::int(
if *graphemes {
// Having found the substring's byte index, convert to grapheme index.
// grapheme_indices iterates graphemes alongside their UTF-8 byte indices, so .enumerate()
// is used to get the grapheme index alongside it.
s.grapheme_indices(true)
.enumerate()
.find(|e| e.1 .0 >= result)
.expect("No grapheme index for substring")
.0
} else {
result
} as i64,
head,
)
} else {
Value::int(-1, head)
}
}
Value::Error { .. } => input.clone(),
_ => Value::Error {
error: Box::new(ShellError::OnlySupportsThisInputType {
exp_input_type: "string".into(),
wrong_type: input.get_type().to_string(),
dst_span: head,
src_span: input.span(),
}),
span: head,
},
}
}
#[cfg(test)]
mod tests {
use nu_protocol::ast::RangeInclusion;
use super::*;
use super::{action, Arguments, SubCommand};
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(SubCommand {})
}
#[test]
fn returns_index_of_substring() {
let word = Value::test_string("Cargo.tomL");
let options = Arguments {
substring: String::from(".tomL"),
range: None,
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(5));
}
#[test]
fn index_of_does_not_exist_in_string() {
let word = Value::test_string("Cargo.tomL");
let options = Arguments {
substring: String::from("Lm"),
range: None,
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(-1));
}
#[test]
fn returns_index_of_next_substring() {
let word = Value::test_string("Cargo.Cargo");
let range = Range {
from: Value::Int {
val: 1,
span: Span::test_data(),
},
incr: Value::Int {
val: 1,
span: Span::test_data(),
},
to: Value::Nothing {
span: Span::test_data(),
},
inclusion: RangeInclusion::Inclusive,
};
let options = Arguments {
substring: String::from("Cargo"),
range: Some(range),
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(6));
}
#[test]
fn index_does_not_exist_due_to_end_index() {
let word = Value::test_string("Cargo.Banana");
let range = Range {
from: Value::Nothing {
span: Span::test_data(),
},
inclusion: RangeInclusion::Inclusive,
incr: Value::Int {
val: 1,
span: Span::test_data(),
},
to: Value::Int {
val: 5,
span: Span::test_data(),
},
};
let options = Arguments {
substring: String::from("Banana"),
range: Some(range),
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(-1));
}
#[test]
fn returns_index_of_nums_in_middle_due_to_index_limit_from_both_ends() {
let word = Value::test_string("123123123");
let range = Range {
from: Value::Int {
val: 2,
span: Span::test_data(),
},
incr: Value::Int {
val: 1,
span: Span::test_data(),
},
to: Value::Int {
val: 6,
span: Span::test_data(),
},
inclusion: RangeInclusion::Inclusive,
};
let options = Arguments {
substring: String::from("123"),
range: Some(range),
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(3));
}
#[test]
fn index_does_not_exists_due_to_strict_bounds() {
let word = Value::test_string("123456");
let range = Range {
from: Value::Int {
val: 2,
span: Span::test_data(),
},
incr: Value::Int {
val: 1,
span: Span::test_data(),
},
to: Value::Int {
val: 5,
span: Span::test_data(),
},
inclusion: RangeInclusion::RightExclusive,
};
let options = Arguments {
substring: String::from("1"),
range: Some(range),
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(-1));
}
#[test]
fn use_utf8_bytes() {
let word = Value::String {
val: String::from("🇯🇵ほげ ふが ぴよ"),
span: Span::test_data(),
};
let options = Arguments {
substring: String::from("ふが"),
range: None,
cell_paths: None,
end: false,
graphemes: false,
};
let actual = action(&word, &options, Span::test_data());
assert_eq!(actual, Value::test_int(15));
}
}