use crate::grapheme_flags; use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_cmd_base::util; use nu_engine::CallExt; use nu_protocol::{ ast::{Call, CellPath}, engine::{Command, EngineState, Stack}, Category, Example, PipelineData, Range, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; use unicode_segmentation::UnicodeSegmentation; struct Arguments { end: bool, substring: String, range: Option, cell_paths: Option>, graphemes: bool, } impl CmdArgument for Arguments { fn take_cell_paths(&mut self) -> Option> { self.cell_paths.take() } } #[derive(Clone)] pub struct SubCommand; #[derive(Clone)] pub struct IndexOfOptionalBounds(i32, i32); impl Command for SubCommand { fn name(&self) -> &str { "str index-of" } fn signature(&self) -> Signature { Signature::build("str index-of") .input_output_types(vec![ (Type::String, Type::Int), (Type::List(Box::new(Type::String)), Type::List(Box::new(Type::Int))), (Type::Table(vec![]), Type::Table(vec![])), (Type::Record(vec![]), Type::Record(vec![])), ]) .allow_variants_without_examples(true) .required("string", SyntaxShape::String, "the string to find in the input") .switch( "grapheme-clusters", "count indexes using grapheme clusters (all visible chars have length 1)", Some('g'), ) .switch( "utf-8-bytes", "count indexes using UTF-8 bytes (default; non-ASCII chars have length 2+)", Some('b'), ) .rest( "rest", SyntaxShape::CellPath, "For a data structure input, search strings at the given cell paths, and replace with result", ) .named( "range", SyntaxShape::Range, "optional start and/or end index", Some('r'), ) .switch("end", "search from the end of the input", Some('e')) .category(Category::Strings) } fn usage(&self) -> &str { "Returns start index of first occurrence of string in input, or -1 if no match." } fn search_terms(&self) -> Vec<&str> { vec!["match", "find", "search"] } fn run( &self, engine_state: &EngineState, stack: &mut Stack, call: &Call, input: PipelineData, ) -> Result { let substring: Spanned = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let args = Arguments { substring: substring.item, range: call.get_flag(engine_state, stack, "range")?, end: call.has_flag("end"), cell_paths, graphemes: grapheme_flags(call)?, }; operate(action, args, input, call.head, engine_state.ctrlc.clone()) } fn examples(&self) -> Vec { vec![ Example { description: "Returns index of string in input", example: " 'my_library.rb' | str index-of '.rb'", result: Some(Value::test_int(10)), }, Example { description: "Count length using grapheme clusters", example: "'๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ' | str index-of --grapheme-clusters 'ใตใŒ'", result: Some(Value::test_int(4)), }, Example { description: "Returns index of string in input within a`rhs open range`", example: " '.rb.rb' | str index-of '.rb' --range 1..", result: Some(Value::test_int(3)), }, Example { description: "Returns index of string in input within a lhs open range", example: " '123456' | str index-of '6' --range ..4", result: Some(Value::test_int(-1)), }, Example { description: "Returns index of string in input within a range", example: " '123456' | str index-of '3' --range 1..4", result: Some(Value::test_int(2)), }, Example { description: "Returns index of string in input", example: " '/this/is/some/path/file.txt' | str index-of '/' -e", result: Some(Value::test_int(18)), }, ] } } fn action( input: &Value, Arguments { ref substring, range, end, graphemes, .. }: &Arguments, head: Span, ) -> Value { match input { Value::String { val: s, .. } => { let (start_index, end_index) = if let Some(range) = range { match util::process_range(range) { Ok(r) => { // `process_range()` returns `isize::MAX` if the range is open-ended, // which is not ideal for us let end = if r.1 as usize > s.len() { s.len() } else { r.1 as usize }; (r.0 as usize, end) } Err(processing_error) => { let err = processing_error("could not find `index-of`", head); return Value::error(err, head); } } } else { (0usize, s.len()) }; // When the -e flag is present, search using rfind instead of find.s if let Some(result) = if *end { s[start_index..end_index].rfind(&**substring) } else { s[start_index..end_index].find(&**substring) } { let result = result + start_index; Value::int( if *graphemes { // Having found the substring's byte index, convert to grapheme index. // grapheme_indices iterates graphemes alongside their UTF-8 byte indices, so .enumerate() // is used to get the grapheme index alongside it. s.grapheme_indices(true) .enumerate() .find(|e| e.1 .0 >= result) .expect("No grapheme index for substring") .0 } else { result } as i64, head, ) } else { Value::int(-1, head) } } Value::Error { .. } => input.clone(), _ => Value::error( ShellError::OnlySupportsThisInputType { exp_input_type: "string".into(), wrong_type: input.get_type().to_string(), dst_span: head, src_span: input.span(), }, head, ), } } #[cfg(test)] mod tests { use nu_protocol::ast::RangeInclusion; use super::*; use super::{action, Arguments, SubCommand}; #[test] fn test_examples() { use crate::test_examples; test_examples(SubCommand {}) } #[test] fn returns_index_of_substring() { let word = Value::test_string("Cargo.tomL"); let options = Arguments { substring: String::from(".tomL"), range: None, cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(5)); } #[test] fn index_of_does_not_exist_in_string() { let word = Value::test_string("Cargo.tomL"); let options = Arguments { substring: String::from("Lm"), range: None, cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(-1)); } #[test] fn returns_index_of_next_substring() { let word = Value::test_string("Cargo.Cargo"); let range = Range { from: Value::int(1, Span::test_data()), incr: Value::int(1, Span::test_data()), to: Value::nothing(Span::test_data()), inclusion: RangeInclusion::Inclusive, }; let options = Arguments { substring: String::from("Cargo"), range: Some(range), cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(6)); } #[test] fn index_does_not_exist_due_to_end_index() { let word = Value::test_string("Cargo.Banana"); let range = Range { from: Value::nothing(Span::test_data()), inclusion: RangeInclusion::Inclusive, incr: Value::int(1, Span::test_data()), to: Value::int(5, Span::test_data()), }; let options = Arguments { substring: String::from("Banana"), range: Some(range), cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(-1)); } #[test] fn returns_index_of_nums_in_middle_due_to_index_limit_from_both_ends() { let word = Value::test_string("123123123"); let range = Range { from: Value::int(2, Span::test_data()), incr: Value::int(1, Span::test_data()), to: Value::int(6, Span::test_data()), inclusion: RangeInclusion::Inclusive, }; let options = Arguments { substring: String::from("123"), range: Some(range), cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(3)); } #[test] fn index_does_not_exists_due_to_strict_bounds() { let word = Value::test_string("123456"); let range = Range { from: Value::int(2, Span::test_data()), incr: Value::int(1, Span::test_data()), to: Value::int(5, Span::test_data()), inclusion: RangeInclusion::RightExclusive, }; let options = Arguments { substring: String::from("1"), range: Some(range), cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(-1)); } #[test] fn use_utf8_bytes() { let word = Value::string(String::from("๐Ÿ‡ฏ๐Ÿ‡ตใปใ’ ใตใŒ ใดใ‚ˆ"), Span::test_data()); let options = Arguments { substring: String::from("ใตใŒ"), range: None, cell_paths: None, end: false, graphemes: false, }; let actual = action(&word, &options, Span::test_data()); assert_eq!(actual, Value::test_int(15)); } }