From c48c092125bb75d6244029a41dc89908ac04acbb Mon Sep 17 00:00:00 2001 From: k-brk <25877802+k-brk@users.noreply.github.com> Date: Tue, 4 Aug 2020 08:36:51 +0200 Subject: [PATCH] String funcs - Contains and IndexOf (#2298) * Contains and index of string functions * Clippy and fmt --- crates/nu-cli/src/cli.rs | 2 + crates/nu-cli/src/commands.rs | 6 +- crates/nu-cli/src/commands/str_/contains.rs | 190 ++++++++++++ crates/nu-cli/src/commands/str_/index_of.rs | 314 ++++++++++++++++++++ crates/nu-cli/src/commands/str_/mod.rs | 4 + 5 files changed, 513 insertions(+), 3 deletions(-) create mode 100644 crates/nu-cli/src/commands/str_/contains.rs create mode 100644 crates/nu-cli/src/commands/str_/index_of.rs diff --git a/crates/nu-cli/src/cli.rs b/crates/nu-cli/src/cli.rs index bb367c189d..810460f138 100644 --- a/crates/nu-cli/src/cli.rs +++ b/crates/nu-cli/src/cli.rs @@ -310,6 +310,8 @@ pub fn create_default_context( whole_stream_command(StrSubstring), whole_stream_command(StrSet), whole_stream_command(StrToDatetime), + whole_stream_command(StrContains), + whole_stream_command(StrIndexOf), whole_stream_command(StrTrim), whole_stream_command(StrTrimLeft), whole_stream_command(StrTrimRight), diff --git a/crates/nu-cli/src/commands.rs b/crates/nu-cli/src/commands.rs index 23b7bbcc16..e099a1c850 100644 --- a/crates/nu-cli/src/commands.rs +++ b/crates/nu-cli/src/commands.rs @@ -229,9 +229,9 @@ pub(crate) use sort_by::SortBy; pub(crate) use split::{Split, SplitChars, SplitColumn, SplitRow}; pub(crate) use split_by::SplitBy; pub(crate) use str_::{ - Str, StrCapitalize, StrCollect, StrDowncase, StrEndsWith, StrFindReplace, StrFrom, StrLength, - StrReverse, StrSet, StrStartsWith, StrSubstring, StrToDatetime, StrToDecimal, StrToInteger, - StrTrim, StrTrimLeft, StrTrimRight, StrUpcase, + Str, StrCapitalize, StrCollect, StrContains, StrDowncase, StrEndsWith, StrFindReplace, StrFrom, + StrIndexOf, StrLength, StrReverse, StrSet, StrStartsWith, StrSubstring, StrToDatetime, + StrToDecimal, StrToInteger, StrTrim, StrTrimLeft, StrTrimRight, StrUpcase, }; pub(crate) use table::Table; pub(crate) use tags::Tags; diff --git a/crates/nu-cli/src/commands/str_/contains.rs b/crates/nu-cli/src/commands/str_/contains.rs new file mode 100644 index 0000000000..ff1975ba2f --- /dev/null +++ b/crates/nu-cli/src/commands/str_/contains.rs @@ -0,0 +1,190 @@ +use crate::commands::WholeStreamCommand; +use crate::prelude::*; +use nu_errors::ShellError; +use nu_protocol::ShellTypeName; +use nu_protocol::{ + ColumnPath, Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value, +}; +use nu_source::{Tag, Tagged}; +use nu_value_ext::ValueExt; + +#[derive(Deserialize)] +struct Arguments { + pattern: Tagged, + rest: Vec, + insensitive: bool, +} + +pub struct SubCommand; + +#[async_trait] +impl WholeStreamCommand for SubCommand { + fn name(&self) -> &str { + "str contains" + } + + fn signature(&self) -> Signature { + Signature::build("str contains") + .required("pattern", SyntaxShape::String, "the pattern to find") + .rest( + SyntaxShape::ColumnPath, + "optionally check if string contains pattern by column paths", + ) + .switch("insensitive", "search is case insensitive", Some('i')) + } + + fn usage(&self) -> &str { + "Checks if string contains pattern" + } + + async fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + operate(args, registry).await + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Check if string contains pattern", + example: "echo 'my_library.rb' | str contains '.rb'", + result: Some(vec![UntaggedValue::boolean(true).into_untagged_value()]), + }, + Example { + description: "Check if string contains pattern case insensitive", + example: "echo 'my_library.rb' | str contains -i '.RB'", + result: Some(vec![UntaggedValue::boolean(true).into_untagged_value()]), + }, + ] + } +} + +async fn operate( + args: CommandArgs, + registry: &CommandRegistry, +) -> Result { + let registry = registry.clone(); + + let ( + Arguments { + pattern, + rest, + insensitive, + }, + input, + ) = args.process(®istry).await?; + let column_paths: Vec<_> = rest; + + Ok(input + .map(move |v| { + if column_paths.is_empty() { + ReturnSuccess::value(action(&v, &pattern, insensitive, v.tag())?) + } else { + let mut ret = v; + + for path in &column_paths { + let pattern = pattern.clone(); + ret = ret.swap_data_by_column_path( + path, + Box::new(move |old| action(old, &pattern, insensitive, old.tag())), + )?; + } + + ReturnSuccess::value(ret) + } + }) + .to_output_stream()) +} + +fn action( + input: &Value, + pattern: &str, + insensitive: bool, + tag: impl Into, +) -> Result { + match &input.value { + UntaggedValue::Primitive(Primitive::Line(s)) + | UntaggedValue::Primitive(Primitive::String(s)) => { + let contains = if insensitive { + s.to_lowercase().find(&pattern.to_lowercase()).is_some() + } else { + s.find(pattern).is_some() + }; + + Ok(UntaggedValue::boolean(contains).into_value(tag)) + } + other => { + let got = format!("got {}", other.type_name()); + Err(ShellError::labeled_error( + "value is not string", + got, + tag.into().span, + )) + } + } +} + +#[cfg(test)] +mod tests { + use super::{action, SubCommand}; + use nu_plugin::test_helpers::value::string; + use nu_protocol::{Primitive, UntaggedValue}; + use nu_source::Tag; + + #[test] + fn examples_work_as_expected() { + use crate::examples::test as test_examples; + + test_examples(SubCommand {}) + } + + #[test] + fn string_contains_other_string_case_sensitive() { + let word = string("Cargo.tomL"); + let pattern = ".tomL"; + let insensitive = false; + let expected = + UntaggedValue::Primitive(Primitive::Boolean(true.into())).into_untagged_value(); + + let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn string_does_not_contain_other_string_case_sensitive() { + let word = string("Cargo.tomL"); + let pattern = "Lomt."; + let insensitive = false; + let expected = + UntaggedValue::Primitive(Primitive::Boolean(false.into())).into_untagged_value(); + + let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn string_contains_other_string_case_insensitive() { + let word = string("Cargo.ToMl"); + let pattern = ".TOML"; + let insensitive = true; + let expected = + UntaggedValue::Primitive(Primitive::Boolean(true.into())).into_untagged_value(); + + let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn string_does_not_contain_other_string_case_insensitive() { + let word = string("Cargo.tOml"); + let pattern = "lomt."; + let insensitive = true; + let expected = + UntaggedValue::Primitive(Primitive::Boolean(false.into())).into_untagged_value(); + + let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } +} diff --git a/crates/nu-cli/src/commands/str_/index_of.rs b/crates/nu-cli/src/commands/str_/index_of.rs new file mode 100644 index 0000000000..33cd81fc5c --- /dev/null +++ b/crates/nu-cli/src/commands/str_/index_of.rs @@ -0,0 +1,314 @@ +use crate::commands::WholeStreamCommand; +use crate::prelude::*; +use nu_errors::ShellError; +use nu_protocol::ShellTypeName; +use nu_protocol::{ + ColumnPath, Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value, +}; +use nu_source::{Tag, Tagged}; +use nu_value_ext::{as_string, ValueExt}; + +#[derive(Deserialize)] +struct Arguments { + pattern: Tagged, + rest: Vec, + range: Option, +} + +pub struct SubCommand; + +#[derive(Clone)] +pub struct IndexOfOptionalBounds(i32, i32); + +#[async_trait] +impl WholeStreamCommand for SubCommand { + fn name(&self) -> &str { + "str index-of" + } + + fn signature(&self) -> Signature { + Signature::build("str index-of") + .required( + "pattern", + SyntaxShape::String, + "the pattern to find index of", + ) + .rest( + SyntaxShape::ColumnPath, + "optionally returns index of pattern in string by column paths", + ) + .named( + "range", + SyntaxShape::Any, + "optional start and/or end index", + Some('r'), + ) + } + + fn usage(&self) -> &str { + "Returns starting index of given pattern in string counting from 0. Returns -1 when there are no results." + } + + async fn run( + &self, + args: CommandArgs, + registry: &CommandRegistry, + ) -> Result { + operate(args, registry).await + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Returns index of pattern in string", + example: "echo 'my_library.rb' | str index-of '.rb'", + result: Some(vec![UntaggedValue::int(10).into_untagged_value()]), + }, + Example { + description: "Returns index of pattern in string with start index", + example: "echo '.rb.rb' | str index-of '.rb' -r '1,'", + result: Some(vec![UntaggedValue::int(3).into_untagged_value()]), + }, + Example { + description: "Returns index of pattern in string with end index", + example: "echo '123456' | str index-of '6' -r ',4'", + result: Some(vec![UntaggedValue::int(-1).into_untagged_value()]), + }, + Example { + description: "Returns index of pattern in string with start and end index", + example: "echo '123456' | str index-of '3' -r '1,4'", + result: Some(vec![UntaggedValue::int(2).into_untagged_value()]), + }, + Example { + description: "Alternativly you can use this form", + example: "echo '123456' | str index-of '3' -r [1 4]", + result: Some(vec![UntaggedValue::int(2).into_untagged_value()]), + }, + ] + } +} + +async fn operate( + args: CommandArgs, + registry: &CommandRegistry, +) -> Result { + let registry = registry.clone(); + + let ( + Arguments { + pattern, + rest, + range, + }, + input, + ) = args.process(®istry).await?; + let range = range.unwrap_or_else(|| { + UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value() + }); + let column_paths: Vec<_> = rest; + + Ok(input + .map(move |v| { + if column_paths.is_empty() { + ReturnSuccess::value(action(&v, &pattern, &range, v.tag())?) + } else { + let mut ret = v; + + for path in &column_paths { + let range = range.clone(); + let pattern = pattern.clone(); + ret = ret.swap_data_by_column_path( + path, + Box::new(move |old| action(old, &pattern, &range, old.tag())), + )?; + } + + ReturnSuccess::value(ret) + } + }) + .to_output_stream()) +} + +fn action( + input: &Value, + pattern: &str, + range: &Value, + tag: impl Into, +) -> Result { + let r = process_range(&input, &range)?; + match &input.value { + UntaggedValue::Primitive(Primitive::Line(s)) + | UntaggedValue::Primitive(Primitive::String(s)) => { + let start_index = r.0 as usize; + let end_index = r.1 as usize; + + if let Some(result) = s[start_index..end_index].find(pattern) { + Ok(UntaggedValue::int(result + start_index).into_value(tag)) + } else { + let not_found = -1; + Ok(UntaggedValue::int(not_found).into_value(tag)) + } + } + other => { + let got = format!("got {}", other.type_name()); + Err(ShellError::labeled_error( + "value is not string", + got, + tag.into().span, + )) + } + } +} + +fn process_range(input: &Value, range: &Value) -> Result { + let input_len = match &input.value { + UntaggedValue::Primitive(Primitive::Line(s)) + | UntaggedValue::Primitive(Primitive::String(s)) => s.len(), + _ => 0, + }; + let min_index_str = String::from("0"); + let max_index_str = input_len.to_string(); + let r = match &range.value { + UntaggedValue::Primitive(Primitive::Line(s)) + | UntaggedValue::Primitive(Primitive::String(s)) => { + let indexes: Vec<&str> = s.split(',').collect(); + + let start_index = indexes.get(0).unwrap_or(&&min_index_str[..]).to_string(); + + let end_index = indexes.get(1).unwrap_or(&&max_index_str[..]).to_string(); + + Ok((start_index, end_index)) + } + UntaggedValue::Table(indexes) => { + if indexes.len() > 2 { + Err(ShellError::labeled_error( + "there shouldn't be more than two indexes", + "too many indexes", + range.tag(), + )) + } else { + let idx: Vec = indexes + .iter() + .map(|v| as_string(v).unwrap_or_else(|_| String::from(""))) + .collect(); + + let start_index = idx.get(0).unwrap_or(&min_index_str).to_string(); + let end_index = idx.get(1).unwrap_or(&max_index_str).to_string(); + + Ok((start_index, end_index)) + } + } + other => { + let got = format!("got {}", other.type_name()); + Err(ShellError::labeled_error( + "value is not string", + got, + range.tag(), + )) + } + }?; + + let start_index = r.0.parse::().unwrap_or(0); + let end_index = r.1.parse::().unwrap_or(input_len as i32); + + if start_index < 0 || start_index > end_index { + return Err(ShellError::labeled_error( + "start index can't be negative or greater than end index", + "Invalid start index", + range.tag(), + )); + } + + if end_index < 0 || end_index < start_index || end_index > input_len as i32 { + return Err(ShellError::labeled_error( + "end index can't be negative, smaller than start index or greater than input length", + "Invalid end index", + range.tag(), + )); + } + Ok(IndexOfOptionalBounds(start_index, end_index)) +} +#[cfg(test)] +mod tests { + use super::{action, SubCommand}; + use nu_plugin::test_helpers::value::string; + use nu_protocol::{Primitive, UntaggedValue}; + use nu_source::Tag; + + #[test] + fn examples_work_as_expected() { + use crate::examples::test as test_examples; + + test_examples(SubCommand {}) + } + + #[test] + fn returns_index_of_substring() { + let word = string("Cargo.tomL"); + let pattern = ".tomL"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int(5.into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + #[test] + fn index_of_does_not_exist_in_string() { + let word = string("Cargo.tomL"); + let pattern = "Lm"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn returns_index_of_next_substring() { + let word = string("Cargo.Cargo"); + let pattern = "Cargo"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String("1,".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int(6.into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + #[test] + fn index_does_not_exist_due_to_end_index() { + let word = string("Cargo.Banana"); + let pattern = "Banana"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String(",5".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn returns_index_of_nums_in_middle_due_to_index_limit_from_both_ends() { + let word = string("123123123"); + let pattern = "123"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String("2,6".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int(3.into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn index_does_not_exists_due_to_strict_bounds() { + let word = string("123456"); + let pattern = "1"; + let index_of_bounds = + UntaggedValue::Primitive(Primitive::String("2,4".to_string())).into_untagged_value(); + let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value(); + + let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap(); + assert_eq!(actual, expected); + } +} diff --git a/crates/nu-cli/src/commands/str_/mod.rs b/crates/nu-cli/src/commands/str_/mod.rs index 8ecc87ecb7..38b7abaa60 100644 --- a/crates/nu-cli/src/commands/str_/mod.rs +++ b/crates/nu-cli/src/commands/str_/mod.rs @@ -1,10 +1,12 @@ mod capitalize; mod collect; mod command; +mod contains; mod downcase; mod ends_with; mod find_replace; mod from; +mod index_of; mod length; mod reverse; mod set; @@ -19,10 +21,12 @@ mod upcase; pub use capitalize::SubCommand as StrCapitalize; pub use collect::SubCommand as StrCollect; pub use command::Command as Str; +pub use contains::SubCommand as StrContains; pub use downcase::SubCommand as StrDowncase; pub use ends_with::SubCommand as StrEndsWith; pub use find_replace::SubCommand as StrFindReplace; pub use from::SubCommand as StrFrom; +pub use index_of::SubCommand as StrIndexOf; pub use length::SubCommand as StrLength; pub use reverse::SubCommand as StrReverse; pub use set::SubCommand as StrSet;