String funcs - Contains and IndexOf (#2298)

* Contains and index of string functions

* Clippy and fmt
This commit is contained in:
k-brk 2020-08-04 08:36:51 +02:00 committed by GitHub
parent 028fc9b9cd
commit c48c092125
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 513 additions and 3 deletions

View File

@ -310,6 +310,8 @@ pub fn create_default_context(
whole_stream_command(StrSubstring),
whole_stream_command(StrSet),
whole_stream_command(StrToDatetime),
whole_stream_command(StrContains),
whole_stream_command(StrIndexOf),
whole_stream_command(StrTrim),
whole_stream_command(StrTrimLeft),
whole_stream_command(StrTrimRight),

View File

@ -229,9 +229,9 @@ pub(crate) use sort_by::SortBy;
pub(crate) use split::{Split, SplitChars, SplitColumn, SplitRow};
pub(crate) use split_by::SplitBy;
pub(crate) use str_::{
Str, StrCapitalize, StrCollect, StrDowncase, StrEndsWith, StrFindReplace, StrFrom, StrLength,
StrReverse, StrSet, StrStartsWith, StrSubstring, StrToDatetime, StrToDecimal, StrToInteger,
StrTrim, StrTrimLeft, StrTrimRight, StrUpcase,
Str, StrCapitalize, StrCollect, StrContains, StrDowncase, StrEndsWith, StrFindReplace, StrFrom,
StrIndexOf, StrLength, StrReverse, StrSet, StrStartsWith, StrSubstring, StrToDatetime,
StrToDecimal, StrToInteger, StrTrim, StrTrimLeft, StrTrimRight, StrUpcase,
};
pub(crate) use table::Table;
pub(crate) use tags::Tags;

View File

@ -0,0 +1,190 @@
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::ShellTypeName;
use nu_protocol::{
ColumnPath, Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_source::{Tag, Tagged};
use nu_value_ext::ValueExt;
#[derive(Deserialize)]
struct Arguments {
pattern: Tagged<String>,
rest: Vec<ColumnPath>,
insensitive: bool,
}
pub struct SubCommand;
#[async_trait]
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"str contains"
}
fn signature(&self) -> Signature {
Signature::build("str contains")
.required("pattern", SyntaxShape::String, "the pattern to find")
.rest(
SyntaxShape::ColumnPath,
"optionally check if string contains pattern by column paths",
)
.switch("insensitive", "search is case insensitive", Some('i'))
}
fn usage(&self) -> &str {
"Checks if string contains pattern"
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
operate(args, registry).await
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Check if string contains pattern",
example: "echo 'my_library.rb' | str contains '.rb'",
result: Some(vec![UntaggedValue::boolean(true).into_untagged_value()]),
},
Example {
description: "Check if string contains pattern case insensitive",
example: "echo 'my_library.rb' | str contains -i '.RB'",
result: Some(vec![UntaggedValue::boolean(true).into_untagged_value()]),
},
]
}
}
async fn operate(
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
let registry = registry.clone();
let (
Arguments {
pattern,
rest,
insensitive,
},
input,
) = args.process(&registry).await?;
let column_paths: Vec<_> = rest;
Ok(input
.map(move |v| {
if column_paths.is_empty() {
ReturnSuccess::value(action(&v, &pattern, insensitive, v.tag())?)
} else {
let mut ret = v;
for path in &column_paths {
let pattern = pattern.clone();
ret = ret.swap_data_by_column_path(
path,
Box::new(move |old| action(old, &pattern, insensitive, old.tag())),
)?;
}
ReturnSuccess::value(ret)
}
})
.to_output_stream())
}
fn action(
input: &Value,
pattern: &str,
insensitive: bool,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
match &input.value {
UntaggedValue::Primitive(Primitive::Line(s))
| UntaggedValue::Primitive(Primitive::String(s)) => {
let contains = if insensitive {
s.to_lowercase().find(&pattern.to_lowercase()).is_some()
} else {
s.find(pattern).is_some()
};
Ok(UntaggedValue::boolean(contains).into_value(tag))
}
other => {
let got = format!("got {}", other.type_name());
Err(ShellError::labeled_error(
"value is not string",
got,
tag.into().span,
))
}
}
}
#[cfg(test)]
mod tests {
use super::{action, SubCommand};
use nu_plugin::test_helpers::value::string;
use nu_protocol::{Primitive, UntaggedValue};
use nu_source::Tag;
#[test]
fn examples_work_as_expected() {
use crate::examples::test as test_examples;
test_examples(SubCommand {})
}
#[test]
fn string_contains_other_string_case_sensitive() {
let word = string("Cargo.tomL");
let pattern = ".tomL";
let insensitive = false;
let expected =
UntaggedValue::Primitive(Primitive::Boolean(true.into())).into_untagged_value();
let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn string_does_not_contain_other_string_case_sensitive() {
let word = string("Cargo.tomL");
let pattern = "Lomt.";
let insensitive = false;
let expected =
UntaggedValue::Primitive(Primitive::Boolean(false.into())).into_untagged_value();
let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn string_contains_other_string_case_insensitive() {
let word = string("Cargo.ToMl");
let pattern = ".TOML";
let insensitive = true;
let expected =
UntaggedValue::Primitive(Primitive::Boolean(true.into())).into_untagged_value();
let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn string_does_not_contain_other_string_case_insensitive() {
let word = string("Cargo.tOml");
let pattern = "lomt.";
let insensitive = true;
let expected =
UntaggedValue::Primitive(Primitive::Boolean(false.into())).into_untagged_value();
let actual = action(&word, &pattern, insensitive, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
}

View File

@ -0,0 +1,314 @@
use crate::commands::WholeStreamCommand;
use crate::prelude::*;
use nu_errors::ShellError;
use nu_protocol::ShellTypeName;
use nu_protocol::{
ColumnPath, Primitive, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value,
};
use nu_source::{Tag, Tagged};
use nu_value_ext::{as_string, ValueExt};
#[derive(Deserialize)]
struct Arguments {
pattern: Tagged<String>,
rest: Vec<ColumnPath>,
range: Option<Value>,
}
pub struct SubCommand;
#[derive(Clone)]
pub struct IndexOfOptionalBounds(i32, i32);
#[async_trait]
impl WholeStreamCommand for SubCommand {
fn name(&self) -> &str {
"str index-of"
}
fn signature(&self) -> Signature {
Signature::build("str index-of")
.required(
"pattern",
SyntaxShape::String,
"the pattern to find index of",
)
.rest(
SyntaxShape::ColumnPath,
"optionally returns index of pattern in string by column paths",
)
.named(
"range",
SyntaxShape::Any,
"optional start and/or end index",
Some('r'),
)
}
fn usage(&self) -> &str {
"Returns starting index of given pattern in string counting from 0. Returns -1 when there are no results."
}
async fn run(
&self,
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
operate(args, registry).await
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Returns index of pattern in string",
example: "echo 'my_library.rb' | str index-of '.rb'",
result: Some(vec![UntaggedValue::int(10).into_untagged_value()]),
},
Example {
description: "Returns index of pattern in string with start index",
example: "echo '.rb.rb' | str index-of '.rb' -r '1,'",
result: Some(vec![UntaggedValue::int(3).into_untagged_value()]),
},
Example {
description: "Returns index of pattern in string with end index",
example: "echo '123456' | str index-of '6' -r ',4'",
result: Some(vec![UntaggedValue::int(-1).into_untagged_value()]),
},
Example {
description: "Returns index of pattern in string with start and end index",
example: "echo '123456' | str index-of '3' -r '1,4'",
result: Some(vec![UntaggedValue::int(2).into_untagged_value()]),
},
Example {
description: "Alternativly you can use this form",
example: "echo '123456' | str index-of '3' -r [1 4]",
result: Some(vec![UntaggedValue::int(2).into_untagged_value()]),
},
]
}
}
async fn operate(
args: CommandArgs,
registry: &CommandRegistry,
) -> Result<OutputStream, ShellError> {
let registry = registry.clone();
let (
Arguments {
pattern,
rest,
range,
},
input,
) = args.process(&registry).await?;
let range = range.unwrap_or_else(|| {
UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value()
});
let column_paths: Vec<_> = rest;
Ok(input
.map(move |v| {
if column_paths.is_empty() {
ReturnSuccess::value(action(&v, &pattern, &range, v.tag())?)
} else {
let mut ret = v;
for path in &column_paths {
let range = range.clone();
let pattern = pattern.clone();
ret = ret.swap_data_by_column_path(
path,
Box::new(move |old| action(old, &pattern, &range, old.tag())),
)?;
}
ReturnSuccess::value(ret)
}
})
.to_output_stream())
}
fn action(
input: &Value,
pattern: &str,
range: &Value,
tag: impl Into<Tag>,
) -> Result<Value, ShellError> {
let r = process_range(&input, &range)?;
match &input.value {
UntaggedValue::Primitive(Primitive::Line(s))
| UntaggedValue::Primitive(Primitive::String(s)) => {
let start_index = r.0 as usize;
let end_index = r.1 as usize;
if let Some(result) = s[start_index..end_index].find(pattern) {
Ok(UntaggedValue::int(result + start_index).into_value(tag))
} else {
let not_found = -1;
Ok(UntaggedValue::int(not_found).into_value(tag))
}
}
other => {
let got = format!("got {}", other.type_name());
Err(ShellError::labeled_error(
"value is not string",
got,
tag.into().span,
))
}
}
}
fn process_range(input: &Value, range: &Value) -> Result<IndexOfOptionalBounds, ShellError> {
let input_len = match &input.value {
UntaggedValue::Primitive(Primitive::Line(s))
| UntaggedValue::Primitive(Primitive::String(s)) => s.len(),
_ => 0,
};
let min_index_str = String::from("0");
let max_index_str = input_len.to_string();
let r = match &range.value {
UntaggedValue::Primitive(Primitive::Line(s))
| UntaggedValue::Primitive(Primitive::String(s)) => {
let indexes: Vec<&str> = s.split(',').collect();
let start_index = indexes.get(0).unwrap_or(&&min_index_str[..]).to_string();
let end_index = indexes.get(1).unwrap_or(&&max_index_str[..]).to_string();
Ok((start_index, end_index))
}
UntaggedValue::Table(indexes) => {
if indexes.len() > 2 {
Err(ShellError::labeled_error(
"there shouldn't be more than two indexes",
"too many indexes",
range.tag(),
))
} else {
let idx: Vec<String> = indexes
.iter()
.map(|v| as_string(v).unwrap_or_else(|_| String::from("")))
.collect();
let start_index = idx.get(0).unwrap_or(&min_index_str).to_string();
let end_index = idx.get(1).unwrap_or(&max_index_str).to_string();
Ok((start_index, end_index))
}
}
other => {
let got = format!("got {}", other.type_name());
Err(ShellError::labeled_error(
"value is not string",
got,
range.tag(),
))
}
}?;
let start_index = r.0.parse::<i32>().unwrap_or(0);
let end_index = r.1.parse::<i32>().unwrap_or(input_len as i32);
if start_index < 0 || start_index > end_index {
return Err(ShellError::labeled_error(
"start index can't be negative or greater than end index",
"Invalid start index",
range.tag(),
));
}
if end_index < 0 || end_index < start_index || end_index > input_len as i32 {
return Err(ShellError::labeled_error(
"end index can't be negative, smaller than start index or greater than input length",
"Invalid end index",
range.tag(),
));
}
Ok(IndexOfOptionalBounds(start_index, end_index))
}
#[cfg(test)]
mod tests {
use super::{action, SubCommand};
use nu_plugin::test_helpers::value::string;
use nu_protocol::{Primitive, UntaggedValue};
use nu_source::Tag;
#[test]
fn examples_work_as_expected() {
use crate::examples::test as test_examples;
test_examples(SubCommand {})
}
#[test]
fn returns_index_of_substring() {
let word = string("Cargo.tomL");
let pattern = ".tomL";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int(5.into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn index_of_does_not_exist_in_string() {
let word = string("Cargo.tomL");
let pattern = "Lm";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String("".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn returns_index_of_next_substring() {
let word = string("Cargo.Cargo");
let pattern = "Cargo";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String("1,".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int(6.into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn index_does_not_exist_due_to_end_index() {
let word = string("Cargo.Banana");
let pattern = "Banana";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String(",5".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn returns_index_of_nums_in_middle_due_to_index_limit_from_both_ends() {
let word = string("123123123");
let pattern = "123";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String("2,6".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int(3.into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
#[test]
fn index_does_not_exists_due_to_strict_bounds() {
let word = string("123456");
let pattern = "1";
let index_of_bounds =
UntaggedValue::Primitive(Primitive::String("2,4".to_string())).into_untagged_value();
let expected = UntaggedValue::Primitive(Primitive::Int((-1).into())).into_untagged_value();
let actual = action(&word, &pattern, &index_of_bounds, Tag::unknown()).unwrap();
assert_eq!(actual, expected);
}
}

View File

@ -1,10 +1,12 @@
mod capitalize;
mod collect;
mod command;
mod contains;
mod downcase;
mod ends_with;
mod find_replace;
mod from;
mod index_of;
mod length;
mod reverse;
mod set;
@ -19,10 +21,12 @@ mod upcase;
pub use capitalize::SubCommand as StrCapitalize;
pub use collect::SubCommand as StrCollect;
pub use command::Command as Str;
pub use contains::SubCommand as StrContains;
pub use downcase::SubCommand as StrDowncase;
pub use ends_with::SubCommand as StrEndsWith;
pub use find_replace::SubCommand as StrFindReplace;
pub use from::SubCommand as StrFrom;
pub use index_of::SubCommand as StrIndexOf;
pub use length::SubCommand as StrLength;
pub use reverse::SubCommand as StrReverse;
pub use set::SubCommand as StrSet;