Change string contains operators to regex (#5117)

This commit is contained in:
Reilly Wood
2022-04-06 23:23:14 -07:00
committed by GitHub
parent 888369022f
commit b2c52b51b7
11 changed files with 130 additions and 43 deletions

View File

@ -22,6 +22,7 @@ nu-json = { path = "../nu-json", version = "0.60.1" }
typetag = "0.1.8"
num-format = "0.4.0"
sys-locale = "0.2.0"
regex = "1.5.4"
[features]
plugin = ["serde_json"]

View File

@ -32,8 +32,8 @@ impl Expression {
Operator::Pow => 100,
Operator::Multiply | Operator::Divide | Operator::Modulo => 95,
Operator::Plus | Operator::Minus => 90,
Operator::NotContains
| Operator::Contains
Operator::NotRegexMatch
| Operator::RegexMatch
| Operator::StartsWith
| Operator::LessThan
| Operator::LessThanOrEqual

View File

@ -11,8 +11,8 @@ pub enum Operator {
GreaterThan,
LessThanOrEqual,
GreaterThanOrEqual,
Contains,
NotContains,
RegexMatch,
NotRegexMatch,
Plus,
Minus,
Multiply,
@ -33,8 +33,8 @@ impl Display for Operator {
Operator::NotEqual => write!(f, "!="),
Operator::LessThan => write!(f, "<"),
Operator::GreaterThan => write!(f, ">"),
Operator::Contains => write!(f, "=~"),
Operator::NotContains => write!(f, "!~"),
Operator::RegexMatch => write!(f, "=~"),
Operator::NotRegexMatch => write!(f, "!~"),
Operator::Plus => write!(f, "+"),
Operator::Minus => write!(f, "-"),
Operator::Multiply => write!(f, "*"),

View File

@ -12,6 +12,7 @@ pub use from_value::FromValue;
use indexmap::map::IndexMap;
use num_format::{Locale, ToFormattedString};
pub use range::*;
use regex::Regex;
use serde::{Deserialize, Serialize};
pub use stream::*;
use sys_locale::get_locale;
@ -2029,17 +2030,38 @@ impl Value {
}
}
pub fn contains(&self, op: Span, rhs: &Value) -> Result<Value, ShellError> {
pub fn regex_match(&self, op: Span, rhs: &Value, invert: bool) -> Result<Value, ShellError> {
let span = span(&[self.span()?, rhs.span()?]);
match (self, rhs) {
(Value::String { val: lhs, .. }, Value::String { val: rhs, .. }) => Ok(Value::Bool {
val: lhs.contains(rhs),
span,
}),
(Value::CustomValue { val: lhs, span }, rhs) => {
lhs.operation(*span, Operator::Contains, op, rhs)
(
Value::String { val: lhs, .. },
Value::String {
val: rhs,
span: rhs_span,
},
) => {
// We are leaving some performance on the table by compiling the regex every time.
// Small regexes compile in microseconds, and the simplicity of this approach currently
// outweighs the performance costs. Revisit this if it ever becomes a bottleneck.
let regex = Regex::new(rhs)
.map_err(|e| ShellError::UnsupportedInput(format!("{e}"), *rhs_span))?;
let is_match = regex.is_match(lhs);
Ok(Value::Bool {
val: if invert { !is_match } else { is_match },
span,
})
}
(Value::CustomValue { val: lhs, span }, rhs) => lhs.operation(
*span,
if invert {
Operator::NotRegexMatch
} else {
Operator::RegexMatch
},
op,
rhs,
),
_ => Err(ShellError::OperatorMismatch {
op_span: op,
lhs_ty: self.get_type(),
@ -2071,27 +2093,6 @@ impl Value {
}
}
pub fn not_contains(&self, op: Span, rhs: &Value) -> Result<Value, ShellError> {
let span = span(&[self.span()?, rhs.span()?]);
match (self, rhs) {
(Value::String { val: lhs, .. }, Value::String { val: rhs, .. }) => Ok(Value::Bool {
val: !lhs.contains(rhs),
span,
}),
(Value::CustomValue { val: lhs, span }, rhs) => {
lhs.operation(*span, Operator::NotContains, op, rhs)
}
_ => Err(ShellError::OperatorMismatch {
op_span: op,
lhs_ty: self.get_type(),
lhs_span: self.span()?,
rhs_ty: rhs.get_type(),
rhs_span: rhs.span()?,
}),
}
}
pub fn modulo(&self, op: Span, rhs: &Value) -> Result<Value, ShellError> {
let span = span(&[self.span()?, rhs.span()?]);