Refactor find to handle regex search and non-regex search the same way (#15839)

# Description

Regex search and search with directly provided search terms used to
follow two different code paths. Now all possible search options get
turned into a regex, with optional additional search options, and
handled using a unified code path which mostly follows the logic of the
current term code path.

# User-Facing Changes

Regex search will now behave in the same way as non-regex search:
- split multiline strings into lists of lines, and filter out the lines
that don't match
- highlight matching string sections (unless --no-highlight flag is
used)
- search through the specified record columns if the --columns flag is
used

The behavior of non-regex search should be unaffected by this commit.
This commit is contained in:
new-years-eve 2025-05-28 23:32:36 +02:00 committed by GitHub
parent a8c49857d9
commit 13452a7aa2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 281 additions and 304 deletions

View File

@ -1,10 +1,8 @@
use crate::help::highlight_search_string;
use fancy_regex::Regex;
use fancy_regex::{Regex, escape};
use nu_ansi_term::Style;
use nu_color_config::StyleComputer;
use nu_engine::command_prelude::*;
use nu_protocol::Config;
use nu_utils::IgnoreCaseExt;
#[derive(Clone)]
pub struct Find;
@ -54,7 +52,7 @@ impl Command for Find {
)
.switch(
"no-highlight",
"no-highlight mode: find without marking with ascii code",
"no-highlight mode: find without marking with ansi code",
Some('n'),
)
.switch("invert", "invert the match", Some('v'))
@ -109,8 +107,14 @@ impl Command for Find {
example: r#"[abc bde arc abf] | find --regex "ab""#,
result: Some(Value::list(
vec![
Value::test_string("abc".to_string()),
Value::test_string("abf".to_string()),
Value::test_string(
"\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
.to_string(),
),
Value::test_string(
"\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
.to_string(),
),
],
Span::test_data(),
)),
@ -120,8 +124,14 @@ impl Command for Find {
example: r#"[aBc bde Arc abf] | find --regex "ab" -i"#,
result: Some(Value::list(
vec![
Value::test_string("aBc".to_string()),
Value::test_string("abf".to_string()),
Value::test_string(
"\u{1b}[37m\u{1b}[0m\u{1b}[41;37maB\u{1b}[0m\u{1b}[37mc\u{1b}[0m"
.to_string(),
),
Value::test_string(
"\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m"
.to_string(),
),
],
Span::test_data(),
)),
@ -131,7 +141,7 @@ impl Command for Find {
example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu""#,
result: Some(Value::test_list(vec![Value::test_record(record! {
"version" => Value::test_string("0.1.0"),
"name" => Value::test_string("nushell".to_string()),
"name" => Value::test_string("\u{1b}[37m\u{1b}[0m\u{1b}[41;37mnu\u{1b}[0m\u{1b}[37mshell\u{1b}[0m".to_string()),
})])),
},
Example {
@ -210,31 +220,72 @@ impl Command for Find {
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let regex = call.get_flag::<String>(engine_state, stack, "regex")?;
let pattern = get_match_pattern_from_arguments(engine_state, stack, call)?;
let columns_to_search: Vec<_> = call
.get_flag(engine_state, stack, "columns")?
.unwrap_or_default();
if let Some(regex) = regex {
find_with_regex(regex, engine_state, stack, call, input)
} else {
let input = split_string_if_multiline(input, call.head);
find_with_rest_and_highlight(engine_state, stack, call, input)
}
find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input)
}
}
fn find_with_regex(
regex: String,
#[derive(Clone)]
struct MatchPattern {
/// the regex to be used for matching in text
regex: Regex,
/// the list of match terms converted to lowercase strings, or empty if a regex was provided
lower_terms: Vec<String>,
/// return a modified version of the value where matching parts are highlighted
highlight: bool,
/// return the values that aren't a match instead
invert: bool,
/// style of the non-highlighted string sections
string_style: Style,
/// style of the highlighted string sections
highlight_style: Style,
}
fn get_match_pattern_from_arguments(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let span = call.head;
) -> Result<MatchPattern, ShellError> {
let config = stack.get_config(engine_state);
let span = call.head;
let regex = call.get_flag::<String>(engine_state, stack, "regex")?;
let terms = call.rest::<Value>(engine_state, stack, 0)?;
let invert = call.has_flag(engine_state, stack, "invert")?;
let highlight = !call.has_flag(engine_state, stack, "no-highlight")?;
let style_computer = StyleComputer::from_config(engine_state, stack);
// Currently, search results all use the same style.
// Also note that this sample string is passed into user-written code (the closure that may or may not be
// defined for "string").
let string_style = style_computer.compute("string", &Value::string("search result", span));
let highlight_style =
style_computer.compute("search_result", &Value::string("search result", span));
let (regex_str, lower_terms) = if let Some(regex) = regex {
if !terms.is_empty() {
return Err(ShellError::IncompatibleParametersSingle {
msg: "Cannot use a `--regex` parameter with additional search terms".into(),
span: call.get_flag_span(stack, "regex").expect("has flag"),
});
}
let insensitive = call.has_flag(engine_state, stack, "ignore-case")?;
let multiline = call.has_flag(engine_state, stack, "multiline")?;
let dotall = call.has_flag(engine_state, stack, "dotall")?;
let invert = call.has_flag(engine_state, stack, "invert")?;
let flags = match (insensitive, multiline, dotall) {
(false, false, false) => "",
@ -247,221 +298,169 @@ fn find_with_regex(
(true, true, true) => "(?ims)", // case insensitive, multi-line mode and allow . to match \n
};
let regex = flags.to_string() + regex.as_str();
(flags.to_string() + regex.as_str(), Vec::new())
} else {
let mut regex = String::new();
let re = Regex::new(regex.as_str()).map_err(|e| ShellError::TypeMismatch {
regex += "(?i)";
let lower_terms = terms
.iter()
.map(|v| escape(&v.to_expanded_string("", &config).to_lowercase()).into())
.collect::<Vec<String>>();
if let Some(term) = lower_terms.first() {
regex += term;
}
for term in lower_terms.iter().skip(1) {
regex += "|";
regex += term;
}
let lower_terms = terms
.iter()
.map(|v| v.to_expanded_string("", &config).to_lowercase())
.collect::<Vec<String>>();
(regex, lower_terms)
};
let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch {
err_message: format!("invalid regex: {e}"),
span,
})?;
input.filter(
move |value| match value {
Value::String { val, .. } => re.is_match(val.as_str()).unwrap_or(false) != invert,
Value::Record { val, .. } => values_match_find(val.values(), &re, &config, invert),
Value::List { vals, .. } => values_match_find(vals, &re, &config, invert),
_ => false,
},
engine_state.signals(),
)
}
fn values_match_find<'a, I>(values: I, re: &Regex, config: &Config, invert: bool) -> bool
where
I: IntoIterator<Item = &'a Value>,
{
match invert {
true => !record_matches_regex(values, re, config),
false => record_matches_regex(values, re, config),
}
}
fn record_matches_regex<'a, I>(values: I, re: &Regex, config: &Config) -> bool
where
I: IntoIterator<Item = &'a Value>,
{
values.into_iter().any(|v| {
re.is_match(v.to_expanded_string(" ", config).as_str())
.unwrap_or(false)
Ok(MatchPattern {
regex,
lower_terms,
invert,
highlight,
string_style,
highlight_style,
})
}
fn highlight_terms_in_string(
val: &Value,
span: Span,
config: &Config,
terms: &[Value],
string_style: Style,
highlight_style: Style,
) -> Value {
let val_str = val.to_expanded_string("", config);
// map functions
if let Some(term) = terms
.iter()
.find(|term| contains_ignore_case(&val_str, &term.to_expanded_string("", config)))
{
let term_str = term.to_expanded_string("", config);
let highlighted_str =
highlight_search_string(&val_str, &term_str, &string_style, &highlight_style)
.unwrap_or_else(|_| string_style.paint(&term_str).to_string());
fn highlight_matches_in_string(pattern: &MatchPattern, val: String) -> String {
// strip haystack to remove existing ansi style
let stripped_val = nu_utils::strip_ansi_string_unlikely(val);
let mut last_match_end = 0;
let mut highlighted = String::new();
return Value::string(highlighted_str, span);
for cap in pattern.regex.captures_iter(stripped_val.as_ref()) {
match cap {
Ok(capture) => {
let start = match capture.get(0) {
Some(acap) => acap.start(),
None => 0,
};
let end = match capture.get(0) {
Some(acap) => acap.end(),
None => 0,
};
highlighted.push_str(
&pattern
.string_style
.paint(&stripped_val[last_match_end..start])
.to_string(),
);
highlighted.push_str(
&pattern
.highlight_style
.paint(&stripped_val[start..end])
.to_string(),
);
last_match_end = end;
}
Err(_e) => {
// in case of error, return the string with no highlight
return pattern.string_style.paint(&stripped_val).to_string();
}
}
}
val.clone()
highlighted.push_str(
&pattern
.string_style
.paint(&stripped_val[last_match_end..])
.to_string(),
);
highlighted
}
#[allow(clippy::too_many_arguments)]
fn highlight_terms_in_record_with_search_columns(
search_cols: &[String],
record: &Record,
span: Span,
config: &Config,
terms: &[Value],
string_style: Style,
highlight_style: Style,
fn highlight_matches_in_record_or_value(
pattern: &MatchPattern,
value: Value,
columns_to_search: &[String],
) -> Value {
let col_select = !search_cols.is_empty();
let term_strs: Vec<_> = terms
.iter()
.map(|v| v.to_expanded_string("", config))
.collect();
if !pattern.highlight || pattern.invert {
return value;
}
let span = value.span();
match value {
Value::Record { val: record, .. } => {
let col_select = !columns_to_search.is_empty();
// TODO: change API to mutate in place
let mut record = record.clone();
// iterator of Ok((val_str, term_str)) pairs if the value should be highlighted, otherwise Err(val)
let mut record = record.into_owned();
for (col, val) in record.iter_mut() {
if col_select && !search_cols.contains(col) {
if col_select && !columns_to_search.contains(col) {
continue;
}
let val_str = val.to_expanded_string("", config);
let Some(term_str) = term_strs
.iter()
.find(|term_str| contains_ignore_case(&val_str, term_str))
else {
continue;
};
let highlighted_str =
highlight_search_string(&val_str, term_str, &string_style, &highlight_style)
.unwrap_or_else(|_| string_style.paint(term_str).to_string());
*val = Value::string(highlighted_str, span);
if let Value::String { val: val_str, .. } = val {
if pattern.regex.is_match(val_str).unwrap_or(false) {
let val_str = std::mem::take(val_str);
*val = highlight_matches_in_string(pattern, val_str).into_value(span)
}
}
}
Value::record(record, span)
}
fn contains_ignore_case(string: &str, substring: &str) -> bool {
string
.to_folded_case()
.contains(&substring.to_folded_case())
Value::String { val, .. } => highlight_matches_in_string(pattern, val).into_value(span),
_ => value,
}
}
fn find_with_rest_and_highlight(
fn find_in_pipelinedata(
pattern: MatchPattern,
columns_to_search: Vec<String>,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let span = call.head;
let config = stack.get_config(engine_state);
let filter_config = config.clone();
let no_highlight = call.has_flag(engine_state, stack, "no-highlight")?;
let invert = call.has_flag(engine_state, stack, "invert")?;
let terms = call.rest::<Value>(engine_state, stack, 0)?;
let lower_terms = terms
.iter()
.map(|v| Value::string(v.to_expanded_string("", &config).to_lowercase(), span))
.collect::<Vec<Value>>();
let style_computer = StyleComputer::from_config(engine_state, stack);
// Currently, search results all use the same style.
// Also note that this sample string is passed into user-written code (the closure that may or may not be
// defined for "string").
let string_style = style_computer.compute("string", &Value::string("search result", span));
let highlight_style =
style_computer.compute("search_result", &Value::string("search result", span));
let cols_to_search_in_map: Vec<_> = call
.get_flag(engine_state, stack, "columns")?
.unwrap_or_default();
let cols_to_search_in_filter = cols_to_search_in_map.clone();
let map_pattern = pattern.clone();
let map_columns_to_search = columns_to_search.clone();
match input {
PipelineData::Empty => Ok(PipelineData::Empty),
PipelineData::Value(_, _) => input
.map(
move |mut x| {
let span = x.span();
if no_highlight {
return x;
};
match &mut x {
Value::Record { val, .. } => highlight_terms_in_record_with_search_columns(
&cols_to_search_in_map,
val,
span,
&config,
&terms,
string_style,
highlight_style,
),
Value::String { .. } => highlight_terms_in_string(
&x,
span,
&config,
&terms,
string_style,
highlight_style,
),
_ => x,
}
.filter(
move |value| {
record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config)
},
engine_state.signals(),
)?
.filter(
move |value| {
value_should_be_printed(
value,
&filter_config,
&lower_terms,
span,
&cols_to_search_in_filter,
invert,
)
.map(
move |x| {
highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search)
},
engine_state.signals(),
),
PipelineData::ListStream(stream, metadata) => {
let stream = stream.modify(|iter| {
iter.map(move |mut x| {
let span = x.span();
if no_highlight {
return x;
};
match &mut x {
Value::Record { val, .. } => highlight_terms_in_record_with_search_columns(
&cols_to_search_in_map,
val,
span,
&config,
&terms,
string_style,
highlight_style,
),
_ => x,
}
iter.filter(move |value| {
record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config)
})
.filter(move |value| {
value_should_be_printed(
value,
&filter_config,
&lower_terms,
span,
&cols_to_search_in_filter,
invert,
)
.map(move |x| {
highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search)
})
});
@ -470,30 +469,15 @@ fn find_with_rest_and_highlight(
PipelineData::ByteStream(stream, ..) => {
let span = stream.span();
if let Some(lines) = stream.lines() {
let terms = lower_terms
.into_iter()
.map(|term| term.to_expanded_string("", &filter_config).to_lowercase())
.collect::<Vec<_>>();
let mut output: Vec<Value> = vec![];
for line in lines {
let line = line?;
let lower_val = line.to_lowercase();
for term in &terms {
if lower_val.contains(term) {
if no_highlight {
output.push(Value::string(&line, span))
if string_should_be_printed(&pattern, &line) != pattern.invert {
if pattern.highlight && !pattern.invert {
output
.push(highlight_matches_in_string(&pattern, line).into_value(span))
} else {
output.push(Value::string(
highlight_search_string(
&line,
term,
&string_style,
&highlight_style,
)?,
span,
))
}
output.push(line.into_value(span))
}
}
}
@ -505,20 +489,16 @@ fn find_with_rest_and_highlight(
}
}
fn value_should_be_printed(
value: &Value,
filter_config: &Config,
lower_terms: &[Value],
span: Span,
columns_to_search: &[String],
invert: bool,
) -> bool {
let lower_value = Value::string(
value.to_expanded_string("", filter_config).to_lowercase(),
span,
);
// filter functions
let mut match_found = lower_terms.iter().any(|term| match value {
fn string_should_be_printed(pattern: &MatchPattern, value: &str) -> bool {
pattern.regex.is_match(value).unwrap_or(false)
}
fn value_should_be_printed(pattern: &MatchPattern, value: &Value, config: &Config) -> bool {
let lower_value = value.to_expanded_string("", config).to_lowercase();
match value {
Value::Bool { .. }
| Value::Int { .. }
| Value::Filesize { .. }
@ -528,57 +508,51 @@ fn value_should_be_printed(
| Value::Float { .. }
| Value::Closure { .. }
| Value::Nothing { .. }
| Value::Error { .. } => term_equals_value(term, &lower_value, span),
Value::String { .. }
| Value::Glob { .. }
| Value::Error { .. } => {
if !pattern.lower_terms.is_empty() {
// look for exact match when searching with terms
pattern
.lower_terms
.iter()
.any(|term: &String| term == &lower_value)
} else {
string_should_be_printed(pattern, &lower_value)
}
}
Value::Glob { .. }
| Value::List { .. }
| Value::CellPath { .. }
| Value::Custom { .. } => term_contains_value(term, &lower_value, span),
Value::Record { val, .. } => {
record_matches_term(val, columns_to_search, filter_config, term, span)
}
| Value::Record { .. }
| Value::Custom { .. } => string_should_be_printed(pattern, &lower_value),
Value::String { val, .. } => string_should_be_printed(pattern, val),
Value::Binary { .. } => false,
});
if invert {
match_found = !match_found;
}
match_found
}
fn term_contains_value(term: &Value, value: &Value, span: Span) -> bool {
term.r#in(span, value, span)
.is_ok_and(|value| value.is_true())
}
fn term_equals_value(term: &Value, value: &Value, span: Span) -> bool {
term.eq(span, value, span)
.is_ok_and(|value| value.is_true())
}
fn record_matches_term(
record: &Record,
fn record_or_value_should_be_printed(
pattern: &MatchPattern,
value: &Value,
columns_to_search: &[String],
filter_config: &Config,
term: &Value,
span: Span,
config: &Config,
) -> bool {
let match_found = match value {
Value::Record { val: record, .. } => {
// Only perform column selection if given columns.
let col_select = !columns_to_search.is_empty();
record.iter().any(|(col, val)| {
if col_select && !columns_to_search.contains(col) {
return false;
}
let lower_val = if !val.is_error() {
Value::string(
val.to_expanded_string("", filter_config).to_lowercase(),
Span::test_data(),
)
} else {
(*val).clone()
};
term_contains_value(term, &lower_val, span)
value_should_be_printed(pattern, val, config)
})
}
_ => value_should_be_printed(pattern, value, config),
};
match_found != pattern.invert
}
// utility
fn split_string_if_multiline(input: PipelineData, head_span: Span) -> PipelineData {
let span = input.span().unwrap_or(head_span);

View File

@ -16,7 +16,7 @@ pub use help_modules::HelpModules;
pub use help_operators::HelpOperators;
pub use help_pipe_and_redirect::HelpPipeAndRedirect;
pub(crate) use help_::{highlight_search_in_table, highlight_search_string};
pub(crate) use help_::highlight_search_in_table;
pub(crate) use help_aliases::help_aliases;
pub(crate) use help_commands::help_commands;
pub(crate) use help_modules::help_modules;

View File

@ -118,7 +118,10 @@ fn find_with_regex_in_table_keeps_row_if_one_column_matches() {
"[[name nickname]; [Maurice moe] [Laurence larry]] | find --no-highlight --regex ce | get name | to json -r"
);
assert_eq!(actual.out, r#"["Maurice","Laurence"]"#);
assert_eq!(
actual.out,
r#"["\u001b[37mMauri\u001b[0m\u001b[41;37mce\u001b[0m\u001b[37m\u001b[0m","\u001b[37mLauren\u001b[0m\u001b[41;37mce\u001b[0m\u001b[37m\u001b[0m"]"#
);
assert_eq!(actual_no_highlight.out, r#"["Maurice","Laurence"]"#);
}