diff --git a/crates/nu-command/src/filters/find.rs b/crates/nu-command/src/filters/find.rs index 28eac810fe..3b4a0219f3 100644 --- a/crates/nu-command/src/filters/find.rs +++ b/crates/nu-command/src/filters/find.rs @@ -1,10 +1,8 @@ -use crate::help::highlight_search_string; -use fancy_regex::Regex; +use fancy_regex::{Regex, escape}; use nu_ansi_term::Style; use nu_color_config::StyleComputer; use nu_engine::command_prelude::*; use nu_protocol::Config; -use nu_utils::IgnoreCaseExt; #[derive(Clone)] pub struct Find; @@ -54,7 +52,7 @@ impl Command for Find { ) .switch( "no-highlight", - "no-highlight mode: find without marking with ascii code", + "no-highlight mode: find without marking with ansi code", Some('n'), ) .switch("invert", "invert the match", Some('v')) @@ -109,8 +107,14 @@ impl Command for Find { example: r#"[abc bde arc abf] | find --regex "ab""#, result: Some(Value::list( vec![ - Value::test_string("abc".to_string()), - Value::test_string("abf".to_string()), + Value::test_string( + "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mc\u{1b}[0m" + .to_string(), + ), + Value::test_string( + "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m" + .to_string(), + ), ], Span::test_data(), )), @@ -120,8 +124,14 @@ impl Command for Find { example: r#"[aBc bde Arc abf] | find --regex "ab" -i"#, result: Some(Value::list( vec![ - Value::test_string("aBc".to_string()), - Value::test_string("abf".to_string()), + Value::test_string( + "\u{1b}[37m\u{1b}[0m\u{1b}[41;37maB\u{1b}[0m\u{1b}[37mc\u{1b}[0m" + .to_string(), + ), + Value::test_string( + "\u{1b}[37m\u{1b}[0m\u{1b}[41;37mab\u{1b}[0m\u{1b}[37mf\u{1b}[0m" + .to_string(), + ), ], Span::test_data(), )), @@ -131,7 +141,7 @@ impl Command for Find { example: r#"[[version name]; ['0.1.0' nushell] ['0.1.1' fish] ['0.2.0' zsh]] | find --regex "nu""#, result: Some(Value::test_list(vec![Value::test_record(record! { "version" => Value::test_string("0.1.0"), - "name" => Value::test_string("nushell".to_string()), + "name" => Value::test_string("\u{1b}[37m\u{1b}[0m\u{1b}[41;37mnu\u{1b}[0m\u{1b}[37mshell\u{1b}[0m".to_string()), })])), }, Example { @@ -210,169 +220,52 @@ impl Command for Find { call: &Call, input: PipelineData, ) -> Result { - let regex = call.get_flag::(engine_state, stack, "regex")?; + let pattern = get_match_pattern_from_arguments(engine_state, stack, call)?; - if let Some(regex) = regex { - find_with_regex(regex, engine_state, stack, call, input) - } else { - let input = split_string_if_multiline(input, call.head); - find_with_rest_and_highlight(engine_state, stack, call, input) - } + let columns_to_search: Vec<_> = call + .get_flag(engine_state, stack, "columns")? + .unwrap_or_default(); + + let input = split_string_if_multiline(input, call.head); + + find_in_pipelinedata(pattern, columns_to_search, engine_state, stack, input) } } -fn find_with_regex( - regex: String, +#[derive(Clone)] +struct MatchPattern { + /// the regex to be used for matching in text + regex: Regex, + + /// the list of match terms converted to lowercase strings, or empty if a regex was provided + lower_terms: Vec, + + /// return a modified version of the value where matching parts are highlighted + highlight: bool, + + /// return the values that aren't a match instead + invert: bool, + + /// style of the non-highlighted string sections + string_style: Style, + + /// style of the highlighted string sections + highlight_style: Style, +} + +fn get_match_pattern_from_arguments( engine_state: &EngineState, stack: &mut Stack, call: &Call, - input: PipelineData, -) -> Result { - let span = call.head; +) -> Result { let config = stack.get_config(engine_state); - let insensitive = call.has_flag(engine_state, stack, "ignore-case")?; - let multiline = call.has_flag(engine_state, stack, "multiline")?; - let dotall = call.has_flag(engine_state, stack, "dotall")?; - let invert = call.has_flag(engine_state, stack, "invert")?; - - let flags = match (insensitive, multiline, dotall) { - (false, false, false) => "", - (true, false, false) => "(?i)", // case insensitive - (false, true, false) => "(?m)", // multi-line mode - (false, false, true) => "(?s)", // allow . to match \n - (true, true, false) => "(?im)", // case insensitive and multi-line mode - (true, false, true) => "(?is)", // case insensitive and allow . to match \n - (false, true, true) => "(?ms)", // multi-line mode and allow . to match \n - (true, true, true) => "(?ims)", // case insensitive, multi-line mode and allow . to match \n - }; - - let regex = flags.to_string() + regex.as_str(); - - let re = Regex::new(regex.as_str()).map_err(|e| ShellError::TypeMismatch { - err_message: format!("invalid regex: {e}"), - span, - })?; - - input.filter( - move |value| match value { - Value::String { val, .. } => re.is_match(val.as_str()).unwrap_or(false) != invert, - Value::Record { val, .. } => values_match_find(val.values(), &re, &config, invert), - Value::List { vals, .. } => values_match_find(vals, &re, &config, invert), - _ => false, - }, - engine_state.signals(), - ) -} - -fn values_match_find<'a, I>(values: I, re: &Regex, config: &Config, invert: bool) -> bool -where - I: IntoIterator, -{ - match invert { - true => !record_matches_regex(values, re, config), - false => record_matches_regex(values, re, config), - } -} - -fn record_matches_regex<'a, I>(values: I, re: &Regex, config: &Config) -> bool -where - I: IntoIterator, -{ - values.into_iter().any(|v| { - re.is_match(v.to_expanded_string(" ", config).as_str()) - .unwrap_or(false) - }) -} - -fn highlight_terms_in_string( - val: &Value, - span: Span, - config: &Config, - terms: &[Value], - string_style: Style, - highlight_style: Style, -) -> Value { - let val_str = val.to_expanded_string("", config); - - if let Some(term) = terms - .iter() - .find(|term| contains_ignore_case(&val_str, &term.to_expanded_string("", config))) - { - let term_str = term.to_expanded_string("", config); - let highlighted_str = - highlight_search_string(&val_str, &term_str, &string_style, &highlight_style) - .unwrap_or_else(|_| string_style.paint(&term_str).to_string()); - - return Value::string(highlighted_str, span); - } - - val.clone() -} - -#[allow(clippy::too_many_arguments)] -fn highlight_terms_in_record_with_search_columns( - search_cols: &[String], - record: &Record, - span: Span, - config: &Config, - terms: &[Value], - string_style: Style, - highlight_style: Style, -) -> Value { - let col_select = !search_cols.is_empty(); - let term_strs: Vec<_> = terms - .iter() - .map(|v| v.to_expanded_string("", config)) - .collect(); - - // TODO: change API to mutate in place - let mut record = record.clone(); - // iterator of Ok((val_str, term_str)) pairs if the value should be highlighted, otherwise Err(val) - for (col, val) in record.iter_mut() { - if col_select && !search_cols.contains(col) { - continue; - } - let val_str = val.to_expanded_string("", config); - let Some(term_str) = term_strs - .iter() - .find(|term_str| contains_ignore_case(&val_str, term_str)) - else { - continue; - }; - - let highlighted_str = - highlight_search_string(&val_str, term_str, &string_style, &highlight_style) - .unwrap_or_else(|_| string_style.paint(term_str).to_string()); - - *val = Value::string(highlighted_str, span); - } - - Value::record(record, span) -} - -fn contains_ignore_case(string: &str, substring: &str) -> bool { - string - .to_folded_case() - .contains(&substring.to_folded_case()) -} - -fn find_with_rest_and_highlight( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { let span = call.head; - let config = stack.get_config(engine_state); - let filter_config = config.clone(); - let no_highlight = call.has_flag(engine_state, stack, "no-highlight")?; - let invert = call.has_flag(engine_state, stack, "invert")?; + let regex = call.get_flag::(engine_state, stack, "regex")?; let terms = call.rest::(engine_state, stack, 0)?; - let lower_terms = terms - .iter() - .map(|v| Value::string(v.to_expanded_string("", &config).to_lowercase(), span)) - .collect::>(); + + let invert = call.has_flag(engine_state, stack, "invert")?; + let highlight = !call.has_flag(engine_state, stack, "no-highlight")?; let style_computer = StyleComputer::from_config(engine_state, stack); // Currently, search results all use the same style. @@ -382,86 +275,192 @@ fn find_with_rest_and_highlight( let highlight_style = style_computer.compute("search_result", &Value::string("search result", span)); - let cols_to_search_in_map: Vec<_> = call - .get_flag(engine_state, stack, "columns")? - .unwrap_or_default(); + let (regex_str, lower_terms) = if let Some(regex) = regex { + if !terms.is_empty() { + return Err(ShellError::IncompatibleParametersSingle { + msg: "Cannot use a `--regex` parameter with additional search terms".into(), + span: call.get_flag_span(stack, "regex").expect("has flag"), + }); + } - let cols_to_search_in_filter = cols_to_search_in_map.clone(); + let insensitive = call.has_flag(engine_state, stack, "ignore-case")?; + let multiline = call.has_flag(engine_state, stack, "multiline")?; + let dotall = call.has_flag(engine_state, stack, "dotall")?; + + let flags = match (insensitive, multiline, dotall) { + (false, false, false) => "", + (true, false, false) => "(?i)", // case insensitive + (false, true, false) => "(?m)", // multi-line mode + (false, false, true) => "(?s)", // allow . to match \n + (true, true, false) => "(?im)", // case insensitive and multi-line mode + (true, false, true) => "(?is)", // case insensitive and allow . to match \n + (false, true, true) => "(?ms)", // multi-line mode and allow . to match \n + (true, true, true) => "(?ims)", // case insensitive, multi-line mode and allow . to match \n + }; + + (flags.to_string() + regex.as_str(), Vec::new()) + } else { + let mut regex = String::new(); + + regex += "(?i)"; + + let lower_terms = terms + .iter() + .map(|v| escape(&v.to_expanded_string("", &config).to_lowercase()).into()) + .collect::>(); + + if let Some(term) = lower_terms.first() { + regex += term; + } + + for term in lower_terms.iter().skip(1) { + regex += "|"; + regex += term; + } + + let lower_terms = terms + .iter() + .map(|v| v.to_expanded_string("", &config).to_lowercase()) + .collect::>(); + + (regex, lower_terms) + }; + + let regex = Regex::new(regex_str.as_str()).map_err(|e| ShellError::TypeMismatch { + err_message: format!("invalid regex: {e}"), + span, + })?; + + Ok(MatchPattern { + regex, + lower_terms, + invert, + highlight, + string_style, + highlight_style, + }) +} + +// map functions + +fn highlight_matches_in_string(pattern: &MatchPattern, val: String) -> String { + // strip haystack to remove existing ansi style + let stripped_val = nu_utils::strip_ansi_string_unlikely(val); + let mut last_match_end = 0; + let mut highlighted = String::new(); + + for cap in pattern.regex.captures_iter(stripped_val.as_ref()) { + match cap { + Ok(capture) => { + let start = match capture.get(0) { + Some(acap) => acap.start(), + None => 0, + }; + let end = match capture.get(0) { + Some(acap) => acap.end(), + None => 0, + }; + highlighted.push_str( + &pattern + .string_style + .paint(&stripped_val[last_match_end..start]) + .to_string(), + ); + highlighted.push_str( + &pattern + .highlight_style + .paint(&stripped_val[start..end]) + .to_string(), + ); + last_match_end = end; + } + Err(_e) => { + // in case of error, return the string with no highlight + return pattern.string_style.paint(&stripped_val).to_string(); + } + } + } + + highlighted.push_str( + &pattern + .string_style + .paint(&stripped_val[last_match_end..]) + .to_string(), + ); + highlighted +} + +fn highlight_matches_in_record_or_value( + pattern: &MatchPattern, + value: Value, + columns_to_search: &[String], +) -> Value { + if !pattern.highlight || pattern.invert { + return value; + } + let span = value.span(); + + match value { + Value::Record { val: record, .. } => { + let col_select = !columns_to_search.is_empty(); + + // TODO: change API to mutate in place + let mut record = record.into_owned(); + + for (col, val) in record.iter_mut() { + if col_select && !columns_to_search.contains(col) { + continue; + } + + if let Value::String { val: val_str, .. } = val { + if pattern.regex.is_match(val_str).unwrap_or(false) { + let val_str = std::mem::take(val_str); + *val = highlight_matches_in_string(pattern, val_str).into_value(span) + } + } + } + + Value::record(record, span) + } + Value::String { val, .. } => highlight_matches_in_string(pattern, val).into_value(span), + _ => value, + } +} + +fn find_in_pipelinedata( + pattern: MatchPattern, + columns_to_search: Vec, + engine_state: &EngineState, + stack: &mut Stack, + input: PipelineData, +) -> Result { + let config = stack.get_config(engine_state); + + let map_pattern = pattern.clone(); + let map_columns_to_search = columns_to_search.clone(); match input { PipelineData::Empty => Ok(PipelineData::Empty), PipelineData::Value(_, _) => input - .map( - move |mut x| { - let span = x.span(); - if no_highlight { - return x; - }; - match &mut x { - Value::Record { val, .. } => highlight_terms_in_record_with_search_columns( - &cols_to_search_in_map, - val, - span, - &config, - &terms, - string_style, - highlight_style, - ), - Value::String { .. } => highlight_terms_in_string( - &x, - span, - &config, - &terms, - string_style, - highlight_style, - ), - _ => x, - } + .filter( + move |value| { + record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config) }, engine_state.signals(), )? - .filter( - move |value| { - value_should_be_printed( - value, - &filter_config, - &lower_terms, - span, - &cols_to_search_in_filter, - invert, - ) + .map( + move |x| { + highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search) }, engine_state.signals(), ), PipelineData::ListStream(stream, metadata) => { let stream = stream.modify(|iter| { - iter.map(move |mut x| { - let span = x.span(); - if no_highlight { - return x; - }; - match &mut x { - Value::Record { val, .. } => highlight_terms_in_record_with_search_columns( - &cols_to_search_in_map, - val, - span, - &config, - &terms, - string_style, - highlight_style, - ), - _ => x, - } + iter.filter(move |value| { + record_or_value_should_be_printed(&pattern, value, &columns_to_search, &config) }) - .filter(move |value| { - value_should_be_printed( - value, - &filter_config, - &lower_terms, - span, - &cols_to_search_in_filter, - invert, - ) + .map(move |x| { + highlight_matches_in_record_or_value(&map_pattern, x, &map_columns_to_search) }) }); @@ -470,30 +469,15 @@ fn find_with_rest_and_highlight( PipelineData::ByteStream(stream, ..) => { let span = stream.span(); if let Some(lines) = stream.lines() { - let terms = lower_terms - .into_iter() - .map(|term| term.to_expanded_string("", &filter_config).to_lowercase()) - .collect::>(); - let mut output: Vec = vec![]; for line in lines { let line = line?; - let lower_val = line.to_lowercase(); - for term in &terms { - if lower_val.contains(term) { - if no_highlight { - output.push(Value::string(&line, span)) - } else { - output.push(Value::string( - highlight_search_string( - &line, - term, - &string_style, - &highlight_style, - )?, - span, - )) - } + if string_should_be_printed(&pattern, &line) != pattern.invert { + if pattern.highlight && !pattern.invert { + output + .push(highlight_matches_in_string(&pattern, line).into_value(span)) + } else { + output.push(line.into_value(span)) } } } @@ -505,20 +489,16 @@ fn find_with_rest_and_highlight( } } -fn value_should_be_printed( - value: &Value, - filter_config: &Config, - lower_terms: &[Value], - span: Span, - columns_to_search: &[String], - invert: bool, -) -> bool { - let lower_value = Value::string( - value.to_expanded_string("", filter_config).to_lowercase(), - span, - ); +// filter functions - let mut match_found = lower_terms.iter().any(|term| match value { +fn string_should_be_printed(pattern: &MatchPattern, value: &str) -> bool { + pattern.regex.is_match(value).unwrap_or(false) +} + +fn value_should_be_printed(pattern: &MatchPattern, value: &Value, config: &Config) -> bool { + let lower_value = value.to_expanded_string("", config).to_lowercase(); + + match value { Value::Bool { .. } | Value::Int { .. } | Value::Filesize { .. } @@ -528,58 +508,52 @@ fn value_should_be_printed( | Value::Float { .. } | Value::Closure { .. } | Value::Nothing { .. } - | Value::Error { .. } => term_equals_value(term, &lower_value, span), - Value::String { .. } - | Value::Glob { .. } + | Value::Error { .. } => { + if !pattern.lower_terms.is_empty() { + // look for exact match when searching with terms + pattern + .lower_terms + .iter() + .any(|term: &String| term == &lower_value) + } else { + string_should_be_printed(pattern, &lower_value) + } + } + Value::Glob { .. } | Value::List { .. } | Value::CellPath { .. } - | Value::Custom { .. } => term_contains_value(term, &lower_value, span), - Value::Record { val, .. } => { - record_matches_term(val, columns_to_search, filter_config, term, span) - } + | Value::Record { .. } + | Value::Custom { .. } => string_should_be_printed(pattern, &lower_value), + Value::String { val, .. } => string_should_be_printed(pattern, val), Value::Binary { .. } => false, - }); - if invert { - match_found = !match_found; } - match_found } -fn term_contains_value(term: &Value, value: &Value, span: Span) -> bool { - term.r#in(span, value, span) - .is_ok_and(|value| value.is_true()) -} - -fn term_equals_value(term: &Value, value: &Value, span: Span) -> bool { - term.eq(span, value, span) - .is_ok_and(|value| value.is_true()) -} - -fn record_matches_term( - record: &Record, +fn record_or_value_should_be_printed( + pattern: &MatchPattern, + value: &Value, columns_to_search: &[String], - filter_config: &Config, - term: &Value, - span: Span, + config: &Config, ) -> bool { - // Only perform column selection if given columns. - let col_select = !columns_to_search.is_empty(); - record.iter().any(|(col, val)| { - if col_select && !columns_to_search.contains(col) { - return false; + let match_found = match value { + Value::Record { val: record, .. } => { + // Only perform column selection if given columns. + let col_select = !columns_to_search.is_empty(); + record.iter().any(|(col, val)| { + if col_select && !columns_to_search.contains(col) { + return false; + } + value_should_be_printed(pattern, val, config) + }) } - let lower_val = if !val.is_error() { - Value::string( - val.to_expanded_string("", filter_config).to_lowercase(), - Span::test_data(), - ) - } else { - (*val).clone() - }; - term_contains_value(term, &lower_val, span) - }) + _ => value_should_be_printed(pattern, value, config), + }; + + match_found != pattern.invert } +// utility + fn split_string_if_multiline(input: PipelineData, head_span: Span) -> PipelineData { let span = input.span().unwrap_or(head_span); match input { diff --git a/crates/nu-command/src/help/mod.rs b/crates/nu-command/src/help/mod.rs index 9cbd187867..6c8a9d1842 100644 --- a/crates/nu-command/src/help/mod.rs +++ b/crates/nu-command/src/help/mod.rs @@ -16,7 +16,7 @@ pub use help_modules::HelpModules; pub use help_operators::HelpOperators; pub use help_pipe_and_redirect::HelpPipeAndRedirect; -pub(crate) use help_::{highlight_search_in_table, highlight_search_string}; +pub(crate) use help_::highlight_search_in_table; pub(crate) use help_aliases::help_aliases; pub(crate) use help_commands::help_commands; pub(crate) use help_modules::help_modules; diff --git a/crates/nu-command/tests/commands/find.rs b/crates/nu-command/tests/commands/find.rs index 9aab5d945b..d428e61c00 100644 --- a/crates/nu-command/tests/commands/find.rs +++ b/crates/nu-command/tests/commands/find.rs @@ -118,7 +118,10 @@ fn find_with_regex_in_table_keeps_row_if_one_column_matches() { "[[name nickname]; [Maurice moe] [Laurence larry]] | find --no-highlight --regex ce | get name | to json -r" ); - assert_eq!(actual.out, r#"["Maurice","Laurence"]"#); + assert_eq!( + actual.out, + r#"["\u001b[37mMauri\u001b[0m\u001b[41;37mce\u001b[0m\u001b[37m\u001b[0m","\u001b[37mLauren\u001b[0m\u001b[41;37mce\u001b[0m\u001b[37m\u001b[0m"]"# + ); assert_eq!(actual_no_highlight.out, r#"["Maurice","Laurence"]"#); }