Add LRU regex cache (#7587)

Closes #7572 by adding a cache for compiled regexes of type
`Arc<Mutex<LruCache<String, Regex>>>` to `EngineState` .

The cache is limited to 100 entries (limit chosen arbitrarily) and
evicts least-recently-used items first.

This PR makes a noticeable difference when using regexes for
`color_config`, e.g.:
```bash
#first set string formatting in config.nu like:
string: { if $in =~ '^#\w{6}$' { $in } else { 'white' } }`

# then try displaying and exploring a table with many strings
# this is instant after the PR, but takes hundreds of milliseconds before
['#ff0033', '#0025ee', '#0087aa', 'string', '#4101ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff']
```

## New dependency (`lru`)
This uses [the popular `lru` crate](https://lib.rs/crates/lru). The new
dependency adds 19.8KB to a Linux release build of Nushell. I think this
is OK, especially since the crate can be useful elsewhere in Nu.
This commit is contained in:
Reilly Wood
2022-12-23 14:30:04 -08:00
committed by GitHub
parent 3be7996e79
commit a43e66ef92
5 changed files with 58 additions and 14 deletions

View File

@ -7,6 +7,7 @@ mod unit;
use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember};
use crate::ast::{Math, Operator};
use crate::engine::EngineState;
use crate::ShellError;
use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId};
use byte_unit::ByteUnit;
@ -2627,6 +2628,7 @@ impl Value {
pub fn regex_match(
&self,
engine_state: &EngineState,
op: Span,
rhs: &Value,
invert: bool,
@ -2640,18 +2642,36 @@ impl Value {
span: rhs_span,
},
) => {
// We are leaving some performance on the table by compiling the regex every time.
// Small regexes compile in microseconds, and the simplicity of this approach currently
// outweighs the performance costs. Revisit this if it ever becomes a bottleneck.
let regex = Regex::new(rhs).map_err(|e| {
ShellError::UnsupportedInput(
format!("{e}"),
"value originated from here".into(),
span,
*rhs_span,
)
})?;
let is_match = regex.is_match(lhs);
let is_match = match engine_state.regex_cache.try_lock() {
Ok(mut cache) => match cache.get(rhs) {
Some(regex) => regex.is_match(lhs),
None => {
let regex = Regex::new(rhs).map_err(|e| {
ShellError::UnsupportedInput(
format!("{e}"),
"value originated from here".into(),
span,
*rhs_span,
)
})?;
let ret = regex.is_match(lhs);
cache.put(rhs.clone(), regex);
ret
}
},
Err(_) => {
let regex = Regex::new(rhs).map_err(|e| {
ShellError::UnsupportedInput(
format!("{e}"),
"value originated from here".into(),
span,
*rhs_span,
)
})?;
regex.is_match(lhs)
}
};
Ok(Value::Bool {
val: if invert {
!is_match.unwrap_or(false)