forked from extern/nushell
Add LRU regex cache (#7587)
Closes #7572 by adding a cache for compiled regexes of type `Arc<Mutex<LruCache<String, Regex>>>` to `EngineState` . The cache is limited to 100 entries (limit chosen arbitrarily) and evicts least-recently-used items first. This PR makes a noticeable difference when using regexes for `color_config`, e.g.: ```bash #first set string formatting in config.nu like: string: { if $in =~ '^#\w{6}$' { $in } else { 'white' } }` # then try displaying and exploring a table with many strings # this is instant after the PR, but takes hundreds of milliseconds before ['#ff0033', '#0025ee', '#0087aa', 'string', '#4101ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff'] ``` ## New dependency (`lru`) This uses [the popular `lru` crate](https://lib.rs/crates/lru). The new dependency adds 19.8KB to a Linux release build of Nushell. I think this is OK, especially since the crate can be useful elsewhere in Nu.
This commit is contained in:
parent
3be7996e79
commit
a43e66ef92
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -2110,6 +2110,15 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lscolors"
|
||||
version = "0.12.0"
|
||||
@ -2778,6 +2787,7 @@ dependencies = [
|
||||
"chrono-humanize",
|
||||
"fancy-regex",
|
||||
"indexmap",
|
||||
"lru",
|
||||
"miette",
|
||||
"nu-json",
|
||||
"nu-test-support",
|
||||
|
@ -402,9 +402,11 @@ pub fn eval_expression(
|
||||
Comparison::NotEqual => lhs.ne(op_span, &rhs, expr.span),
|
||||
Comparison::In => lhs.r#in(op_span, &rhs, expr.span),
|
||||
Comparison::NotIn => lhs.not_in(op_span, &rhs, expr.span),
|
||||
Comparison::RegexMatch => lhs.regex_match(op_span, &rhs, false, expr.span),
|
||||
Comparison::RegexMatch => {
|
||||
lhs.regex_match(engine_state, op_span, &rhs, false, expr.span)
|
||||
}
|
||||
Comparison::NotRegexMatch => {
|
||||
lhs.regex_match(op_span, &rhs, true, expr.span)
|
||||
lhs.regex_match(engine_state, op_span, &rhs, true, expr.span)
|
||||
}
|
||||
Comparison::StartsWith => lhs.starts_with(op_span, &rhs, expr.span),
|
||||
Comparison::EndsWith => lhs.ends_with(op_span, &rhs, expr.span),
|
||||
|
@ -18,6 +18,7 @@ chrono = { version="0.4.23", features= ["serde", "std"], default-features = fals
|
||||
chrono-humanize = "0.2.1"
|
||||
fancy-regex = "0.10.0"
|
||||
indexmap = { version="1.7" }
|
||||
lru = "0.8.1"
|
||||
miette = { version = "5.1.0", features = ["fancy-no-backtrace"] }
|
||||
num-format = "0.4.3"
|
||||
serde = {version = "1.0.143", default-features = false }
|
||||
|
@ -1,3 +1,6 @@
|
||||
use fancy_regex::Regex;
|
||||
use lru::LruCache;
|
||||
|
||||
use super::{Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Visibility, DEFAULT_OVERLAY_NAME};
|
||||
use crate::Value;
|
||||
use crate::{
|
||||
@ -6,6 +9,7 @@ use crate::{
|
||||
};
|
||||
use core::panic;
|
||||
use std::borrow::Borrow;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::{
|
||||
@ -94,8 +98,12 @@ pub struct EngineState {
|
||||
pub history_session_id: i64,
|
||||
// If Nushell was started, e.g., with `nu spam.nu`, the file's parent is stored here
|
||||
pub currently_parsed_cwd: Option<PathBuf>,
|
||||
pub regex_cache: Arc<Mutex<LruCache<String, Regex>>>,
|
||||
}
|
||||
|
||||
// The max number of compiled regexes to keep around in a LRU cache, arbitrarily chosen
|
||||
const REGEX_CACHE_SIZE: usize = 100; // must be nonzero, otherwise will panic
|
||||
|
||||
pub const NU_VARIABLE_ID: usize = 0;
|
||||
pub const IN_VARIABLE_ID: usize = 1;
|
||||
pub const ENV_VARIABLE_ID: usize = 2;
|
||||
@ -137,6 +145,9 @@ impl EngineState {
|
||||
config_path: HashMap::new(),
|
||||
history_session_id: 0,
|
||||
currently_parsed_cwd: None,
|
||||
regex_cache: Arc::new(Mutex::new(LruCache::new(
|
||||
NonZeroUsize::new(REGEX_CACHE_SIZE).expect("tried to create cache of size zero"),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ mod unit;
|
||||
|
||||
use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember};
|
||||
use crate::ast::{Math, Operator};
|
||||
use crate::engine::EngineState;
|
||||
use crate::ShellError;
|
||||
use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId};
|
||||
use byte_unit::ByteUnit;
|
||||
@ -2627,6 +2628,7 @@ impl Value {
|
||||
|
||||
pub fn regex_match(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
op: Span,
|
||||
rhs: &Value,
|
||||
invert: bool,
|
||||
@ -2640,18 +2642,36 @@ impl Value {
|
||||
span: rhs_span,
|
||||
},
|
||||
) => {
|
||||
// We are leaving some performance on the table by compiling the regex every time.
|
||||
// Small regexes compile in microseconds, and the simplicity of this approach currently
|
||||
// outweighs the performance costs. Revisit this if it ever becomes a bottleneck.
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
let is_match = regex.is_match(lhs);
|
||||
let is_match = match engine_state.regex_cache.try_lock() {
|
||||
Ok(mut cache) => match cache.get(rhs) {
|
||||
Some(regex) => regex.is_match(lhs),
|
||||
None => {
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
let ret = regex.is_match(lhs);
|
||||
cache.put(rhs.clone(), regex);
|
||||
ret
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
regex.is_match(lhs)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Value::Bool {
|
||||
val: if invert {
|
||||
!is_match.unwrap_or(false)
|
||||
|
Loading…
Reference in New Issue
Block a user