mirror of
https://github.com/nushell/nushell.git
synced 2025-08-09 09:25:38 +02:00
Add LRU regex cache (#7587)
Closes #7572 by adding a cache for compiled regexes of type `Arc<Mutex<LruCache<String, Regex>>>` to `EngineState` . The cache is limited to 100 entries (limit chosen arbitrarily) and evicts least-recently-used items first. This PR makes a noticeable difference when using regexes for `color_config`, e.g.: ```bash #first set string formatting in config.nu like: string: { if $in =~ '^#\w{6}$' { $in } else { 'white' } }` # then try displaying and exploring a table with many strings # this is instant after the PR, but takes hundreds of milliseconds before ['#ff0033', '#0025ee', '#0087aa', 'string', '#4101ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff'] ``` ## New dependency (`lru`) This uses [the popular `lru` crate](https://lib.rs/crates/lru). The new dependency adds 19.8KB to a Linux release build of Nushell. I think this is OK, especially since the crate can be useful elsewhere in Nu.
This commit is contained in:
@ -1,3 +1,6 @@
|
||||
use fancy_regex::Regex;
|
||||
use lru::LruCache;
|
||||
|
||||
use super::{Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Visibility, DEFAULT_OVERLAY_NAME};
|
||||
use crate::Value;
|
||||
use crate::{
|
||||
@ -6,6 +9,7 @@ use crate::{
|
||||
};
|
||||
use core::panic;
|
||||
use std::borrow::Borrow;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::{
|
||||
@ -94,8 +98,12 @@ pub struct EngineState {
|
||||
pub history_session_id: i64,
|
||||
// If Nushell was started, e.g., with `nu spam.nu`, the file's parent is stored here
|
||||
pub currently_parsed_cwd: Option<PathBuf>,
|
||||
pub regex_cache: Arc<Mutex<LruCache<String, Regex>>>,
|
||||
}
|
||||
|
||||
// The max number of compiled regexes to keep around in a LRU cache, arbitrarily chosen
|
||||
const REGEX_CACHE_SIZE: usize = 100; // must be nonzero, otherwise will panic
|
||||
|
||||
pub const NU_VARIABLE_ID: usize = 0;
|
||||
pub const IN_VARIABLE_ID: usize = 1;
|
||||
pub const ENV_VARIABLE_ID: usize = 2;
|
||||
@ -137,6 +145,9 @@ impl EngineState {
|
||||
config_path: HashMap::new(),
|
||||
history_session_id: 0,
|
||||
currently_parsed_cwd: None,
|
||||
regex_cache: Arc::new(Mutex::new(LruCache::new(
|
||||
NonZeroUsize::new(REGEX_CACHE_SIZE).expect("tried to create cache of size zero"),
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ mod unit;
|
||||
|
||||
use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember};
|
||||
use crate::ast::{Math, Operator};
|
||||
use crate::engine::EngineState;
|
||||
use crate::ShellError;
|
||||
use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId};
|
||||
use byte_unit::ByteUnit;
|
||||
@ -2627,6 +2628,7 @@ impl Value {
|
||||
|
||||
pub fn regex_match(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
op: Span,
|
||||
rhs: &Value,
|
||||
invert: bool,
|
||||
@ -2640,18 +2642,36 @@ impl Value {
|
||||
span: rhs_span,
|
||||
},
|
||||
) => {
|
||||
// We are leaving some performance on the table by compiling the regex every time.
|
||||
// Small regexes compile in microseconds, and the simplicity of this approach currently
|
||||
// outweighs the performance costs. Revisit this if it ever becomes a bottleneck.
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
let is_match = regex.is_match(lhs);
|
||||
let is_match = match engine_state.regex_cache.try_lock() {
|
||||
Ok(mut cache) => match cache.get(rhs) {
|
||||
Some(regex) => regex.is_match(lhs),
|
||||
None => {
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
let ret = regex.is_match(lhs);
|
||||
cache.put(rhs.clone(), regex);
|
||||
ret
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
let regex = Regex::new(rhs).map_err(|e| {
|
||||
ShellError::UnsupportedInput(
|
||||
format!("{e}"),
|
||||
"value originated from here".into(),
|
||||
span,
|
||||
*rhs_span,
|
||||
)
|
||||
})?;
|
||||
regex.is_match(lhs)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Value::Bool {
|
||||
val: if invert {
|
||||
!is_match.unwrap_or(false)
|
||||
|
Reference in New Issue
Block a user