forked from extern/nushell
Add LRU regex cache (#7587)
Closes #7572 by adding a cache for compiled regexes of type `Arc<Mutex<LruCache<String, Regex>>>` to `EngineState` . The cache is limited to 100 entries (limit chosen arbitrarily) and evicts least-recently-used items first. This PR makes a noticeable difference when using regexes for `color_config`, e.g.: ```bash #first set string formatting in config.nu like: string: { if $in =~ '^#\w{6}$' { $in } else { 'white' } }` # then try displaying and exploring a table with many strings # this is instant after the PR, but takes hundreds of milliseconds before ['#ff0033', '#0025ee', '#0087aa', 'string', '#4101ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff', '#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff','#ff0033', '#0025ee', '#0087aa', 'string', '#6103ff'] ``` ## New dependency (`lru`) This uses [the popular `lru` crate](https://lib.rs/crates/lru). The new dependency adds 19.8KB to a Linux release build of Nushell. I think this is OK, especially since the crate can be useful elsewhere in Nu.
This commit is contained in:
parent
3be7996e79
commit
a43e66ef92
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -2110,6 +2110,15 @@ dependencies = [
|
|||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lru"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909"
|
||||||
|
dependencies = [
|
||||||
|
"hashbrown",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lscolors"
|
name = "lscolors"
|
||||||
version = "0.12.0"
|
version = "0.12.0"
|
||||||
@ -2778,6 +2787,7 @@ dependencies = [
|
|||||||
"chrono-humanize",
|
"chrono-humanize",
|
||||||
"fancy-regex",
|
"fancy-regex",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
|
"lru",
|
||||||
"miette",
|
"miette",
|
||||||
"nu-json",
|
"nu-json",
|
||||||
"nu-test-support",
|
"nu-test-support",
|
||||||
|
@ -402,9 +402,11 @@ pub fn eval_expression(
|
|||||||
Comparison::NotEqual => lhs.ne(op_span, &rhs, expr.span),
|
Comparison::NotEqual => lhs.ne(op_span, &rhs, expr.span),
|
||||||
Comparison::In => lhs.r#in(op_span, &rhs, expr.span),
|
Comparison::In => lhs.r#in(op_span, &rhs, expr.span),
|
||||||
Comparison::NotIn => lhs.not_in(op_span, &rhs, expr.span),
|
Comparison::NotIn => lhs.not_in(op_span, &rhs, expr.span),
|
||||||
Comparison::RegexMatch => lhs.regex_match(op_span, &rhs, false, expr.span),
|
Comparison::RegexMatch => {
|
||||||
|
lhs.regex_match(engine_state, op_span, &rhs, false, expr.span)
|
||||||
|
}
|
||||||
Comparison::NotRegexMatch => {
|
Comparison::NotRegexMatch => {
|
||||||
lhs.regex_match(op_span, &rhs, true, expr.span)
|
lhs.regex_match(engine_state, op_span, &rhs, true, expr.span)
|
||||||
}
|
}
|
||||||
Comparison::StartsWith => lhs.starts_with(op_span, &rhs, expr.span),
|
Comparison::StartsWith => lhs.starts_with(op_span, &rhs, expr.span),
|
||||||
Comparison::EndsWith => lhs.ends_with(op_span, &rhs, expr.span),
|
Comparison::EndsWith => lhs.ends_with(op_span, &rhs, expr.span),
|
||||||
|
@ -18,6 +18,7 @@ chrono = { version="0.4.23", features= ["serde", "std"], default-features = fals
|
|||||||
chrono-humanize = "0.2.1"
|
chrono-humanize = "0.2.1"
|
||||||
fancy-regex = "0.10.0"
|
fancy-regex = "0.10.0"
|
||||||
indexmap = { version="1.7" }
|
indexmap = { version="1.7" }
|
||||||
|
lru = "0.8.1"
|
||||||
miette = { version = "5.1.0", features = ["fancy-no-backtrace"] }
|
miette = { version = "5.1.0", features = ["fancy-no-backtrace"] }
|
||||||
num-format = "0.4.3"
|
num-format = "0.4.3"
|
||||||
serde = {version = "1.0.143", default-features = false }
|
serde = {version = "1.0.143", default-features = false }
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
use fancy_regex::Regex;
|
||||||
|
use lru::LruCache;
|
||||||
|
|
||||||
use super::{Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Visibility, DEFAULT_OVERLAY_NAME};
|
use super::{Command, EnvVars, OverlayFrame, ScopeFrame, Stack, Visibility, DEFAULT_OVERLAY_NAME};
|
||||||
use crate::Value;
|
use crate::Value;
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -6,6 +9,7 @@ use crate::{
|
|||||||
};
|
};
|
||||||
use core::panic;
|
use core::panic;
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::{
|
use std::{
|
||||||
@ -94,8 +98,12 @@ pub struct EngineState {
|
|||||||
pub history_session_id: i64,
|
pub history_session_id: i64,
|
||||||
// If Nushell was started, e.g., with `nu spam.nu`, the file's parent is stored here
|
// If Nushell was started, e.g., with `nu spam.nu`, the file's parent is stored here
|
||||||
pub currently_parsed_cwd: Option<PathBuf>,
|
pub currently_parsed_cwd: Option<PathBuf>,
|
||||||
|
pub regex_cache: Arc<Mutex<LruCache<String, Regex>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The max number of compiled regexes to keep around in a LRU cache, arbitrarily chosen
|
||||||
|
const REGEX_CACHE_SIZE: usize = 100; // must be nonzero, otherwise will panic
|
||||||
|
|
||||||
pub const NU_VARIABLE_ID: usize = 0;
|
pub const NU_VARIABLE_ID: usize = 0;
|
||||||
pub const IN_VARIABLE_ID: usize = 1;
|
pub const IN_VARIABLE_ID: usize = 1;
|
||||||
pub const ENV_VARIABLE_ID: usize = 2;
|
pub const ENV_VARIABLE_ID: usize = 2;
|
||||||
@ -137,6 +145,9 @@ impl EngineState {
|
|||||||
config_path: HashMap::new(),
|
config_path: HashMap::new(),
|
||||||
history_session_id: 0,
|
history_session_id: 0,
|
||||||
currently_parsed_cwd: None,
|
currently_parsed_cwd: None,
|
||||||
|
regex_cache: Arc::new(Mutex::new(LruCache::new(
|
||||||
|
NonZeroUsize::new(REGEX_CACHE_SIZE).expect("tried to create cache of size zero"),
|
||||||
|
))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ mod unit;
|
|||||||
|
|
||||||
use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember};
|
use crate::ast::{Bits, Boolean, CellPath, Comparison, PathMember};
|
||||||
use crate::ast::{Math, Operator};
|
use crate::ast::{Math, Operator};
|
||||||
|
use crate::engine::EngineState;
|
||||||
use crate::ShellError;
|
use crate::ShellError;
|
||||||
use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId};
|
use crate::{did_you_mean, BlockId, Config, Span, Spanned, Type, VarId};
|
||||||
use byte_unit::ByteUnit;
|
use byte_unit::ByteUnit;
|
||||||
@ -2627,6 +2628,7 @@ impl Value {
|
|||||||
|
|
||||||
pub fn regex_match(
|
pub fn regex_match(
|
||||||
&self,
|
&self,
|
||||||
|
engine_state: &EngineState,
|
||||||
op: Span,
|
op: Span,
|
||||||
rhs: &Value,
|
rhs: &Value,
|
||||||
invert: bool,
|
invert: bool,
|
||||||
@ -2640,18 +2642,36 @@ impl Value {
|
|||||||
span: rhs_span,
|
span: rhs_span,
|
||||||
},
|
},
|
||||||
) => {
|
) => {
|
||||||
// We are leaving some performance on the table by compiling the regex every time.
|
let is_match = match engine_state.regex_cache.try_lock() {
|
||||||
// Small regexes compile in microseconds, and the simplicity of this approach currently
|
Ok(mut cache) => match cache.get(rhs) {
|
||||||
// outweighs the performance costs. Revisit this if it ever becomes a bottleneck.
|
Some(regex) => regex.is_match(lhs),
|
||||||
let regex = Regex::new(rhs).map_err(|e| {
|
None => {
|
||||||
ShellError::UnsupportedInput(
|
let regex = Regex::new(rhs).map_err(|e| {
|
||||||
format!("{e}"),
|
ShellError::UnsupportedInput(
|
||||||
"value originated from here".into(),
|
format!("{e}"),
|
||||||
span,
|
"value originated from here".into(),
|
||||||
*rhs_span,
|
span,
|
||||||
)
|
*rhs_span,
|
||||||
})?;
|
)
|
||||||
let is_match = regex.is_match(lhs);
|
})?;
|
||||||
|
let ret = regex.is_match(lhs);
|
||||||
|
cache.put(rhs.clone(), regex);
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
let regex = Regex::new(rhs).map_err(|e| {
|
||||||
|
ShellError::UnsupportedInput(
|
||||||
|
format!("{e}"),
|
||||||
|
"value originated from here".into(),
|
||||||
|
span,
|
||||||
|
*rhs_span,
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
regex.is_match(lhs)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Value::Bool {
|
Ok(Value::Bool {
|
||||||
val: if invert {
|
val: if invert {
|
||||||
!is_match.unwrap_or(false)
|
!is_match.unwrap_or(false)
|
||||||
|
Loading…
Reference in New Issue
Block a user