Use nucleo instead of skim for completions (#14846)

# Description

This PR replaces `SkimMatcherV2` from the
[fuzzy-matcher](https://docs.rs/fuzzy-matcher/latest/fuzzy_matcher/)
crate with the
[nucleo-matcher](https://docs.rs/nucleo-matcher/latest/nucleo_matcher/)
crate for doing fuzzy matching. This touches both our completion code in
`nu-cli` and symbol filtering in `nu-lsp`.

Nucleo should give us better performance than Skim. In the event that we
decide to use the Nucleo frontend ([crate
docs](https://docs.rs/nucleo/latest/nucleo/)) too, it also works on
Windows, unlike [Skim](https://github.com/skim-rs/skim), which appears
to only support Linux and MacOS.

Unfortunately, we still have an indirect dependency on `fuzzy-matcher`,
because the [`dialoguer`](https://github.com/console-rs/dialoguer) crate
uses it.

# User-Facing Changes

No breaking changes. Suggestions will be sorted differently, because
Nucleo uses a different algorithm from Skim for matching/scoring.
Hopefully, the new sorting will generally make more sense.

# Tests + Formatting

In `nu-cli`, modified an existing test, but didn't test performance. I
haven't tested `nu-lsp` manually, but existing tests pass.

I did manually do `ls /nix/store/<TAB>`, `ls /nix/store/d<TAB>`, etc.,
but didn't notice Nucleo being faster (my `/nix/store` folder has 34136
items at the time of writing).
This commit is contained in:
Yash Thakur 2025-01-17 07:24:00 -05:00 committed by GitHub
parent 8759936636
commit 75105033b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 90 additions and 64 deletions

14
Cargo.lock generated
View File

@ -3666,7 +3666,6 @@ dependencies = [
"chrono",
"crossterm 0.28.1",
"fancy-regex",
"fuzzy-matcher",
"is_executable",
"log",
"lscolors",
@ -3684,6 +3683,7 @@ dependencies = [
"nu-protocol",
"nu-test-support",
"nu-utils",
"nucleo-matcher",
"percent-encoding",
"reedline",
"rstest",
@ -3952,7 +3952,6 @@ version = "0.101.1"
dependencies = [
"assert-json-diff",
"crossbeam-channel",
"fuzzy-matcher",
"lsp-server",
"lsp-textdocument",
"lsp-types",
@ -3964,6 +3963,7 @@ dependencies = [
"nu-parser",
"nu-protocol",
"nu-test-support",
"nucleo-matcher",
"serde",
"serde_json",
"url",
@ -4318,6 +4318,16 @@ dependencies = [
"serde_json",
]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
dependencies = [
"memchr",
"unicode-segmentation",
]
[[package]]
name = "num"
version = "0.4.3"

View File

@ -90,7 +90,6 @@ encoding_rs = "0.8"
fancy-regex = "0.14"
filesize = "0.2"
filetime = "0.2"
fuzzy-matcher = "0.3"
heck = "0.5.0"
human-date-parser = "0.2.0"
indexmap = "2.7"
@ -117,6 +116,7 @@ native-tls = "0.2"
nix = { version = "0.29", default-features = false }
notify-debouncer-full = { version = "0.3", default-features = false }
nu-ansi-term = "0.50.1"
nucleo-matcher = "0.3"
num-format = "0.4"
num-traits = "0.2"
oem_cp = "2.0.0"

View File

@ -33,11 +33,11 @@ reedline = { workspace = true, features = ["bashisms", "sqlite"] }
chrono = { default-features = false, features = ["std"], workspace = true }
crossterm = { workspace = true }
fancy-regex = { workspace = true }
fuzzy-matcher = { workspace = true }
is_executable = { workspace = true }
log = { workspace = true }
miette = { workspace = true, features = ["fancy-no-backtrace"] }
lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] }
miette = { workspace = true, features = ["fancy-no-backtrace"] }
nucleo-matcher = { workspace = true }
percent-encoding = { workspace = true }
sysinfo = { workspace = true }
unicode-segmentation = { workspace = true }

View File

@ -1,7 +1,10 @@
use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
use nu_parser::trim_quotes_str;
use nu_protocol::{CompletionAlgorithm, CompletionSort};
use nu_utils::IgnoreCaseExt;
use nucleo_matcher::{
pattern::{AtomKind, CaseMatching, Normalization, Pattern},
Config, Matcher, Utf32Str,
};
use std::{borrow::Cow, fmt::Display};
use super::SemanticSuggestion;
@ -34,9 +37,10 @@ enum State<T> {
items: Vec<(String, T)>,
},
Fuzzy {
matcher: Box<SkimMatcherV2>,
matcher: Matcher,
pat: Pattern,
/// Holds (haystack, item, score)
items: Vec<(String, T, i64)>,
items: Vec<(String, T, u32)>,
},
}
@ -46,30 +50,37 @@ impl<T> NuMatcher<T> {
///
/// * `needle` - The text to search for
pub fn new(needle: impl AsRef<str>, options: CompletionOptions) -> NuMatcher<T> {
let orig_needle = trim_quotes_str(needle.as_ref());
let lowercase_needle = if options.case_sensitive {
orig_needle.to_owned()
} else {
orig_needle.to_folded_case()
};
let needle = trim_quotes_str(needle.as_ref());
match options.match_algorithm {
MatchAlgorithm::Prefix => NuMatcher {
options,
needle: lowercase_needle,
state: State::Prefix { items: Vec::new() },
},
MatchAlgorithm::Fuzzy => {
let mut matcher = SkimMatcherV2::default();
if options.case_sensitive {
matcher = matcher.respect_case();
MatchAlgorithm::Prefix => {
let lowercase_needle = if options.case_sensitive {
needle.to_owned()
} else {
matcher = matcher.ignore_case();
needle.to_folded_case()
};
NuMatcher {
options,
needle: orig_needle.to_owned(),
needle: lowercase_needle,
state: State::Prefix { items: Vec::new() },
}
}
MatchAlgorithm::Fuzzy => {
let pat = Pattern::new(
needle,
if options.case_sensitive {
CaseMatching::Respect
} else {
CaseMatching::Ignore
},
Normalization::Smart,
AtomKind::Fuzzy,
);
NuMatcher {
options,
needle: needle.to_owned(),
state: State::Fuzzy {
matcher: Box::new(matcher),
matcher: Matcher::new(Config::DEFAULT),
pat,
items: Vec::new(),
},
}
@ -102,8 +113,15 @@ impl<T> NuMatcher<T> {
}
matches
}
State::Fuzzy { items, matcher } => {
let Some(score) = matcher.fuzzy_match(haystack, &self.needle) else {
State::Fuzzy {
matcher,
pat,
items,
} => {
let mut haystack_buf = Vec::new();
let haystack_utf32 = Utf32Str::new(trim_quotes_str(haystack), &mut haystack_buf);
let mut indices = Vec::new();
let Some(score) = pat.indices(haystack_utf32, matcher, &mut indices) else {
return false;
};
if let Some(item) = item {

View File

@ -111,25 +111,6 @@ fn custom_completer() -> NuCompleter {
NuCompleter::new(Arc::new(engine), Arc::new(stack))
}
#[fixture]
fn subcommand_completer() -> NuCompleter {
// Create a new engine
let (_, _, mut engine, mut stack) = new_engine();
let commands = r#"
$env.config.completions.algorithm = "fuzzy"
def foo [] {}
def "foo bar" [] {}
def "foo abaz" [] {}
def "foo aabcrr" [] {}
def food [] {}
"#;
assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok());
// Instantiate a new completer
NuCompleter::new(Arc::new(engine), Arc::new(stack))
}
/// Use fuzzy completions but sort in alphabetical order
#[fixture]
fn fuzzy_alpha_sort_completer() -> NuCompleter {
@ -1040,24 +1021,32 @@ fn command_watch_with_filecompletion() {
}
#[rstest]
fn subcommand_completions(mut subcommand_completer: NuCompleter) {
let prefix = "foo br";
let suggestions = subcommand_completer.complete(prefix, prefix.len());
match_suggestions(
&vec!["foo bar".to_string(), "foo aabcrr".to_string()],
&suggestions,
);
fn subcommand_completions() {
let (_, _, mut engine, mut stack) = new_engine();
let commands = r#"
$env.config.completions.algorithm = "fuzzy"
def foo-test-command [] {}
def "foo-test-command bar" [] {}
def "foo-test-command aagap bcr" [] {}
def "food bar" [] {}
"#;
assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok());
let mut subcommand_completer = NuCompleter::new(Arc::new(engine), Arc::new(stack));
let prefix = "foo b";
let prefix = "fod br";
let suggestions = subcommand_completer.complete(prefix, prefix.len());
match_suggestions(
&vec![
"foo bar".to_string(),
"foo abaz".to_string(),
"foo aabcrr".to_string(),
"food bar".to_string(),
"foo-test-command aagap bcr".to_string(),
"foo-test-command bar".to_string(),
],
&suggestions,
);
let prefix = "foot bar";
let suggestions = subcommand_completer.complete(prefix, prefix.len());
match_suggestions(&vec!["foo-test-command bar".to_string()], &suggestions);
}
#[test]

View File

@ -14,11 +14,11 @@ nu-parser = { path = "../nu-parser", version = "0.101.1" }
nu-protocol = { path = "../nu-protocol", version = "0.101.1" }
crossbeam-channel = { workspace = true }
fuzzy-matcher = { workspace = true }
lsp-server = { workspace = true }
lsp-textdocument = { workspace = true }
lsp-types = { workspace = true }
miette = { workspace = true }
nucleo-matcher = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
url = { workspace = true }

View File

@ -2,7 +2,6 @@ use std::collections::{BTreeMap, HashSet};
use std::hash::{Hash, Hasher};
use crate::{path_to_uri, span_to_range, uri_to_path, Id, LanguageServer};
use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
use lsp_textdocument::{FullTextDocument, TextDocuments};
use lsp_types::{
DocumentSymbolParams, DocumentSymbolResponse, Location, Range, SymbolInformation, SymbolKind,
@ -14,6 +13,8 @@ use nu_protocol::{
engine::{CachedFile, EngineState, StateWorkingSet},
DeclId, Span, VarId,
};
use nucleo_matcher::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
use nucleo_matcher::{Config, Matcher, Utf32Str};
use std::{cmp::Ordering, path::Path};
/// Struct stored in cache, uri not included
@ -70,7 +71,7 @@ impl Symbol {
/// Cache symbols for each opened file to avoid repeated parsing
pub struct SymbolCache {
/// Fuzzy matcher for symbol names
matcher: SkimMatcherV2,
matcher: Matcher,
/// File Uri --> Symbols
cache: BTreeMap<Uri, Vec<Symbol>>,
/// If marked as dirty, parse on next request
@ -80,7 +81,7 @@ pub struct SymbolCache {
impl SymbolCache {
pub fn new() -> Self {
SymbolCache {
matcher: SkimMatcherV2::default(),
matcher: Matcher::new(Config::DEFAULT),
cache: BTreeMap::new(),
dirty_flags: BTreeMap::new(),
}
@ -240,12 +241,20 @@ impl SymbolCache {
)
}
pub fn get_fuzzy_matched_symbols(&self, query: &str) -> Vec<SymbolInformation> {
pub fn get_fuzzy_matched_symbols(&mut self, query: &str) -> Vec<SymbolInformation> {
let pat = Pattern::new(
query,
CaseMatching::Smart,
Normalization::Smart,
AtomKind::Fuzzy,
);
self.cache
.iter()
.flat_map(|(uri, symbols)| symbols.iter().map(|s| s.clone().to_symbol_information(uri)))
.filter_map(|s| {
self.matcher.fuzzy_match(&s.name, query)?;
let mut buf = Vec::new();
let name = Utf32Str::new(&s.name, &mut buf);
pat.score(name, &mut self.matcher)?;
Some(s)
})
.collect()