Use nucleo instead of skim for completions (#14846)

# Description

This PR replaces `SkimMatcherV2` from the
[fuzzy-matcher](https://docs.rs/fuzzy-matcher/latest/fuzzy_matcher/)
crate with the
[nucleo-matcher](https://docs.rs/nucleo-matcher/latest/nucleo_matcher/)
crate for doing fuzzy matching. This touches both our completion code in
`nu-cli` and symbol filtering in `nu-lsp`.

Nucleo should give us better performance than Skim. In the event that we
decide to use the Nucleo frontend ([crate
docs](https://docs.rs/nucleo/latest/nucleo/)) too, it also works on
Windows, unlike [Skim](https://github.com/skim-rs/skim), which appears
to only support Linux and MacOS.

Unfortunately, we still have an indirect dependency on `fuzzy-matcher`,
because the [`dialoguer`](https://github.com/console-rs/dialoguer) crate
uses it.

# User-Facing Changes

No breaking changes. Suggestions will be sorted differently, because
Nucleo uses a different algorithm from Skim for matching/scoring.
Hopefully, the new sorting will generally make more sense.

# Tests + Formatting

In `nu-cli`, modified an existing test, but didn't test performance. I
haven't tested `nu-lsp` manually, but existing tests pass.

I did manually do `ls /nix/store/<TAB>`, `ls /nix/store/d<TAB>`, etc.,
but didn't notice Nucleo being faster (my `/nix/store` folder has 34136
items at the time of writing).
This commit is contained in:
Yash Thakur 2025-01-17 07:24:00 -05:00 committed by GitHub
parent 8759936636
commit 75105033b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 90 additions and 64 deletions

14
Cargo.lock generated
View File

@ -3666,7 +3666,6 @@ dependencies = [
"chrono", "chrono",
"crossterm 0.28.1", "crossterm 0.28.1",
"fancy-regex", "fancy-regex",
"fuzzy-matcher",
"is_executable", "is_executable",
"log", "log",
"lscolors", "lscolors",
@ -3684,6 +3683,7 @@ dependencies = [
"nu-protocol", "nu-protocol",
"nu-test-support", "nu-test-support",
"nu-utils", "nu-utils",
"nucleo-matcher",
"percent-encoding", "percent-encoding",
"reedline", "reedline",
"rstest", "rstest",
@ -3952,7 +3952,6 @@ version = "0.101.1"
dependencies = [ dependencies = [
"assert-json-diff", "assert-json-diff",
"crossbeam-channel", "crossbeam-channel",
"fuzzy-matcher",
"lsp-server", "lsp-server",
"lsp-textdocument", "lsp-textdocument",
"lsp-types", "lsp-types",
@ -3964,6 +3963,7 @@ dependencies = [
"nu-parser", "nu-parser",
"nu-protocol", "nu-protocol",
"nu-test-support", "nu-test-support",
"nucleo-matcher",
"serde", "serde",
"serde_json", "serde_json",
"url", "url",
@ -4318,6 +4318,16 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
dependencies = [
"memchr",
"unicode-segmentation",
]
[[package]] [[package]]
name = "num" name = "num"
version = "0.4.3" version = "0.4.3"

View File

@ -90,7 +90,6 @@ encoding_rs = "0.8"
fancy-regex = "0.14" fancy-regex = "0.14"
filesize = "0.2" filesize = "0.2"
filetime = "0.2" filetime = "0.2"
fuzzy-matcher = "0.3"
heck = "0.5.0" heck = "0.5.0"
human-date-parser = "0.2.0" human-date-parser = "0.2.0"
indexmap = "2.7" indexmap = "2.7"
@ -117,6 +116,7 @@ native-tls = "0.2"
nix = { version = "0.29", default-features = false } nix = { version = "0.29", default-features = false }
notify-debouncer-full = { version = "0.3", default-features = false } notify-debouncer-full = { version = "0.3", default-features = false }
nu-ansi-term = "0.50.1" nu-ansi-term = "0.50.1"
nucleo-matcher = "0.3"
num-format = "0.4" num-format = "0.4"
num-traits = "0.2" num-traits = "0.2"
oem_cp = "2.0.0" oem_cp = "2.0.0"

View File

@ -33,11 +33,11 @@ reedline = { workspace = true, features = ["bashisms", "sqlite"] }
chrono = { default-features = false, features = ["std"], workspace = true } chrono = { default-features = false, features = ["std"], workspace = true }
crossterm = { workspace = true } crossterm = { workspace = true }
fancy-regex = { workspace = true } fancy-regex = { workspace = true }
fuzzy-matcher = { workspace = true }
is_executable = { workspace = true } is_executable = { workspace = true }
log = { workspace = true } log = { workspace = true }
miette = { workspace = true, features = ["fancy-no-backtrace"] }
lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] } lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] }
miette = { workspace = true, features = ["fancy-no-backtrace"] }
nucleo-matcher = { workspace = true }
percent-encoding = { workspace = true } percent-encoding = { workspace = true }
sysinfo = { workspace = true } sysinfo = { workspace = true }
unicode-segmentation = { workspace = true } unicode-segmentation = { workspace = true }

View File

@ -1,7 +1,10 @@
use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
use nu_parser::trim_quotes_str; use nu_parser::trim_quotes_str;
use nu_protocol::{CompletionAlgorithm, CompletionSort}; use nu_protocol::{CompletionAlgorithm, CompletionSort};
use nu_utils::IgnoreCaseExt; use nu_utils::IgnoreCaseExt;
use nucleo_matcher::{
pattern::{AtomKind, CaseMatching, Normalization, Pattern},
Config, Matcher, Utf32Str,
};
use std::{borrow::Cow, fmt::Display}; use std::{borrow::Cow, fmt::Display};
use super::SemanticSuggestion; use super::SemanticSuggestion;
@ -34,9 +37,10 @@ enum State<T> {
items: Vec<(String, T)>, items: Vec<(String, T)>,
}, },
Fuzzy { Fuzzy {
matcher: Box<SkimMatcherV2>, matcher: Matcher,
pat: Pattern,
/// Holds (haystack, item, score) /// Holds (haystack, item, score)
items: Vec<(String, T, i64)>, items: Vec<(String, T, u32)>,
}, },
} }
@ -46,30 +50,37 @@ impl<T> NuMatcher<T> {
/// ///
/// * `needle` - The text to search for /// * `needle` - The text to search for
pub fn new(needle: impl AsRef<str>, options: CompletionOptions) -> NuMatcher<T> { pub fn new(needle: impl AsRef<str>, options: CompletionOptions) -> NuMatcher<T> {
let orig_needle = trim_quotes_str(needle.as_ref()); let needle = trim_quotes_str(needle.as_ref());
let lowercase_needle = if options.case_sensitive {
orig_needle.to_owned()
} else {
orig_needle.to_folded_case()
};
match options.match_algorithm { match options.match_algorithm {
MatchAlgorithm::Prefix => NuMatcher { MatchAlgorithm::Prefix => {
options, let lowercase_needle = if options.case_sensitive {
needle: lowercase_needle, needle.to_owned()
state: State::Prefix { items: Vec::new() },
},
MatchAlgorithm::Fuzzy => {
let mut matcher = SkimMatcherV2::default();
if options.case_sensitive {
matcher = matcher.respect_case();
} else { } else {
matcher = matcher.ignore_case(); needle.to_folded_case()
}; };
NuMatcher { NuMatcher {
options, options,
needle: orig_needle.to_owned(), needle: lowercase_needle,
state: State::Prefix { items: Vec::new() },
}
}
MatchAlgorithm::Fuzzy => {
let pat = Pattern::new(
needle,
if options.case_sensitive {
CaseMatching::Respect
} else {
CaseMatching::Ignore
},
Normalization::Smart,
AtomKind::Fuzzy,
);
NuMatcher {
options,
needle: needle.to_owned(),
state: State::Fuzzy { state: State::Fuzzy {
matcher: Box::new(matcher), matcher: Matcher::new(Config::DEFAULT),
pat,
items: Vec::new(), items: Vec::new(),
}, },
} }
@ -102,8 +113,15 @@ impl<T> NuMatcher<T> {
} }
matches matches
} }
State::Fuzzy { items, matcher } => { State::Fuzzy {
let Some(score) = matcher.fuzzy_match(haystack, &self.needle) else { matcher,
pat,
items,
} => {
let mut haystack_buf = Vec::new();
let haystack_utf32 = Utf32Str::new(trim_quotes_str(haystack), &mut haystack_buf);
let mut indices = Vec::new();
let Some(score) = pat.indices(haystack_utf32, matcher, &mut indices) else {
return false; return false;
}; };
if let Some(item) = item { if let Some(item) = item {

View File

@ -111,25 +111,6 @@ fn custom_completer() -> NuCompleter {
NuCompleter::new(Arc::new(engine), Arc::new(stack)) NuCompleter::new(Arc::new(engine), Arc::new(stack))
} }
#[fixture]
fn subcommand_completer() -> NuCompleter {
// Create a new engine
let (_, _, mut engine, mut stack) = new_engine();
let commands = r#"
$env.config.completions.algorithm = "fuzzy"
def foo [] {}
def "foo bar" [] {}
def "foo abaz" [] {}
def "foo aabcrr" [] {}
def food [] {}
"#;
assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok());
// Instantiate a new completer
NuCompleter::new(Arc::new(engine), Arc::new(stack))
}
/// Use fuzzy completions but sort in alphabetical order /// Use fuzzy completions but sort in alphabetical order
#[fixture] #[fixture]
fn fuzzy_alpha_sort_completer() -> NuCompleter { fn fuzzy_alpha_sort_completer() -> NuCompleter {
@ -1040,24 +1021,32 @@ fn command_watch_with_filecompletion() {
} }
#[rstest] #[rstest]
fn subcommand_completions(mut subcommand_completer: NuCompleter) { fn subcommand_completions() {
let prefix = "foo br"; let (_, _, mut engine, mut stack) = new_engine();
let suggestions = subcommand_completer.complete(prefix, prefix.len()); let commands = r#"
match_suggestions( $env.config.completions.algorithm = "fuzzy"
&vec!["foo bar".to_string(), "foo aabcrr".to_string()], def foo-test-command [] {}
&suggestions, def "foo-test-command bar" [] {}
); def "foo-test-command aagap bcr" [] {}
def "food bar" [] {}
"#;
assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok());
let mut subcommand_completer = NuCompleter::new(Arc::new(engine), Arc::new(stack));
let prefix = "foo b"; let prefix = "fod br";
let suggestions = subcommand_completer.complete(prefix, prefix.len()); let suggestions = subcommand_completer.complete(prefix, prefix.len());
match_suggestions( match_suggestions(
&vec![ &vec![
"foo bar".to_string(), "food bar".to_string(),
"foo abaz".to_string(), "foo-test-command aagap bcr".to_string(),
"foo aabcrr".to_string(), "foo-test-command bar".to_string(),
], ],
&suggestions, &suggestions,
); );
let prefix = "foot bar";
let suggestions = subcommand_completer.complete(prefix, prefix.len());
match_suggestions(&vec!["foo-test-command bar".to_string()], &suggestions);
} }
#[test] #[test]

View File

@ -14,11 +14,11 @@ nu-parser = { path = "../nu-parser", version = "0.101.1" }
nu-protocol = { path = "../nu-protocol", version = "0.101.1" } nu-protocol = { path = "../nu-protocol", version = "0.101.1" }
crossbeam-channel = { workspace = true } crossbeam-channel = { workspace = true }
fuzzy-matcher = { workspace = true }
lsp-server = { workspace = true } lsp-server = { workspace = true }
lsp-textdocument = { workspace = true } lsp-textdocument = { workspace = true }
lsp-types = { workspace = true } lsp-types = { workspace = true }
miette = { workspace = true } miette = { workspace = true }
nucleo-matcher = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
url = { workspace = true } url = { workspace = true }

View File

@ -2,7 +2,6 @@ use std::collections::{BTreeMap, HashSet};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use crate::{path_to_uri, span_to_range, uri_to_path, Id, LanguageServer}; use crate::{path_to_uri, span_to_range, uri_to_path, Id, LanguageServer};
use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher};
use lsp_textdocument::{FullTextDocument, TextDocuments}; use lsp_textdocument::{FullTextDocument, TextDocuments};
use lsp_types::{ use lsp_types::{
DocumentSymbolParams, DocumentSymbolResponse, Location, Range, SymbolInformation, SymbolKind, DocumentSymbolParams, DocumentSymbolResponse, Location, Range, SymbolInformation, SymbolKind,
@ -14,6 +13,8 @@ use nu_protocol::{
engine::{CachedFile, EngineState, StateWorkingSet}, engine::{CachedFile, EngineState, StateWorkingSet},
DeclId, Span, VarId, DeclId, Span, VarId,
}; };
use nucleo_matcher::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
use nucleo_matcher::{Config, Matcher, Utf32Str};
use std::{cmp::Ordering, path::Path}; use std::{cmp::Ordering, path::Path};
/// Struct stored in cache, uri not included /// Struct stored in cache, uri not included
@ -70,7 +71,7 @@ impl Symbol {
/// Cache symbols for each opened file to avoid repeated parsing /// Cache symbols for each opened file to avoid repeated parsing
pub struct SymbolCache { pub struct SymbolCache {
/// Fuzzy matcher for symbol names /// Fuzzy matcher for symbol names
matcher: SkimMatcherV2, matcher: Matcher,
/// File Uri --> Symbols /// File Uri --> Symbols
cache: BTreeMap<Uri, Vec<Symbol>>, cache: BTreeMap<Uri, Vec<Symbol>>,
/// If marked as dirty, parse on next request /// If marked as dirty, parse on next request
@ -80,7 +81,7 @@ pub struct SymbolCache {
impl SymbolCache { impl SymbolCache {
pub fn new() -> Self { pub fn new() -> Self {
SymbolCache { SymbolCache {
matcher: SkimMatcherV2::default(), matcher: Matcher::new(Config::DEFAULT),
cache: BTreeMap::new(), cache: BTreeMap::new(),
dirty_flags: BTreeMap::new(), dirty_flags: BTreeMap::new(),
} }
@ -240,12 +241,20 @@ impl SymbolCache {
) )
} }
pub fn get_fuzzy_matched_symbols(&self, query: &str) -> Vec<SymbolInformation> { pub fn get_fuzzy_matched_symbols(&mut self, query: &str) -> Vec<SymbolInformation> {
let pat = Pattern::new(
query,
CaseMatching::Smart,
Normalization::Smart,
AtomKind::Fuzzy,
);
self.cache self.cache
.iter() .iter()
.flat_map(|(uri, symbols)| symbols.iter().map(|s| s.clone().to_symbol_information(uri))) .flat_map(|(uri, symbols)| symbols.iter().map(|s| s.clone().to_symbol_information(uri)))
.filter_map(|s| { .filter_map(|s| {
self.matcher.fuzzy_match(&s.name, query)?; let mut buf = Vec::new();
let name = Utf32Str::new(&s.name, &mut buf);
pat.score(name, &mut self.matcher)?;
Some(s) Some(s)
}) })
.collect() .collect()