From 75105033b25a03b70b839dd707c951eb978f1082 Mon Sep 17 00:00:00 2001 From: Yash Thakur <45539777+ysthakur@users.noreply.github.com> Date: Fri, 17 Jan 2025 07:24:00 -0500 Subject: [PATCH] Use nucleo instead of skim for completions (#14846) # Description This PR replaces `SkimMatcherV2` from the [fuzzy-matcher](https://docs.rs/fuzzy-matcher/latest/fuzzy_matcher/) crate with the [nucleo-matcher](https://docs.rs/nucleo-matcher/latest/nucleo_matcher/) crate for doing fuzzy matching. This touches both our completion code in `nu-cli` and symbol filtering in `nu-lsp`. Nucleo should give us better performance than Skim. In the event that we decide to use the Nucleo frontend ([crate docs](https://docs.rs/nucleo/latest/nucleo/)) too, it also works on Windows, unlike [Skim](https://github.com/skim-rs/skim), which appears to only support Linux and MacOS. Unfortunately, we still have an indirect dependency on `fuzzy-matcher`, because the [`dialoguer`](https://github.com/console-rs/dialoguer) crate uses it. # User-Facing Changes No breaking changes. Suggestions will be sorted differently, because Nucleo uses a different algorithm from Skim for matching/scoring. Hopefully, the new sorting will generally make more sense. # Tests + Formatting In `nu-cli`, modified an existing test, but didn't test performance. I haven't tested `nu-lsp` manually, but existing tests pass. I did manually do `ls /nix/store/`, `ls /nix/store/d`, etc., but didn't notice Nucleo being faster (my `/nix/store` folder has 34136 items at the time of writing). --- Cargo.lock | 14 +++- Cargo.toml | 2 +- crates/nu-cli/Cargo.toml | 4 +- .../src/completions/completion_options.rs | 64 ++++++++++++------- crates/nu-cli/tests/completions/mod.rs | 49 ++++++-------- crates/nu-lsp/Cargo.toml | 2 +- crates/nu-lsp/src/symbols.rs | 19 ++++-- 7 files changed, 90 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b598c5dfe5..8bbc01b159 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3666,7 +3666,6 @@ dependencies = [ "chrono", "crossterm 0.28.1", "fancy-regex", - "fuzzy-matcher", "is_executable", "log", "lscolors", @@ -3684,6 +3683,7 @@ dependencies = [ "nu-protocol", "nu-test-support", "nu-utils", + "nucleo-matcher", "percent-encoding", "reedline", "rstest", @@ -3952,7 +3952,6 @@ version = "0.101.1" dependencies = [ "assert-json-diff", "crossbeam-channel", - "fuzzy-matcher", "lsp-server", "lsp-textdocument", "lsp-types", @@ -3964,6 +3963,7 @@ dependencies = [ "nu-parser", "nu-protocol", "nu-test-support", + "nucleo-matcher", "serde", "serde_json", "url", @@ -4318,6 +4318,16 @@ dependencies = [ "serde_json", ] +[[package]] +name = "nucleo-matcher" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85" +dependencies = [ + "memchr", + "unicode-segmentation", +] + [[package]] name = "num" version = "0.4.3" diff --git a/Cargo.toml b/Cargo.toml index 70b3fb369f..c9614ba5b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -90,7 +90,6 @@ encoding_rs = "0.8" fancy-regex = "0.14" filesize = "0.2" filetime = "0.2" -fuzzy-matcher = "0.3" heck = "0.5.0" human-date-parser = "0.2.0" indexmap = "2.7" @@ -117,6 +116,7 @@ native-tls = "0.2" nix = { version = "0.29", default-features = false } notify-debouncer-full = { version = "0.3", default-features = false } nu-ansi-term = "0.50.1" +nucleo-matcher = "0.3" num-format = "0.4" num-traits = "0.2" oem_cp = "2.0.0" diff --git a/crates/nu-cli/Cargo.toml b/crates/nu-cli/Cargo.toml index fc9b838af7..01f749b322 100644 --- a/crates/nu-cli/Cargo.toml +++ b/crates/nu-cli/Cargo.toml @@ -33,11 +33,11 @@ reedline = { workspace = true, features = ["bashisms", "sqlite"] } chrono = { default-features = false, features = ["std"], workspace = true } crossterm = { workspace = true } fancy-regex = { workspace = true } -fuzzy-matcher = { workspace = true } is_executable = { workspace = true } log = { workspace = true } -miette = { workspace = true, features = ["fancy-no-backtrace"] } lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] } +miette = { workspace = true, features = ["fancy-no-backtrace"] } +nucleo-matcher = { workspace = true } percent-encoding = { workspace = true } sysinfo = { workspace = true } unicode-segmentation = { workspace = true } diff --git a/crates/nu-cli/src/completions/completion_options.rs b/crates/nu-cli/src/completions/completion_options.rs index 9165bd32f9..58fea194ec 100644 --- a/crates/nu-cli/src/completions/completion_options.rs +++ b/crates/nu-cli/src/completions/completion_options.rs @@ -1,7 +1,10 @@ -use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher}; use nu_parser::trim_quotes_str; use nu_protocol::{CompletionAlgorithm, CompletionSort}; use nu_utils::IgnoreCaseExt; +use nucleo_matcher::{ + pattern::{AtomKind, CaseMatching, Normalization, Pattern}, + Config, Matcher, Utf32Str, +}; use std::{borrow::Cow, fmt::Display}; use super::SemanticSuggestion; @@ -34,9 +37,10 @@ enum State { items: Vec<(String, T)>, }, Fuzzy { - matcher: Box, + matcher: Matcher, + pat: Pattern, /// Holds (haystack, item, score) - items: Vec<(String, T, i64)>, + items: Vec<(String, T, u32)>, }, } @@ -46,30 +50,37 @@ impl NuMatcher { /// /// * `needle` - The text to search for pub fn new(needle: impl AsRef, options: CompletionOptions) -> NuMatcher { - let orig_needle = trim_quotes_str(needle.as_ref()); - let lowercase_needle = if options.case_sensitive { - orig_needle.to_owned() - } else { - orig_needle.to_folded_case() - }; + let needle = trim_quotes_str(needle.as_ref()); match options.match_algorithm { - MatchAlgorithm::Prefix => NuMatcher { - options, - needle: lowercase_needle, - state: State::Prefix { items: Vec::new() }, - }, - MatchAlgorithm::Fuzzy => { - let mut matcher = SkimMatcherV2::default(); - if options.case_sensitive { - matcher = matcher.respect_case(); + MatchAlgorithm::Prefix => { + let lowercase_needle = if options.case_sensitive { + needle.to_owned() } else { - matcher = matcher.ignore_case(); + needle.to_folded_case() }; NuMatcher { options, - needle: orig_needle.to_owned(), + needle: lowercase_needle, + state: State::Prefix { items: Vec::new() }, + } + } + MatchAlgorithm::Fuzzy => { + let pat = Pattern::new( + needle, + if options.case_sensitive { + CaseMatching::Respect + } else { + CaseMatching::Ignore + }, + Normalization::Smart, + AtomKind::Fuzzy, + ); + NuMatcher { + options, + needle: needle.to_owned(), state: State::Fuzzy { - matcher: Box::new(matcher), + matcher: Matcher::new(Config::DEFAULT), + pat, items: Vec::new(), }, } @@ -102,8 +113,15 @@ impl NuMatcher { } matches } - State::Fuzzy { items, matcher } => { - let Some(score) = matcher.fuzzy_match(haystack, &self.needle) else { + State::Fuzzy { + matcher, + pat, + items, + } => { + let mut haystack_buf = Vec::new(); + let haystack_utf32 = Utf32Str::new(trim_quotes_str(haystack), &mut haystack_buf); + let mut indices = Vec::new(); + let Some(score) = pat.indices(haystack_utf32, matcher, &mut indices) else { return false; }; if let Some(item) = item { diff --git a/crates/nu-cli/tests/completions/mod.rs b/crates/nu-cli/tests/completions/mod.rs index a75ffe1d4c..fc2cc160b3 100644 --- a/crates/nu-cli/tests/completions/mod.rs +++ b/crates/nu-cli/tests/completions/mod.rs @@ -111,25 +111,6 @@ fn custom_completer() -> NuCompleter { NuCompleter::new(Arc::new(engine), Arc::new(stack)) } -#[fixture] -fn subcommand_completer() -> NuCompleter { - // Create a new engine - let (_, _, mut engine, mut stack) = new_engine(); - - let commands = r#" - $env.config.completions.algorithm = "fuzzy" - def foo [] {} - def "foo bar" [] {} - def "foo abaz" [] {} - def "foo aabcrr" [] {} - def food [] {} - "#; - assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok()); - - // Instantiate a new completer - NuCompleter::new(Arc::new(engine), Arc::new(stack)) -} - /// Use fuzzy completions but sort in alphabetical order #[fixture] fn fuzzy_alpha_sort_completer() -> NuCompleter { @@ -1040,24 +1021,32 @@ fn command_watch_with_filecompletion() { } #[rstest] -fn subcommand_completions(mut subcommand_completer: NuCompleter) { - let prefix = "foo br"; - let suggestions = subcommand_completer.complete(prefix, prefix.len()); - match_suggestions( - &vec!["foo bar".to_string(), "foo aabcrr".to_string()], - &suggestions, - ); +fn subcommand_completions() { + let (_, _, mut engine, mut stack) = new_engine(); + let commands = r#" + $env.config.completions.algorithm = "fuzzy" + def foo-test-command [] {} + def "foo-test-command bar" [] {} + def "foo-test-command aagap bcr" [] {} + def "food bar" [] {} + "#; + assert!(support::merge_input(commands.as_bytes(), &mut engine, &mut stack).is_ok()); + let mut subcommand_completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); - let prefix = "foo b"; + let prefix = "fod br"; let suggestions = subcommand_completer.complete(prefix, prefix.len()); match_suggestions( &vec![ - "foo bar".to_string(), - "foo abaz".to_string(), - "foo aabcrr".to_string(), + "food bar".to_string(), + "foo-test-command aagap bcr".to_string(), + "foo-test-command bar".to_string(), ], &suggestions, ); + + let prefix = "foot bar"; + let suggestions = subcommand_completer.complete(prefix, prefix.len()); + match_suggestions(&vec!["foo-test-command bar".to_string()], &suggestions); } #[test] diff --git a/crates/nu-lsp/Cargo.toml b/crates/nu-lsp/Cargo.toml index 5775fd8049..33a15c8ebb 100644 --- a/crates/nu-lsp/Cargo.toml +++ b/crates/nu-lsp/Cargo.toml @@ -14,11 +14,11 @@ nu-parser = { path = "../nu-parser", version = "0.101.1" } nu-protocol = { path = "../nu-protocol", version = "0.101.1" } crossbeam-channel = { workspace = true } -fuzzy-matcher = { workspace = true } lsp-server = { workspace = true } lsp-textdocument = { workspace = true } lsp-types = { workspace = true } miette = { workspace = true } +nucleo-matcher = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } url = { workspace = true } diff --git a/crates/nu-lsp/src/symbols.rs b/crates/nu-lsp/src/symbols.rs index 301def0a5c..604c867e01 100644 --- a/crates/nu-lsp/src/symbols.rs +++ b/crates/nu-lsp/src/symbols.rs @@ -2,7 +2,6 @@ use std::collections::{BTreeMap, HashSet}; use std::hash::{Hash, Hasher}; use crate::{path_to_uri, span_to_range, uri_to_path, Id, LanguageServer}; -use fuzzy_matcher::{skim::SkimMatcherV2, FuzzyMatcher}; use lsp_textdocument::{FullTextDocument, TextDocuments}; use lsp_types::{ DocumentSymbolParams, DocumentSymbolResponse, Location, Range, SymbolInformation, SymbolKind, @@ -14,6 +13,8 @@ use nu_protocol::{ engine::{CachedFile, EngineState, StateWorkingSet}, DeclId, Span, VarId, }; +use nucleo_matcher::pattern::{AtomKind, CaseMatching, Normalization, Pattern}; +use nucleo_matcher::{Config, Matcher, Utf32Str}; use std::{cmp::Ordering, path::Path}; /// Struct stored in cache, uri not included @@ -70,7 +71,7 @@ impl Symbol { /// Cache symbols for each opened file to avoid repeated parsing pub struct SymbolCache { /// Fuzzy matcher for symbol names - matcher: SkimMatcherV2, + matcher: Matcher, /// File Uri --> Symbols cache: BTreeMap>, /// If marked as dirty, parse on next request @@ -80,7 +81,7 @@ pub struct SymbolCache { impl SymbolCache { pub fn new() -> Self { SymbolCache { - matcher: SkimMatcherV2::default(), + matcher: Matcher::new(Config::DEFAULT), cache: BTreeMap::new(), dirty_flags: BTreeMap::new(), } @@ -240,12 +241,20 @@ impl SymbolCache { ) } - pub fn get_fuzzy_matched_symbols(&self, query: &str) -> Vec { + pub fn get_fuzzy_matched_symbols(&mut self, query: &str) -> Vec { + let pat = Pattern::new( + query, + CaseMatching::Smart, + Normalization::Smart, + AtomKind::Fuzzy, + ); self.cache .iter() .flat_map(|(uri, symbols)| symbols.iter().map(|s| s.clone().to_symbol_information(uri))) .filter_map(|s| { - self.matcher.fuzzy_match(&s.name, query)?; + let mut buf = Vec::new(); + let name = Utf32Str::new(&s.name, &mut buf); + pat.score(name, &mut self.matcher)?; Some(s) }) .collect()