perf: Optimize files, folder and extension lookups in Context (#880)

This commit is contained in:
Sam Rose 2020-02-03 21:57:48 +00:00 committed by GitHub
parent c30419877c
commit 83337a1a03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 168 additions and 129 deletions

View File

@ -5,9 +5,8 @@ use crate::modules;
use clap::ArgMatches; use clap::ArgMatches;
use git2::{Repository, RepositoryState}; use git2::{Repository, RepositoryState};
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use std::env; use std::env;
use std::ffi::OsStr;
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::string::String; use std::string::String;
@ -23,8 +22,8 @@ pub struct Context<'a> {
/// The current working directory that starship is being called in. /// The current working directory that starship is being called in.
pub current_dir: PathBuf, pub current_dir: PathBuf,
/// A vector containing the full paths of all the files in `current_dir`. /// A struct containing directory contents in a lookup-optimised format.
dir_files: OnceCell<Vec<PathBuf>>, dir_contents: OnceCell<DirContents>,
/// Properties to provide to modules. /// Properties to provide to modules.
pub properties: HashMap<&'a str, String>, pub properties: HashMap<&'a str, String>,
@ -80,7 +79,7 @@ impl<'a> Context<'a> {
config, config,
properties, properties,
current_dir, current_dir,
dir_files: OnceCell::new(), dir_contents: OnceCell::new(),
repo: OnceCell::new(), repo: OnceCell::new(),
shell, shell,
} }
@ -117,7 +116,7 @@ impl<'a> Context<'a> {
// see ScanDir for methods // see ScanDir for methods
pub fn try_begin_scan(&'a self) -> Option<ScanDir<'a>> { pub fn try_begin_scan(&'a self) -> Option<ScanDir<'a>> {
Some(ScanDir { Some(ScanDir {
dir_files: self.get_dir_files().ok()?, dir_contents: self.dir_contents().ok()?,
files: &[], files: &[],
folders: &[], folders: &[],
extensions: &[], extensions: &[],
@ -145,28 +144,103 @@ impl<'a> Context<'a> {
}) })
} }
pub fn get_dir_files(&self) -> Result<&Vec<PathBuf>, std::io::Error> { pub fn dir_contents(&self) -> Result<&DirContents, std::io::Error> {
let start_time = SystemTime::now(); self.dir_contents.get_or_try_init(|| {
let scan_timeout = Duration::from_millis(self.config.get_root_config().scan_timeout); let timeout = Duration::from_millis(self.config.get_root_config().scan_timeout);
DirContents::from_path_with_timeout(&self.current_dir, timeout)
self.dir_files
.get_or_try_init(|| -> Result<Vec<PathBuf>, std::io::Error> {
let dir_files = fs::read_dir(&self.current_dir)?
.take_while(|_item| {
SystemTime::now().duration_since(start_time).unwrap() < scan_timeout
}) })
}
}
#[derive(Debug)]
pub struct DirContents {
// HashSet of all files, no folders, relative to the base directory given at construction.
files: HashSet<PathBuf>,
// HashSet of all file names, e.g. the last section without any folders, as strings.
file_names: HashSet<String>,
// HashSet of all folders, relative to the base directory given at construction.
folders: HashSet<PathBuf>,
// HashSet of all extensions found, without dots, e.g. "js" instead of ".js".
extensions: HashSet<String>,
}
impl DirContents {
fn from_path(base: &PathBuf) -> Result<Self, std::io::Error> {
Self::from_path_with_timeout(base, Duration::from_secs(30))
}
fn from_path_with_timeout(base: &PathBuf, timeout: Duration) -> Result<Self, std::io::Error> {
let start = SystemTime::now();
let mut folders: HashSet<PathBuf> = HashSet::new();
let mut files: HashSet<PathBuf> = HashSet::new();
let mut file_names: HashSet<String> = HashSet::new();
let mut extensions: HashSet<String> = HashSet::new();
fs::read_dir(base)?
.take_while(|_| SystemTime::now().duration_since(start).unwrap() < timeout)
.filter_map(Result::ok) .filter_map(Result::ok)
.map(|entry| entry.path()) .for_each(|entry| {
.collect::<Vec<PathBuf>>(); let path = PathBuf::from(entry.path().strip_prefix(base).unwrap());
if entry.path().is_dir() {
folders.insert(path);
} else {
if !path.to_string_lossy().starts_with('.') {
path.extension()
.map(|ext| extensions.insert(ext.to_string_lossy().to_string()));
}
if let Some(file_name) = path.file_name() {
file_names.insert(file_name.to_string_lossy().to_string());
}
files.insert(path);
}
});
log::trace!( log::trace!(
"Building a vector of directory files took {:?}", "Building HashSets of directory files, folders and extensions took {:?}",
SystemTime::now().duration_since(start_time).unwrap() SystemTime::now().duration_since(start).unwrap()
); );
Ok(dir_files)
Ok(DirContents {
folders,
files,
file_names,
extensions,
}) })
} }
pub fn files(&self) -> impl Iterator<Item = &PathBuf> {
self.files.iter()
}
pub fn has_file(&self, path: &str) -> bool {
self.files.contains(Path::new(path))
}
pub fn has_file_name(&self, name: &str) -> bool {
self.file_names.contains(name)
}
pub fn has_any_file_name(&self, names: &[&str]) -> bool {
names.iter().any(|name| self.has_file_name(name))
}
pub fn has_folder(&self, path: &str) -> bool {
self.folders.contains(Path::new(path))
}
pub fn has_any_folder(&self, paths: &[&str]) -> bool {
paths.iter().any(|path| self.has_folder(path))
}
pub fn has_extension(&self, ext: &str) -> bool {
self.extensions.contains(ext)
}
pub fn has_any_extension(&self, exts: &[&str]) -> bool {
exts.iter().any(|ext| self.has_extension(ext))
}
fn get_shell() -> Shell { fn get_shell() -> Shell {
let shell = std::env::var("STARSHIP_SHELL").unwrap_or_default(); let shell = std::env::var("STARSHIP_SHELL").unwrap_or_default();
match shell.as_str() { match shell.as_str() {
@ -196,7 +270,7 @@ pub struct Repo {
// A struct of Criteria which will be used to verify current PathBuf is // A struct of Criteria which will be used to verify current PathBuf is
// of X language, criteria can be set via the builder pattern // of X language, criteria can be set via the builder pattern
pub struct ScanDir<'a> { pub struct ScanDir<'a> {
dir_files: &'a Vec<PathBuf>, dir_contents: &'a DirContents,
files: &'a [&'a str], files: &'a [&'a str],
folders: &'a [&'a str], folders: &'a [&'a str],
extensions: &'a [&'a str], extensions: &'a [&'a str],
@ -221,46 +295,10 @@ impl<'a> ScanDir<'a> {
/// based on the current Pathbuf check to see /// based on the current Pathbuf check to see
/// if any of this criteria match or exist and returning a boolean /// if any of this criteria match or exist and returning a boolean
pub fn is_match(&self) -> bool { pub fn is_match(&self) -> bool {
self.dir_files.iter().any(|path| { self.dir_contents.has_any_extension(self.extensions)
if path.is_dir() { || self.dir_contents.has_any_folder(self.folders)
path_has_name(path, self.folders) || self.dir_contents.has_any_file_name(self.files)
} else {
path_has_name(path, self.files) || has_extension(path, self.extensions)
} }
})
}
}
/// checks to see if the pathbuf matches a file or folder name
pub fn path_has_name<'a>(dir_entry: &PathBuf, names: &'a [&'a str]) -> bool {
let found_file_or_folder_name = names.iter().find(|file_or_folder_name| {
dir_entry
.file_name()
.and_then(OsStr::to_str)
.unwrap_or_default()
== **file_or_folder_name
});
match found_file_or_folder_name {
Some(name) => !name.is_empty(),
None => false,
}
}
/// checks if pathbuf doesn't start with a dot and matches any provided extension
pub fn has_extension<'a>(dir_entry: &PathBuf, extensions: &'a [&'a str]) -> bool {
if let Some(file_name) = dir_entry.file_name() {
if file_name.to_string_lossy().starts_with('.') {
return false;
}
return extensions.iter().any(|ext| {
dir_entry
.extension()
.and_then(OsStr::to_str)
.map_or(false, |e| e == *ext)
});
}
false
} }
fn get_current_branch(repository: &Repository) -> Option<String> { fn get_current_branch(repository: &Repository) -> Option<String> {
@ -284,69 +322,73 @@ pub enum Shell {
mod tests { mod tests {
use super::*; use super::*;
#[test] fn testdir(paths: &[&str]) -> Result<tempfile::TempDir, std::io::Error> {
fn test_path_has_name() { let dir = tempfile::tempdir()?;
let mut buf = PathBuf::from("/"); for path in paths {
let files = vec!["package.json"]; let p = dir.path().join(Path::new(path));
if let Some(parent) = p.parent() {
assert_eq!(path_has_name(&buf, &files), false); fs::create_dir_all(parent)?;
}
buf.set_file_name("some-file.js"); fs::File::create(p)?.sync_all()?;
assert_eq!(path_has_name(&buf, &files), false); }
Ok(dir)
buf.set_file_name("package.json");
assert_eq!(path_has_name(&buf, &files), true);
} }
#[test] #[test]
fn test_has_extension() { fn test_scan_dir() -> Result<(), Box<dyn std::error::Error>> {
let mut buf = PathBuf::from("/"); let empty = testdir(&[])?;
let extensions = vec!["js"]; let empty_dc = DirContents::from_path(&PathBuf::from(empty.path()))?;
assert_eq!(has_extension(&buf, &extensions), false); assert_eq!(
ScanDir {
buf.set_file_name("some-file.rs"); dir_contents: &empty_dc,
assert_eq!(has_extension(&buf, &extensions), false); files: &["package.json"],
extensions: &["js"],
buf.set_file_name(".some-file.js"); folders: &["node_modules"],
assert_eq!(has_extension(&buf, &extensions), false);
buf.set_file_name("some-file.js");
assert_eq!(has_extension(&buf, &extensions), true)
} }
.is_match(),
false
);
#[test] let rust = testdir(&["README.md", "Cargo.toml", "src/main.rs"])?;
fn test_criteria_scan_fails() { let rust_dc = DirContents::from_path(&PathBuf::from(rust.path()))?;
let failing_criteria = ScanDir { assert_eq!(
dir_files: &vec![PathBuf::new()], ScanDir {
dir_contents: &rust_dc,
files: &["package.json"], files: &["package.json"],
extensions: &["js"], extensions: &["js"],
folders: &["node_modules"], folders: &["node_modules"],
};
// fails if buffer does not match any criteria
assert_eq!(failing_criteria.is_match(), false);
let failing_dir_criteria = ScanDir {
dir_files: &vec![PathBuf::from("/package.js/dog.go")],
files: &["package.json"],
extensions: &["js"],
folders: &["node_modules"],
};
// fails when passed a pathbuf dir matches extension path
assert_eq!(failing_dir_criteria.is_match(), false);
} }
.is_match(),
false
);
#[test] let java = testdir(&["README.md", "src/com/test/Main.java", "pom.xml"])?;
fn test_criteria_scan_passes() { let java_dc = DirContents::from_path(&PathBuf::from(java.path()))?;
let passing_criteria = ScanDir { assert_eq!(
dir_files: &vec![PathBuf::from("package.json")], ScanDir {
dir_contents: &java_dc,
files: &["package.json"], files: &["package.json"],
extensions: &["js"], extensions: &["js"],
folders: &["node_modules"], folders: &["node_modules"],
}; }
.is_match(),
false
);
assert_eq!(passing_criteria.is_match(), true); let node = testdir(&["README.md", "node_modules/lodash/main.js", "package.json"])?;
let node_dc = DirContents::from_path(&PathBuf::from(node.path()))?;
assert_eq!(
ScanDir {
dir_contents: &node_dc,
files: &["package.json"],
extensions: &["js"],
folders: &["node_modules"],
}
.is_match(),
true
);
Ok(())
} }
} }

View File

@ -165,8 +165,8 @@ fn get_pinned_sdk_version(json: &str) -> Option<Version> {
fn get_local_dotnet_files<'a>(context: &'a Context) -> Result<Vec<DotNetFile<'a>>, std::io::Error> { fn get_local_dotnet_files<'a>(context: &'a Context) -> Result<Vec<DotNetFile<'a>>, std::io::Error> {
Ok(context Ok(context
.get_dir_files()? .dir_contents()?
.iter() .files()
.filter_map(|p| { .filter_map(|p| {
get_dotnet_file_type(p).map(|t| DotNetFile { get_dotnet_file_type(p).map(|t| DotNetFile {
path: p.as_ref(), path: p.as_ref(),

View File

@ -1,4 +1,3 @@
use std::ffi::OsStr;
use std::path::Path; use std::path::Path;
use std::process::{Command, Output}; use std::process::{Command, Output};
use std::{env, fs}; use std::{env, fs};
@ -107,13 +106,11 @@ fn find_rust_toolchain_file(context: &Context) -> Option<String> {
Some(line.trim().to_owned()) Some(line.trim().to_owned())
} }
if let Some(path) = context if let Ok(true) = context
.get_dir_files() .dir_contents()
.ok()? .map(|dir| dir.has_file("rust-toolchain"))
.iter()
.find(|p| p.file_name() == Some(OsStr::new("rust-toolchain")))
{ {
if let Some(toolchain) = read_first_line(path) { if let Some(toolchain) = read_first_line(Path::new("rust-toolchain")) {
return Some(toolchain); return Some(toolchain);
} }
} }