Reduce startup time in loop-through mode with 80%-90%

Instead of 100 ms - 50 ms, startup takes 10 ms - 5 ms.

HighlightingAssets::get_syntax_set() is never called when e.g. piping the bat
output to a file (see Config::loop_through), so by loading the SyntaxSet only
when needed, we radically improve startup time when it is not needed.
This commit is contained in:
Martin Nordholts 2021-07-22 10:39:39 +02:00
parent 1bac3750df
commit 6acec2c074
4 changed files with 71 additions and 9 deletions

View File

@ -9,6 +9,7 @@
## Other ## Other
- Load cached assets as fast as integrated assets, see #1753 (@Enselic) - Load cached assets as fast as integrated assets, see #1753 (@Enselic)
- Greatly reduce startup time in loop-through mode, e.g. when redirecting output. Instead of *50 ms* - *100 ms*, startup takes *5 ms* - *10 ms*. See #1747 (@Enselic)
## Syntaxes ## Syntaxes

1
Cargo.lock generated
View File

@ -101,6 +101,7 @@ dependencies = [
"globset", "globset",
"grep-cli", "grep-cli",
"lazy_static", "lazy_static",
"lazycell",
"nix", "nix",
"path_abs", "path_abs",
"predicates", "predicates",

View File

@ -38,6 +38,7 @@ ansi_term = "^0.12.1"
ansi_colours = "^1.0" ansi_colours = "^1.0"
console = "0.14.1" console = "0.14.1"
lazy_static = { version = "1.4", optional = true } lazy_static = { version = "1.4", optional = true }
lazycell = "1.0"
wild = { version = "2.0", optional = true } wild = { version = "2.0", optional = true }
content_inspector = "0.2.4" content_inspector = "0.2.4"
encoding = "0.2" encoding = "0.2"

View File

@ -1,7 +1,9 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fs; use std::fs;
use std::path::Path; use std::path::{Path, PathBuf};
use lazycell::LazyCell;
use syntect::dumps::{dump_to_file, from_binary, from_reader}; use syntect::dumps::{dump_to_file, from_binary, from_reader};
use syntect::highlighting::{Theme, ThemeSet}; use syntect::highlighting::{Theme, ThemeSet};
@ -17,7 +19,8 @@ use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
#[derive(Debug)] #[derive(Debug)]
pub struct HighlightingAssets { pub struct HighlightingAssets {
syntax_set: SyntaxSet, syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: Option<SerializedSyntaxSet>,
pub(crate) theme_set: ThemeSet, pub(crate) theme_set: ThemeSet,
fallback_theme: Option<&'static str>, fallback_theme: Option<&'static str>,
} }
@ -40,9 +43,21 @@ const IGNORED_SUFFIXES: [&str; 10] = [
]; ];
impl HighlightingAssets { impl HighlightingAssets {
fn new(syntax_set: SyntaxSet, theme_set: ThemeSet) -> Self { fn new(
syntax_set: Option<SyntaxSet>,
serialized_syntax_set: Option<SerializedSyntaxSet>,
theme_set: ThemeSet,
) -> Self {
assert!(syntax_set.is_some() || serialized_syntax_set.is_some());
let syntax_set_cell = LazyCell::new();
if let Some(syntax_set) = syntax_set {
syntax_set_cell.fill(syntax_set).expect("can never fail");
}
HighlightingAssets { HighlightingAssets {
syntax_set, syntax_set_cell,
serialized_syntax_set,
theme_set, theme_set,
fallback_theme: None, fallback_theme: None,
} }
@ -97,20 +112,30 @@ impl HighlightingAssets {
} }
Ok(HighlightingAssets::new( Ok(HighlightingAssets::new(
syntax_set_builder.build(), Some(syntax_set_builder.build()),
None,
theme_set, theme_set,
)) ))
} }
pub fn from_cache(cache_path: &Path) -> Result<Self> { pub fn from_cache(cache_path: &Path) -> Result<Self> {
Ok(HighlightingAssets::new( Ok(HighlightingAssets::new(
asset_from_cache(&cache_path.join("syntaxes.bin"), "syntax set")?, None,
Some(SerializedSyntaxSet::FromFile(
cache_path.join("syntaxes.bin"),
)),
asset_from_cache(&cache_path.join("themes.bin"), "theme set")?, asset_from_cache(&cache_path.join("themes.bin"), "theme set")?,
)) ))
} }
pub fn from_binary() -> Self { pub fn from_binary() -> Self {
HighlightingAssets::new(get_integrated_syntaxset(), get_integrated_themeset()) HighlightingAssets::new(
None,
Some(SerializedSyntaxSet::FromBinary(
get_serialized_integrated_syntaxset(),
)),
get_integrated_themeset(),
)
} }
pub fn save_to_cache(&self, target_dir: &Path, current_version: &str) -> Result<()> { pub fn save_to_cache(&self, target_dir: &Path, current_version: &str) -> Result<()> {
@ -137,7 +162,17 @@ impl HighlightingAssets {
} }
pub(crate) fn get_syntax_set(&self) -> Result<&SyntaxSet> { pub(crate) fn get_syntax_set(&self) -> Result<&SyntaxSet> {
Ok(&self.syntax_set) if !self.syntax_set_cell.filled() {
self.syntax_set_cell.fill(
self.serialized_syntax_set
.as_ref()
.expect("a dev forgot to setup serialized_syntax_set, please report to https://github.com/sharkdp/bat/issues")
.deserialize()?
).unwrap();
}
// It is safe to .unwrap() because we just made sure it was .filled()
Ok(self.syntax_set_cell.borrow().unwrap())
} }
/// Use [Self::get_syntaxes] instead /// Use [Self::get_syntaxes] instead
@ -316,8 +351,32 @@ impl HighlightingAssets {
} }
} }
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
/// We keep it in this format since we want to load it lazily.
#[derive(Debug)]
enum SerializedSyntaxSet {
/// The data comes from a user-generated cache file.
FromFile(PathBuf),
/// The data to use is embedded into the bat binary.
FromBinary(&'static [u8]),
}
impl SerializedSyntaxSet {
fn deserialize(&self) -> Result<SyntaxSet> {
match self {
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data)),
SerializedSyntaxSet::FromFile(ref path) => asset_from_cache(&path, "syntax set"),
}
}
}
fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin")
}
fn get_integrated_syntaxset() -> SyntaxSet { fn get_integrated_syntaxset() -> SyntaxSet {
from_binary(include_bytes!("../assets/syntaxes.bin")) from_binary(get_serialized_integrated_syntaxset())
} }
fn get_integrated_themeset() -> ThemeSet { fn get_integrated_themeset() -> ThemeSet {