mirror of
https://github.com/sharkdp/bat.git
synced 2024-11-22 07:43:39 +01:00
Make asset compression optional at compile time (#1825)
This will be needed to later support zero-copy deserialization of independent syntax sets, but is interesting and useful on its own. Instead of deferring serialization and deserialization to syntect, we implement it ourselves in the same way, but make compression optional.
This commit is contained in:
parent
d935ea1cda
commit
87978e7755
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -89,6 +89,7 @@ dependencies = [
|
||||
"ansi_term 0.12.1",
|
||||
"assert_cmd",
|
||||
"atty",
|
||||
"bincode",
|
||||
"bugreport",
|
||||
"clap",
|
||||
"clircle",
|
||||
@ -96,6 +97,7 @@ dependencies = [
|
||||
"content_inspector",
|
||||
"dirs-next",
|
||||
"encoding",
|
||||
"flate2",
|
||||
"git2",
|
||||
"globset",
|
||||
"grep-cli",
|
||||
|
@ -45,7 +45,9 @@ regex-fancy = ["syntect/regex-fancy"] # Use the rust-only "fancy-regex" engine
|
||||
atty = { version = "0.2.14", optional = true }
|
||||
ansi_term = "^0.12.1"
|
||||
ansi_colours = "^1.0"
|
||||
bincode = "1.0"
|
||||
console = "0.14.1"
|
||||
flate2 = "1.0"
|
||||
lazy_static = { version = "1.4", optional = true }
|
||||
lazycell = "1.0"
|
||||
thiserror = "1.0"
|
||||
@ -72,7 +74,7 @@ default-features = false
|
||||
[dependencies.syntect]
|
||||
version = "4.6.0"
|
||||
default-features = false
|
||||
features = ["parsing", "dump-load"]
|
||||
features = ["parsing"]
|
||||
|
||||
[dependencies.clap]
|
||||
version = "2.33"
|
||||
|
@ -4,7 +4,6 @@ use std::path::{Path, PathBuf};
|
||||
|
||||
use lazycell::LazyCell;
|
||||
|
||||
use syntect::dumps::{from_binary, from_reader};
|
||||
use syntect::highlighting::{Theme, ThemeSet};
|
||||
use syntect::parsing::{SyntaxReference, SyntaxSet};
|
||||
|
||||
@ -29,6 +28,12 @@ pub struct SyntaxReferenceInSet<'a> {
|
||||
pub syntax_set: &'a SyntaxSet,
|
||||
}
|
||||
|
||||
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
|
||||
pub(crate) const COMPRESS_SYNTAXES: bool = true;
|
||||
|
||||
// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
|
||||
pub(crate) const COMPRESS_THEMES: bool = true;
|
||||
|
||||
const IGNORED_SUFFIXES: [&str; 13] = [
|
||||
// Editor etc backups
|
||||
"~",
|
||||
@ -66,7 +71,7 @@ impl HighlightingAssets {
|
||||
pub fn from_cache(cache_path: &Path) -> Result<Self> {
|
||||
Ok(HighlightingAssets::new(
|
||||
SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
|
||||
asset_from_cache(&cache_path.join("themes.bin"), "theme set")?,
|
||||
asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
|
||||
))
|
||||
}
|
||||
|
||||
@ -299,8 +304,10 @@ enum SerializedSyntaxSet {
|
||||
impl SerializedSyntaxSet {
|
||||
fn deserialize(&self) -> Result<SyntaxSet> {
|
||||
match self {
|
||||
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data)),
|
||||
SerializedSyntaxSet::FromFile(ref path) => asset_from_cache(path, "syntax set"),
|
||||
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
|
||||
SerializedSyntaxSet::FromFile(ref path) => {
|
||||
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -310,10 +317,32 @@ pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
|
||||
}
|
||||
|
||||
pub(crate) fn get_integrated_themeset() -> ThemeSet {
|
||||
from_binary(include_bytes!("../assets/themes.bin"))
|
||||
from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
|
||||
}
|
||||
|
||||
fn asset_from_cache<T: serde::de::DeserializeOwned>(path: &Path, description: &str) -> Result<T> {
|
||||
pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
|
||||
asset_from_contents(v, "n/a", compressed)
|
||||
.expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
|
||||
}
|
||||
|
||||
fn asset_from_contents<T: serde::de::DeserializeOwned>(
|
||||
contents: &[u8],
|
||||
description: &str,
|
||||
compressed: bool,
|
||||
) -> Result<T> {
|
||||
if compressed {
|
||||
bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
|
||||
} else {
|
||||
bincode::deserialize_from(contents)
|
||||
}
|
||||
.map_err(|_| format!("Could not parse {}", description).into())
|
||||
}
|
||||
|
||||
fn asset_from_cache<T: serde::de::DeserializeOwned>(
|
||||
path: &Path,
|
||||
description: &str,
|
||||
compressed: bool,
|
||||
) -> Result<T> {
|
||||
let contents = fs::read(path).map_err(|_| {
|
||||
format!(
|
||||
"Could not load cached {} '{}'",
|
||||
@ -321,7 +350,8 @@ fn asset_from_cache<T: serde::de::DeserializeOwned>(path: &Path, description: &s
|
||||
path.to_string_lossy()
|
||||
)
|
||||
})?;
|
||||
from_reader(&contents[..]).map_err(|_| format!("Could not parse cached {}", description).into())
|
||||
asset_from_contents(&contents[..], description, compressed)
|
||||
.map_err(|_| format!("Could not parse cached {}", description).into())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1,12 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use syntect::dumps::from_binary;
|
||||
use syntect::highlighting::ThemeSet;
|
||||
use syntect::parsing::syntax_definition::{
|
||||
ContextReference, MatchOperation, MatchPattern, Pattern, SyntaxDefinition,
|
||||
};
|
||||
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
|
||||
|
||||
use crate::assets::*;
|
||||
use crate::error::*;
|
||||
|
||||
type SyntaxName = String;
|
||||
@ -86,7 +86,7 @@ fn build_syntax_set_builder(
|
||||
builder.add_plain_text_syntax();
|
||||
builder
|
||||
} else {
|
||||
from_binary::<SyntaxSet>(crate::assets::get_serialized_integrated_syntaxset())
|
||||
from_binary::<SyntaxSet>(get_serialized_integrated_syntaxset(), COMPRESS_SYNTAXES)
|
||||
.into_builder()
|
||||
};
|
||||
|
||||
@ -120,8 +120,18 @@ fn write_assets(
|
||||
current_version: &str,
|
||||
) -> Result<()> {
|
||||
let _ = std::fs::create_dir_all(target_dir);
|
||||
asset_to_cache(theme_set, &target_dir.join("themes.bin"), "theme set")?;
|
||||
asset_to_cache(syntax_set, &target_dir.join("syntaxes.bin"), "syntax set")?;
|
||||
asset_to_cache(
|
||||
theme_set,
|
||||
&target_dir.join("themes.bin"),
|
||||
"theme set",
|
||||
COMPRESS_THEMES,
|
||||
)?;
|
||||
asset_to_cache(
|
||||
syntax_set,
|
||||
&target_dir.join("syntaxes.bin"),
|
||||
"syntax set",
|
||||
COMPRESS_SYNTAXES,
|
||||
)?;
|
||||
|
||||
print!(
|
||||
"Writing metadata to folder {} ... ",
|
||||
@ -294,9 +304,33 @@ impl SyntaxSetDependencyBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
fn asset_to_cache<T: serde::Serialize>(asset: &T, path: &Path, description: &str) -> Result<()> {
|
||||
fn asset_to_contents<T: serde::Serialize>(
|
||||
asset: &T,
|
||||
description: &str,
|
||||
compressed: bool,
|
||||
) -> Result<Vec<u8>> {
|
||||
let mut contents = vec![];
|
||||
if compressed {
|
||||
bincode::serialize_into(
|
||||
flate2::write::ZlibEncoder::new(&mut contents, flate2::Compression::best()),
|
||||
asset,
|
||||
)
|
||||
} else {
|
||||
bincode::serialize_into(&mut contents, asset)
|
||||
}
|
||||
.map_err(|_| format!("Could not serialize {}", description))?;
|
||||
Ok(contents)
|
||||
}
|
||||
|
||||
fn asset_to_cache<T: serde::Serialize>(
|
||||
asset: &T,
|
||||
path: &Path,
|
||||
description: &str,
|
||||
compressed: bool,
|
||||
) -> Result<()> {
|
||||
print!("Writing {} to {} ... ", description, path.to_string_lossy());
|
||||
syntect::dumps::dump_to_file(asset, &path).map_err(|_| {
|
||||
let contents = asset_to_contents(asset, description, compressed)?;
|
||||
std::fs::write(path, &contents[..]).map_err(|_| {
|
||||
format!(
|
||||
"Could not save {} to {}",
|
||||
description,
|
||||
|
Loading…
Reference in New Issue
Block a user