Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
use std::collections::HashMap;
|
2020-04-21 08:19:24 +02:00
|
|
|
use std::ffi::OsStr;
|
2021-07-26 12:59:39 +02:00
|
|
|
use std::fs;
|
2021-07-22 10:39:39 +02:00
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
|
|
|
use lazycell::LazyCell;
|
2018-10-09 21:18:40 +02:00
|
|
|
|
2018-05-10 12:36:09 +02:00
|
|
|
use syntect::highlighting::{Theme, ThemeSet};
|
2021-08-10 22:18:47 +02:00
|
|
|
use syntect::parsing::{SyntaxReference, SyntaxSet};
|
2018-10-09 21:18:40 +02:00
|
|
|
|
2020-05-26 07:50:52 +02:00
|
|
|
use path_abs::PathAbs;
|
2020-05-24 16:09:59 +02:00
|
|
|
|
2020-12-27 22:51:24 +01:00
|
|
|
use crate::bat_warning;
|
2020-04-22 21:45:47 +02:00
|
|
|
use crate::error::*;
|
2020-04-22 18:10:26 +02:00
|
|
|
use crate::input::{InputReader, OpenedInput, OpenedInputKind};
|
2020-03-22 09:55:13 +01:00
|
|
|
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
|
2018-08-28 20:12:45 +02:00
|
|
|
|
2019-10-15 03:25:53 +02:00
|
|
|
#[derive(Debug)]
|
2018-05-10 12:36:09 +02:00
|
|
|
pub struct HighlightingAssets {
|
2021-07-22 10:39:39 +02:00
|
|
|
syntax_set_cell: LazyCell<SyntaxSet>,
|
2021-08-24 07:58:03 +02:00
|
|
|
serialized_syntax_set: SerializedSyntaxSet,
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
|
|
|
|
minimal_syntaxes: MinimalSyntaxes,
|
|
|
|
|
|
|
|
/// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
|
|
|
|
/// index in this vec matches the index in
|
|
|
|
/// [Self.minimal_syntaxes.serialized_syntax_sets]
|
|
|
|
deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
|
|
|
|
|
2021-07-29 12:47:26 +02:00
|
|
|
theme_set: ThemeSet,
|
2020-03-21 20:31:32 +01:00
|
|
|
fallback_theme: Option<&'static str>,
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct SyntaxReferenceInSet<'a> {
|
|
|
|
pub syntax: &'a SyntaxReference,
|
|
|
|
pub syntax_set: &'a SyntaxSet,
|
|
|
|
}
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
|
|
|
|
/// stored in serialized form, and are deserialized on-demand. This gives good
|
|
|
|
/// startup performance since only the necessary [SyntaxReference]s needs to be
|
|
|
|
/// deserialized.
|
|
|
|
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
|
|
|
pub(crate) struct MinimalSyntaxes {
|
|
|
|
/// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
|
|
|
|
/// name of any [SyntaxReference] inside the [SyntaxSet]
|
|
|
|
/// (We will later add `by_extension`, `by_first_line`, etc.)
|
|
|
|
pub(crate) by_name: HashMap<String, usize>,
|
|
|
|
|
|
|
|
/// Serialized [SyntaxSet]s. Whether or not this data is compressed is
|
|
|
|
/// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
|
|
|
|
pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
|
|
|
|
}
|
|
|
|
|
2021-09-07 17:21:48 +02:00
|
|
|
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
|
|
|
|
pub(crate) const COMPRESS_SYNTAXES: bool = true;
|
|
|
|
|
|
|
|
// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
|
|
|
|
pub(crate) const COMPRESS_THEMES: bool = true;
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
// Compress for size of ~400 kB instead of ~2100 kB at the cost of ~30% longer deserialization time
|
|
|
|
pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
|
|
|
|
|
|
|
|
// Whether or not to compress the serialized form of [MinimalSyntaxes]. Shall
|
|
|
|
// always be `false`, because the data in
|
|
|
|
// [MinimalSyntaxes.serialized_syntax_sets] has already been compressed
|
|
|
|
// (assuming [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] is `true`). The "outer" data
|
|
|
|
// structures like `by_name` are tiny. If we compress, deserialization can't do
|
|
|
|
// efficient byte-by-byte copy of `serialized_syntax_sets`.
|
|
|
|
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
|
|
|
|
|
2021-08-29 18:19:42 +02:00
|
|
|
const IGNORED_SUFFIXES: [&str; 13] = [
|
2021-06-13 22:36:38 +02:00
|
|
|
// Editor etc backups
|
2021-07-13 08:19:59 +02:00
|
|
|
"~",
|
|
|
|
".bak",
|
|
|
|
".old",
|
|
|
|
".orig",
|
2021-08-29 18:19:42 +02:00
|
|
|
// Debian and derivatives apt/dpkg/ucf backups
|
2021-07-13 08:19:59 +02:00
|
|
|
".dpkg-dist",
|
|
|
|
".dpkg-old",
|
2021-08-29 18:19:42 +02:00
|
|
|
".ucf-dist",
|
|
|
|
".ucf-new",
|
|
|
|
".ucf-old",
|
2021-06-13 22:36:38 +02:00
|
|
|
// Red Hat and derivatives rpm backups
|
2021-07-13 08:19:59 +02:00
|
|
|
".rpmnew",
|
|
|
|
".rpmorig",
|
|
|
|
".rpmsave",
|
2021-06-13 22:36:38 +02:00
|
|
|
// Build system input/template files
|
|
|
|
".in",
|
|
|
|
];
|
|
|
|
|
2018-05-10 12:36:09 +02:00
|
|
|
impl HighlightingAssets {
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
fn new(
|
|
|
|
serialized_syntax_set: SerializedSyntaxSet,
|
|
|
|
minimal_syntaxes: MinimalSyntaxes,
|
|
|
|
theme_set: ThemeSet,
|
|
|
|
) -> Self {
|
|
|
|
// Prepare so we can lazily load minimal syntaxes without a mut reference
|
|
|
|
let deserialized_minimal_syntaxes =
|
|
|
|
vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
|
|
|
|
|
2021-07-19 05:41:40 +02:00
|
|
|
HighlightingAssets {
|
2021-08-24 07:58:03 +02:00
|
|
|
syntax_set_cell: LazyCell::new(),
|
2021-07-22 10:39:39 +02:00
|
|
|
serialized_syntax_set,
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
deserialized_minimal_syntaxes,
|
|
|
|
minimal_syntaxes,
|
2021-07-19 05:41:40 +02:00
|
|
|
theme_set,
|
|
|
|
fallback_theme: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-21 20:31:32 +01:00
|
|
|
pub fn default_theme() -> &'static str {
|
|
|
|
"Monokai Extended"
|
|
|
|
}
|
|
|
|
|
2020-04-21 15:50:46 +02:00
|
|
|
pub fn from_cache(cache_path: &Path) -> Result<Self> {
|
2021-07-19 05:41:40 +02:00
|
|
|
Ok(HighlightingAssets::new(
|
2021-08-24 07:58:03 +02:00
|
|
|
SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
asset_from_cache(
|
|
|
|
&cache_path.join("minimal_syntaxes.bin"),
|
|
|
|
"minimal syntax sets",
|
|
|
|
COMPRESS_MINIMAL_SYNTAXES,
|
|
|
|
)?,
|
2021-09-07 17:21:48 +02:00
|
|
|
asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
|
2021-07-19 05:41:40 +02:00
|
|
|
))
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 16:48:27 +01:00
|
|
|
pub fn from_binary() -> Self {
|
2021-07-22 10:39:39 +02:00
|
|
|
HighlightingAssets::new(
|
2021-08-24 07:58:03 +02:00
|
|
|
SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
get_integrated_minimal_syntaxes(),
|
2021-07-22 10:39:39 +02:00
|
|
|
get_integrated_themeset(),
|
|
|
|
)
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:31:32 +01:00
|
|
|
pub fn set_fallback_theme(&mut self, theme: &'static str) {
|
|
|
|
self.fallback_theme = Some(theme);
|
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
pub(crate) fn get_syntax_set(&self) -> Result<&SyntaxSet> {
|
2021-08-24 07:58:03 +02:00
|
|
|
self.syntax_set_cell
|
|
|
|
.try_borrow_with(|| self.serialized_syntax_set.deserialize())
|
2021-07-16 11:49:47 +02:00
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
/// Use [Self::get_syntaxes] instead
|
|
|
|
#[deprecated]
|
2020-03-21 20:01:36 +01:00
|
|
|
pub fn syntaxes(&self) -> &[SyntaxReference] {
|
2021-07-27 09:43:51 +02:00
|
|
|
self.get_syntax_set()
|
|
|
|
.expect(".syntaxes() is deprecated, use .get_syntaxes() instead")
|
|
|
|
.syntaxes()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> {
|
|
|
|
Ok(self.get_syntax_set()?.syntaxes())
|
2020-03-21 20:01:36 +01:00
|
|
|
}
|
|
|
|
|
2021-07-29 12:47:26 +02:00
|
|
|
fn get_theme_set(&self) -> &ThemeSet {
|
|
|
|
&self.theme_set
|
|
|
|
}
|
|
|
|
|
2020-04-22 22:05:54 +02:00
|
|
|
pub fn themes(&self) -> impl Iterator<Item = &str> {
|
2021-07-29 12:47:26 +02:00
|
|
|
self.get_theme_set().themes.keys().map(|s| s.as_ref())
|
2020-03-21 20:01:36 +01:00
|
|
|
}
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
/// Finds a [SyntaxSet] that contains a [SyntaxReference] by its name. First
|
|
|
|
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
|
|
|
|
/// [SyntaxSet] that contains all syntaxes.
|
|
|
|
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
|
|
|
|
let minimal_syntax_set = self
|
|
|
|
.minimal_syntaxes
|
|
|
|
.by_name
|
|
|
|
.get(&name.to_ascii_lowercase())
|
|
|
|
.and_then(|index| self.get_minimal_syntax_set_with_index(*index));
|
|
|
|
|
|
|
|
match minimal_syntax_set {
|
|
|
|
Some(syntax_set) => Ok(syntax_set),
|
|
|
|
None => self.get_syntax_set(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
|
|
|
|
let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
|
|
|
|
asset_from_contents(
|
|
|
|
&serialized_syntax_set[..],
|
|
|
|
&format!("minimal syntax set {}", index),
|
|
|
|
COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
|
|
|
|
)
|
|
|
|
.map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
|
|
|
|
self.deserialized_minimal_syntaxes
|
|
|
|
.get(index)
|
|
|
|
.and_then(|cell| {
|
|
|
|
cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
|
|
|
|
.ok()
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
/// Use [Self::get_syntax_for_file_name] instead
|
|
|
|
#[deprecated]
|
2020-09-06 14:08:13 +02:00
|
|
|
pub fn syntax_for_file_name(
|
|
|
|
&self,
|
|
|
|
file_name: impl AsRef<Path>,
|
|
|
|
mapping: &SyntaxMapping,
|
|
|
|
) -> Option<&SyntaxReference> {
|
2021-08-08 08:26:17 +02:00
|
|
|
self.get_syntax_for_file_name(file_name, mapping)
|
|
|
|
.expect(
|
|
|
|
".syntax_for_file_name() is deprecated, use .get_syntax_for_file_name() instead",
|
|
|
|
)
|
|
|
|
.map(|syntax_in_set| syntax_in_set.syntax)
|
2021-07-27 09:43:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_syntax_for_file_name(
|
|
|
|
&self,
|
|
|
|
file_name: impl AsRef<Path>,
|
|
|
|
mapping: &SyntaxMapping,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2020-09-06 14:08:13 +02:00
|
|
|
let file_name = file_name.as_ref();
|
2021-07-27 09:43:51 +02:00
|
|
|
Ok(match mapping.get_syntax_for(file_name) {
|
2020-09-06 14:08:13 +02:00
|
|
|
Some(MappingTarget::MapToUnknown) => None,
|
2021-09-13 17:37:05 +02:00
|
|
|
Some(MappingTarget::MapTo(syntax_name)) => self.find_syntax_by_name(syntax_name)?,
|
2021-07-27 09:43:51 +02:00
|
|
|
None => self.get_extension_syntax(file_name.as_os_str())?,
|
|
|
|
})
|
2020-09-06 14:08:13 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:16:32 +01:00
|
|
|
pub(crate) fn get_theme(&self, theme: &str) -> &Theme {
|
2021-07-29 12:47:26 +02:00
|
|
|
match self.get_theme_set().themes.get(theme) {
|
2018-07-23 21:38:45 +02:00
|
|
|
Some(theme) => theme,
|
|
|
|
None => {
|
2020-11-29 23:16:54 +01:00
|
|
|
if theme == "ansi-light" || theme == "ansi-dark" {
|
2020-12-27 22:51:24 +01:00
|
|
|
bat_warning!("Theme '{}' is deprecated, using 'ansi' instead.", theme);
|
2020-11-29 23:16:54 +01:00
|
|
|
return self.get_theme("ansi");
|
|
|
|
}
|
2021-05-21 14:19:15 +02:00
|
|
|
if !theme.is_empty() {
|
2020-12-27 22:51:24 +01:00
|
|
|
bat_warning!("Unknown theme '{}', using default.", theme)
|
2020-03-21 21:45:03 +01:00
|
|
|
}
|
2021-07-29 12:47:26 +02:00
|
|
|
&self.get_theme_set().themes
|
|
|
|
[self.fallback_theme.unwrap_or_else(|| Self::default_theme())]
|
2018-07-23 21:38:45 +02:00
|
|
|
}
|
|
|
|
}
|
2018-07-21 08:26:24 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
pub(crate) fn get_syntax(
|
2018-10-07 13:26:50 +02:00
|
|
|
&self,
|
|
|
|
language: Option<&str>,
|
2020-04-22 16:27:34 +02:00
|
|
|
input: &mut OpenedInput,
|
2018-10-17 22:30:09 +02:00
|
|
|
mapping: &SyntaxMapping,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<SyntaxReferenceInSet> {
|
2020-05-16 01:19:41 +02:00
|
|
|
if let Some(language) = language {
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
let syntax_set = self.get_syntax_set_by_name(language)?;
|
2021-09-10 21:52:09 +02:00
|
|
|
return syntax_set
|
2020-05-16 02:52:33 +02:00
|
|
|
.find_syntax_by_token(language)
|
2021-08-08 08:26:17 +02:00
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })
|
2021-09-10 21:52:09 +02:00
|
|
|
.ok_or_else(|| Error::UnknownSyntax(language.to_owned()));
|
|
|
|
}
|
|
|
|
|
|
|
|
let line_syntax = self.get_first_line_syntax(&mut input.reader)?;
|
|
|
|
|
|
|
|
// Get the path of the file:
|
|
|
|
// If this was set by the metadata, that will take priority.
|
|
|
|
// If it wasn't, it will use the real file path (if available).
|
|
|
|
let path_str = input
|
|
|
|
.metadata
|
|
|
|
.user_provided_name
|
|
|
|
.as_ref()
|
|
|
|
.or_else(|| match input.kind {
|
|
|
|
OpenedInputKind::OrdinaryFile(ref path) => Some(path),
|
|
|
|
_ => None,
|
|
|
|
});
|
|
|
|
|
|
|
|
if let Some(path_str) = path_str {
|
|
|
|
// If a path was provided, we try and detect the syntax based on extension mappings.
|
|
|
|
let path = Path::new(path_str);
|
|
|
|
let absolute_path = PathAbs::new(path)
|
|
|
|
.ok()
|
|
|
|
.map(|p| p.as_path().to_path_buf())
|
|
|
|
.unwrap_or_else(|| path.to_owned());
|
|
|
|
|
|
|
|
match mapping.get_syntax_for(absolute_path) {
|
|
|
|
Some(MappingTarget::MapToUnknown) => line_syntax
|
|
|
|
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into())),
|
|
|
|
|
2021-09-13 17:37:05 +02:00
|
|
|
Some(MappingTarget::MapTo(syntax_name)) => self
|
|
|
|
.find_syntax_by_name(syntax_name)?
|
|
|
|
.ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())),
|
2021-09-10 21:52:09 +02:00
|
|
|
|
|
|
|
None => {
|
|
|
|
let file_name = path.file_name().unwrap_or_default();
|
|
|
|
self.get_extension_syntax(file_name)?
|
|
|
|
.or(line_syntax)
|
|
|
|
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into()))
|
2020-03-22 09:55:13 +01:00
|
|
|
}
|
2018-05-18 12:30:30 +02:00
|
|
|
}
|
2021-09-10 21:52:09 +02:00
|
|
|
} else {
|
|
|
|
// If a path wasn't provided, we fall back to the detect first-line syntax.
|
|
|
|
line_syntax.ok_or_else(|| Error::UndetectedSyntax("[unknown]".into()))
|
2020-05-16 02:52:33 +02:00
|
|
|
}
|
2018-05-18 12:30:30 +02:00
|
|
|
}
|
2020-04-19 16:56:53 +02:00
|
|
|
|
2021-09-13 17:37:05 +02:00
|
|
|
fn find_syntax_by_name(&self, syntax_name: &str) -> Result<Option<SyntaxReferenceInSet>> {
|
|
|
|
let syntax_set = self.get_syntax_set()?;
|
|
|
|
Ok(syntax_set
|
|
|
|
.find_syntax_by_name(syntax_name)
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
|
|
|
}
|
|
|
|
|
2021-09-13 17:38:54 +02:00
|
|
|
fn find_syntax_by_extension(&self, extension: &str) -> Result<Option<SyntaxReferenceInSet>> {
|
|
|
|
let syntax_set = self.get_syntax_set()?;
|
|
|
|
Ok(syntax_set
|
|
|
|
.find_syntax_by_extension(extension)
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
fn get_extension_syntax(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-07-27 09:43:51 +02:00
|
|
|
let mut syntax = self.find_syntax_by_file_name(file_name)?;
|
|
|
|
if syntax.is_none() {
|
|
|
|
syntax = self.find_syntax_by_file_name_extension(file_name)?;
|
|
|
|
}
|
|
|
|
if syntax.is_none() {
|
|
|
|
syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?;
|
|
|
|
}
|
|
|
|
Ok(syntax)
|
2021-07-27 18:53:28 +02:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
fn find_syntax_by_file_name(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-09-13 17:38:54 +02:00
|
|
|
self.find_syntax_by_extension(file_name.to_str().unwrap_or_default())
|
2021-07-27 18:53:28 +02:00
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
fn find_syntax_by_file_name_extension(
|
|
|
|
&self,
|
|
|
|
file_name: &OsStr,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-09-13 17:38:54 +02:00
|
|
|
self.find_syntax_by_extension(
|
|
|
|
Path::new(file_name)
|
|
|
|
.extension()
|
|
|
|
.and_then(|x| x.to_str())
|
|
|
|
.unwrap_or_default(),
|
|
|
|
)
|
2021-07-27 18:53:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
|
|
|
|
/// then try again to find a syntax without it. Note that we do this recursively.
|
|
|
|
fn get_extension_syntax_with_stripped_suffix(
|
|
|
|
&self,
|
|
|
|
file_name: &OsStr,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-07-27 18:53:28 +02:00
|
|
|
let file_path = Path::new(file_name);
|
2021-07-27 09:43:51 +02:00
|
|
|
let mut syntax = None;
|
2021-07-27 18:53:28 +02:00
|
|
|
if let Some(file_str) = file_path.to_str() {
|
2021-09-10 21:56:40 +02:00
|
|
|
for suffix in &IGNORED_SUFFIXES {
|
2021-07-27 18:53:28 +02:00
|
|
|
if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
|
2021-07-27 09:43:51 +02:00
|
|
|
syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?;
|
|
|
|
break;
|
2021-07-27 18:53:28 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-27 09:43:51 +02:00
|
|
|
Ok(syntax)
|
2020-04-19 16:56:53 +02:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
fn get_first_line_syntax(
|
|
|
|
&self,
|
|
|
|
reader: &mut InputReader,
|
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-07-27 09:43:51 +02:00
|
|
|
let syntax_set = self.get_syntax_set()?;
|
|
|
|
Ok(String::from_utf8(reader.first_line.clone())
|
2020-04-19 16:56:53 +02:00
|
|
|
.ok()
|
2021-08-08 08:26:17 +02:00
|
|
|
.and_then(|l| syntax_set.find_syntax_by_first_line(&l))
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
2020-04-19 16:56:53 +02:00
|
|
|
}
|
2020-03-21 19:35:04 +01:00
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2021-08-24 07:58:03 +02:00
|
|
|
#[cfg(feature = "build-assets")]
|
|
|
|
pub use crate::build_assets::build_assets as build;
|
|
|
|
|
2021-07-22 10:39:39 +02:00
|
|
|
/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
|
|
|
|
/// We keep it in this format since we want to load it lazily.
|
|
|
|
#[derive(Debug)]
|
|
|
|
enum SerializedSyntaxSet {
|
|
|
|
/// The data comes from a user-generated cache file.
|
|
|
|
FromFile(PathBuf),
|
|
|
|
|
|
|
|
/// The data to use is embedded into the bat binary.
|
|
|
|
FromBinary(&'static [u8]),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SerializedSyntaxSet {
|
|
|
|
fn deserialize(&self) -> Result<SyntaxSet> {
|
|
|
|
match self {
|
2021-09-07 17:21:48 +02:00
|
|
|
SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
|
|
|
|
SerializedSyntaxSet::FromFile(ref path) => {
|
|
|
|
asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
|
|
|
|
}
|
2021-07-22 10:39:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-24 07:58:03 +02:00
|
|
|
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
|
2021-07-22 10:39:39 +02:00
|
|
|
include_bytes!("../assets/syntaxes.bin")
|
|
|
|
}
|
|
|
|
|
2021-08-24 07:58:03 +02:00
|
|
|
pub(crate) fn get_integrated_themeset() -> ThemeSet {
|
2021-09-07 17:21:48 +02:00
|
|
|
from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
|
|
|
|
}
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes {
|
|
|
|
from_binary(
|
|
|
|
include_bytes!("../assets/minimal_syntaxes.bin"),
|
|
|
|
COMPRESS_MINIMAL_SYNTAXES,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-09-07 17:21:48 +02:00
|
|
|
pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
|
|
|
|
asset_from_contents(v, "n/a", compressed)
|
|
|
|
.expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn asset_from_contents<T: serde::de::DeserializeOwned>(
|
|
|
|
contents: &[u8],
|
|
|
|
description: &str,
|
|
|
|
compressed: bool,
|
|
|
|
) -> Result<T> {
|
|
|
|
if compressed {
|
|
|
|
bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
|
|
|
|
} else {
|
|
|
|
bincode::deserialize_from(contents)
|
|
|
|
}
|
|
|
|
.map_err(|_| format!("Could not parse {}", description).into())
|
2021-07-22 10:28:39 +02:00
|
|
|
}
|
|
|
|
|
2021-09-07 17:21:48 +02:00
|
|
|
fn asset_from_cache<T: serde::de::DeserializeOwned>(
|
|
|
|
path: &Path,
|
|
|
|
description: &str,
|
|
|
|
compressed: bool,
|
|
|
|
) -> Result<T> {
|
2021-08-26 13:12:21 +02:00
|
|
|
let contents = fs::read(path).map_err(|_| {
|
2021-07-13 21:53:29 +02:00
|
|
|
format!(
|
|
|
|
"Could not load cached {} '{}'",
|
|
|
|
description,
|
|
|
|
path.to_string_lossy()
|
|
|
|
)
|
|
|
|
})?;
|
2021-09-07 17:21:48 +02:00
|
|
|
asset_from_contents(&contents[..], description, compressed)
|
|
|
|
.map_err(|_| format!("Could not parse cached {}", description).into())
|
2021-07-13 21:53:29 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2020-04-21 20:06:09 +02:00
|
|
|
use super::*;
|
|
|
|
|
2020-04-22 18:10:26 +02:00
|
|
|
use std::ffi::OsStr;
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
use std::fs::File;
|
2021-07-26 12:59:39 +02:00
|
|
|
use std::io::{BufReader, Write};
|
2021-02-16 08:50:41 +01:00
|
|
|
use tempfile::TempDir;
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2020-04-22 18:10:26 +02:00
|
|
|
use crate::input::Input;
|
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
struct SyntaxDetectionTest<'a> {
|
2020-03-21 20:42:10 +01:00
|
|
|
assets: HighlightingAssets,
|
2020-03-22 09:55:13 +01:00
|
|
|
pub syntax_mapping: SyntaxMapping<'a>,
|
2020-05-24 16:09:59 +02:00
|
|
|
pub temp_dir: TempDir,
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
impl<'a> SyntaxDetectionTest<'a> {
|
2020-03-21 20:42:10 +01:00
|
|
|
fn new() -> Self {
|
|
|
|
SyntaxDetectionTest {
|
|
|
|
assets: HighlightingAssets::from_binary(),
|
2020-03-22 09:55:13 +01:00
|
|
|
syntax_mapping: SyntaxMapping::builtin(),
|
2021-02-16 08:50:41 +01:00
|
|
|
temp_dir: TempDir::new().expect("creation of temporary directory"),
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
fn get_syntax_name(
|
|
|
|
&self,
|
|
|
|
language: Option<&str>,
|
|
|
|
input: &mut OpenedInput,
|
|
|
|
mapping: &SyntaxMapping,
|
|
|
|
) -> String {
|
|
|
|
self.assets
|
|
|
|
.get_syntax(language, input, mapping)
|
2021-08-08 08:26:17 +02:00
|
|
|
.map(|syntax_in_set| syntax_in_set.syntax.name.clone())
|
2021-08-02 16:20:21 +02:00
|
|
|
.unwrap_or_else(|_| "!no syntax!".to_owned())
|
|
|
|
}
|
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
fn syntax_for_real_file_with_content_os(
|
|
|
|
&self,
|
|
|
|
file_name: &OsStr,
|
|
|
|
first_line: &str,
|
|
|
|
) -> String {
|
|
|
|
let file_path = self.temp_dir.path().join(file_name);
|
|
|
|
{
|
|
|
|
let mut temp_file = File::create(&file_path).unwrap();
|
|
|
|
writeln!(temp_file, "{}", first_line).unwrap();
|
|
|
|
}
|
|
|
|
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::ordinary_file(&file_path);
|
2020-05-12 05:10:04 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-05-12 05:10:04 +02:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
|
|
|
|
2020-04-21 08:19:24 +02:00
|
|
|
fn syntax_for_file_with_content_os(&self, file_name: &OsStr, first_line: &str) -> String {
|
2020-03-21 20:42:10 +01:00
|
|
|
let file_path = self.temp_dir.path().join(file_name);
|
2020-05-12 04:23:56 +02:00
|
|
|
let input = Input::from_reader(Box::new(BufReader::new(first_line.as_bytes())))
|
2021-03-02 09:15:49 +01:00
|
|
|
.with_name(Some(&file_path));
|
2020-04-22 16:27:34 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-05-27 05:37:37 +02:00
|
|
|
#[cfg(unix)]
|
2020-04-21 08:19:24 +02:00
|
|
|
fn syntax_for_file_os(&self, file_name: &OsStr) -> String {
|
|
|
|
self.syntax_for_file_with_content_os(file_name, "")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn syntax_for_file_with_content(&self, file_name: &str, first_line: &str) -> String {
|
|
|
|
self.syntax_for_file_with_content_os(OsStr::new(file_name), first_line)
|
|
|
|
}
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
fn syntax_for_file(&self, file_name: &str) -> String {
|
2020-04-05 02:49:55 +02:00
|
|
|
self.syntax_for_file_with_content(file_name, "")
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
2020-04-18 11:37:12 +02:00
|
|
|
|
|
|
|
fn syntax_for_stdin_with_content(&self, file_name: &str, content: &[u8]) -> String {
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::stdin().with_name(Some(file_name));
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(content, None).unwrap();
|
2020-04-22 16:27:34 +02:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-04-18 11:37:12 +02:00
|
|
|
}
|
2020-05-12 05:10:04 +02:00
|
|
|
|
2020-05-13 09:47:18 +02:00
|
|
|
fn syntax_is_same_for_inputkinds(&self, file_name: &str, content: &str) -> bool {
|
|
|
|
let as_file = self.syntax_for_real_file_with_content_os(file_name.as_ref(), content);
|
|
|
|
let as_reader = self.syntax_for_file_with_content_os(file_name.as_ref(), content);
|
|
|
|
let consistent = as_file == as_reader;
|
|
|
|
// TODO: Compare StdIn somehow?
|
|
|
|
|
|
|
|
if !consistent {
|
|
|
|
eprintln!(
|
|
|
|
"Inconsistent syntax detection:\nFor File: {}\nFor Reader: {}",
|
|
|
|
as_file, as_reader
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
consistent
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_basic() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.rs"), "Rust");
|
|
|
|
assert_eq!(test.syntax_for_file("test.cpp"), "C++");
|
|
|
|
assert_eq!(test.syntax_for_file("test.build"), "NAnt Build File");
|
|
|
|
assert_eq!(
|
|
|
|
test.syntax_for_file("PKGBUILD"),
|
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
|
|
|
assert_eq!(test.syntax_for_file(".bashrc"), "Bourne Again Shell (bash)");
|
|
|
|
assert_eq!(test.syntax_for_file("Makefile"), "Makefile");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-04-21 08:19:24 +02:00
|
|
|
#[cfg(unix)]
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_invalid_utf8() {
|
|
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
test.syntax_for_file_os(OsStr::from_bytes(b"invalid_\xFEutf8_filename.rs")),
|
|
|
|
"Rust"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
#[test]
|
2020-05-13 09:47:18 +02:00
|
|
|
fn syntax_detection_same_for_inputkinds() {
|
2020-05-12 05:10:04 +02:00
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
test.syntax_mapping
|
|
|
|
.insert("*.myext", MappingTarget::MapTo("C"))
|
|
|
|
.ok();
|
|
|
|
test.syntax_mapping
|
|
|
|
.insert("MY_FILE", MappingTarget::MapTo("Markdown"))
|
|
|
|
.ok();
|
|
|
|
|
2020-05-13 09:47:18 +02:00
|
|
|
assert!(test.syntax_is_same_for_inputkinds("Test.md", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("Test.txt", "#!/bin/bash"));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds(".bashrc", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.h", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.js", "#!/bin/bash"));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.myext", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("MY_FILE", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("MY_FILE", "<?php"));
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
#[test]
|
|
|
|
fn syntax_detection_well_defined_mapping_for_duplicate_extensions() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C++");
|
|
|
|
assert_eq!(test.syntax_for_file("test.sass"), "Sass");
|
|
|
|
assert_eq!(test.syntax_for_file("test.js"), "JavaScript (Babel)");
|
2020-06-27 02:42:05 +02:00
|
|
|
assert_eq!(test.syntax_for_file("test.fs"), "F#");
|
2021-03-11 06:35:14 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.v"), "Verilog");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_first_line() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("my_script", "#!/bin/bash"),
|
2020-03-21 20:42:10 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
2020-03-22 09:55:13 +01:00
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("build", "#!/bin/bash"),
|
2020-03-22 09:55:13 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("my_script", "<?php"),
|
2020-03-22 10:54:37 +01:00
|
|
|
"PHP"
|
|
|
|
);
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_with_custom_mapping() {
|
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C++");
|
2020-03-22 09:55:13 +01:00
|
|
|
test.syntax_mapping
|
2020-03-22 10:37:35 +01:00
|
|
|
.insert("*.h", MappingTarget::MapTo("C"))
|
2020-03-22 09:55:13 +01:00
|
|
|
.ok();
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
2020-03-22 09:55:13 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_is_case_sensitive() {
|
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_ne!(test.syntax_for_file("README.MD"), "Markdown");
|
2020-03-22 09:55:13 +01:00
|
|
|
test.syntax_mapping
|
|
|
|
.insert("*.MD", MappingTarget::MapTo("Markdown"))
|
|
|
|
.ok();
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("README.MD"), "Markdown");
|
2020-03-22 09:55:13 +01:00
|
|
|
}
|
2020-03-26 23:49:16 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_stdin_filename() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
// from file extension
|
2020-04-18 11:37:12 +02:00
|
|
|
assert_eq!(test.syntax_for_stdin_with_content("test.cpp", b"a"), "C++");
|
2020-03-26 23:49:16 +01:00
|
|
|
// from first line (fallback)
|
|
|
|
assert_eq!(
|
2020-04-18 11:37:12 +02:00
|
|
|
test.syntax_for_stdin_with_content("my_script", b"#!/bin/bash"),
|
2020-03-26 23:49:16 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
|
|
|
}
|
2020-05-24 10:22:32 +02:00
|
|
|
|
2020-05-24 16:09:59 +02:00
|
|
|
#[cfg(unix)]
|
2020-05-24 10:22:32 +02:00
|
|
|
#[test]
|
2020-05-24 16:09:59 +02:00
|
|
|
fn syntax_detection_for_symlinked_file() {
|
|
|
|
use std::os::unix::fs::symlink;
|
|
|
|
|
2020-05-24 10:22:32 +02:00
|
|
|
let test = SyntaxDetectionTest::new();
|
2020-05-24 16:09:59 +02:00
|
|
|
let file_path = test.temp_dir.path().join("my_ssh_config_filename");
|
|
|
|
{
|
|
|
|
File::create(&file_path).unwrap();
|
|
|
|
}
|
|
|
|
let file_path_symlink = test.temp_dir.path().join(".ssh").join("config");
|
|
|
|
|
|
|
|
std::fs::create_dir(test.temp_dir.path().join(".ssh"))
|
|
|
|
.expect("creation of directory succeeds");
|
|
|
|
symlink(&file_path, &file_path_symlink).expect("creation of symbolic link succeeds");
|
|
|
|
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::ordinary_file(&file_path_symlink);
|
2020-05-24 16:09:59 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-05-24 10:22:32 +02:00
|
|
|
|
|
|
|
assert_eq!(
|
2021-08-02 16:20:21 +02:00
|
|
|
test.get_syntax_name(None, &mut opened_input, &test.syntax_mapping),
|
2020-05-24 16:09:59 +02:00
|
|
|
"SSH Config"
|
2020-05-24 10:22:32 +02:00
|
|
|
);
|
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|