2020-04-21 08:19:24 +02:00
|
|
|
use std::ffi::OsStr;
|
2021-07-26 12:59:39 +02:00
|
|
|
use std::fs;
|
2021-09-15 07:59:33 +02:00
|
|
|
use std::path::Path;
|
2021-07-22 10:39:39 +02:00
|
|
|
|
|
|
|
use lazycell::LazyCell;
|
2018-10-09 21:18:40 +02:00
|
|
|
|
2018-05-10 12:36:09 +02:00
|
|
|
use syntect::highlighting::{Theme, ThemeSet};
|
2021-08-10 22:18:47 +02:00
|
|
|
use syntect::parsing::{SyntaxReference, SyntaxSet};
|
2018-10-09 21:18:40 +02:00
|
|
|
|
2020-05-26 07:50:52 +02:00
|
|
|
use path_abs::PathAbs;
|
2020-05-24 16:09:59 +02:00
|
|
|
|
2020-12-27 22:51:24 +01:00
|
|
|
use crate::bat_warning;
|
2020-04-22 21:45:47 +02:00
|
|
|
use crate::error::*;
|
2021-09-26 10:00:40 +02:00
|
|
|
use crate::input::{InputReader, OpenedInput};
|
2020-03-22 09:55:13 +01:00
|
|
|
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
|
2018-08-28 20:12:45 +02:00
|
|
|
|
2021-09-15 07:59:33 +02:00
|
|
|
use ignored_suffixes::*;
|
|
|
|
use minimal_assets::*;
|
|
|
|
use serialized_syntax_set::*;
|
|
|
|
|
|
|
|
#[cfg(feature = "build-assets")]
|
|
|
|
pub use crate::assets::build_assets::*;
|
|
|
|
|
|
|
|
pub(crate) mod assets_metadata;
|
|
|
|
#[cfg(feature = "build-assets")]
|
|
|
|
mod build_assets;
|
|
|
|
mod ignored_suffixes;
|
|
|
|
mod minimal_assets;
|
|
|
|
mod serialized_syntax_set;
|
|
|
|
|
2019-10-15 03:25:53 +02:00
|
|
|
#[derive(Debug)]
|
2018-05-10 12:36:09 +02:00
|
|
|
pub struct HighlightingAssets {
|
2021-07-22 10:39:39 +02:00
|
|
|
syntax_set_cell: LazyCell<SyntaxSet>,
|
2021-08-24 07:58:03 +02:00
|
|
|
serialized_syntax_set: SerializedSyntaxSet,
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
|
2021-09-15 07:59:33 +02:00
|
|
|
minimal_assets: MinimalAssets,
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
|
2021-07-29 12:47:26 +02:00
|
|
|
theme_set: ThemeSet,
|
2020-03-21 20:31:32 +01:00
|
|
|
fallback_theme: Option<&'static str>,
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct SyntaxReferenceInSet<'a> {
|
|
|
|
pub syntax: &'a SyntaxReference,
|
|
|
|
pub syntax_set: &'a SyntaxSet,
|
|
|
|
}
|
|
|
|
|
2021-09-16 19:37:10 +02:00
|
|
|
/// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
|
2021-09-07 17:21:48 +02:00
|
|
|
pub(crate) const COMPRESS_SYNTAXES: bool = true;
|
|
|
|
|
2021-09-16 19:37:10 +02:00
|
|
|
/// Compress for size of ~20 kB instead of ~200 kB at the cost of ~30% longer deserialization time
|
2021-09-07 17:21:48 +02:00
|
|
|
pub(crate) const COMPRESS_THEMES: bool = true;
|
|
|
|
|
2021-09-16 19:37:10 +02:00
|
|
|
/// Compress for size of ~400 kB instead of ~2100 kB at the cost of ~30% longer deserialization time
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
|
|
|
|
|
2021-09-16 19:37:10 +02:00
|
|
|
/// Whether or not to compress the serialized form of [MinimalSyntaxes]. Shall
|
|
|
|
/// always be `false`, because the data in
|
|
|
|
/// [MinimalSyntaxes.serialized_syntax_sets] has already been compressed
|
|
|
|
/// (assuming [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES] is `true`). The "outer" data
|
|
|
|
/// structures like `by_name` are tiny. If we compress, deserialization can't do
|
|
|
|
/// efficient byte-by-byte copy of `serialized_syntax_sets`.
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
|
|
|
|
|
2018-05-10 12:36:09 +02:00
|
|
|
impl HighlightingAssets {
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
fn new(
|
|
|
|
serialized_syntax_set: SerializedSyntaxSet,
|
|
|
|
minimal_syntaxes: MinimalSyntaxes,
|
|
|
|
theme_set: ThemeSet,
|
|
|
|
) -> Self {
|
2021-07-19 05:41:40 +02:00
|
|
|
HighlightingAssets {
|
2021-08-24 07:58:03 +02:00
|
|
|
syntax_set_cell: LazyCell::new(),
|
2021-07-22 10:39:39 +02:00
|
|
|
serialized_syntax_set,
|
2021-09-15 07:59:33 +02:00
|
|
|
minimal_assets: MinimalAssets::new(minimal_syntaxes),
|
2021-07-19 05:41:40 +02:00
|
|
|
theme_set,
|
|
|
|
fallback_theme: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-21 20:31:32 +01:00
|
|
|
pub fn default_theme() -> &'static str {
|
|
|
|
"Monokai Extended"
|
|
|
|
}
|
|
|
|
|
2020-04-21 15:50:46 +02:00
|
|
|
pub fn from_cache(cache_path: &Path) -> Result<Self> {
|
2021-07-19 05:41:40 +02:00
|
|
|
Ok(HighlightingAssets::new(
|
2021-08-24 07:58:03 +02:00
|
|
|
SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
asset_from_cache(
|
|
|
|
&cache_path.join("minimal_syntaxes.bin"),
|
|
|
|
"minimal syntax sets",
|
|
|
|
COMPRESS_MINIMAL_SYNTAXES,
|
|
|
|
)?,
|
2021-09-07 17:21:48 +02:00
|
|
|
asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
|
2021-07-19 05:41:40 +02:00
|
|
|
))
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 16:48:27 +01:00
|
|
|
pub fn from_binary() -> Self {
|
2021-07-22 10:39:39 +02:00
|
|
|
HighlightingAssets::new(
|
2021-08-24 07:58:03 +02:00
|
|
|
SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
get_integrated_minimal_syntaxes(),
|
2021-07-22 10:39:39 +02:00
|
|
|
get_integrated_themeset(),
|
|
|
|
)
|
2018-05-10 12:36:09 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:31:32 +01:00
|
|
|
pub fn set_fallback_theme(&mut self, theme: &'static str) {
|
|
|
|
self.fallback_theme = Some(theme);
|
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
pub(crate) fn get_syntax_set(&self) -> Result<&SyntaxSet> {
|
2021-08-24 07:58:03 +02:00
|
|
|
self.syntax_set_cell
|
|
|
|
.try_borrow_with(|| self.serialized_syntax_set.deserialize())
|
2021-07-16 11:49:47 +02:00
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
/// Use [Self::get_syntaxes] instead
|
|
|
|
#[deprecated]
|
2020-03-21 20:01:36 +01:00
|
|
|
pub fn syntaxes(&self) -> &[SyntaxReference] {
|
2021-07-27 09:43:51 +02:00
|
|
|
self.get_syntax_set()
|
|
|
|
.expect(".syntaxes() is deprecated, use .get_syntaxes() instead")
|
|
|
|
.syntaxes()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> {
|
|
|
|
Ok(self.get_syntax_set()?.syntaxes())
|
2020-03-21 20:01:36 +01:00
|
|
|
}
|
|
|
|
|
2021-07-29 12:47:26 +02:00
|
|
|
fn get_theme_set(&self) -> &ThemeSet {
|
|
|
|
&self.theme_set
|
|
|
|
}
|
|
|
|
|
2020-04-22 22:05:54 +02:00
|
|
|
pub fn themes(&self) -> impl Iterator<Item = &str> {
|
2021-07-29 12:47:26 +02:00
|
|
|
self.get_theme_set().themes.keys().map(|s| s.as_ref())
|
2020-03-21 20:01:36 +01:00
|
|
|
}
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
/// Finds a [SyntaxSet] that contains a [SyntaxReference] by its name. First
|
|
|
|
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
|
|
|
|
/// [SyntaxSet] that contains all syntaxes.
|
|
|
|
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
|
2021-09-15 07:59:33 +02:00
|
|
|
match self.minimal_assets.get_syntax_set_by_name(name) {
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
Some(syntax_set) => Ok(syntax_set),
|
|
|
|
None => self.get_syntax_set(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-27 09:43:51 +02:00
|
|
|
/// Use [Self::get_syntax_for_file_name] instead
|
|
|
|
#[deprecated]
|
2020-09-06 14:08:13 +02:00
|
|
|
pub fn syntax_for_file_name(
|
|
|
|
&self,
|
|
|
|
file_name: impl AsRef<Path>,
|
|
|
|
mapping: &SyntaxMapping,
|
|
|
|
) -> Option<&SyntaxReference> {
|
2021-08-08 08:26:17 +02:00
|
|
|
self.get_syntax_for_file_name(file_name, mapping)
|
|
|
|
.expect(
|
|
|
|
".syntax_for_file_name() is deprecated, use .get_syntax_for_file_name() instead",
|
|
|
|
)
|
|
|
|
.map(|syntax_in_set| syntax_in_set.syntax)
|
2021-07-27 09:43:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_syntax_for_file_name(
|
|
|
|
&self,
|
|
|
|
file_name: impl AsRef<Path>,
|
|
|
|
mapping: &SyntaxMapping,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-09-25 09:47:31 +02:00
|
|
|
Ok(self.get_syntax_for_path(file_name.as_ref(), mapping).ok())
|
2020-09-06 14:08:13 +02:00
|
|
|
}
|
|
|
|
|
2021-09-25 09:33:25 +02:00
|
|
|
fn get_syntax_for_path(
|
|
|
|
&self,
|
|
|
|
path: impl AsRef<Path>,
|
|
|
|
mapping: &SyntaxMapping,
|
|
|
|
) -> Result<SyntaxReferenceInSet> {
|
|
|
|
let path = path.as_ref();
|
|
|
|
match mapping.get_syntax_for(path) {
|
|
|
|
Some(MappingTarget::MapToUnknown) => {
|
|
|
|
Err(Error::UndetectedSyntax(path.to_string_lossy().into()))
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(MappingTarget::MapTo(syntax_name)) => self
|
|
|
|
.find_syntax_by_name(syntax_name)?
|
|
|
|
.ok_or_else(|| Error::UnknownSyntax(syntax_name.to_owned())),
|
|
|
|
|
|
|
|
None => {
|
|
|
|
let file_name = path.file_name().unwrap_or_default();
|
|
|
|
self.get_extension_syntax(file_name)?
|
|
|
|
.ok_or_else(|| Error::UndetectedSyntax(path.to_string_lossy().into()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-21 20:16:32 +01:00
|
|
|
pub(crate) fn get_theme(&self, theme: &str) -> &Theme {
|
2021-07-29 12:47:26 +02:00
|
|
|
match self.get_theme_set().themes.get(theme) {
|
2018-07-23 21:38:45 +02:00
|
|
|
Some(theme) => theme,
|
|
|
|
None => {
|
2020-11-29 23:16:54 +01:00
|
|
|
if theme == "ansi-light" || theme == "ansi-dark" {
|
2020-12-27 22:51:24 +01:00
|
|
|
bat_warning!("Theme '{}' is deprecated, using 'ansi' instead.", theme);
|
2020-11-29 23:16:54 +01:00
|
|
|
return self.get_theme("ansi");
|
|
|
|
}
|
2021-05-21 14:19:15 +02:00
|
|
|
if !theme.is_empty() {
|
2020-12-27 22:51:24 +01:00
|
|
|
bat_warning!("Unknown theme '{}', using default.", theme)
|
2020-03-21 21:45:03 +01:00
|
|
|
}
|
2021-07-29 12:47:26 +02:00
|
|
|
&self.get_theme_set().themes
|
|
|
|
[self.fallback_theme.unwrap_or_else(|| Self::default_theme())]
|
2018-07-23 21:38:45 +02:00
|
|
|
}
|
|
|
|
}
|
2018-07-21 08:26:24 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
pub(crate) fn get_syntax(
|
2018-10-07 13:26:50 +02:00
|
|
|
&self,
|
|
|
|
language: Option<&str>,
|
2020-04-22 16:27:34 +02:00
|
|
|
input: &mut OpenedInput,
|
2018-10-17 22:30:09 +02:00
|
|
|
mapping: &SyntaxMapping,
|
2021-08-08 08:26:17 +02:00
|
|
|
) -> Result<SyntaxReferenceInSet> {
|
2020-05-16 01:19:41 +02:00
|
|
|
if let Some(language) = language {
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
let syntax_set = self.get_syntax_set_by_name(language)?;
|
2021-09-10 21:52:09 +02:00
|
|
|
return syntax_set
|
2020-05-16 02:52:33 +02:00
|
|
|
.find_syntax_by_token(language)
|
2021-08-08 08:26:17 +02:00
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set })
|
2021-09-10 21:52:09 +02:00
|
|
|
.ok_or_else(|| Error::UnknownSyntax(language.to_owned()));
|
|
|
|
}
|
|
|
|
|
2021-09-26 10:00:40 +02:00
|
|
|
let path = input.path();
|
2021-09-16 17:02:01 +02:00
|
|
|
let path_syntax = if let Some(path) = path {
|
2021-09-10 21:52:09 +02:00
|
|
|
// If a path was provided, we try and detect the syntax based on extension mappings.
|
2021-09-25 09:33:25 +02:00
|
|
|
self.get_syntax_for_path(
|
2021-09-16 17:02:18 +02:00
|
|
|
PathAbs::new(path).map_or_else(|_| path.to_owned(), |p| p.as_path().to_path_buf()),
|
2021-09-25 09:33:25 +02:00
|
|
|
mapping,
|
|
|
|
)
|
2021-09-10 21:52:09 +02:00
|
|
|
} else {
|
2021-09-16 17:01:12 +02:00
|
|
|
Err(Error::UndetectedSyntax("[unknown]".into()))
|
|
|
|
};
|
|
|
|
|
|
|
|
match path_syntax {
|
|
|
|
// If a path wasn't provided, or if path based syntax detection
|
|
|
|
// above failed, we fall back to first-line syntax detection.
|
|
|
|
Err(Error::UndetectedSyntax(path)) => self
|
|
|
|
.get_first_line_syntax(&mut input.reader)?
|
|
|
|
.ok_or(Error::UndetectedSyntax(path)),
|
|
|
|
_ => path_syntax,
|
2020-05-16 02:52:33 +02:00
|
|
|
}
|
2018-05-18 12:30:30 +02:00
|
|
|
}
|
2020-04-19 16:56:53 +02:00
|
|
|
|
2021-09-13 17:37:05 +02:00
|
|
|
fn find_syntax_by_name(&self, syntax_name: &str) -> Result<Option<SyntaxReferenceInSet>> {
|
|
|
|
let syntax_set = self.get_syntax_set()?;
|
|
|
|
Ok(syntax_set
|
|
|
|
.find_syntax_by_name(syntax_name)
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
|
|
|
}
|
|
|
|
|
2021-09-18 06:50:54 +02:00
|
|
|
fn find_syntax_by_extension(&self, e: Option<&OsStr>) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-09-13 17:38:54 +02:00
|
|
|
let syntax_set = self.get_syntax_set()?;
|
2021-09-18 06:50:54 +02:00
|
|
|
let extension = e.and_then(|x| x.to_str()).unwrap_or_default();
|
2021-09-13 17:38:54 +02:00
|
|
|
Ok(syntax_set
|
|
|
|
.find_syntax_by_extension(extension)
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
fn get_extension_syntax(&self, file_name: &OsStr) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-09-18 06:50:54 +02:00
|
|
|
let mut syntax = self.find_syntax_by_extension(Some(file_name))?;
|
2021-07-27 09:43:51 +02:00
|
|
|
if syntax.is_none() {
|
2021-09-18 06:50:54 +02:00
|
|
|
syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
|
2021-07-27 09:43:51 +02:00
|
|
|
}
|
|
|
|
if syntax.is_none() {
|
2021-09-15 07:59:33 +02:00
|
|
|
syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
|
|
|
|
self.get_extension_syntax(stripped_file_name) // Note: recursion
|
|
|
|
})?;
|
2021-07-27 09:43:51 +02:00
|
|
|
}
|
|
|
|
Ok(syntax)
|
2021-07-27 18:53:28 +02:00
|
|
|
}
|
|
|
|
|
2021-08-08 08:26:17 +02:00
|
|
|
fn get_first_line_syntax(
|
|
|
|
&self,
|
|
|
|
reader: &mut InputReader,
|
|
|
|
) -> Result<Option<SyntaxReferenceInSet>> {
|
2021-07-27 09:43:51 +02:00
|
|
|
let syntax_set = self.get_syntax_set()?;
|
|
|
|
Ok(String::from_utf8(reader.first_line.clone())
|
2020-04-19 16:56:53 +02:00
|
|
|
.ok()
|
2021-08-08 08:26:17 +02:00
|
|
|
.and_then(|l| syntax_set.find_syntax_by_first_line(&l))
|
|
|
|
.map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
|
2020-04-19 16:56:53 +02:00
|
|
|
}
|
2020-03-21 19:35:04 +01:00
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2021-08-24 07:58:03 +02:00
|
|
|
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
|
2021-07-22 10:39:39 +02:00
|
|
|
include_bytes!("../assets/syntaxes.bin")
|
|
|
|
}
|
|
|
|
|
2021-08-24 07:58:03 +02:00
|
|
|
pub(crate) fn get_integrated_themeset() -> ThemeSet {
|
2021-09-07 17:21:48 +02:00
|
|
|
from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
|
|
|
|
}
|
|
|
|
|
Load independent and minimal syntax sets when using --language (#1787)
This significantly speeds up the startup time of bat, since only a single
linked SyntaxDefinition is loaded for each file. The size increase of the
binary is just ~400 kB.
In order for startup time to be improved, the --language arg must be used, and
it must match one of the following names:
"Plain Text", "ActionScript", "AppleScript", "Batch File", "NAnt Build File",
"C#", "C", "CSS", "D", "Diff", "Erlang", "Go", "Haskell", "JSON", "Java
Properties", "BibTeX", "LaTeX Log", "TeX", "Lisp", "Lua", "MATLAB", "Pascal",
"R", "Regular Expression", "Rust", "SQL", "Scala", "Tcl", "XML", "YAML", "Apache
Conf", "ARM Assembly", "Assembly (x86_64)", "CMakeCache", "Comma Separated
Values", "Cabal", "CoffeeScript", "CpuInfo", "Dart Analysis Output", "Dart",
"Dockerfile", "DotENV", "F#", "Friendly Interactive Shell (fish)", "Fortran
(Fixed Form)", "Fortran (Modern)", "Fortran Namelist", "fstab", "GLSL",
"GraphQL", "Groff/troff", "group", "hosts", "INI", "Jinja2", "jsonnet",
"Kotlin", "Less", "LLVM", "Lean", "MemInfo", "Nim", "Ninja", "Nix", "passwd",
"PowerShell", "Protocol Buffer (TEXT)", "Puppet", "Rego", "resolv", "Robot
Framework", "SML", "Strace", "Stylus", "Solidity", "Vyper", "Swift",
"SystemVerilog", "TOML", "Terraform", "TypeScript", "TypeScriptReact",
"Verilog", "VimL", "Zig", "gnuplot", "log", "requirements.txt", "Highlight
non-printables", "Private Key", "varlink"
Later commits will improve startup time for more code paths.
* fix some typos and misspellings
* CHANGELOG.md: Add Performance section (preliminary)
* Add a CHANGELOG.md entry for this PR
2021-09-09 20:52:33 +02:00
|
|
|
fn get_integrated_minimal_syntaxes() -> MinimalSyntaxes {
|
|
|
|
from_binary(
|
|
|
|
include_bytes!("../assets/minimal_syntaxes.bin"),
|
|
|
|
COMPRESS_MINIMAL_SYNTAXES,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-09-07 17:21:48 +02:00
|
|
|
pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
|
|
|
|
asset_from_contents(v, "n/a", compressed)
|
|
|
|
.expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn asset_from_contents<T: serde::de::DeserializeOwned>(
|
|
|
|
contents: &[u8],
|
|
|
|
description: &str,
|
|
|
|
compressed: bool,
|
|
|
|
) -> Result<T> {
|
|
|
|
if compressed {
|
|
|
|
bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
|
|
|
|
} else {
|
|
|
|
bincode::deserialize_from(contents)
|
|
|
|
}
|
|
|
|
.map_err(|_| format!("Could not parse {}", description).into())
|
2021-07-22 10:28:39 +02:00
|
|
|
}
|
|
|
|
|
2021-09-07 17:21:48 +02:00
|
|
|
fn asset_from_cache<T: serde::de::DeserializeOwned>(
|
|
|
|
path: &Path,
|
|
|
|
description: &str,
|
|
|
|
compressed: bool,
|
|
|
|
) -> Result<T> {
|
2021-08-26 13:12:21 +02:00
|
|
|
let contents = fs::read(path).map_err(|_| {
|
2021-07-13 21:53:29 +02:00
|
|
|
format!(
|
|
|
|
"Could not load cached {} '{}'",
|
|
|
|
description,
|
|
|
|
path.to_string_lossy()
|
|
|
|
)
|
|
|
|
})?;
|
2021-09-07 17:21:48 +02:00
|
|
|
asset_from_contents(&contents[..], description, compressed)
|
|
|
|
.map_err(|_| format!("Could not parse cached {}", description).into())
|
2021-07-13 21:53:29 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2020-04-21 20:06:09 +02:00
|
|
|
use super::*;
|
|
|
|
|
2020-04-22 18:10:26 +02:00
|
|
|
use std::ffi::OsStr;
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
use std::fs::File;
|
2021-07-26 12:59:39 +02:00
|
|
|
use std::io::{BufReader, Write};
|
2021-02-16 08:50:41 +01:00
|
|
|
use tempfile::TempDir;
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2020-04-22 18:10:26 +02:00
|
|
|
use crate::input::Input;
|
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
struct SyntaxDetectionTest<'a> {
|
2020-03-21 20:42:10 +01:00
|
|
|
assets: HighlightingAssets,
|
2020-03-22 09:55:13 +01:00
|
|
|
pub syntax_mapping: SyntaxMapping<'a>,
|
2020-05-24 16:09:59 +02:00
|
|
|
pub temp_dir: TempDir,
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
impl<'a> SyntaxDetectionTest<'a> {
|
2020-03-21 20:42:10 +01:00
|
|
|
fn new() -> Self {
|
|
|
|
SyntaxDetectionTest {
|
|
|
|
assets: HighlightingAssets::from_binary(),
|
2020-03-22 09:55:13 +01:00
|
|
|
syntax_mapping: SyntaxMapping::builtin(),
|
2021-02-16 08:50:41 +01:00
|
|
|
temp_dir: TempDir::new().expect("creation of temporary directory"),
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
fn get_syntax_name(
|
|
|
|
&self,
|
|
|
|
language: Option<&str>,
|
|
|
|
input: &mut OpenedInput,
|
|
|
|
mapping: &SyntaxMapping,
|
|
|
|
) -> String {
|
|
|
|
self.assets
|
|
|
|
.get_syntax(language, input, mapping)
|
2021-08-08 08:26:17 +02:00
|
|
|
.map(|syntax_in_set| syntax_in_set.syntax.name.clone())
|
2021-08-02 16:20:21 +02:00
|
|
|
.unwrap_or_else(|_| "!no syntax!".to_owned())
|
|
|
|
}
|
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
fn syntax_for_real_file_with_content_os(
|
|
|
|
&self,
|
|
|
|
file_name: &OsStr,
|
|
|
|
first_line: &str,
|
|
|
|
) -> String {
|
|
|
|
let file_path = self.temp_dir.path().join(file_name);
|
|
|
|
{
|
|
|
|
let mut temp_file = File::create(&file_path).unwrap();
|
|
|
|
writeln!(temp_file, "{}", first_line).unwrap();
|
|
|
|
}
|
|
|
|
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::ordinary_file(&file_path);
|
2020-05-12 05:10:04 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-05-12 05:10:04 +02:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
|
|
|
|
2020-04-21 08:19:24 +02:00
|
|
|
fn syntax_for_file_with_content_os(&self, file_name: &OsStr, first_line: &str) -> String {
|
2020-03-21 20:42:10 +01:00
|
|
|
let file_path = self.temp_dir.path().join(file_name);
|
2020-05-12 04:23:56 +02:00
|
|
|
let input = Input::from_reader(Box::new(BufReader::new(first_line.as_bytes())))
|
2021-03-02 09:15:49 +01:00
|
|
|
.with_name(Some(&file_path));
|
2020-04-22 16:27:34 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-03-21 20:42:10 +01:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-05-27 05:37:37 +02:00
|
|
|
#[cfg(unix)]
|
2020-04-21 08:19:24 +02:00
|
|
|
fn syntax_for_file_os(&self, file_name: &OsStr) -> String {
|
|
|
|
self.syntax_for_file_with_content_os(file_name, "")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn syntax_for_file_with_content(&self, file_name: &str, first_line: &str) -> String {
|
|
|
|
self.syntax_for_file_with_content_os(OsStr::new(file_name), first_line)
|
|
|
|
}
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
fn syntax_for_file(&self, file_name: &str) -> String {
|
2020-04-05 02:49:55 +02:00
|
|
|
self.syntax_for_file_with_content(file_name, "")
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
2020-04-18 11:37:12 +02:00
|
|
|
|
|
|
|
fn syntax_for_stdin_with_content(&self, file_name: &str, content: &[u8]) -> String {
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::stdin().with_name(Some(file_name));
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(content, None).unwrap();
|
2020-04-22 16:27:34 +02:00
|
|
|
|
2021-08-02 16:20:21 +02:00
|
|
|
self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping)
|
2020-04-18 11:37:12 +02:00
|
|
|
}
|
2020-05-12 05:10:04 +02:00
|
|
|
|
2020-05-13 09:47:18 +02:00
|
|
|
fn syntax_is_same_for_inputkinds(&self, file_name: &str, content: &str) -> bool {
|
|
|
|
let as_file = self.syntax_for_real_file_with_content_os(file_name.as_ref(), content);
|
|
|
|
let as_reader = self.syntax_for_file_with_content_os(file_name.as_ref(), content);
|
|
|
|
let consistent = as_file == as_reader;
|
|
|
|
// TODO: Compare StdIn somehow?
|
|
|
|
|
|
|
|
if !consistent {
|
|
|
|
eprintln!(
|
|
|
|
"Inconsistent syntax detection:\nFor File: {}\nFor Reader: {}",
|
|
|
|
as_file, as_reader
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
consistent
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_basic() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.rs"), "Rust");
|
|
|
|
assert_eq!(test.syntax_for_file("test.cpp"), "C++");
|
|
|
|
assert_eq!(test.syntax_for_file("test.build"), "NAnt Build File");
|
|
|
|
assert_eq!(
|
|
|
|
test.syntax_for_file("PKGBUILD"),
|
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
|
|
|
assert_eq!(test.syntax_for_file(".bashrc"), "Bourne Again Shell (bash)");
|
|
|
|
assert_eq!(test.syntax_for_file("Makefile"), "Makefile");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
2020-04-21 08:19:24 +02:00
|
|
|
#[cfg(unix)]
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_invalid_utf8() {
|
|
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
test.syntax_for_file_os(OsStr::from_bytes(b"invalid_\xFEutf8_filename.rs")),
|
|
|
|
"Rust"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-05-12 05:10:04 +02:00
|
|
|
#[test]
|
2020-05-13 09:47:18 +02:00
|
|
|
fn syntax_detection_same_for_inputkinds() {
|
2020-05-12 05:10:04 +02:00
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
test.syntax_mapping
|
|
|
|
.insert("*.myext", MappingTarget::MapTo("C"))
|
|
|
|
.ok();
|
|
|
|
test.syntax_mapping
|
|
|
|
.insert("MY_FILE", MappingTarget::MapTo("Markdown"))
|
|
|
|
.ok();
|
|
|
|
|
2020-05-13 09:47:18 +02:00
|
|
|
assert!(test.syntax_is_same_for_inputkinds("Test.md", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("Test.txt", "#!/bin/bash"));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds(".bashrc", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.h", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.js", "#!/bin/bash"));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("test.myext", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("MY_FILE", ""));
|
|
|
|
assert!(test.syntax_is_same_for_inputkinds("MY_FILE", "<?php"));
|
2020-05-12 05:10:04 +02:00
|
|
|
}
|
|
|
|
|
2020-03-21 20:42:10 +01:00
|
|
|
#[test]
|
|
|
|
fn syntax_detection_well_defined_mapping_for_duplicate_extensions() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C++");
|
|
|
|
assert_eq!(test.syntax_for_file("test.sass"), "Sass");
|
|
|
|
assert_eq!(test.syntax_for_file("test.js"), "JavaScript (Babel)");
|
2020-06-27 02:42:05 +02:00
|
|
|
assert_eq!(test.syntax_for_file("test.fs"), "F#");
|
2021-03-11 06:35:14 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.v"), "Verilog");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_first_line() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("my_script", "#!/bin/bash"),
|
2020-03-21 20:42:10 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
2020-03-22 09:55:13 +01:00
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("build", "#!/bin/bash"),
|
2020-03-22 09:55:13 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(
|
2020-04-05 02:49:55 +02:00
|
|
|
test.syntax_for_file_with_content("my_script", "<?php"),
|
2020-03-22 10:54:37 +01:00
|
|
|
"PHP"
|
|
|
|
);
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_with_custom_mapping() {
|
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C++");
|
2020-03-22 09:55:13 +01:00
|
|
|
test.syntax_mapping
|
2020-03-22 10:37:35 +01:00
|
|
|
.insert("*.h", MappingTarget::MapTo("C"))
|
2020-03-22 09:55:13 +01:00
|
|
|
.ok();
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("test.h"), "C");
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|
2020-03-22 09:55:13 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_is_case_sensitive() {
|
|
|
|
let mut test = SyntaxDetectionTest::new();
|
|
|
|
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_ne!(test.syntax_for_file("README.MD"), "Markdown");
|
2020-03-22 09:55:13 +01:00
|
|
|
test.syntax_mapping
|
|
|
|
.insert("*.MD", MappingTarget::MapTo("Markdown"))
|
|
|
|
.ok();
|
2020-03-22 10:54:37 +01:00
|
|
|
assert_eq!(test.syntax_for_file("README.MD"), "Markdown");
|
2020-03-22 09:55:13 +01:00
|
|
|
}
|
2020-03-26 23:49:16 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn syntax_detection_stdin_filename() {
|
|
|
|
let test = SyntaxDetectionTest::new();
|
|
|
|
|
|
|
|
// from file extension
|
2020-04-18 11:37:12 +02:00
|
|
|
assert_eq!(test.syntax_for_stdin_with_content("test.cpp", b"a"), "C++");
|
2020-03-26 23:49:16 +01:00
|
|
|
// from first line (fallback)
|
|
|
|
assert_eq!(
|
2020-04-18 11:37:12 +02:00
|
|
|
test.syntax_for_stdin_with_content("my_script", b"#!/bin/bash"),
|
2020-03-26 23:49:16 +01:00
|
|
|
"Bourne Again Shell (bash)"
|
|
|
|
);
|
|
|
|
}
|
2020-05-24 10:22:32 +02:00
|
|
|
|
2020-05-24 16:09:59 +02:00
|
|
|
#[cfg(unix)]
|
2020-05-24 10:22:32 +02:00
|
|
|
#[test]
|
2020-05-24 16:09:59 +02:00
|
|
|
fn syntax_detection_for_symlinked_file() {
|
|
|
|
use std::os::unix::fs::symlink;
|
|
|
|
|
2020-05-24 10:22:32 +02:00
|
|
|
let test = SyntaxDetectionTest::new();
|
2020-05-24 16:09:59 +02:00
|
|
|
let file_path = test.temp_dir.path().join("my_ssh_config_filename");
|
|
|
|
{
|
|
|
|
File::create(&file_path).unwrap();
|
|
|
|
}
|
|
|
|
let file_path_symlink = test.temp_dir.path().join(".ssh").join("config");
|
|
|
|
|
|
|
|
std::fs::create_dir(test.temp_dir.path().join(".ssh"))
|
|
|
|
.expect("creation of directory succeeds");
|
|
|
|
symlink(&file_path, &file_path_symlink).expect("creation of symbolic link succeeds");
|
|
|
|
|
2021-03-02 09:15:49 +01:00
|
|
|
let input = Input::ordinary_file(&file_path_symlink);
|
2020-05-24 16:09:59 +02:00
|
|
|
let dummy_stdin: &[u8] = &[];
|
2021-02-27 15:32:07 +01:00
|
|
|
let mut opened_input = input.open(dummy_stdin, None).unwrap();
|
2020-05-24 10:22:32 +02:00
|
|
|
|
|
|
|
assert_eq!(
|
2021-08-02 16:20:21 +02:00
|
|
|
test.get_syntax_name(None, &mut opened_input, &test.syntax_mapping),
|
2020-05-24 16:09:59 +02:00
|
|
|
"SSH Config"
|
2020-05-24 10:22:32 +02:00
|
|
|
);
|
|
|
|
}
|
2020-03-21 20:42:10 +01:00
|
|
|
}
|