mirror of
https://github.com/sharkdp/bat.git
synced 2024-12-27 00:38:52 +01:00
Merge pull request #2755 from cyqsimon/syntax-mapping-refactor
More extensible syntax mapping mechanism
This commit is contained in:
commit
db66e4459b
@ -15,6 +15,7 @@
|
||||
- Minor benchmark script improvements #2768 (@cyqsimon)
|
||||
- Update Arch Linux package URL in README files #2779 (@brunobell)
|
||||
- Update and improve `zsh` completion, see #2772 (@okapia)
|
||||
- More extensible syntax mapping mechanism #2755 (@cyqsimon)
|
||||
- Use proper Architecture for Debian packages built for musl, see #2811 (@Enselic)
|
||||
- Pull in fix for unsafe-libyaml security advisory, see #2812 (@dtolnay)
|
||||
- Update git-version dependency to use Syn v2, see #2816 (@dtolnay)
|
||||
@ -28,6 +29,10 @@
|
||||
|
||||
## `bat` as a library
|
||||
|
||||
- Changes to `syntax_mapping::SyntaxMapping` #2755 (@cyqsimon)
|
||||
- `SyntaxMapping::get_syntax_for` is now correctly public
|
||||
- [BREAKING] `SyntaxMapping::{empty,builtin}` are removed; use `SyntaxMapping::new` instead
|
||||
- [BREAKING] `SyntaxMapping::mappings` is replaced by `SyntaxMapping::{builtin,custom,all}_mappings`
|
||||
- Make `Controller::run_with_error_handler`'s error handler `FnMut`, see #2831 (@rhysd)
|
||||
|
||||
# v0.24.0
|
||||
|
132
Cargo.lock
generated
132
Cargo.lock
generated
@ -129,6 +129,8 @@ dependencies = [
|
||||
"globset",
|
||||
"grep-cli",
|
||||
"home",
|
||||
"indexmap 2.1.0",
|
||||
"itertools",
|
||||
"nix",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
@ -140,12 +142,14 @@ dependencies = [
|
||||
"run_script",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_with",
|
||||
"serde_yaml",
|
||||
"serial_test",
|
||||
"shell-words",
|
||||
"syntect",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"toml",
|
||||
"unicode-width",
|
||||
"wait-timeout",
|
||||
"walkdir",
|
||||
@ -224,11 +228,12 @@ checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -314,6 +319,41 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"darling_macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_core"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621"
|
||||
dependencies = [
|
||||
"fnv",
|
||||
"ident_case",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling_macro"
|
||||
version = "0.20.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.4.0"
|
||||
@ -578,6 +618,12 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ident_case"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.3.0"
|
||||
@ -600,12 +646,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.2"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897"
|
||||
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.1",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1101,13 +1148,44 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_with"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_with_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_with_macros"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788"
|
||||
dependencies = [
|
||||
"darling",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a15e0ef66bf939a7c890a0bf6d5a733c70202225f9888a89ed5c62298b019129"
|
||||
dependencies = [
|
||||
"indexmap 2.0.2",
|
||||
"indexmap 2.1.0",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
@ -1294,6 +1372,41 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ff9e3abce27ee2c9a37f9ad37238c1bdd4e789c84ba37df76aa4d528f5072cc"
|
||||
dependencies = [
|
||||
"indexmap 2.1.0",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_datetime"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.20.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81"
|
||||
dependencies = [
|
||||
"indexmap 2.1.0",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.8"
|
||||
@ -1613,6 +1726,15 @@ version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.5.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "176b6138793677221d420fd2f0aeeced263f197688b36484660da767bca2fa32"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
|
@ -100,6 +100,14 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.78"
|
||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||
itertools = "0.11.0"
|
||||
once_cell = "1.18"
|
||||
regex = "1.10.2"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = { version = "3.4.0", default-features = false, features = ["macros"] }
|
||||
toml = { version = "0.8.6", features = ["preserve_order"] }
|
||||
walkdir = "2.4"
|
||||
|
||||
[build-dependencies.clap]
|
||||
version = "4.4.12"
|
||||
|
@ -1,5 +1,6 @@
|
||||
#[cfg(feature = "application")]
|
||||
mod application;
|
||||
mod syntax_mapping;
|
||||
mod util;
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@ -7,6 +8,8 @@ fn main() -> anyhow::Result<()> {
|
||||
// see: https://doc.rust-lang.org/cargo/reference/build-scripts.html#rerun-if-changed
|
||||
println!("cargo:rerun-if-changed=build/");
|
||||
|
||||
syntax_mapping::build_static_mappings()?;
|
||||
|
||||
#[cfg(feature = "application")]
|
||||
application::gen_man_and_comp()?;
|
||||
|
||||
|
292
build/syntax_mapping.rs
Normal file
292
build/syntax_mapping.rs
Normal file
@ -0,0 +1,292 @@
|
||||
use std::{
|
||||
convert::Infallible,
|
||||
env, fs,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail};
|
||||
use indexmap::IndexMap;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde::Deserialize;
|
||||
use serde_with::DeserializeFromStr;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Known mapping targets.
|
||||
///
|
||||
/// Corresponds to `syntax_mapping::MappingTarget`.
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Hash, DeserializeFromStr)]
|
||||
pub enum MappingTarget {
|
||||
MapTo(String),
|
||||
MapToUnknown,
|
||||
MapExtensionToUnknown,
|
||||
}
|
||||
impl FromStr for MappingTarget {
|
||||
type Err = Infallible;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
|
||||
"MappingTarget::MapExtensionToUnknown" => Ok(Self::MapExtensionToUnknown),
|
||||
syntax => Ok(Self::MapTo(syntax.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl MappingTarget {
|
||||
fn codegen(&self) -> String {
|
||||
match self {
|
||||
Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###),
|
||||
Self::MapToUnknown => "MappingTarget::MapToUnknown".into(),
|
||||
Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)]
|
||||
/// A single matcher.
|
||||
///
|
||||
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
|
||||
struct Matcher(Vec<MatcherSegment>);
|
||||
/// Parse a matcher.
|
||||
///
|
||||
/// Note that this implementation is rather strict: it will greedily interpret
|
||||
/// every valid environment variable replacement as such, then immediately
|
||||
/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text
|
||||
/// segments.
|
||||
///
|
||||
/// The reason for this strictness is I currently cannot think of a valid reason
|
||||
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
|
||||
/// Therefore any such occurrences are likely human errors.
|
||||
///
|
||||
/// If we later discover some edge cases, it's okay to make it more permissive.
|
||||
impl FromStr for Matcher {
|
||||
type Err = anyhow::Error;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use MatcherSegment as Seg;
|
||||
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
|
||||
|
||||
let mut segments = vec![];
|
||||
let mut text_start = 0;
|
||||
for capture in VAR_REGEX.captures_iter(s) {
|
||||
let match_0 = capture.get(0).unwrap();
|
||||
|
||||
// text before this var
|
||||
let text_end = match_0.start();
|
||||
segments.push(Seg::Text(s[text_start..text_end].into()));
|
||||
text_start = match_0.end();
|
||||
|
||||
// this var
|
||||
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
||||
}
|
||||
// possible trailing text
|
||||
segments.push(Seg::Text(s[text_start..].into()));
|
||||
|
||||
// cleanup empty text segments
|
||||
let non_empty_segments = segments
|
||||
.into_iter()
|
||||
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
|
||||
.collect_vec();
|
||||
|
||||
// sanity check
|
||||
if non_empty_segments
|
||||
.windows(2)
|
||||
.any(|segs| segs[0].is_text() && segs[1].is_text())
|
||||
{
|
||||
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
|
||||
}
|
||||
|
||||
// guard empty case
|
||||
if non_empty_segments.is_empty() {
|
||||
bail!(r#"Parsed an empty matcher: "{s}""#);
|
||||
}
|
||||
|
||||
// guard variable syntax leftover fragments
|
||||
if non_empty_segments
|
||||
.iter()
|
||||
.filter_map(Seg::text)
|
||||
.any(|t| t.contains(['$', '{', '}']))
|
||||
{
|
||||
bail!(r#"Invalid matcher: "{s}""#);
|
||||
}
|
||||
|
||||
Ok(Self(non_empty_segments))
|
||||
}
|
||||
}
|
||||
impl Matcher {
|
||||
fn codegen(&self) -> String {
|
||||
match self.0.len() {
|
||||
0 => unreachable!("0-length matcher should never be created"),
|
||||
// if-let guard would be ideal here
|
||||
// see: https://github.com/rust-lang/rust/issues/51114
|
||||
1 if self.0[0].is_text() => {
|
||||
let s = self.0[0].text().unwrap();
|
||||
format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###)
|
||||
}
|
||||
// parser logic ensures that this case can only happen when there are dynamic segments
|
||||
_ => {
|
||||
let segs = self.0.iter().map(MatcherSegment::codegen).join(", ");
|
||||
format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A segment in a matcher.
|
||||
///
|
||||
/// Corresponds to `syntax_mapping::MatcherSegment`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
enum MatcherSegment {
|
||||
Text(String),
|
||||
Env(String),
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
impl MatcherSegment {
|
||||
fn is_text(&self) -> bool {
|
||||
matches!(self, Self::Text(_))
|
||||
}
|
||||
fn is_env(&self) -> bool {
|
||||
matches!(self, Self::Env(_))
|
||||
}
|
||||
fn text(&self) -> Option<&str> {
|
||||
match self {
|
||||
Self::Text(t) => Some(t),
|
||||
Self::Env(_) => None,
|
||||
}
|
||||
}
|
||||
fn env(&self) -> Option<&str> {
|
||||
match self {
|
||||
Self::Text(_) => None,
|
||||
Self::Env(t) => Some(t),
|
||||
}
|
||||
}
|
||||
fn codegen(&self) -> String {
|
||||
match self {
|
||||
Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###),
|
||||
Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
struct MappingDefModel {
|
||||
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
|
||||
}
|
||||
impl MappingDefModel {
|
||||
fn into_mapping_list(self) -> MappingList {
|
||||
let list = self
|
||||
.mappings
|
||||
.into_iter()
|
||||
.flat_map(|(target, matchers)| {
|
||||
matchers
|
||||
.into_iter()
|
||||
.map(|matcher| (matcher, target.clone()))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
MappingList(list)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct MappingList(Vec<(Matcher, MappingTarget)>);
|
||||
impl MappingList {
|
||||
fn codegen(&self) -> String {
|
||||
let array_items: Vec<_> = self
|
||||
.0
|
||||
.iter()
|
||||
.map(|(matcher, target)| {
|
||||
format!("({m}, {t})", m = matcher.codegen(), t = target.codegen())
|
||||
})
|
||||
.collect();
|
||||
let len = array_items.len();
|
||||
|
||||
format!(
|
||||
"/// Generated by build script from /src/syntax_mapping/builtins/.\n\
|
||||
pub(crate) static BUILTIN_MAPPINGS: [(Lazy<Option<GlobMatcher>>, MappingTarget); {len}] = [\n{items}\n];",
|
||||
items = array_items.join(",\n")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the list of paths to all mapping definition files that should be
|
||||
/// included for the current target platform.
|
||||
fn get_def_paths() -> anyhow::Result<Vec<PathBuf>> {
|
||||
let source_subdirs = [
|
||||
"common",
|
||||
#[cfg(target_family = "unix")]
|
||||
"unix-family",
|
||||
#[cfg(any(
|
||||
target_os = "freebsd",
|
||||
target_os = "netbsd",
|
||||
target_os = "openbsd",
|
||||
target_os = "macos"
|
||||
))]
|
||||
"bsd-family",
|
||||
#[cfg(target_os = "linux")]
|
||||
"linux",
|
||||
#[cfg(target_os = "macos")]
|
||||
"macos",
|
||||
#[cfg(target_os = "windows")]
|
||||
"windows",
|
||||
];
|
||||
|
||||
let mut toml_paths = vec![];
|
||||
for subdir in source_subdirs {
|
||||
let wd = WalkDir::new(Path::new("src/syntax_mapping/builtins").join(subdir));
|
||||
let paths = wd
|
||||
.into_iter()
|
||||
.filter_map_ok(|entry| {
|
||||
let path = entry.path();
|
||||
(path.is_file() && path.extension().map(|ext| ext == "toml").unwrap_or(false))
|
||||
.then(|| path.to_owned())
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
toml_paths.extend(paths);
|
||||
}
|
||||
|
||||
toml_paths.sort_by_key(|path| {
|
||||
path.file_name()
|
||||
.expect("file name should not terminate in ..")
|
||||
.to_owned()
|
||||
});
|
||||
|
||||
Ok(toml_paths)
|
||||
}
|
||||
|
||||
fn read_all_mappings() -> anyhow::Result<MappingList> {
|
||||
let mut all_mappings = vec![];
|
||||
|
||||
for path in get_def_paths()? {
|
||||
let toml_string = fs::read_to_string(path)?;
|
||||
let mappings = toml::from_str::<MappingDefModel>(&toml_string)?.into_mapping_list();
|
||||
all_mappings.extend(mappings.0);
|
||||
}
|
||||
|
||||
let duplicates = all_mappings
|
||||
.iter()
|
||||
.duplicates_by(|(matcher, _)| matcher)
|
||||
.collect_vec();
|
||||
if !duplicates.is_empty() {
|
||||
bail!("Rules with duplicate matchers found: {duplicates:?}");
|
||||
}
|
||||
|
||||
Ok(MappingList(all_mappings))
|
||||
}
|
||||
|
||||
/// Build the static syntax mappings defined in /src/syntax_mapping/builtins/
|
||||
/// into a .rs source file, which is to be inserted with `include!`.
|
||||
pub fn build_static_mappings() -> anyhow::Result<()> {
|
||||
println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/");
|
||||
|
||||
let mappings = read_all_mappings()?;
|
||||
|
||||
let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?)
|
||||
.join("codegen_static_syntax_mappings.rs");
|
||||
|
||||
fs::write(codegen_path, mappings.codegen())?;
|
||||
|
||||
Ok(())
|
||||
}
|
@ -441,7 +441,7 @@ mod tests {
|
||||
fn new() -> Self {
|
||||
SyntaxDetectionTest {
|
||||
assets: HighlightingAssets::from_binary(),
|
||||
syntax_mapping: SyntaxMapping::builtin(),
|
||||
syntax_mapping: SyntaxMapping::new(),
|
||||
temp_dir: TempDir::new().expect("creation of temporary directory"),
|
||||
}
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ impl App {
|
||||
_ => unreachable!("other values for --paging are not allowed"),
|
||||
};
|
||||
|
||||
let mut syntax_mapping = SyntaxMapping::builtin();
|
||||
let mut syntax_mapping = SyntaxMapping::new();
|
||||
|
||||
if let Some(values) = self.matches.get_many::<String>("ignored-suffix") {
|
||||
for suffix in values {
|
||||
@ -130,7 +130,9 @@ impl App {
|
||||
}
|
||||
|
||||
if let Some(values) = self.matches.get_many::<String>("map-syntax") {
|
||||
for from_to in values {
|
||||
// later args take precedence over earlier ones, hence `.rev()`
|
||||
// see: https://github.com/sharkdp/bat/pull/2755#discussion_r1456416875
|
||||
for from_to in values.rev() {
|
||||
let parts: Vec<_> = from_to.split(':').collect();
|
||||
|
||||
if parts.len() != 2 {
|
||||
|
@ -78,9 +78,11 @@ fn run_cache_subcommand(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_syntax_mapping_to_paths<'a>(
|
||||
mappings: &[(GlobMatcher, MappingTarget<'a>)],
|
||||
) -> HashMap<&'a str, Vec<String>> {
|
||||
fn get_syntax_mapping_to_paths<'r, 't, I>(mappings: I) -> HashMap<&'t str, Vec<String>>
|
||||
where
|
||||
I: IntoIterator<Item = (&'r GlobMatcher, &'r MappingTarget<'t>)>,
|
||||
't: 'r, // target text outlives rule
|
||||
{
|
||||
let mut map = HashMap::new();
|
||||
for mapping in mappings {
|
||||
if let (matcher, MappingTarget::MapTo(s)) = mapping {
|
||||
@ -123,7 +125,7 @@ pub fn get_languages(config: &Config, cache_dir: &Path) -> Result<String> {
|
||||
|
||||
languages.sort_by_key(|lang| lang.name.to_uppercase());
|
||||
|
||||
let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.mappings());
|
||||
let configured_languages = get_syntax_mapping_to_paths(config.syntax_mapping.all_mappings());
|
||||
|
||||
for lang in &mut languages {
|
||||
if let Some(additional_paths) = configured_languages.get(lang.name.as_str()) {
|
||||
|
@ -1,12 +1,23 @@
|
||||
use std::path::Path;
|
||||
|
||||
use crate::error::Result;
|
||||
use ignored_suffixes::IgnoredSuffixes;
|
||||
|
||||
use globset::{Candidate, GlobBuilder, GlobMatcher};
|
||||
|
||||
use crate::error::Result;
|
||||
use builtin::BUILTIN_MAPPINGS;
|
||||
use ignored_suffixes::IgnoredSuffixes;
|
||||
|
||||
mod builtin;
|
||||
pub mod ignored_suffixes;
|
||||
|
||||
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
|
||||
let matcher = GlobBuilder::new(from)
|
||||
.case_insensitive(true)
|
||||
.literal_separator(true)
|
||||
.build()?
|
||||
.compile_matcher();
|
||||
Ok(matcher)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum MappingTarget<'a> {
|
||||
@ -29,204 +40,72 @@ pub enum MappingTarget<'a> {
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct SyntaxMapping<'a> {
|
||||
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
|
||||
/// User-defined mappings at run time.
|
||||
///
|
||||
/// Rules in front have precedence.
|
||||
custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
|
||||
pub(crate) ignored_suffixes: IgnoredSuffixes<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SyntaxMapping<'a> {
|
||||
pub fn empty() -> SyntaxMapping<'a> {
|
||||
pub fn new() -> SyntaxMapping<'a> {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub fn builtin() -> SyntaxMapping<'a> {
|
||||
let mut mapping = Self::empty();
|
||||
mapping.insert("*.h", MappingTarget::MapTo("C++")).unwrap();
|
||||
mapping
|
||||
.insert(".clang-format", MappingTarget::MapTo("YAML"))
|
||||
.unwrap();
|
||||
mapping.insert("*.fs", MappingTarget::MapTo("F#")).unwrap();
|
||||
mapping
|
||||
.insert("build", MappingTarget::MapToUnknown)
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert("**/.ssh/config", MappingTarget::MapTo("SSH Config"))
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert(
|
||||
"**/bat/config",
|
||||
MappingTarget::MapTo("Bourne Again Shell (bash)"),
|
||||
)
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert(
|
||||
"/etc/profile",
|
||||
MappingTarget::MapTo("Bourne Again Shell (bash)"),
|
||||
)
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert(
|
||||
"os-release",
|
||||
MappingTarget::MapTo("Bourne Again Shell (bash)"),
|
||||
)
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert("*.pac", MappingTarget::MapTo("JavaScript (Babel)"))
|
||||
.unwrap();
|
||||
mapping
|
||||
.insert("fish_history", MappingTarget::MapTo("YAML"))
|
||||
.unwrap();
|
||||
|
||||
for glob in ["*.jsonl", "*.sarif"] {
|
||||
mapping.insert(glob, MappingTarget::MapTo("JSON")).unwrap();
|
||||
}
|
||||
|
||||
// See #2151, https://nmap.org/book/nse-language.html
|
||||
mapping
|
||||
.insert("*.nse", MappingTarget::MapTo("Lua"))
|
||||
.unwrap();
|
||||
|
||||
// See #1008
|
||||
mapping
|
||||
.insert("rails", MappingTarget::MapToUnknown)
|
||||
.unwrap();
|
||||
|
||||
mapping
|
||||
.insert("Containerfile", MappingTarget::MapTo("Dockerfile"))
|
||||
.unwrap();
|
||||
|
||||
mapping
|
||||
.insert("*.ksh", MappingTarget::MapTo("Bourne Again Shell (bash)"))
|
||||
.unwrap();
|
||||
|
||||
// Nginx and Apache syntax files both want to style all ".conf" files
|
||||
// see #1131 and #1137
|
||||
mapping
|
||||
.insert("*.conf", MappingTarget::MapExtensionToUnknown)
|
||||
.unwrap();
|
||||
|
||||
for glob in &[
|
||||
"/etc/nginx/**/*.conf",
|
||||
"/etc/nginx/sites-*/**/*",
|
||||
"nginx.conf",
|
||||
"mime.types",
|
||||
] {
|
||||
mapping.insert(glob, MappingTarget::MapTo("nginx")).unwrap();
|
||||
}
|
||||
|
||||
for glob in &[
|
||||
"/etc/apache2/**/*.conf",
|
||||
"/etc/apache2/sites-*/**/*",
|
||||
"httpd.conf",
|
||||
] {
|
||||
mapping
|
||||
.insert(glob, MappingTarget::MapTo("Apache Conf"))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
for glob in &[
|
||||
"**/systemd/**/*.conf",
|
||||
"**/systemd/**/*.example",
|
||||
"*.automount",
|
||||
"*.device",
|
||||
"*.dnssd",
|
||||
"*.link",
|
||||
"*.mount",
|
||||
"*.netdev",
|
||||
"*.network",
|
||||
"*.nspawn",
|
||||
"*.path",
|
||||
"*.service",
|
||||
"*.scope",
|
||||
"*.slice",
|
||||
"*.socket",
|
||||
"*.swap",
|
||||
"*.target",
|
||||
"*.timer",
|
||||
] {
|
||||
mapping.insert(glob, MappingTarget::MapTo("INI")).unwrap();
|
||||
}
|
||||
|
||||
// unix mail spool
|
||||
for glob in &["/var/spool/mail/*", "/var/mail/*"] {
|
||||
mapping.insert(glob, MappingTarget::MapTo("Email")).unwrap()
|
||||
}
|
||||
|
||||
// pacman hooks
|
||||
mapping
|
||||
.insert("*.hook", MappingTarget::MapTo("INI"))
|
||||
.unwrap();
|
||||
|
||||
mapping
|
||||
.insert("*.ron", MappingTarget::MapTo("Rust"))
|
||||
.unwrap();
|
||||
|
||||
// Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
|
||||
// See e.g. https://git-scm.com/docs/git-config#FILES
|
||||
match (
|
||||
std::env::var_os("XDG_CONFIG_HOME").filter(|val| !val.is_empty()),
|
||||
std::env::var_os("HOME")
|
||||
.filter(|val| !val.is_empty())
|
||||
.map(|home| Path::new(&home).join(".config")),
|
||||
) {
|
||||
(Some(xdg_config_home), Some(default_config_home))
|
||||
if xdg_config_home == default_config_home => {
|
||||
insert_git_config_global(&mut mapping, &xdg_config_home)
|
||||
}
|
||||
(Some(xdg_config_home), Some(default_config_home)) /* else guard */ => {
|
||||
insert_git_config_global(&mut mapping, &xdg_config_home);
|
||||
insert_git_config_global(&mut mapping, &default_config_home)
|
||||
}
|
||||
(Some(config_home), None) => insert_git_config_global(&mut mapping, &config_home),
|
||||
(None, Some(config_home)) => insert_git_config_global(&mut mapping, &config_home),
|
||||
(None, None) => (),
|
||||
};
|
||||
|
||||
fn insert_git_config_global(mapping: &mut SyntaxMapping, config_home: impl AsRef<Path>) {
|
||||
let git_config_path = config_home.as_ref().join("git");
|
||||
|
||||
mapping
|
||||
.insert(
|
||||
&git_config_path.join("config").to_string_lossy(),
|
||||
MappingTarget::MapTo("Git Config"),
|
||||
)
|
||||
.ok();
|
||||
|
||||
mapping
|
||||
.insert(
|
||||
&git_config_path.join("ignore").to_string_lossy(),
|
||||
MappingTarget::MapTo("Git Ignore"),
|
||||
)
|
||||
.ok();
|
||||
|
||||
mapping
|
||||
.insert(
|
||||
&git_config_path.join("attributes").to_string_lossy(),
|
||||
MappingTarget::MapTo("Git Attributes"),
|
||||
)
|
||||
.ok();
|
||||
}
|
||||
|
||||
mapping
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
||||
let glob = GlobBuilder::new(from)
|
||||
.case_insensitive(true)
|
||||
.literal_separator(true)
|
||||
.build()?;
|
||||
self.mappings.push((glob.compile_matcher(), to));
|
||||
let matcher = make_glob_matcher(from)?;
|
||||
self.custom_mappings.push((matcher, to));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
|
||||
&self.mappings
|
||||
/// Returns an iterator over all mappings. User-defined mappings are listed
|
||||
/// before builtin mappings; mappings in front have higher precedence.
|
||||
///
|
||||
/// Builtin mappings' `GlobMatcher`s are lazily compiled.
|
||||
///
|
||||
/// Note that this function only returns mappings that are valid under the
|
||||
/// current environment. For details see [`Self::builtin_mappings`].
|
||||
pub fn all_mappings(&self) -> impl Iterator<Item = (&GlobMatcher, &MappingTarget<'a>)> {
|
||||
self.custom_mappings()
|
||||
.iter()
|
||||
.map(|(matcher, target)| (matcher, target)) // as_ref
|
||||
.chain(
|
||||
// we need a map with a closure to "do" the lifetime variance
|
||||
// see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653
|
||||
// also, clippy false positive:
|
||||
// see: https://github.com/rust-lang/rust-clippy/issues/9280
|
||||
#[allow(clippy::map_identity)]
|
||||
self.builtin_mappings().map(|rule| rule),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
|
||||
/// Returns an iterator over all valid builtin mappings. Mappings in front
|
||||
/// have higher precedence.
|
||||
///
|
||||
/// The `GlabMatcher`s are lazily compiled.
|
||||
///
|
||||
/// Mappings that are invalid under the current environment (i.e. rule
|
||||
/// requires environment variable(s) that is unset, or the joined string
|
||||
/// after variable(s) replacement is not a valid glob expression) are
|
||||
/// ignored.
|
||||
pub fn builtin_mappings(
|
||||
&self,
|
||||
) -> impl Iterator<Item = (&'static GlobMatcher, &'static MappingTarget<'static>)> {
|
||||
BUILTIN_MAPPINGS
|
||||
.iter()
|
||||
.filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target)))
|
||||
}
|
||||
|
||||
/// Returns all user-defined mappings.
|
||||
pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
|
||||
&self.custom_mappings
|
||||
}
|
||||
|
||||
pub fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
|
||||
// Try matching on the file name as-is.
|
||||
let candidate = Candidate::new(&path);
|
||||
let candidate_filename = path.as_ref().file_name().map(Candidate::new);
|
||||
for (ref glob, ref syntax) in self.mappings.iter().rev() {
|
||||
for (glob, syntax) in self.all_mappings() {
|
||||
if glob.is_match_candidate(&candidate)
|
||||
|| candidate_filename
|
||||
.as_ref()
|
||||
@ -252,9 +131,46 @@ impl<'a> SyntaxMapping<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let mut map = SyntaxMapping::empty();
|
||||
fn builtin_mappings_work() {
|
||||
let map = SyntaxMapping::new();
|
||||
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/build"),
|
||||
Some(MappingTarget::MapToUnknown)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all_fixed_builtin_mappings_can_compile() {
|
||||
let map = SyntaxMapping::new();
|
||||
|
||||
// collect call evaluates all lazy closures
|
||||
// fixed builtin mappings will panic if they fail to compile
|
||||
let _mappings = map.builtin_mappings().collect::<Vec<_>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builtin_mappings_matcher_only_compile_once() {
|
||||
let map = SyntaxMapping::new();
|
||||
|
||||
let two_iterations: Vec<_> = (0..2)
|
||||
.map(|_| {
|
||||
// addresses of every matcher
|
||||
map.builtin_mappings()
|
||||
.map(|(matcher, _)| matcher as *const _ as usize)
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// if the matchers are only compiled once, their address should remain the same
|
||||
assert_eq!(two_iterations[0], two_iterations[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn custom_mappings_work() {
|
||||
let mut map = SyntaxMapping::new();
|
||||
map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
|
||||
.ok();
|
||||
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
|
||||
@ -273,52 +189,32 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn user_can_override_builtin_mappings() {
|
||||
let mut map = SyntaxMapping::builtin();
|
||||
fn custom_mappings_override_builtin() {
|
||||
let mut map = SyntaxMapping::new();
|
||||
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/etc/profile"),
|
||||
Some(MappingTarget::MapTo("Bourne Again Shell (bash)"))
|
||||
map.get_syntax_for("/path/to/httpd.conf"),
|
||||
Some(MappingTarget::MapTo("Apache Conf"))
|
||||
);
|
||||
map.insert("/etc/profile", MappingTarget::MapTo("My Syntax"))
|
||||
map.insert("httpd.conf", MappingTarget::MapTo("My Syntax"))
|
||||
.ok();
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/etc/profile"),
|
||||
map.get_syntax_for("/path/to/httpd.conf"),
|
||||
Some(MappingTarget::MapTo("My Syntax"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builtin_mappings() {
|
||||
let map = SyntaxMapping::builtin();
|
||||
fn custom_mappings_precedence() {
|
||||
let mut map = SyntaxMapping::new();
|
||||
|
||||
map.insert("/path/to/foo", MappingTarget::MapTo("alpha"))
|
||||
.ok();
|
||||
map.insert("/path/to/foo", MappingTarget::MapTo("bravo"))
|
||||
.ok();
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/build"),
|
||||
Some(MappingTarget::MapToUnknown)
|
||||
map.get_syntax_for("/path/to/foo"),
|
||||
Some(MappingTarget::MapTo("alpha"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// verifies that SyntaxMapping::builtin() doesn't repeat `Glob`-based keys
|
||||
fn no_duplicate_builtin_keys() {
|
||||
let mappings = SyntaxMapping::builtin().mappings;
|
||||
for i in 0..mappings.len() {
|
||||
let tail = mappings[i + 1..].into_iter();
|
||||
let (dupl, _): (Vec<_>, Vec<_>) =
|
||||
tail.partition(|item| item.0.glob() == mappings[i].0.glob());
|
||||
|
||||
// emit repeats on failure
|
||||
assert_eq!(
|
||||
dupl.len(),
|
||||
0,
|
||||
"Glob pattern `{}` mapped to multiple: {:?}",
|
||||
mappings[i].0.glob().glob(),
|
||||
{
|
||||
let (_, mut dupl_targets): (Vec<GlobMatcher>, Vec<MappingTarget>) =
|
||||
dupl.into_iter().cloned().unzip();
|
||||
dupl_targets.push(mappings[i].1)
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
91
src/syntax_mapping/builtin.rs
Normal file
91
src/syntax_mapping/builtin.rs
Normal file
@ -0,0 +1,91 @@
|
||||
use std::env;
|
||||
|
||||
use globset::GlobMatcher;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::syntax_mapping::{make_glob_matcher, MappingTarget};
|
||||
|
||||
// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the
|
||||
// build script (/build/syntax_mapping.rs).
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/codegen_static_syntax_mappings.rs"
|
||||
));
|
||||
|
||||
// The defined matcher strings are analysed at compile time and converted into
|
||||
// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved
|
||||
// from run time to compile time, thus improving startup performance.
|
||||
//
|
||||
// To any future maintainer (including possibly myself) wondering why there is
|
||||
// not a `BuiltinMatcher` enum that looks like this:
|
||||
//
|
||||
// ```
|
||||
// enum BuiltinMatcher {
|
||||
// Fixed(&'static str),
|
||||
// Dynamic(Lazy<Option<String>>),
|
||||
// }
|
||||
// ```
|
||||
//
|
||||
// Because there was. I tried it and threw it out.
|
||||
//
|
||||
// Naively looking at the problem from a distance, this may seem like a good
|
||||
// design (strongly typed etc. etc.). It would also save on compiled size by
|
||||
// extracting out common behaviour into functions. But while actually
|
||||
// implementing the lazy matcher compilation logic, I realised that it's most
|
||||
// convenient for `BUILTIN_MAPPINGS` to have the following type:
|
||||
//
|
||||
// `[(Lazy<Option<GlobMatcher>>, MappingTarget); N]`
|
||||
//
|
||||
// The benefit for this is that operations like listing all builtin mappings
|
||||
// would be effectively memoised. The caller would not have to compile another
|
||||
// `GlobMatcher` for rules that they have previously visited.
|
||||
//
|
||||
// Unfortunately, this means we are going to have to store a distinct closure
|
||||
// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer
|
||||
// of indirection.
|
||||
//
|
||||
// In the current implementation, the closure within each generated rule simply
|
||||
// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on
|
||||
// whether the defined matcher contains dynamic segments or not.
|
||||
|
||||
/// Compile a fixed glob string into a glob matcher.
|
||||
///
|
||||
/// A failure to compile is a fatal error.
|
||||
///
|
||||
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
|
||||
fn build_matcher_fixed(from: &str) -> GlobMatcher {
|
||||
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
|
||||
}
|
||||
|
||||
/// Join a list of matcher segments to create a glob string, replacing all
|
||||
/// environment variables, then compile to a glob matcher.
|
||||
///
|
||||
/// Returns `None` if any replacement fails, or if the joined glob string fails
|
||||
/// to compile.
|
||||
///
|
||||
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
|
||||
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
|
||||
// join segments
|
||||
let mut buf = String::new();
|
||||
for seg in segs {
|
||||
match seg {
|
||||
MatcherSegment::Text(s) => buf.push_str(s),
|
||||
MatcherSegment::Env(var) => {
|
||||
let replaced = env::var(var).ok()?;
|
||||
buf.push_str(&replaced);
|
||||
}
|
||||
}
|
||||
}
|
||||
// compile glob matcher
|
||||
let matcher = make_glob_matcher(&buf).ok()?;
|
||||
Some(matcher)
|
||||
}
|
||||
|
||||
/// A segment of a dynamic builtin matcher.
|
||||
///
|
||||
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
|
||||
#[derive(Clone, Debug)]
|
||||
enum MatcherSegment {
|
||||
Text(&'static str),
|
||||
Env(&'static str),
|
||||
}
|
116
src/syntax_mapping/builtins/README.md
Normal file
116
src/syntax_mapping/builtins/README.md
Normal file
@ -0,0 +1,116 @@
|
||||
# `/src/syntax_mapping/builtins`
|
||||
|
||||
The files in this directory define path/name-based syntax mappings, which amend
|
||||
and take precedence over the extension/content-based syntax mappings provided by
|
||||
[syntect](https://github.com/trishume/syntect).
|
||||
|
||||
## File organisation
|
||||
|
||||
Each TOML file should describe the syntax mappings of a single application, or
|
||||
otherwise a set of logically-related rules.
|
||||
|
||||
What defines "a single application" here is deliberately vague, since the
|
||||
file-splitting is purely for maintainability reasons. (Technically, we could
|
||||
just as well use a single TOML file.) So just use common sense.
|
||||
|
||||
TOML files should reside in the corresponding subdirectory of the platform(s)
|
||||
that they intend to target. At compile time, the build script will go through
|
||||
each subdirectory that is applicable to the compilation target, collect the
|
||||
syntax mappings defined by all TOML files, and embed them into the binary.
|
||||
|
||||
## File syntax
|
||||
|
||||
Each TOML file should contain a single section named `mappings`, with each of
|
||||
its keys being a language identifier (first column of `bat -L`; also referred to
|
||||
as "target").
|
||||
|
||||
The value of each key should be an array of strings, with each item being a glob
|
||||
matcher. We will call each of these items a "rule".
|
||||
|
||||
For example, if `foo-application` uses both TOML and YAML configuration files,
|
||||
we could write something like this:
|
||||
|
||||
```toml
|
||||
# 30-foo-application.toml
|
||||
[mappings]
|
||||
"TOML" = [
|
||||
# rules for TOML syntax go here
|
||||
"/usr/share/foo-application/toml-config/*.conf",
|
||||
"/etc/foo-application/toml-config/*.conf",
|
||||
]
|
||||
"YAML" = [
|
||||
# rules for YAML syntax go here
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
### Dynamic environment variable replacement
|
||||
|
||||
In additional to the standard glob matcher syntax, rules also support dynamic
|
||||
replacement of environment variables at runtime. This allows us to concisely
|
||||
handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/).
|
||||
|
||||
All environment variables intended to be replaced at runtime must be enclosed in
|
||||
`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the
|
||||
**only** admissible syntax; other variable substitution syntaxes are not
|
||||
supported and will either cause a compile time error, or be treated as plain
|
||||
text.
|
||||
|
||||
For example, if `foo-application` also supports per-user configuration files, we
|
||||
could write something like this:
|
||||
|
||||
```toml
|
||||
# 30-foo-application.toml
|
||||
[mappings]
|
||||
"TOML" = [
|
||||
# rules for TOML syntax go here
|
||||
"/usr/share/foo-application/toml-config/*.conf",
|
||||
"/etc/foo-application/toml-config/*.conf",
|
||||
"${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf",
|
||||
"${HOME}/.config/foo-application/toml-config/*.conf",
|
||||
]
|
||||
"YAML" = [
|
||||
# rules for YAML syntax go here
|
||||
# ...
|
||||
]
|
||||
```
|
||||
|
||||
If any environment variable replacement in a rule fails (for example when a
|
||||
variable is unset), or if the glob string after replacements is invalid, the
|
||||
entire rule will be ignored.
|
||||
|
||||
### Explicitly mapping to unknown
|
||||
|
||||
Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps
|
||||
a syntax's matching rules are "too greedy", and is claiming files that it should
|
||||
not. In this case, there are two special identifiers:
|
||||
`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown`
|
||||
(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum).
|
||||
|
||||
An example of this would be `*.conf` files in general. So we may write something
|
||||
like this:
|
||||
|
||||
```toml
|
||||
# 99-unset-ambiguous-extensions.toml
|
||||
[mappings]
|
||||
"MappingTarget::MapExtensionToUnknown" = [
|
||||
"*.conf",
|
||||
]
|
||||
```
|
||||
|
||||
## Ordering
|
||||
|
||||
At compile time, all TOML files applicable to the target are processed in
|
||||
lexicographical filename order. So `00-foo.toml` takes precedence over
|
||||
`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that
|
||||
**only** the filenames of the TOML files are taken into account; the
|
||||
subdirectories they are placed in have no influence on ordering.
|
||||
|
||||
This behaviour can be occasionally useful for creating high/low priority rules,
|
||||
such as in the aforementioned example of explicitly mapping `*.conf` files to
|
||||
unknown. Generally this should not be much of a concern though, since rules
|
||||
should be written as specifically as possible for each application.
|
||||
|
||||
Rules within each TOML file are processed (and therefore matched) in the order
|
||||
in which they are defined. At runtime, the syntax selection algorithm will
|
||||
short-circuit and return the target of the first matching rule.
|
0
src/syntax_mapping/builtins/bsd-family/.gitkeep
Normal file
0
src/syntax_mapping/builtins/bsd-family/.gitkeep
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"]
|
0
src/syntax_mapping/builtins/common/.gitkeep
Normal file
0
src/syntax_mapping/builtins/common/.gitkeep
Normal file
2
src/syntax_mapping/builtins/common/50-apache.toml
Normal file
2
src/syntax_mapping/builtins/common/50-apache.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Apache Conf" = ["httpd.conf"]
|
2
src/syntax_mapping/builtins/common/50-bat.toml
Normal file
2
src/syntax_mapping/builtins/common/50-bat.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Bourne Again Shell (bash)" = ["**/bat/config"]
|
2
src/syntax_mapping/builtins/common/50-container.toml
Normal file
2
src/syntax_mapping/builtins/common/50-container.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Dockerfile" = ["Containerfile"]
|
6
src/syntax_mapping/builtins/common/50-cpp.toml
Normal file
6
src/syntax_mapping/builtins/common/50-cpp.toml
Normal file
@ -0,0 +1,6 @@
|
||||
[mappings]
|
||||
"C++" = [
|
||||
# probably better than the default Objective C mapping #877
|
||||
"*.h",
|
||||
]
|
||||
"YAML" = [".clang-format"]
|
2
src/syntax_mapping/builtins/common/50-f-sharp.toml
Normal file
2
src/syntax_mapping/builtins/common/50-f-sharp.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"F#" = ["*.fs"]
|
10
src/syntax_mapping/builtins/common/50-git.toml
Normal file
10
src/syntax_mapping/builtins/common/50-git.toml
Normal file
@ -0,0 +1,10 @@
|
||||
# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
|
||||
# See e.g. https://git-scm.com/docs/git-config#FILES
|
||||
|
||||
[mappings]
|
||||
"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"]
|
||||
"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"]
|
||||
"Git Attributes" = [
|
||||
"${XDG_CONFIG_HOME}/git/attributes",
|
||||
"${HOME}/.config/git/attributes",
|
||||
]
|
3
src/syntax_mapping/builtins/common/50-jsonl.toml
Normal file
3
src/syntax_mapping/builtins/common/50-jsonl.toml
Normal file
@ -0,0 +1,3 @@
|
||||
# JSON Lines is a simple variation of JSON #2535
|
||||
[mappings]
|
||||
"JSON" = ["*.jsonl"]
|
2
src/syntax_mapping/builtins/common/50-nginx.toml
Normal file
2
src/syntax_mapping/builtins/common/50-nginx.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"nginx" = ["nginx.conf", "mime.types"]
|
3
src/syntax_mapping/builtins/common/50-nmap.toml
Normal file
3
src/syntax_mapping/builtins/common/50-nmap.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[mappings]
|
||||
# See #2151, https://nmap.org/book/nse-language.html
|
||||
"Lua" = ["*.nse"]
|
@ -0,0 +1,3 @@
|
||||
# 1515
|
||||
[mappings]
|
||||
"JavaScript (Babel)" = ["*.pac"]
|
3
src/syntax_mapping/builtins/common/50-ron.toml
Normal file
3
src/syntax_mapping/builtins/common/50-ron.toml
Normal file
@ -0,0 +1,3 @@
|
||||
# Rusty Object Notation #2427
|
||||
[mappings]
|
||||
"Rust" = ["*.ron"]
|
3
src/syntax_mapping/builtins/common/50-sarif.toml
Normal file
3
src/syntax_mapping/builtins/common/50-sarif.toml
Normal file
@ -0,0 +1,3 @@
|
||||
# SARIF is a format for reporting static analysis results #2695
|
||||
[mappings]
|
||||
"JSON" = ["*.sarif"]
|
2
src/syntax_mapping/builtins/common/50-ssh.toml
Normal file
2
src/syntax_mapping/builtins/common/50-ssh.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"SSH Config" = ["**/.ssh/config"]
|
@ -0,0 +1,5 @@
|
||||
[mappings]
|
||||
"MappingTarget::MapExtensionToUnknown" = [
|
||||
# common extension used for all kinds of formats
|
||||
"*.conf",
|
||||
]
|
@ -0,0 +1,7 @@
|
||||
[mappings]
|
||||
"MappingTarget::MapToUnknown" = [
|
||||
# "NAnt Build File" should only match *.build files, not files named "build"
|
||||
"build",
|
||||
# "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008
|
||||
"rails",
|
||||
]
|
0
src/syntax_mapping/builtins/linux/.gitkeep
Normal file
0
src/syntax_mapping/builtins/linux/.gitkeep
Normal file
7
src/syntax_mapping/builtins/linux/50-os-release.toml
Normal file
7
src/syntax_mapping/builtins/linux/50-os-release.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[mappings]
|
||||
"Bourne Again Shell (bash)" = [
|
||||
"/etc/os-release",
|
||||
"/usr/lib/os-release",
|
||||
"/etc/initrd-release",
|
||||
"/usr/lib/extension-release.d/extension-release.*",
|
||||
]
|
3
src/syntax_mapping/builtins/linux/50-pacman.toml
Normal file
3
src/syntax_mapping/builtins/linux/50-pacman.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[mappings]
|
||||
# pacman hooks
|
||||
"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"]
|
21
src/syntax_mapping/builtins/linux/50-systemd.toml
Normal file
21
src/syntax_mapping/builtins/linux/50-systemd.toml
Normal file
@ -0,0 +1,21 @@
|
||||
[mappings]
|
||||
"INI" = [
|
||||
"**/systemd/**/*.conf",
|
||||
"**/systemd/**/*.example",
|
||||
"*.automount",
|
||||
"*.device",
|
||||
"*.dnssd",
|
||||
"*.link",
|
||||
"*.mount",
|
||||
"*.netdev",
|
||||
"*.network",
|
||||
"*.nspawn",
|
||||
"*.path",
|
||||
"*.service",
|
||||
"*.scope",
|
||||
"*.slice",
|
||||
"*.socket",
|
||||
"*.swap",
|
||||
"*.target",
|
||||
"*.timer",
|
||||
]
|
0
src/syntax_mapping/builtins/macos/.gitkeep
Normal file
0
src/syntax_mapping/builtins/macos/.gitkeep
Normal file
0
src/syntax_mapping/builtins/unix-family/.gitkeep
Normal file
0
src/syntax_mapping/builtins/unix-family/.gitkeep
Normal file
2
src/syntax_mapping/builtins/unix-family/50-apache.toml
Normal file
2
src/syntax_mapping/builtins/unix-family/50-apache.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"]
|
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"YAML" = ["fish_history"]
|
@ -0,0 +1,3 @@
|
||||
# KornShell is backward-compatible with the Bourne shell #2633
|
||||
[mappings]
|
||||
"Bourne Again Shell (bash)" = ["*.ksh"]
|
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"Email" = ["/var/spool/mail/*", "/var/mail/*"]
|
2
src/syntax_mapping/builtins/unix-family/50-nginx.toml
Normal file
2
src/syntax_mapping/builtins/unix-family/50-nginx.toml
Normal file
@ -0,0 +1,2 @@
|
||||
[mappings]
|
||||
"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"]
|
5
src/syntax_mapping/builtins/unix-family/50-shell.toml
Normal file
5
src/syntax_mapping/builtins/unix-family/50-shell.toml
Normal file
@ -0,0 +1,5 @@
|
||||
[mappings]
|
||||
"Bourne Again Shell (bash)" = [
|
||||
# used by lots of shells
|
||||
"/etc/profile",
|
||||
]
|
0
src/syntax_mapping/builtins/windows/.gitkeep
Normal file
0
src/syntax_mapping/builtins/windows/.gitkeep
Normal file
53
tests/benchmarks/run-benchmarks.sh
vendored
53
tests/benchmarks/run-benchmarks.sh
vendored
@ -9,6 +9,13 @@ if ! command -v hyperfine > /dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check that jq is installed.
|
||||
if ! command -v jq > /dev/null 2>&1; then
|
||||
echo "'jq' does not seem to be installed."
|
||||
echo "You can get it here: https://jqlang.github.io/jq/download/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check that python3 is installed.
|
||||
if ! command -v python3 > /dev/null 2>&1; then
|
||||
echo "'python3' does not seem to be installed."
|
||||
@ -95,10 +102,20 @@ hyperfine \
|
||||
cat "$RESULT_DIR/startup-time.md" >> "$REPORT"
|
||||
|
||||
|
||||
heading "Startup time without syntax highlighting"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config startup-time-src/small-CpuInfo-file.cpuinfo" \
|
||||
--command-name "bat … small-CpuInfo-file.cpuinfo" \
|
||||
--warmup "$WARMUP_COUNT" \
|
||||
--runs "$RUN_COUNT" \
|
||||
--export-markdown "$RESULT_DIR/startup-time-without-syntax-highlighting.md" \
|
||||
--export-json "$RESULT_DIR/startup-time-without-syntax-highlighting.json"
|
||||
cat "$RESULT_DIR/startup-time-without-syntax-highlighting.md" >> "$REPORT"
|
||||
|
||||
heading "Startup time with syntax highlighting"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/small-CpuInfo-file.cpuinfo" \
|
||||
--command-name "bat … small-CpuInfo-file.cpuinfo" \
|
||||
--command-name "bat … --color=always small-CpuInfo-file.cpuinfo" \
|
||||
--warmup "$WARMUP_COUNT" \
|
||||
--runs "$RUN_COUNT" \
|
||||
--export-markdown "$RESULT_DIR/startup-time-with-syntax-highlighting.md" \
|
||||
@ -117,6 +134,40 @@ hyperfine \
|
||||
cat "$RESULT_DIR/startup-time-with-syntax-with-dependencies.md" >> "$REPORT"
|
||||
|
||||
|
||||
heading "Startup time with indeterminant syntax"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/mystery-file" \
|
||||
--shell none \
|
||||
--command-name 'bat … mystery-file' \
|
||||
--warmup "$WARMUP_COUNT" \
|
||||
--runs "$RUN_COUNT" \
|
||||
--export-markdown "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" \
|
||||
--export-json "$RESULT_DIR/startup-time-with-indeterminant-syntax.json"
|
||||
cat "$RESULT_DIR/startup-time-with-indeterminant-syntax.md" >> "$REPORT"
|
||||
|
||||
heading "Startup time with manually set syntax"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config --color=always --language=Dockerfile startup-time-src/mystery-file" \
|
||||
--shell none \
|
||||
--command-name 'bat … --language=Dockerfile mystery-file' \
|
||||
--warmup "$WARMUP_COUNT" \
|
||||
--runs "$RUN_COUNT" \
|
||||
--export-markdown "$RESULT_DIR/startup-time-with-manually-set-syntax.md" \
|
||||
--export-json "$RESULT_DIR/startup-time-with-manually-set-syntax.json"
|
||||
cat "$RESULT_DIR/startup-time-with-manually-set-syntax.md" >> "$REPORT"
|
||||
|
||||
heading "Startup time with mapped syntax"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config --color=always startup-time-src/Containerfile" \
|
||||
--shell none \
|
||||
--command-name 'bat … Containerfile' \
|
||||
--warmup "$WARMUP_COUNT" \
|
||||
--runs "$RUN_COUNT" \
|
||||
--export-markdown "$RESULT_DIR/startup-time-with-mapped-syntax.md" \
|
||||
--export-json "$RESULT_DIR/startup-time-with-mapped-syntax.json"
|
||||
cat "$RESULT_DIR/startup-time-with-mapped-syntax.md" >> "$REPORT"
|
||||
|
||||
|
||||
heading "Plain-text speed"
|
||||
hyperfine \
|
||||
"$(printf "%q" "$BAT") --no-config --language=txt --style=plain highlighting-speed-src/numpy_test_multiarray.py" \
|
||||
|
3
tests/benchmarks/startup-time-src/Containerfile
vendored
Normal file
3
tests/benchmarks/startup-time-src/Containerfile
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
FROM docker.io/alpine:latest
|
||||
COPY foo /root/bar
|
||||
RUN sleep 60
|
3
tests/benchmarks/startup-time-src/mystery-file
vendored
Normal file
3
tests/benchmarks/startup-time-src/mystery-file
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
FROM docker.io/alpine:latest
|
||||
COPY foo /root/bar
|
||||
RUN sleep 60
|
Loading…
Reference in New Issue
Block a user