2024-02-21 07:56:37 +01:00
|
|
|
use std::{
|
|
|
|
path::Path,
|
|
|
|
sync::{
|
|
|
|
atomic::{AtomicBool, Ordering},
|
|
|
|
Arc,
|
|
|
|
},
|
|
|
|
thread,
|
|
|
|
};
|
2023-11-04 18:29:21 +01:00
|
|
|
|
|
|
|
use globset::{Candidate, GlobBuilder, GlobMatcher};
|
2024-02-21 07:56:37 +01:00
|
|
|
use once_cell::sync::Lazy;
|
2020-03-22 09:55:13 +01:00
|
|
|
|
2020-04-22 21:45:47 +02:00
|
|
|
use crate::error::Result;
|
2023-11-04 18:41:39 +01:00
|
|
|
use builtin::BUILTIN_MAPPINGS;
|
2021-11-19 17:05:23 +01:00
|
|
|
use ignored_suffixes::IgnoredSuffixes;
|
2020-03-22 09:55:13 +01:00
|
|
|
|
2023-11-04 18:29:21 +01:00
|
|
|
mod builtin;
|
2021-11-19 17:05:23 +01:00
|
|
|
pub mod ignored_suffixes;
|
|
|
|
|
2023-11-04 18:29:21 +01:00
|
|
|
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
|
|
|
|
let matcher = GlobBuilder::new(from)
|
|
|
|
.case_insensitive(true)
|
|
|
|
.literal_separator(true)
|
|
|
|
.build()?
|
|
|
|
.compile_matcher();
|
|
|
|
Ok(matcher)
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
|
|
|
|
2022-09-04 00:02:08 +02:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
2022-01-02 21:46:15 +01:00
|
|
|
#[non_exhaustive]
|
2020-03-22 09:55:13 +01:00
|
|
|
pub enum MappingTarget<'a> {
|
2021-10-25 17:59:12 +02:00
|
|
|
/// For mapping a path to a specific syntax.
|
2020-03-22 09:55:13 +01:00
|
|
|
MapTo(&'a str),
|
2021-10-25 17:59:12 +02:00
|
|
|
|
|
|
|
/// For mapping a path (typically an extension-less file name) to an unknown
|
|
|
|
/// syntax. This typically means later using the contents of the first line
|
|
|
|
/// of the file to determine what syntax to use.
|
2020-03-22 09:55:13 +01:00
|
|
|
MapToUnknown,
|
2021-10-25 17:59:12 +02:00
|
|
|
|
|
|
|
/// For mapping a file extension (e.g. `*.conf`) to an unknown syntax. This
|
|
|
|
/// typically means later using the contents of the first line of the file
|
|
|
|
/// to determine what syntax to use. However, if a syntax handles a file
|
|
|
|
/// name that happens to have the given file extension (e.g. `resolv.conf`),
|
|
|
|
/// then that association will have higher precedence, and the mapping will
|
|
|
|
/// be ignored.
|
|
|
|
MapExtensionToUnknown,
|
2020-03-22 09:55:13 +01:00
|
|
|
}
|
2018-10-17 22:30:09 +02:00
|
|
|
|
2019-03-08 11:46:49 +01:00
|
|
|
#[derive(Debug, Clone, Default)]
|
2020-03-22 09:55:13 +01:00
|
|
|
pub struct SyntaxMapping<'a> {
|
2023-11-04 18:41:39 +01:00
|
|
|
/// User-defined mappings at run time.
|
2024-01-18 15:41:57 +01:00
|
|
|
///
|
|
|
|
/// Rules in front have precedence.
|
2023-11-04 18:41:39 +01:00
|
|
|
custom_mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
|
2024-02-21 07:56:37 +01:00
|
|
|
|
2021-11-19 17:05:23 +01:00
|
|
|
pub(crate) ignored_suffixes: IgnoredSuffixes<'a>,
|
2024-02-21 07:56:37 +01:00
|
|
|
|
|
|
|
/// A flag to halt glob matcher building, which is offloaded to another thread.
|
|
|
|
///
|
|
|
|
/// We have this so that we can signal the thread to halt early when appropriate.
|
|
|
|
halt_glob_build: Arc<AtomicBool>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Drop for SyntaxMapping<'a> {
|
|
|
|
fn drop(&mut self) {
|
|
|
|
// signal the offload thread to halt early
|
|
|
|
self.halt_glob_build.store(true, Ordering::Relaxed);
|
|
|
|
}
|
2020-03-22 09:55:13 +01:00
|
|
|
}
|
2018-10-17 22:30:09 +02:00
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
impl<'a> SyntaxMapping<'a> {
|
2023-11-04 18:41:39 +01:00
|
|
|
pub fn new() -> SyntaxMapping<'a> {
|
2019-03-08 11:46:49 +01:00
|
|
|
Default::default()
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|
|
|
|
|
2024-02-21 07:56:37 +01:00
|
|
|
/// Start a thread to build the glob matchers for all builtin mappings.
|
|
|
|
///
|
|
|
|
/// The use of this function while not necessary, is useful to speed up startup
|
|
|
|
/// times by starting this work early in parallel.
|
|
|
|
///
|
|
|
|
/// The thread halts if/when `halt_glob_build` is set to true.
|
|
|
|
pub fn start_offload_build_all(&self) {
|
|
|
|
let halt = Arc::clone(&self.halt_glob_build);
|
|
|
|
thread::spawn(move || {
|
|
|
|
for (matcher, _) in BUILTIN_MAPPINGS.iter() {
|
|
|
|
if halt.load(Ordering::Relaxed) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Lazy::force(matcher);
|
|
|
|
}
|
|
|
|
});
|
2024-02-26 04:31:30 +01:00
|
|
|
// Note that this thread is not joined upon completion because there's
|
|
|
|
// no shared resources that need synchronization to be safely dropped.
|
|
|
|
// If we later add code into this thread that requires interesting
|
|
|
|
// resources (e.g. IO), it would be a good idea to store the handle
|
|
|
|
// and join it on drop.
|
2024-02-21 07:56:37 +01:00
|
|
|
}
|
|
|
|
|
2020-03-22 09:55:13 +01:00
|
|
|
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
2023-11-02 12:53:04 +01:00
|
|
|
let matcher = make_glob_matcher(from)?;
|
2023-11-04 18:41:39 +01:00
|
|
|
self.custom_mappings.push((matcher, to));
|
2020-03-22 09:55:13 +01:00
|
|
|
Ok(())
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|
|
|
|
|
2023-11-04 19:46:32 +01:00
|
|
|
/// Returns an iterator over all mappings. User-defined mappings are listed
|
|
|
|
/// before builtin mappings; mappings in front have higher precedence.
|
|
|
|
///
|
|
|
|
/// Builtin mappings' `GlobMatcher`s are lazily compiled.
|
2023-11-04 18:41:39 +01:00
|
|
|
///
|
2023-11-05 15:46:53 +01:00
|
|
|
/// Note that this function only returns mappings that are valid under the
|
|
|
|
/// current environment. For details see [`Self::builtin_mappings`].
|
2023-11-04 19:46:32 +01:00
|
|
|
pub fn all_mappings(&self) -> impl Iterator<Item = (&GlobMatcher, &MappingTarget<'a>)> {
|
2023-11-04 18:41:39 +01:00
|
|
|
self.custom_mappings()
|
|
|
|
.iter()
|
|
|
|
.map(|(matcher, target)| (matcher, target)) // as_ref
|
2023-11-05 03:12:49 +01:00
|
|
|
.chain(
|
|
|
|
// we need a map with a closure to "do" the lifetime variance
|
|
|
|
// see: https://discord.com/channels/273534239310479360/1120124565591425034/1170543402870382653
|
|
|
|
// also, clippy false positive:
|
|
|
|
// see: https://github.com/rust-lang/rust-clippy/issues/9280
|
|
|
|
#[allow(clippy::map_identity)]
|
|
|
|
self.builtin_mappings().map(|rule| rule),
|
|
|
|
)
|
2023-11-04 18:41:39 +01:00
|
|
|
}
|
|
|
|
|
2023-11-04 19:46:32 +01:00
|
|
|
/// Returns an iterator over all valid builtin mappings. Mappings in front
|
|
|
|
/// have higher precedence.
|
|
|
|
///
|
|
|
|
/// The `GlabMatcher`s are lazily compiled.
|
2023-11-04 18:41:39 +01:00
|
|
|
///
|
2023-11-05 15:46:53 +01:00
|
|
|
/// Mappings that are invalid under the current environment (i.e. rule
|
|
|
|
/// requires environment variable(s) that is unset, or the joined string
|
|
|
|
/// after variable(s) replacement is not a valid glob expression) are
|
|
|
|
/// ignored.
|
2023-11-05 03:12:49 +01:00
|
|
|
pub fn builtin_mappings(
|
|
|
|
&self,
|
|
|
|
) -> impl Iterator<Item = (&'static GlobMatcher, &'static MappingTarget<'static>)> {
|
2023-11-04 18:41:39 +01:00
|
|
|
BUILTIN_MAPPINGS
|
|
|
|
.iter()
|
|
|
|
.filter_map(|(matcher, target)| matcher.as_ref().map(|glob| (glob, target)))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns all user-defined mappings.
|
|
|
|
pub fn custom_mappings(&self) -> &[(GlobMatcher, MappingTarget<'a>)] {
|
|
|
|
&self.custom_mappings
|
2020-05-30 03:53:31 +02:00
|
|
|
}
|
|
|
|
|
2023-11-04 14:42:17 +01:00
|
|
|
pub fn get_syntax_for(&self, path: impl AsRef<Path>) -> Option<MappingTarget<'a>> {
|
2022-08-16 22:42:15 +02:00
|
|
|
// Try matching on the file name as-is.
|
2021-09-10 21:58:46 +02:00
|
|
|
let candidate = Candidate::new(&path);
|
2020-08-06 09:20:33 +02:00
|
|
|
let candidate_filename = path.as_ref().file_name().map(Candidate::new);
|
2023-11-04 19:46:32 +01:00
|
|
|
for (glob, syntax) in self.all_mappings() {
|
2020-03-22 09:55:13 +01:00
|
|
|
if glob.is_match_candidate(&candidate)
|
2020-08-06 09:20:33 +02:00
|
|
|
|| candidate_filename
|
2020-03-22 09:55:13 +01:00
|
|
|
.as_ref()
|
|
|
|
.map_or(false, |filename| glob.is_match_candidate(filename))
|
|
|
|
{
|
|
|
|
return Some(*syntax);
|
|
|
|
}
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|
2022-08-16 22:42:15 +02:00
|
|
|
// Try matching on the file name after removing an ignored suffix.
|
|
|
|
let file_name = path.as_ref().file_name()?;
|
|
|
|
self.ignored_suffixes
|
|
|
|
.try_with_stripped_suffix(file_name, |stripped_file_name| {
|
|
|
|
Ok(self.get_syntax_for(stripped_file_name))
|
|
|
|
})
|
|
|
|
.ok()?
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|
2021-11-19 17:05:23 +01:00
|
|
|
|
|
|
|
pub fn insert_ignored_suffix(&mut self, suffix: &'a str) {
|
|
|
|
self.ignored_suffixes.add_suffix(suffix);
|
|
|
|
}
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|
|
|
|
|
2023-09-01 21:11:41 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2023-12-11 03:38:59 +01:00
|
|
|
|
2023-09-01 21:11:41 +02:00
|
|
|
#[test]
|
2023-11-06 03:54:52 +01:00
|
|
|
fn builtin_mappings_work() {
|
|
|
|
let map = SyntaxMapping::new();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
map.get_syntax_for("/path/to/build"),
|
|
|
|
Some(MappingTarget::MapToUnknown)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn all_fixed_builtin_mappings_can_compile() {
|
|
|
|
let map = SyntaxMapping::new();
|
|
|
|
|
|
|
|
// collect call evaluates all lazy closures
|
|
|
|
// fixed builtin mappings will panic if they fail to compile
|
|
|
|
let _mappings = map.builtin_mappings().collect::<Vec<_>>();
|
|
|
|
}
|
|
|
|
|
2023-11-06 04:18:20 +01:00
|
|
|
#[test]
|
|
|
|
fn builtin_mappings_matcher_only_compile_once() {
|
|
|
|
let map = SyntaxMapping::new();
|
|
|
|
|
|
|
|
let two_iterations: Vec<_> = (0..2)
|
|
|
|
.map(|_| {
|
|
|
|
// addresses of every matcher
|
|
|
|
map.builtin_mappings()
|
|
|
|
.map(|(matcher, _)| matcher as *const _ as usize)
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
// if the matchers are only compiled once, their address should remain the same
|
|
|
|
assert_eq!(two_iterations[0], two_iterations[1]);
|
|
|
|
}
|
|
|
|
|
2023-11-06 03:54:52 +01:00
|
|
|
#[test]
|
|
|
|
fn custom_mappings_work() {
|
2023-11-04 18:41:39 +01:00
|
|
|
let mut map = SyntaxMapping::new();
|
2023-09-01 21:11:41 +02:00
|
|
|
map.insert("/path/to/Cargo.lock", MappingTarget::MapTo("TOML"))
|
|
|
|
.ok();
|
|
|
|
map.insert("/path/to/.ignore", MappingTarget::MapTo("Git Ignore"))
|
|
|
|
.ok();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
map.get_syntax_for("/path/to/Cargo.lock"),
|
|
|
|
Some(MappingTarget::MapTo("TOML"))
|
|
|
|
);
|
|
|
|
assert_eq!(map.get_syntax_for("/path/to/other.lock"), None);
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
map.get_syntax_for("/path/to/.ignore"),
|
|
|
|
Some(MappingTarget::MapTo("Git Ignore"))
|
|
|
|
);
|
|
|
|
}
|
2020-03-22 09:55:13 +01:00
|
|
|
|
2023-09-01 21:11:41 +02:00
|
|
|
#[test]
|
2023-11-06 03:54:52 +01:00
|
|
|
fn custom_mappings_override_builtin() {
|
2023-11-04 18:41:39 +01:00
|
|
|
let mut map = SyntaxMapping::new();
|
2023-09-01 21:11:41 +02:00
|
|
|
|
|
|
|
assert_eq!(
|
2023-11-05 12:21:00 +01:00
|
|
|
map.get_syntax_for("/path/to/httpd.conf"),
|
|
|
|
Some(MappingTarget::MapTo("Apache Conf"))
|
2023-09-01 21:11:41 +02:00
|
|
|
);
|
2023-11-05 12:21:00 +01:00
|
|
|
map.insert("httpd.conf", MappingTarget::MapTo("My Syntax"))
|
2023-09-01 21:11:41 +02:00
|
|
|
.ok();
|
|
|
|
assert_eq!(
|
2023-11-05 12:21:00 +01:00
|
|
|
map.get_syntax_for("/path/to/httpd.conf"),
|
2023-09-01 21:11:41 +02:00
|
|
|
Some(MappingTarget::MapTo("My Syntax"))
|
|
|
|
);
|
|
|
|
}
|
2023-11-06 03:55:22 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn custom_mappings_precedence() {
|
|
|
|
let mut map = SyntaxMapping::new();
|
|
|
|
|
|
|
|
map.insert("/path/to/foo", MappingTarget::MapTo("alpha"))
|
|
|
|
.ok();
|
|
|
|
map.insert("/path/to/foo", MappingTarget::MapTo("bravo"))
|
|
|
|
.ok();
|
|
|
|
assert_eq!(
|
|
|
|
map.get_syntax_for("/path/to/foo"),
|
|
|
|
Some(MappingTarget::MapTo("alpha"))
|
|
|
|
);
|
|
|
|
}
|
2018-10-17 22:30:09 +02:00
|
|
|
}
|