2023-11-02 09:27:26 +01:00
|
|
|
use std::{convert::Infallible, env, fs, path::Path, str::FromStr};
|
|
|
|
|
2023-11-02 12:53:04 +01:00
|
|
|
use anyhow::{anyhow, bail};
|
2023-11-02 09:27:26 +01:00
|
|
|
use indexmap::IndexMap;
|
2023-11-02 12:53:04 +01:00
|
|
|
use itertools::Itertools;
|
2023-11-02 16:16:04 +01:00
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use regex::Regex;
|
2023-11-02 09:27:26 +01:00
|
|
|
use serde::Deserialize;
|
|
|
|
use serde_with::DeserializeFromStr;
|
|
|
|
use walkdir::WalkDir;
|
|
|
|
|
|
|
|
/// Known mapping targets.
|
|
|
|
///
|
|
|
|
/// Corresponds to `syntax_mapping::MappingTarget`.
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Hash, DeserializeFromStr)]
|
|
|
|
pub enum MappingTarget {
|
|
|
|
MapTo(String),
|
|
|
|
MapToUnknown,
|
|
|
|
MapExtensionToUnknown,
|
|
|
|
}
|
|
|
|
impl FromStr for MappingTarget {
|
|
|
|
type Err = Infallible;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
|
|
match s {
|
|
|
|
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
|
|
|
|
"MappingTarget::MapExtensionToUnknown" => Ok(Self::MapExtensionToUnknown),
|
|
|
|
syntax => Ok(Self::MapTo(syntax.into())),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl MappingTarget {
|
|
|
|
fn codegen(&self) -> String {
|
|
|
|
match self {
|
|
|
|
Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###),
|
|
|
|
Self::MapToUnknown => "MappingTarget::MapToUnknown".into(),
|
|
|
|
Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-04 18:18:08 +01:00
|
|
|
#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)]
|
2023-11-02 12:53:04 +01:00
|
|
|
/// A single matcher.
|
|
|
|
///
|
2023-11-04 17:08:05 +01:00
|
|
|
/// Codegen converts this into a `Lazy<GlobMatcher>`.
|
2023-11-02 12:53:04 +01:00
|
|
|
struct Matcher(Vec<MatcherSegment>);
|
|
|
|
/// Parse a matcher.
|
|
|
|
///
|
2023-11-02 16:16:04 +01:00
|
|
|
/// Note that this implementation is rather strict: it will greedily interpret
|
|
|
|
/// every valid environment variable replacement as such, then immediately
|
|
|
|
/// hard-error if it finds a '$', '{', or '}' anywhere in the remaining text
|
|
|
|
/// segments.
|
2023-11-02 12:53:04 +01:00
|
|
|
///
|
|
|
|
/// The reason for this strictness is I currently cannot think of a valid reason
|
|
|
|
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
|
|
|
|
/// Therefore any such occurrences are likely human errors.
|
|
|
|
///
|
|
|
|
/// If we later discover some edge cases, it's okay to make it more permissive.
|
|
|
|
impl FromStr for Matcher {
|
|
|
|
type Err = anyhow::Error;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
|
|
use MatcherSegment as Seg;
|
2023-11-02 16:16:04 +01:00
|
|
|
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_\-]+)\}").unwrap());
|
2023-11-02 12:53:04 +01:00
|
|
|
|
2023-11-02 16:16:04 +01:00
|
|
|
let mut segments = vec![];
|
|
|
|
let mut text_start = 0;
|
|
|
|
for capture in VAR_REGEX.captures_iter(s) {
|
|
|
|
let match_0 = capture.get(0).unwrap();
|
|
|
|
|
|
|
|
// text before this var
|
|
|
|
let text_end = match_0.start();
|
|
|
|
segments.push(Seg::Text(s[text_start..text_end].into()));
|
|
|
|
text_start = match_0.end();
|
|
|
|
|
|
|
|
// this var
|
|
|
|
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
2023-11-02 16:16:04 +01:00
|
|
|
// possible trailing text
|
|
|
|
segments.push(Seg::Text(s[text_start..].into()));
|
2023-11-02 12:53:04 +01:00
|
|
|
|
2023-11-02 16:16:04 +01:00
|
|
|
// cleanup empty text segments
|
|
|
|
let non_empty_segments = segments
|
|
|
|
.into_iter()
|
|
|
|
.filter(|seg| match seg {
|
|
|
|
Seg::Text(t) => !t.is_empty(),
|
|
|
|
Seg::Env(_) => true,
|
|
|
|
})
|
|
|
|
.collect_vec();
|
2023-11-02 12:53:04 +01:00
|
|
|
|
2023-11-04 13:23:33 +01:00
|
|
|
if non_empty_segments.is_empty() {
|
|
|
|
bail!(r#"Parsed an empty matcher: "{s}""#);
|
|
|
|
}
|
|
|
|
|
2023-11-02 16:16:04 +01:00
|
|
|
if non_empty_segments.iter().any(|seg| match seg {
|
|
|
|
Seg::Text(t) => t.contains(['$', '{', '}']),
|
|
|
|
Seg::Env(_) => false,
|
|
|
|
}) {
|
|
|
|
bail!(r#"Invalid matcher: "{s}""#);
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
|
|
|
|
2023-11-02 16:16:04 +01:00
|
|
|
Ok(Self(non_empty_segments))
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
impl Matcher {
|
|
|
|
fn codegen(&self) -> String {
|
|
|
|
match self.0.len() {
|
|
|
|
0 => unreachable!("0-length matcher should never be created"),
|
|
|
|
// if-let guard would be ideal here
|
|
|
|
// see: https://github.com/rust-lang/rust/issues/51114
|
|
|
|
1 if matches!(self.0[0], MatcherSegment::Text(_)) => {
|
|
|
|
let MatcherSegment::Text(ref s) = self.0[0] else {
|
|
|
|
unreachable!()
|
|
|
|
};
|
2023-11-04 17:08:05 +01:00
|
|
|
format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###)
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
|
|
|
// parser logic ensures that this case can only happen when there are dynamic segments
|
|
|
|
_ => {
|
2023-11-04 17:08:05 +01:00
|
|
|
let segs = self.0.iter().map(MatcherSegment::codegen).join(", ");
|
|
|
|
format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###)
|
2023-11-02 12:53:04 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A segment in a matcher.
|
|
|
|
///
|
|
|
|
/// Corresponds to `syntax_mapping::MatcherSegment`.
|
2023-11-04 18:18:08 +01:00
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
2023-11-02 12:53:04 +01:00
|
|
|
enum MatcherSegment {
|
|
|
|
Text(String),
|
|
|
|
Env(String),
|
|
|
|
}
|
|
|
|
impl MatcherSegment {
|
|
|
|
fn codegen(&self) -> String {
|
|
|
|
match self {
|
|
|
|
Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###),
|
|
|
|
Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-02 09:27:26 +01:00
|
|
|
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
|
|
|
|
#[derive(Clone, Debug, Deserialize)]
|
|
|
|
struct MappingDefModel {
|
2023-11-02 12:53:04 +01:00
|
|
|
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
|
2023-11-02 09:27:26 +01:00
|
|
|
}
|
|
|
|
impl MappingDefModel {
|
2023-11-02 09:33:56 +01:00
|
|
|
fn into_mapping_list(self) -> MappingList {
|
|
|
|
let list = self
|
|
|
|
.mappings
|
2023-11-02 09:27:26 +01:00
|
|
|
.into_iter()
|
2023-11-02 13:35:58 +01:00
|
|
|
.flat_map(|(target, matchers)| {
|
|
|
|
matchers
|
2023-11-02 09:27:26 +01:00
|
|
|
.into_iter()
|
2023-11-02 13:35:58 +01:00
|
|
|
.map(|matcher| (matcher, target.clone()))
|
2023-11-02 09:27:26 +01:00
|
|
|
.collect::<Vec<_>>()
|
|
|
|
})
|
2023-11-02 09:33:56 +01:00
|
|
|
.collect();
|
|
|
|
MappingList(list)
|
2023-11-02 09:27:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug)]
|
2023-11-02 12:53:04 +01:00
|
|
|
struct MappingList(Vec<(Matcher, MappingTarget)>);
|
2023-11-02 09:27:26 +01:00
|
|
|
impl MappingList {
|
|
|
|
fn codegen(&self) -> String {
|
|
|
|
let array_items: Vec<_> = self
|
|
|
|
.0
|
|
|
|
.iter()
|
2023-11-02 12:53:04 +01:00
|
|
|
.map(|(matcher, target)| {
|
|
|
|
format!("({m}, {t})", m = matcher.codegen(), t = target.codegen())
|
|
|
|
})
|
2023-11-02 09:27:26 +01:00
|
|
|
.collect();
|
|
|
|
let len = array_items.len();
|
|
|
|
|
|
|
|
format!(
|
2023-11-04 18:29:21 +01:00
|
|
|
"pub(crate) static BUILTIN_MAPPINGS: [(Lazy<Option<GlobMatcher>>, MappingTarget); {len}] = [\n{items}\n];",
|
2023-11-02 09:27:26 +01:00
|
|
|
items = array_items.join(",\n")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_all_mappings() -> anyhow::Result<MappingList> {
|
|
|
|
let mut all_mappings = vec![];
|
|
|
|
|
|
|
|
for entry in WalkDir::new("src/syntax_mapping/builtins")
|
2023-11-02 09:36:25 +01:00
|
|
|
.sort_by_file_name()
|
2023-11-02 09:27:26 +01:00
|
|
|
.into_iter()
|
|
|
|
.map(|entry| entry.unwrap_or_else(|err| panic!("failed to visit a file: {err}")))
|
|
|
|
.filter(|entry| {
|
|
|
|
let path = entry.path();
|
|
|
|
path.is_file() && path.extension().map(|ext| ext == "toml").unwrap_or(false)
|
|
|
|
})
|
|
|
|
{
|
|
|
|
let toml_string = fs::read_to_string(entry.path())?;
|
|
|
|
let mappings = toml::from_str::<MappingDefModel>(&toml_string)?.into_mapping_list();
|
2023-11-02 09:33:56 +01:00
|
|
|
all_mappings.extend(mappings.0);
|
2023-11-02 09:27:26 +01:00
|
|
|
}
|
|
|
|
|
2023-11-04 18:18:08 +01:00
|
|
|
let duplicates = all_mappings
|
|
|
|
.iter()
|
|
|
|
.duplicates_by(|(matcher, _)| matcher)
|
|
|
|
.collect_vec();
|
|
|
|
if !duplicates.is_empty() {
|
|
|
|
bail!("Rules with duplicate matchers found: {duplicates:?}");
|
|
|
|
}
|
|
|
|
|
2023-11-02 09:27:26 +01:00
|
|
|
Ok(MappingList(all_mappings))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Build the static syntax mappings defined in /src/syntax_mapping/builtins/
|
|
|
|
/// into a .rs source file, which is to be inserted with `include!`.
|
|
|
|
pub fn build_static_mappings() -> anyhow::Result<()> {
|
2023-11-02 16:59:20 +01:00
|
|
|
println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/");
|
|
|
|
|
2023-11-02 09:27:26 +01:00
|
|
|
let mappings = read_all_mappings()?;
|
|
|
|
|
|
|
|
let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?)
|
|
|
|
.join("codegen_static_syntax_mappings.rs");
|
|
|
|
|
|
|
|
fs::write(codegen_path, mappings.codegen())?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|