Isolate variables at compile time

This commit is contained in:
cyqsimon 2023-11-02 19:53:04 +08:00
parent d24501ab5e
commit 9478d2dfe8
No known key found for this signature in database
GPG Key ID: 1D8CE2F297390D65
4 changed files with 203 additions and 12 deletions

1
Cargo.lock generated
View File

@ -145,6 +145,7 @@ dependencies = [
"grep-cli", "grep-cli",
"home", "home",
"indexmap 2.1.0", "indexmap 2.1.0",
"itertools",
"nix", "nix",
"nu-ansi-term", "nu-ansi-term",
"once_cell", "once_cell",

View File

@ -101,6 +101,7 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] }
[build-dependencies] [build-dependencies]
anyhow = "1.0.75" anyhow = "1.0.75"
indexmap = { version = "2.1.0", features = ["serde"] } indexmap = { version = "2.1.0", features = ["serde"] }
itertools = "0.11.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_with = "3.4.0" serde_with = "3.4.0"
toml = { version = "0.8.6", features = ["preserve_order"] } toml = { version = "0.8.6", features = ["preserve_order"] }

View File

@ -1,7 +1,8 @@
use std::{convert::Infallible, env, fs, path::Path, str::FromStr}; use std::{convert::Infallible, env, fs, path::Path, str::FromStr};
use anyhow::anyhow; use anyhow::{anyhow, bail};
use indexmap::IndexMap; use indexmap::IndexMap;
use itertools::Itertools;
use serde::Deserialize; use serde::Deserialize;
use serde_with::DeserializeFromStr; use serde_with::DeserializeFromStr;
use walkdir::WalkDir; use walkdir::WalkDir;
@ -17,7 +18,6 @@ pub enum MappingTarget {
} }
impl FromStr for MappingTarget { impl FromStr for MappingTarget {
type Err = Infallible; type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
match s { match s {
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown), "MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
@ -36,10 +36,136 @@ impl MappingTarget {
} }
} }
#[derive(Clone, Debug, DeserializeFromStr)]
/// A single matcher.
///
/// Corresponds to `syntax_mapping::BuiltinMatcher`.
struct Matcher(Vec<MatcherSegment>);
/// Parse a matcher.
///
/// Note that this implementation is rather strict: when it sees a '$', '{', or
/// '}' where it does not make sense, it will immediately hard-error.
///
/// The reason for this strictness is I currently cannot think of a valid reason
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
/// Therefore any such occurrences are likely human errors.
///
/// If we later discover some edge cases, it's okay to make it more permissive.
impl FromStr for Matcher {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
use MatcherSegment as Seg;
if s.is_empty() {
bail!("Empty string is not a valid glob matcher");
}
let mut segments = Vec::new();
let mut buf = String::new();
let mut is_in_var = false;
let mut char_it = s.chars();
loop {
match char_it.next() {
Some('$') => {
if is_in_var {
bail!(r#"Saw a '$' when already in a variable: "{s}""#);
}
match char_it.next() {
Some('{') => {
// push text unless empty
if !buf.is_empty() {
segments.push(Seg::Text(buf.clone()));
buf.clear();
}
// open var
is_in_var = true;
}
Some(_) | None => {
bail!(r#"Expected a '{{' after '$': "{s}""#);
}
}
}
Some('{') => {
bail!(r#"Saw a hanging '{{': "{s}""#);
}
Some('}') => {
if !is_in_var {
bail!(r#"Saw a '}}' when not in a variable: "{s}""#);
}
if buf.is_empty() {
// `${}`
bail!(r#"Variable name cannot be empty: "{s}""#);
}
// push variable
segments.push(Seg::Env(buf.clone()));
buf.clear();
// close var
is_in_var = false;
}
Some(' ') if is_in_var => {
bail!(r#"' ' Cannot be part of a variable's name: "{s}""#);
}
Some(c) => {
// either plaintext or variable name
buf.push(c);
}
None => {
if is_in_var {
bail!(r#"Variable unclosed: "{s}""#);
}
segments.push(Seg::Text(buf.clone()));
break;
}
}
}
Ok(Self(segments))
}
}
impl Matcher {
fn codegen(&self) -> String {
match self.0.len() {
0 => unreachable!("0-length matcher should never be created"),
// if-let guard would be ideal here
// see: https://github.com/rust-lang/rust/issues/51114
1 if matches!(self.0[0], MatcherSegment::Text(_)) => {
let MatcherSegment::Text(ref s) = self.0[0] else {
unreachable!()
};
format!(r###"BuiltinMatcher::Fixed(r#"{s}"#)"###)
}
// parser logic ensures that this case can only happen when there are dynamic segments
_ => {
let segments_codegen = self.0.iter().map(MatcherSegment::codegen).join(", ");
let closure = format!("|| join_segments(&[{segments_codegen}])");
format!("BuiltinMatcher::Dynamic(Lazy::new({closure}))")
}
}
}
}
/// A segment in a matcher.
///
/// Corresponds to `syntax_mapping::MatcherSegment`.
#[derive(Debug, Clone)]
enum MatcherSegment {
Text(String),
Env(String),
}
impl MatcherSegment {
fn codegen(&self) -> String {
match self {
Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###),
Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###),
}
}
}
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/. /// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
#[derive(Clone, Debug, Deserialize)] #[derive(Clone, Debug, Deserialize)]
struct MappingDefModel { struct MappingDefModel {
mappings: IndexMap<MappingTarget, Vec<String>>, mappings: IndexMap<MappingTarget, Vec<Matcher>>,
} }
impl MappingDefModel { impl MappingDefModel {
fn into_mapping_list(self) -> MappingList { fn into_mapping_list(self) -> MappingList {
@ -58,18 +184,20 @@ impl MappingDefModel {
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct MappingList(Vec<(String, MappingTarget)>); struct MappingList(Vec<(Matcher, MappingTarget)>);
impl MappingList { impl MappingList {
fn codegen(&self) -> String { fn codegen(&self) -> String {
let array_items: Vec<_> = self let array_items: Vec<_> = self
.0 .0
.iter() .iter()
.map(|(matcher, target)| format!(r###"(r#"{matcher}"#, {t})"###, t = target.codegen())) .map(|(matcher, target)| {
format!("({m}, {t})", m = matcher.codegen(), t = target.codegen())
})
.collect(); .collect();
let len = array_items.len(); let len = array_items.len();
format!( format!(
"static STATIC_RULES: [(&str, MappingTarget); {len}] = [\n{items}\n];", "static STATIC_RULES: [(BuiltinMatcher, MappingTarget); {len}] = [\n{items}\n];",
items = array_items.join(",\n") items = array_items.join(",\n")
) )
} }

View File

@ -1,9 +1,10 @@
use std::path::Path; use std::{env, path::Path};
use crate::error::Result; use crate::error::Result;
use ignored_suffixes::IgnoredSuffixes; use ignored_suffixes::IgnoredSuffixes;
use globset::{Candidate, GlobBuilder, GlobMatcher}; use globset::{Candidate, GlobBuilder, GlobMatcher};
use once_cell::sync::Lazy;
pub mod ignored_suffixes; pub mod ignored_suffixes;
@ -14,6 +15,60 @@ include!(concat!(
"/codegen_static_syntax_mappings.rs" "/codegen_static_syntax_mappings.rs"
)); ));
/// A glob matcher generated from analysing the matcher string at compile time.
///
/// This is so that the string searches are moved from run time to compile time,
/// thus improving startup performance.
#[derive(Debug)]
enum BuiltinMatcher {
/// A plaintext matcher.
Fixed(&'static str),
/// A matcher that needs dynamic environment variable replacement.
///
/// Evaluates to `None` when any environment variable replacement fails.
Dynamic(Lazy<Option<String>>),
}
impl BuiltinMatcher {
/// Finalise into a glob matcher.
///
/// Returns `None` if any environment variable replacement fails (only
/// possible for dynamic matchers).
fn to_glob_matcher(&self) -> Option<GlobMatcher> {
let glob_str = match self {
Self::Fixed(s) => *s,
Self::Dynamic(s) => s.as_ref()?.as_str(),
};
Some(make_glob_matcher(glob_str).expect("A builtin glob matcher failed to compile"))
}
}
/// Join a list of matcher segments, replacing all environment variables.
/// Returns `None` if any replacement fails.
///
/// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure.
fn join_segments(segs: &[MatcherSegment]) -> Option<String> {
let mut buf = String::new();
for seg in segs {
match seg {
MatcherSegment::Text(s) => buf.push_str(s),
MatcherSegment::Env(var) => {
let replaced = env::var(var).ok()?;
buf.push_str(&replaced);
}
}
}
Some(buf)
}
/// A segment of a dynamic builtin matcher.
///
/// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure.
#[derive(Clone, Debug)]
enum MatcherSegment {
Text(&'static str),
Env(&'static str),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive] #[non_exhaustive]
pub enum MappingTarget<'a> { pub enum MappingTarget<'a> {
@ -34,6 +89,15 @@ pub enum MappingTarget<'a> {
MapExtensionToUnknown, MapExtensionToUnknown,
} }
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
let matcher = GlobBuilder::new(from)
.case_insensitive(true)
.literal_separator(true)
.build()?
.compile_matcher();
Ok(matcher)
}
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct SyntaxMapping<'a> { pub struct SyntaxMapping<'a> {
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>, mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
@ -217,11 +281,8 @@ impl<'a> SyntaxMapping<'a> {
} }
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let glob = GlobBuilder::new(from) let matcher = make_glob_matcher(from)?;
.case_insensitive(true) self.mappings.push((matcher, to));
.literal_separator(true)
.build()?;
self.mappings.push((glob.compile_matcher(), to));
Ok(()) Ok(())
} }