mirror of
https://github.com/sharkdp/bat.git
synced 2024-11-21 23:33:26 +01:00
Isolate variables at compile time
This commit is contained in:
parent
d24501ab5e
commit
9478d2dfe8
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -145,6 +145,7 @@ dependencies = [
|
|||||||
"grep-cli",
|
"grep-cli",
|
||||||
"home",
|
"home",
|
||||||
"indexmap 2.1.0",
|
"indexmap 2.1.0",
|
||||||
|
"itertools",
|
||||||
"nix",
|
"nix",
|
||||||
"nu-ansi-term",
|
"nu-ansi-term",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -101,6 +101,7 @@ nix = { version = "0.26.4", default-features = false, features = ["term"] }
|
|||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = "1.0.75"
|
anyhow = "1.0.75"
|
||||||
indexmap = { version = "2.1.0", features = ["serde"] }
|
indexmap = { version = "2.1.0", features = ["serde"] }
|
||||||
|
itertools = "0.11.0"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_with = "3.4.0"
|
serde_with = "3.4.0"
|
||||||
toml = { version = "0.8.6", features = ["preserve_order"] }
|
toml = { version = "0.8.6", features = ["preserve_order"] }
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use std::{convert::Infallible, env, fs, path::Path, str::FromStr};
|
use std::{convert::Infallible, env, fs, path::Path, str::FromStr};
|
||||||
|
|
||||||
use anyhow::anyhow;
|
use anyhow::{anyhow, bail};
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
|
use itertools::Itertools;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde_with::DeserializeFromStr;
|
use serde_with::DeserializeFromStr;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
@ -17,7 +18,6 @@ pub enum MappingTarget {
|
|||||||
}
|
}
|
||||||
impl FromStr for MappingTarget {
|
impl FromStr for MappingTarget {
|
||||||
type Err = Infallible;
|
type Err = Infallible;
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
match s {
|
match s {
|
||||||
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
|
"MappingTarget::MapToUnknown" => Ok(Self::MapToUnknown),
|
||||||
@ -36,10 +36,136 @@ impl MappingTarget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, DeserializeFromStr)]
|
||||||
|
/// A single matcher.
|
||||||
|
///
|
||||||
|
/// Corresponds to `syntax_mapping::BuiltinMatcher`.
|
||||||
|
struct Matcher(Vec<MatcherSegment>);
|
||||||
|
/// Parse a matcher.
|
||||||
|
///
|
||||||
|
/// Note that this implementation is rather strict: when it sees a '$', '{', or
|
||||||
|
/// '}' where it does not make sense, it will immediately hard-error.
|
||||||
|
///
|
||||||
|
/// The reason for this strictness is I currently cannot think of a valid reason
|
||||||
|
/// why you would ever need '$', '{', or '}' as plaintext in a glob pattern.
|
||||||
|
/// Therefore any such occurrences are likely human errors.
|
||||||
|
///
|
||||||
|
/// If we later discover some edge cases, it's okay to make it more permissive.
|
||||||
|
impl FromStr for Matcher {
|
||||||
|
type Err = anyhow::Error;
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
use MatcherSegment as Seg;
|
||||||
|
|
||||||
|
if s.is_empty() {
|
||||||
|
bail!("Empty string is not a valid glob matcher");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut segments = Vec::new();
|
||||||
|
let mut buf = String::new();
|
||||||
|
let mut is_in_var = false;
|
||||||
|
|
||||||
|
let mut char_it = s.chars();
|
||||||
|
loop {
|
||||||
|
match char_it.next() {
|
||||||
|
Some('$') => {
|
||||||
|
if is_in_var {
|
||||||
|
bail!(r#"Saw a '$' when already in a variable: "{s}""#);
|
||||||
|
}
|
||||||
|
match char_it.next() {
|
||||||
|
Some('{') => {
|
||||||
|
// push text unless empty
|
||||||
|
if !buf.is_empty() {
|
||||||
|
segments.push(Seg::Text(buf.clone()));
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
// open var
|
||||||
|
is_in_var = true;
|
||||||
|
}
|
||||||
|
Some(_) | None => {
|
||||||
|
bail!(r#"Expected a '{{' after '$': "{s}""#);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some('{') => {
|
||||||
|
bail!(r#"Saw a hanging '{{': "{s}""#);
|
||||||
|
}
|
||||||
|
Some('}') => {
|
||||||
|
if !is_in_var {
|
||||||
|
bail!(r#"Saw a '}}' when not in a variable: "{s}""#);
|
||||||
|
}
|
||||||
|
if buf.is_empty() {
|
||||||
|
// `${}`
|
||||||
|
bail!(r#"Variable name cannot be empty: "{s}""#);
|
||||||
|
}
|
||||||
|
// push variable
|
||||||
|
segments.push(Seg::Env(buf.clone()));
|
||||||
|
buf.clear();
|
||||||
|
// close var
|
||||||
|
is_in_var = false;
|
||||||
|
}
|
||||||
|
Some(' ') if is_in_var => {
|
||||||
|
bail!(r#"' ' Cannot be part of a variable's name: "{s}""#);
|
||||||
|
}
|
||||||
|
Some(c) => {
|
||||||
|
// either plaintext or variable name
|
||||||
|
buf.push(c);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if is_in_var {
|
||||||
|
bail!(r#"Variable unclosed: "{s}""#);
|
||||||
|
}
|
||||||
|
segments.push(Seg::Text(buf.clone()));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self(segments))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Matcher {
|
||||||
|
fn codegen(&self) -> String {
|
||||||
|
match self.0.len() {
|
||||||
|
0 => unreachable!("0-length matcher should never be created"),
|
||||||
|
// if-let guard would be ideal here
|
||||||
|
// see: https://github.com/rust-lang/rust/issues/51114
|
||||||
|
1 if matches!(self.0[0], MatcherSegment::Text(_)) => {
|
||||||
|
let MatcherSegment::Text(ref s) = self.0[0] else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
format!(r###"BuiltinMatcher::Fixed(r#"{s}"#)"###)
|
||||||
|
}
|
||||||
|
// parser logic ensures that this case can only happen when there are dynamic segments
|
||||||
|
_ => {
|
||||||
|
let segments_codegen = self.0.iter().map(MatcherSegment::codegen).join(", ");
|
||||||
|
let closure = format!("|| join_segments(&[{segments_codegen}])");
|
||||||
|
format!("BuiltinMatcher::Dynamic(Lazy::new({closure}))")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A segment in a matcher.
|
||||||
|
///
|
||||||
|
/// Corresponds to `syntax_mapping::MatcherSegment`.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum MatcherSegment {
|
||||||
|
Text(String),
|
||||||
|
Env(String),
|
||||||
|
}
|
||||||
|
impl MatcherSegment {
|
||||||
|
fn codegen(&self) -> String {
|
||||||
|
match self {
|
||||||
|
Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###),
|
||||||
|
Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
|
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
|
||||||
#[derive(Clone, Debug, Deserialize)]
|
#[derive(Clone, Debug, Deserialize)]
|
||||||
struct MappingDefModel {
|
struct MappingDefModel {
|
||||||
mappings: IndexMap<MappingTarget, Vec<String>>,
|
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
|
||||||
}
|
}
|
||||||
impl MappingDefModel {
|
impl MappingDefModel {
|
||||||
fn into_mapping_list(self) -> MappingList {
|
fn into_mapping_list(self) -> MappingList {
|
||||||
@ -58,18 +184,20 @@ impl MappingDefModel {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct MappingList(Vec<(String, MappingTarget)>);
|
struct MappingList(Vec<(Matcher, MappingTarget)>);
|
||||||
impl MappingList {
|
impl MappingList {
|
||||||
fn codegen(&self) -> String {
|
fn codegen(&self) -> String {
|
||||||
let array_items: Vec<_> = self
|
let array_items: Vec<_> = self
|
||||||
.0
|
.0
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(matcher, target)| format!(r###"(r#"{matcher}"#, {t})"###, t = target.codegen()))
|
.map(|(matcher, target)| {
|
||||||
|
format!("({m}, {t})", m = matcher.codegen(), t = target.codegen())
|
||||||
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let len = array_items.len();
|
let len = array_items.len();
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
"static STATIC_RULES: [(&str, MappingTarget); {len}] = [\n{items}\n];",
|
"static STATIC_RULES: [(BuiltinMatcher, MappingTarget); {len}] = [\n{items}\n];",
|
||||||
items = array_items.join(",\n")
|
items = array_items.join(",\n")
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
use std::path::Path;
|
use std::{env, path::Path};
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use ignored_suffixes::IgnoredSuffixes;
|
use ignored_suffixes::IgnoredSuffixes;
|
||||||
|
|
||||||
use globset::{Candidate, GlobBuilder, GlobMatcher};
|
use globset::{Candidate, GlobBuilder, GlobMatcher};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
pub mod ignored_suffixes;
|
pub mod ignored_suffixes;
|
||||||
|
|
||||||
@ -14,6 +15,60 @@ include!(concat!(
|
|||||||
"/codegen_static_syntax_mappings.rs"
|
"/codegen_static_syntax_mappings.rs"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
/// A glob matcher generated from analysing the matcher string at compile time.
|
||||||
|
///
|
||||||
|
/// This is so that the string searches are moved from run time to compile time,
|
||||||
|
/// thus improving startup performance.
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum BuiltinMatcher {
|
||||||
|
/// A plaintext matcher.
|
||||||
|
Fixed(&'static str),
|
||||||
|
/// A matcher that needs dynamic environment variable replacement.
|
||||||
|
///
|
||||||
|
/// Evaluates to `None` when any environment variable replacement fails.
|
||||||
|
Dynamic(Lazy<Option<String>>),
|
||||||
|
}
|
||||||
|
impl BuiltinMatcher {
|
||||||
|
/// Finalise into a glob matcher.
|
||||||
|
///
|
||||||
|
/// Returns `None` if any environment variable replacement fails (only
|
||||||
|
/// possible for dynamic matchers).
|
||||||
|
fn to_glob_matcher(&self) -> Option<GlobMatcher> {
|
||||||
|
let glob_str = match self {
|
||||||
|
Self::Fixed(s) => *s,
|
||||||
|
Self::Dynamic(s) => s.as_ref()?.as_str(),
|
||||||
|
};
|
||||||
|
Some(make_glob_matcher(glob_str).expect("A builtin glob matcher failed to compile"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Join a list of matcher segments, replacing all environment variables.
|
||||||
|
/// Returns `None` if any replacement fails.
|
||||||
|
///
|
||||||
|
/// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure.
|
||||||
|
fn join_segments(segs: &[MatcherSegment]) -> Option<String> {
|
||||||
|
let mut buf = String::new();
|
||||||
|
for seg in segs {
|
||||||
|
match seg {
|
||||||
|
MatcherSegment::Text(s) => buf.push_str(s),
|
||||||
|
MatcherSegment::Env(var) => {
|
||||||
|
let replaced = env::var(var).ok()?;
|
||||||
|
buf.push_str(&replaced);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A segment of a dynamic builtin matcher.
|
||||||
|
///
|
||||||
|
/// Used internally by `BuiltinMatcher::Dynamic`'s lazy evaluation closure.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum MatcherSegment {
|
||||||
|
Text(&'static str),
|
||||||
|
Env(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
pub enum MappingTarget<'a> {
|
pub enum MappingTarget<'a> {
|
||||||
@ -34,6 +89,15 @@ pub enum MappingTarget<'a> {
|
|||||||
MapExtensionToUnknown,
|
MapExtensionToUnknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
|
||||||
|
let matcher = GlobBuilder::new(from)
|
||||||
|
.case_insensitive(true)
|
||||||
|
.literal_separator(true)
|
||||||
|
.build()?
|
||||||
|
.compile_matcher();
|
||||||
|
Ok(matcher)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct SyntaxMapping<'a> {
|
pub struct SyntaxMapping<'a> {
|
||||||
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
|
mappings: Vec<(GlobMatcher, MappingTarget<'a>)>,
|
||||||
@ -217,11 +281,8 @@ impl<'a> SyntaxMapping<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
||||||
let glob = GlobBuilder::new(from)
|
let matcher = make_glob_matcher(from)?;
|
||||||
.case_insensitive(true)
|
self.mappings.push((matcher, to));
|
||||||
.literal_separator(true)
|
|
||||||
.build()?;
|
|
||||||
self.mappings.push((glob.compile_matcher(), to));
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user