From fd12328293b6141b2117cf32e0e227eb4cc06f95 Mon Sep 17 00:00:00 2001 From: cyqsimon <28627918+cyqsimon@users.noreply.github.com> Date: Tue, 8 Jul 2025 18:32:39 +0800 Subject: [PATCH] Build script: replace string-based codegen with `quote`-based codegen --- Cargo.lock | 28 ++++++++++---- Cargo.toml | 4 ++ build/syntax_mapping.rs | 82 ++++++++++++++++++++--------------------- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f836e3eb..f59bdc0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,9 @@ dependencies = [ "path_abs", "plist", "predicates", + "prettyplease", + "proc-macro2", + "quote", "regex", "semver", "serde", @@ -149,6 +152,7 @@ dependencies = [ "serde_yaml", "serial_test", "shell-words", + "syn", "syntect", "tempfile", "terminal-colorsaurus", @@ -1163,10 +1167,20 @@ dependencies = [ ] [[package]] -name = "proc-macro2" -version = "1.0.92" +name = "prettyplease" +version = "0.2.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -1182,9 +1196,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -1440,9 +1454,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.95" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index bd7bd912..50804767 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,10 +108,14 @@ anyhow = "1.0.97" indexmap = { version = "2.8.0", features = ["serde"] } itertools = "0.14.0" once_cell = "1.20" +prettyplease = "0.2.35" +proc-macro2 = "1.0.95" +quote = "1.0.40" regex = "1.10.6" serde = "1.0" serde_derive = "1.0" serde_with = { version = "3.12.0", default-features = false, features = ["macros"] } +syn = { version = "2.0.104", features = ["full"] } toml = { version = "0.8.19", features = ["preserve_order"] } walkdir = "2.5" diff --git a/build/syntax_mapping.rs b/build/syntax_mapping.rs index 48468b9a..81fb6318 100644 --- a/build/syntax_mapping.rs +++ b/build/syntax_mapping.rs @@ -9,6 +9,8 @@ use anyhow::{anyhow, bail}; use indexmap::IndexMap; use itertools::Itertools; use once_cell::sync::Lazy; +use proc_macro2::TokenStream; +use quote::{quote, ToTokens, TokenStreamExt}; use regex::Regex; use serde_derive::Deserialize; use serde_with::DeserializeFromStr; @@ -34,13 +36,14 @@ impl FromStr for MappingTarget { } } } -impl MappingTarget { - fn codegen(&self) -> String { - match self { - Self::MapTo(syntax) => format!(r###"MappingTarget::MapTo(r#"{syntax}"#)"###), - Self::MapToUnknown => "MappingTarget::MapToUnknown".into(), - Self::MapExtensionToUnknown => "MappingTarget::MapExtensionToUnknown".into(), - } +impl ToTokens for MappingTarget { + fn to_tokens(&self, tokens: &mut TokenStream) { + let t = match self { + Self::MapTo(syntax) => quote! { MappingTarget::MapTo(#syntax) }, + Self::MapToUnknown => quote! { MappingTarget::MapToUnknown }, + Self::MapExtensionToUnknown => quote! { MappingTarget::MapExtensionToUnknown }, + }; + tokens.append_all(t); } } @@ -116,22 +119,17 @@ impl FromStr for Matcher { Ok(Self(non_empty_segments)) } } -impl Matcher { - fn codegen(&self) -> String { - match self.0.len() { - 0 => unreachable!("0-length matcher should never be created"), - // if-let guard would be ideal here - // see: https://github.com/rust-lang/rust/issues/51114 - 1 if self.0[0].is_text() => { - let s = self.0[0].text().unwrap(); - format!(r###"Lazy::new(|| Some(build_matcher_fixed(r#"{s}"#)))"###) +impl ToTokens for Matcher { + fn to_tokens(&self, tokens: &mut TokenStream) { + let t = match self.0.as_slice() { + [] => unreachable!("0-length matcher should never be created"), + [MatcherSegment::Text(text)] => { + quote! { Lazy::new(|| Some(build_matcher_fixed(#text)))} } // parser logic ensures that this case can only happen when there are dynamic segments - _ => { - let segs = self.0.iter().map(MatcherSegment::codegen).join(", "); - format!(r###"Lazy::new(|| build_matcher_dynamic(&[{segs}]))"###) - } - } + segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ]))}, + }; + tokens.append_all(t); } } @@ -143,6 +141,15 @@ enum MatcherSegment { Text(String), Env(String), } +impl ToTokens for MatcherSegment { + fn to_tokens(&self, tokens: &mut TokenStream) { + let t = match self { + Self::Text(text) => quote! { MatcherSegment::Text(#text)}, + Self::Env(env) => quote! {MatcherSegment::Env(#env)}, + }; + tokens.append_all(t); + } +} #[allow(dead_code)] impl MatcherSegment { fn is_text(&self) -> bool { @@ -163,12 +170,6 @@ impl MatcherSegment { Self::Env(t) => Some(t), } } - fn codegen(&self) -> String { - match self { - Self::Text(s) => format!(r###"MatcherSegment::Text(r#"{s}"#)"###), - Self::Env(s) => format!(r###"MatcherSegment::Env(r#"{s}"#)"###), - } - } } /// A struct that models a single .toml file in /src/syntax_mapping/builtins/. @@ -194,22 +195,19 @@ impl MappingDefModel { #[derive(Clone, Debug)] struct MappingList(Vec<(Matcher, MappingTarget)>); -impl MappingList { - fn codegen(&self) -> String { - let array_items: Vec<_> = self +impl ToTokens for MappingList { + fn to_tokens(&self, tokens: &mut TokenStream) { + let len = self.0.len(); + let array_items = self .0 .iter() - .map(|(matcher, target)| { - format!("({m}, {t})", m = matcher.codegen(), t = target.codegen()) - }) - .collect(); - let len = array_items.len(); + .map(|(matcher, target)| quote! { (#matcher, #target) }); - format!( - "/// Generated by build script from /src/syntax_mapping/builtins/.\n\ - pub(crate) static BUILTIN_MAPPINGS: [(Lazy>, MappingTarget); {len}] = [\n{items}\n];", - items = array_items.join(",\n") - ) + let t = quote! { + /// Generated by build script from /src/syntax_mapping/builtins/. + pub(crate) static BUILTIN_MAPPINGS: [(Lazy>, MappingTarget); #len] = [#(#array_items),*]; + }; + tokens.append_all(t); } } @@ -290,11 +288,13 @@ pub fn build_static_mappings() -> anyhow::Result<()> { println!("cargo:rerun-if-changed=src/syntax_mapping/builtins/"); let mappings = read_all_mappings()?; + let rs_src = syn::parse_file(&mappings.to_token_stream().to_string())?; + let rs_src_pretty = prettyplease::unparse(&rs_src); let codegen_path = Path::new(&env::var_os("OUT_DIR").ok_or(anyhow!("OUT_DIR is unset"))?) .join("codegen_static_syntax_mappings.rs"); - fs::write(codegen_path, mappings.codegen())?; + fs::write(codegen_path, rs_src_pretty)?; Ok(()) }