From 0ac58525ecded98f0b02e10662ba05506ae342a6 Mon Sep 17 00:00:00 2001
From: Marcel Hellkamp <marc@gsites.de>
Date: Sat, 10 Feb 2024 15:19:53 +0100
Subject: [PATCH] fix: Unnecessary restrictions on bad word filters

Bad words can now contain almost any character and match domains, accounts or hashtags more precisely. For example: `example.com` is no longer split into `example` and `com` which would block more than intended. `#example` is no longer redurced to `example` which would also block the word, not only the hashtag.
---
 src/components/ConfigModal.vue | 5 +++--
 src/config.ts                  | 2 +-
 src/sources.ts                 | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/src/components/ConfigModal.vue b/src/components/ConfigModal.vue
index 112ebce..6eef30d 100644
--- a/src/components/ConfigModal.vue
+++ b/src/components/ConfigModal.vue
@@ -32,9 +32,10 @@ const formTags = computed({
   set: (value) => config.value.tags = arrayUnique([...(value || "").matchAll(tagPattern)].map(m => m[1])),
 });
 
+const badwordPattern = /([^, ]+)/igu
 const formBadWords = computed({
   get: () => config.value.badWords.join(" "),
-  set: (value) => config.value.badWords = arrayUnique([...(value || "").matchAll(tagPattern)].map(m => m[1])),
+  set: (value) => config.value.badWords = arrayUnique([...(value || "").matchAll(badwordPattern)].map(m => m[1])),
 });
 
 const accountPattern = /\b([a-z0-9_]+)(@([a-z0-9.-]+\.[a-z]{2,}))?\b/ig;
@@ -239,7 +240,7 @@ const onSubmit = () => {
                   <label for="edit-server" class="form-label">Filter bad words:</label>
                   <div class="ms-5">
                     <input type="text" class="form-control" id="edit-server" v-model.lazy="formBadWords">
-                    <div class="form-text">Hide posts containing certain words or hashtags. Only exact matches are
+                    <div class="form-text">Hide posts containing certain words, domains or hashtags. Only full word matches are
                       filtered.</div>
                   </div>
                 </div>
diff --git a/src/config.ts b/src/config.ts
index e5b4c83..a07fe46 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -255,7 +255,7 @@ export function sanatizeConfig(config: any): Config {
     result.loadTrends = boolOr(config.loadTrends, fallback.loadTrends)
 
     result.languages = arrayUnique((Array.isArray(config.languages) ? [...config.languages] : [...fallback.languages]).filter(isLanguage));
-    result.badWords = arrayUnique((Array.isArray(config.badWords) ? [...config.badWords] : [...fallback.badWords]).filter(isTag));
+    result.badWords = arrayUnique((Array.isArray(config.badWords) ? [...config.badWords] : [...fallback.badWords]));
     result.hideSensitive = boolOr(config.hideSensitive, fallback.hideSensitive)
     result.hideBoosts = boolOr(config.hideBoosts, fallback.hideBoosts)
     result.hideBots = boolOr(config.hideBots, fallback.hideBots)
diff --git a/src/sources.ts b/src/sources.ts
index c2c9061..abca2ff 100644
--- a/src/sources.ts
+++ b/src/sources.ts
@@ -199,12 +199,12 @@ const filterStatus = (cfg: Config, status: MastodonStatus) => {
     if (cfg.hideReplies && status.in_reply_to_id) return false;
     if (cfg.hideBots && status.account?.bot) return false;
     if (cfg.badWords.length) {
-        const pattern = new RegExp(`\\b(${cfg.badWords.map(regexEscape).join("|")})\\b`, 'i');
+        const pattern = new RegExp(`(?:\\b|^)(${cfg.badWords.map(regexEscape).join("|")})(?:\\b|$)`, 'i');
         if (status.account?.display_name?.match(pattern)
             || status.account?.acct?.match(pattern)
             || status.content.match(pattern)
             || status.spoiler_text?.match(pattern)
-            || status.tags?.some(tag => tag.name?.match(pattern))
+            || status.tags?.some(tag => `#${tag.name}`.match(pattern))
             || status.media_attachments?.some(media => media.description?.match(pattern)))
             return false;
     }