From 52f1ac71daf4516e02a497101b2c33f4a4c7a3a0 Mon Sep 17 00:00:00 2001
From: Jonatan Heyman <jonatan@heyman.info>
Date: Sat, 14 Jan 2023 13:08:24 +0100
Subject: [PATCH] Optimize the Lezer external tokenizer for block content

---
 src/editor/lang-heynote/external-tokens.js | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/editor/lang-heynote/external-tokens.js b/src/editor/lang-heynote/external-tokens.js
index e23c630..d58d4dc 100644
--- a/src/editor/lang-heynote/external-tokens.js
+++ b/src/editor/lang-heynote/external-tokens.js
@@ -3,6 +3,9 @@ import { NoteContent } from "./parser.terms.js"
 
 const EOF = -1;
 
+const FIRST_TOKEN_CHAR = "\n".charCodeAt(0)
+const SECOND_TOKEN_CHAR = "∞".charCodeAt(0)
+
 export const noteContent = new ExternalTokenizer((input) => {
     let current = input.peek(0);
     let next = input.peek(1);
@@ -12,13 +15,17 @@ export const noteContent = new ExternalTokenizer((input) => {
     }
 
     while (true) {
-        let potentialLang = "";
-        for (let i=0; i<18; i++) {
-            potentialLang += String.fromCharCode(input.peek(i));
-        }
-        if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) {
-            input.acceptToken(NoteContent);
-            return;
+        // unless the first two characters are a newline and a "∞" character, we don't have a note content token
+        // so we don't need to check for the rest of the token
+        if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) {
+            let potentialLang = "";
+            for (let i=0; i<18; i++) {
+                potentialLang += String.fromCharCode(input.peek(i));
+            }
+            if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) {
+                input.acceptToken(NoteContent);
+                return;
+            }
         }
         if (next === EOF) {
             input.acceptToken(NoteContent, 1);