From 52f1ac71daf4516e02a497101b2c33f4a4c7a3a0 Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Sat, 14 Jan 2023 13:08:24 +0100 Subject: [PATCH] Optimize the Lezer external tokenizer for block content --- src/editor/lang-heynote/external-tokens.js | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/editor/lang-heynote/external-tokens.js b/src/editor/lang-heynote/external-tokens.js index e23c630..d58d4dc 100644 --- a/src/editor/lang-heynote/external-tokens.js +++ b/src/editor/lang-heynote/external-tokens.js @@ -3,6 +3,9 @@ import { NoteContent } from "./parser.terms.js" const EOF = -1; +const FIRST_TOKEN_CHAR = "\n".charCodeAt(0) +const SECOND_TOKEN_CHAR = "∞".charCodeAt(0) + export const noteContent = new ExternalTokenizer((input) => { let current = input.peek(0); let next = input.peek(1); @@ -12,13 +15,17 @@ export const noteContent = new ExternalTokenizer((input) => { } while (true) { - let potentialLang = ""; - for (let i=0; i<18; i++) { - potentialLang += String.fromCharCode(input.peek(i)); - } - if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) { - input.acceptToken(NoteContent); - return; + // unless the first two characters are a newline and a "∞" character, we don't have a note content token + // so we don't need to check for the rest of the token + if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) { + let potentialLang = ""; + for (let i=0; i<18; i++) { + potentialLang += String.fromCharCode(input.peek(i)); + } + if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) { + input.acceptToken(NoteContent); + return; + } } if (next === EOF) { input.acceptToken(NoteContent, 1);