Optimize the Lezer external tokenizer for block content

This commit is contained in:
Jonatan Heyman 2023-01-14 13:08:24 +01:00
parent ab1a80199e
commit 52f1ac71da

View File

@ -3,6 +3,9 @@ import { NoteContent } from "./parser.terms.js"
const EOF = -1; const EOF = -1;
const FIRST_TOKEN_CHAR = "\n".charCodeAt(0)
const SECOND_TOKEN_CHAR = "∞".charCodeAt(0)
export const noteContent = new ExternalTokenizer((input) => { export const noteContent = new ExternalTokenizer((input) => {
let current = input.peek(0); let current = input.peek(0);
let next = input.peek(1); let next = input.peek(1);
@ -12,13 +15,17 @@ export const noteContent = new ExternalTokenizer((input) => {
} }
while (true) { while (true) {
let potentialLang = ""; // unless the first two characters are a newline and a "∞" character, we don't have a note content token
for (let i=0; i<18; i++) { // so we don't need to check for the rest of the token
potentialLang += String.fromCharCode(input.peek(i)); if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) {
} let potentialLang = "";
if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) { for (let i=0; i<18; i++) {
input.acceptToken(NoteContent); potentialLang += String.fromCharCode(input.peek(i));
return; }
if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) {
input.acceptToken(NoteContent);
return;
}
} }
if (next === EOF) { if (next === EOF) {
input.acceptToken(NoteContent, 1); input.acceptToken(NoteContent, 1);