mirror of
https://github.com/heyman/heynote.git
synced 2025-06-20 09:37:50 +02:00
Optimize the Lezer external tokenizer for block content
This commit is contained in:
parent
ab1a80199e
commit
52f1ac71da
@ -3,6 +3,9 @@ import { NoteContent } from "./parser.terms.js"
|
||||
|
||||
const EOF = -1;
|
||||
|
||||
const FIRST_TOKEN_CHAR = "\n".charCodeAt(0)
|
||||
const SECOND_TOKEN_CHAR = "∞".charCodeAt(0)
|
||||
|
||||
export const noteContent = new ExternalTokenizer((input) => {
|
||||
let current = input.peek(0);
|
||||
let next = input.peek(1);
|
||||
@ -12,13 +15,17 @@ export const noteContent = new ExternalTokenizer((input) => {
|
||||
}
|
||||
|
||||
while (true) {
|
||||
let potentialLang = "";
|
||||
for (let i=0; i<18; i++) {
|
||||
potentialLang += String.fromCharCode(input.peek(i));
|
||||
}
|
||||
if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) {
|
||||
input.acceptToken(NoteContent);
|
||||
return;
|
||||
// unless the first two characters are a newline and a "∞" character, we don't have a note content token
|
||||
// so we don't need to check for the rest of the token
|
||||
if (current === FIRST_TOKEN_CHAR && next === SECOND_TOKEN_CHAR) {
|
||||
let potentialLang = "";
|
||||
for (let i=0; i<18; i++) {
|
||||
potentialLang += String.fromCharCode(input.peek(i));
|
||||
}
|
||||
if (potentialLang.match(/^\n∞∞∞(text|javascript|json|python|html|sql|markdown|java|lezer|php)(-a)?\n/g)) {
|
||||
input.acceptToken(NoteContent);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (next === EOF) {
|
||||
input.acceptToken(NoteContent, 1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user