mirror of
https://github.com/heyman/heynote.git
synced 2024-11-24 17:03:19 +01:00
6f53b61bb0
Should (hopefully) reduce false positives.
88 lines
2.5 KiB
JavaScript
88 lines
2.5 KiB
JavaScript
importScripts("guesslang.min.js")
|
|
|
|
GUESSLANG_LANGUAGES = [
|
|
"json",
|
|
"py",
|
|
"js",
|
|
"html",
|
|
"sql",
|
|
"java",
|
|
"cpp",
|
|
"php",
|
|
"css",
|
|
"xml",
|
|
"rs",
|
|
"md",
|
|
]
|
|
|
|
const guessLang = new self.GuessLang()
|
|
|
|
onmessage = (event) => {
|
|
//console.log("worker received message:", event.data)
|
|
//importScripts("../../lib/highlight.min.js")
|
|
|
|
const content = event.data.content
|
|
|
|
// we first check some custom heuristic rules to determine if the language is JSON
|
|
const trimmedContent = content.trim()
|
|
if ((
|
|
trimmedContent.startsWith("{") &&
|
|
trimmedContent.endsWith("}")
|
|
) || (
|
|
trimmedContent.startsWith("[") &&
|
|
trimmedContent.endsWith("]")
|
|
)) {
|
|
try {
|
|
if (typeof JSON.parse(trimmedContent) === "object") {
|
|
postMessage({
|
|
guesslang: {
|
|
language: "json",
|
|
confidence: 1.0,
|
|
},
|
|
content: content,
|
|
idx: event.data.idx,
|
|
})
|
|
return
|
|
}
|
|
} catch (e) {
|
|
// JSON could not be parsed, do nothing
|
|
}
|
|
}
|
|
|
|
//let startTime = performance.now()
|
|
guessLang.runModel(content).then((result) => {
|
|
//const duration = performance.now() - startTime
|
|
//console.log("Guessing language done:", result, result[0]?.languageId, result[0]?.confidence)
|
|
//console.log("Guessing language took", duration, "ms")
|
|
|
|
if (result.length > 0) {
|
|
// for the language that is most likely according to GuessLang we have a lower threshold (0.15)
|
|
const lang = result[0]
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.15) {
|
|
postMessage({
|
|
guesslang: {
|
|
language: lang.languageId,
|
|
confidence: lang.confidence,
|
|
},
|
|
content: content,
|
|
idx: event.data.idx,
|
|
})
|
|
return
|
|
}
|
|
}
|
|
for (let lang of result) {
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.5) {
|
|
postMessage({
|
|
guesslang: {
|
|
language: lang.languageId,
|
|
confidence: lang.confidence,
|
|
},
|
|
content: content,
|
|
idx: event.data.idx,
|
|
})
|
|
return
|
|
}
|
|
}
|
|
})
|
|
}
|