2023-12-06 02:52:51 +01:00
|
|
|
importScripts("guesslang.min.js")
|
2023-01-12 18:55:55 +01:00
|
|
|
|
2023-12-06 02:52:51 +01:00
|
|
|
GUESSLANG_LANGUAGES = [
|
|
|
|
"json",
|
|
|
|
"py",
|
|
|
|
"js",
|
2023-12-26 00:47:28 +01:00
|
|
|
"ts",
|
2023-12-06 02:52:51 +01:00
|
|
|
"html",
|
|
|
|
"sql",
|
|
|
|
"java",
|
|
|
|
"cpp",
|
|
|
|
"php",
|
|
|
|
"css",
|
2023-01-16 23:43:22 +01:00
|
|
|
"xml",
|
2023-12-06 02:52:51 +01:00
|
|
|
"rs",
|
|
|
|
"md",
|
2023-12-24 10:59:24 +01:00
|
|
|
"cs",
|
2023-12-26 00:27:43 +01:00
|
|
|
"rb",
|
|
|
|
"sh",
|
|
|
|
"yaml",
|
|
|
|
"go",
|
|
|
|
"clj",
|
|
|
|
"erl",
|
2023-12-26 01:31:36 +01:00
|
|
|
"toml",
|
2023-12-31 10:07:21 +01:00
|
|
|
"swift",
|
|
|
|
"kt",
|
2024-01-04 15:18:28 +01:00
|
|
|
"groovy",
|
2023-01-16 23:43:22 +01:00
|
|
|
]
|
2023-01-03 16:56:07 +01:00
|
|
|
|
2023-12-06 02:52:51 +01:00
|
|
|
const guessLang = new self.GuessLang()
|
|
|
|
|
2023-01-03 16:56:07 +01:00
|
|
|
onmessage = (event) => {
|
|
|
|
//console.log("worker received message:", event.data)
|
2023-01-12 18:55:55 +01:00
|
|
|
//importScripts("../../lib/highlight.min.js")
|
|
|
|
|
2023-03-07 15:01:41 +01:00
|
|
|
const content = event.data.content
|
|
|
|
|
|
|
|
// we first check some custom heuristic rules to determine if the language is JSON
|
|
|
|
const trimmedContent = content.trim()
|
|
|
|
if ((
|
2023-12-24 10:59:24 +01:00
|
|
|
trimmedContent.startsWith("{") &&
|
2023-03-07 15:01:41 +01:00
|
|
|
trimmedContent.endsWith("}")
|
|
|
|
) || (
|
2023-12-24 10:59:24 +01:00
|
|
|
trimmedContent.startsWith("[") &&
|
|
|
|
trimmedContent.endsWith("]")
|
|
|
|
)) {
|
2023-03-07 15:01:41 +01:00
|
|
|
try {
|
|
|
|
if (typeof JSON.parse(trimmedContent) === "object") {
|
|
|
|
postMessage({
|
2023-12-06 02:52:51 +01:00
|
|
|
guesslang: {
|
2023-03-07 15:01:41 +01:00
|
|
|
language: "json",
|
2023-12-06 02:52:51 +01:00
|
|
|
confidence: 1.0,
|
2023-03-07 15:01:41 +01:00
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
// JSON could not be parsed, do nothing
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-06 02:52:51 +01:00
|
|
|
//let startTime = performance.now()
|
|
|
|
guessLang.runModel(content).then((result) => {
|
|
|
|
//const duration = performance.now() - startTime
|
2023-12-24 10:59:24 +01:00
|
|
|
console.log("Guessing language done:", result, result[0]?.languageId, result[0]?.confidence)
|
2023-12-06 02:52:51 +01:00
|
|
|
//console.log("Guessing language took", duration, "ms")
|
|
|
|
|
|
|
|
if (result.length > 0) {
|
|
|
|
// for the language that is most likely according to GuessLang we have a lower threshold (0.15)
|
|
|
|
const lang = result[0]
|
|
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.15) {
|
|
|
|
postMessage({
|
|
|
|
guesslang: {
|
|
|
|
language: lang.languageId,
|
|
|
|
confidence: lang.confidence,
|
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (let lang of result) {
|
|
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.5) {
|
|
|
|
postMessage({
|
|
|
|
guesslang: {
|
|
|
|
language: lang.languageId,
|
|
|
|
confidence: lang.confidence,
|
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2023-01-03 16:56:07 +01:00
|
|
|
})
|
|
|
|
}
|