2023-12-06 02:52:51 +01:00
|
|
|
importScripts("guesslang.min.js")
|
2023-01-12 18:55:55 +01:00
|
|
|
|
2024-06-12 07:47:45 +02:00
|
|
|
GUESSLANG_LANGUAGES = ["json","py","html","sql","md","java","php","css","xml","cpp","rs","cs","rb","sh","yaml","toml","go","clj","erl","js","ts","swift","kt","groovy","ps1","dart"]
|
2023-01-03 16:56:07 +01:00
|
|
|
|
2023-12-06 02:52:51 +01:00
|
|
|
const guessLang = new self.GuessLang()
|
|
|
|
|
2023-01-03 16:56:07 +01:00
|
|
|
onmessage = (event) => {
|
|
|
|
//console.log("worker received message:", event.data)
|
2023-01-12 18:55:55 +01:00
|
|
|
//importScripts("../../lib/highlight.min.js")
|
|
|
|
|
2023-03-07 15:01:41 +01:00
|
|
|
const content = event.data.content
|
|
|
|
|
|
|
|
// we first check some custom heuristic rules to determine if the language is JSON
|
|
|
|
const trimmedContent = content.trim()
|
|
|
|
if ((
|
2023-12-24 10:59:24 +01:00
|
|
|
trimmedContent.startsWith("{") &&
|
2023-03-07 15:01:41 +01:00
|
|
|
trimmedContent.endsWith("}")
|
|
|
|
) || (
|
2023-12-24 10:59:24 +01:00
|
|
|
trimmedContent.startsWith("[") &&
|
|
|
|
trimmedContent.endsWith("]")
|
|
|
|
)) {
|
2023-03-07 15:01:41 +01:00
|
|
|
try {
|
|
|
|
if (typeof JSON.parse(trimmedContent) === "object") {
|
|
|
|
postMessage({
|
2023-12-06 02:52:51 +01:00
|
|
|
guesslang: {
|
2023-03-07 15:01:41 +01:00
|
|
|
language: "json",
|
2023-12-06 02:52:51 +01:00
|
|
|
confidence: 1.0,
|
2023-03-07 15:01:41 +01:00
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
2024-07-24 13:52:44 +02:00
|
|
|
path: event.data.path,
|
2023-03-07 15:01:41 +01:00
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
// JSON could not be parsed, do nothing
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-06 02:52:51 +01:00
|
|
|
//let startTime = performance.now()
|
|
|
|
guessLang.runModel(content).then((result) => {
|
|
|
|
//const duration = performance.now() - startTime
|
2023-12-24 10:59:24 +01:00
|
|
|
console.log("Guessing language done:", result, result[0]?.languageId, result[0]?.confidence)
|
2023-12-06 02:52:51 +01:00
|
|
|
//console.log("Guessing language took", duration, "ms")
|
|
|
|
|
|
|
|
if (result.length > 0) {
|
|
|
|
// for the language that is most likely according to GuessLang we have a lower threshold (0.15)
|
|
|
|
const lang = result[0]
|
|
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.15) {
|
|
|
|
postMessage({
|
|
|
|
guesslang: {
|
|
|
|
language: lang.languageId,
|
|
|
|
confidence: lang.confidence,
|
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
2024-07-24 13:52:44 +02:00
|
|
|
path: event.data.path,
|
2023-12-06 02:52:51 +01:00
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (let lang of result) {
|
|
|
|
if (GUESSLANG_LANGUAGES.includes(lang.languageId) && lang.confidence > 0.5) {
|
|
|
|
postMessage({
|
|
|
|
guesslang: {
|
|
|
|
language: lang.languageId,
|
|
|
|
confidence: lang.confidence,
|
|
|
|
},
|
|
|
|
content: content,
|
|
|
|
idx: event.data.idx,
|
2024-07-24 13:52:44 +02:00
|
|
|
path: event.data.path,
|
2023-12-06 02:52:51 +01:00
|
|
|
})
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2023-01-03 16:56:07 +01:00
|
|
|
})
|
|
|
|
}
|