From 0becabc8d68d9ffa6ddfba5240e38cd7a2642046 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 2 Aug 2025 07:03:04 +0200 Subject: [PATCH] stream.wasm : add language selection support (#3354) * stream.wasm : add language selection support This commit adds support for selecting the language in the stream.wasm example. This is includes adding the model `base` which supports multilingual transcription, and allowing the user to select a language from a dropdown menu in the HTML interface. The motivation for this is that it allows users to transcribe audio in various languages. Refs: https://github.com/ggml-org/whisper.cpp/issues/3347 * squash! stream.wasm : add language selection support Remove strdup() for language in stream.wasm and update butten text for base (should not be "base.en" but just "base"). --- examples/stream.wasm/emscripten.cpp | 12 ++--- examples/stream.wasm/index-tmpl.html | 78 +++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/examples/stream.wasm/emscripten.cpp b/examples/stream.wasm/emscripten.cpp index 43e71bf2..5dff24ad 100644 --- a/examples/stream.wasm/emscripten.cpp +++ b/examples/stream.wasm/emscripten.cpp @@ -31,10 +31,11 @@ void stream_set_status(const std::string & status) { g_status = status; } -void stream_main(size_t index) { +void stream_main(size_t index, const std::string & lang) { stream_set_status("loading data ..."); struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY); + bool is_multilingual = whisper_is_multilingual(g_contexts[index]); wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency()); wparams.offset_ms = 0; @@ -52,7 +53,7 @@ void stream_main(size_t index) { // disable temperature fallback wparams.temperature_inc = -1.0f; - wparams.language = "en"; + wparams.language = is_multilingual ? lang.c_str() : "en"; printf("stream: using %d threads\n", wparams.n_threads); @@ -127,9 +128,8 @@ void stream_main(size_t index) { g_contexts[index] = nullptr; } } - EMSCRIPTEN_BINDINGS(stream) { - emscripten::function("init", emscripten::optional_override([](const std::string & path_model) { + emscripten::function("init", emscripten::optional_override([](const std::string & path_model, const std::string & lang) { for (size_t i = 0; i < g_contexts.size(); ++i) { if (g_contexts[i] == nullptr) { g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params()); @@ -138,8 +138,8 @@ EMSCRIPTEN_BINDINGS(stream) { if (g_worker.joinable()) { g_worker.join(); } - g_worker = std::thread([i]() { - stream_main(i); + g_worker = std::thread([i, lang]() { + stream_main(i, lang); }); return i + 1; diff --git a/examples/stream.wasm/index-tmpl.html b/examples/stream.wasm/index-tmpl.html index c831b2f5..309dfe73 100644 --- a/examples/stream.wasm/index-tmpl.html +++ b/examples/stream.wasm/index-tmpl.html @@ -55,6 +55,7 @@ Whisper model: +

Quantized models:

@@ -66,6 +67,77 @@ --> + + + + +
+ Language: + +
+
@@ -176,6 +248,7 @@ let urls = { 'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin', 'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin', + 'base' : 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin', 'tiny-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin', 'base-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin', @@ -184,6 +257,7 @@ let sizes = { 'tiny.en': 75, 'base.en': 142, + 'base': 142, 'tiny-en-q5_1': 31, 'base-en-q5_1': 57, @@ -197,6 +271,7 @@ document.getElementById('fetch-whisper-tiny-en').style.display = 'none'; document.getElementById('fetch-whisper-base-en').style.display = 'none'; + document.getElementById('fetch-whisper-base').style.display = 'none'; document.getElementById('fetch-whisper-tiny-en-q5_1').style.display = 'none'; document.getElementById('fetch-whisper-base-en-q5_1').style.display = 'none'; @@ -212,6 +287,7 @@ var el; el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block'; el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block'; + el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block'; el = document.getElementById('fetch-whisper-tiny-en-q5_1'); if (el) el.style.display = 'inline-block'; el = document.getElementById('fetch-whisper-base-en-q5_1'); if (el) el.style.display = 'inline-block'; @@ -368,7 +444,7 @@ function onStart() { if (!instance) { - instance = Module.init('whisper.bin'); + instance = Module.init('whisper.bin', document.getElementById('language').value); if (instance) { printTextarea("js: whisper initialized, instance: " + instance);