]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
stream.wasm : add language selection support (#3354)
authorDaniel Bevenius <redacted>
Sat, 2 Aug 2025 05:03:04 +0000 (07:03 +0200)
committerGitHub <redacted>
Sat, 2 Aug 2025 05:03:04 +0000 (07:03 +0200)
* stream.wasm : add language selection support

This commit adds support for selecting the language in the stream.wasm
example. This is includes adding the model `base` which supports
multilingual transcription, and allowing the user to select a language
from a dropdown menu in the HTML interface.

The motivation for this is that it allows users to transcribe audio in
various languages.

Refs: https://github.com/ggml-org/whisper.cpp/issues/3347

* squash! stream.wasm : add language selection support

Remove strdup() for language in stream.wasm and update butten text for
base (should not be "base.en" but just "base").

examples/stream.wasm/emscripten.cpp
examples/stream.wasm/index-tmpl.html

index 43e71bf23f0ad1292640ef182535051dcd723acd..5dff24ad3bd60c711b6790aa3f85f21215b10dd6 100644 (file)
@@ -31,10 +31,11 @@ void stream_set_status(const std::string & status) {
     g_status = status;
 }
 
-void stream_main(size_t index) {
+void stream_main(size_t index, const std::string & lang) {
     stream_set_status("loading data ...");
 
     struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
+    bool is_multilingual = whisper_is_multilingual(g_contexts[index]);
 
     wparams.n_threads        = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
     wparams.offset_ms        = 0;
@@ -52,7 +53,7 @@ void stream_main(size_t index) {
     // disable temperature fallback
     wparams.temperature_inc  = -1.0f;
 
-    wparams.language         = "en";
+    wparams.language         = is_multilingual ? lang.c_str() : "en";
 
     printf("stream: using %d threads\n", wparams.n_threads);
 
@@ -127,9 +128,8 @@ void stream_main(size_t index) {
         g_contexts[index] = nullptr;
     }
 }
-
 EMSCRIPTEN_BINDINGS(stream) {
-    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
+    emscripten::function("init", emscripten::optional_override([](const std::string & path_model, const std::string & lang) {
         for (size_t i = 0; i < g_contexts.size(); ++i) {
             if (g_contexts[i] == nullptr) {
                 g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
@@ -138,8 +138,8 @@ EMSCRIPTEN_BINDINGS(stream) {
                     if (g_worker.joinable()) {
                         g_worker.join();
                     }
-                    g_worker = std::thread([i]() {
-                        stream_main(i);
+                    g_worker = std::thread([i, lang]() {
+                        stream_main(i, lang);
                     });
 
                     return i + 1;
index c831b2f52b707272f0d5d8e74fea06fa722e0117..309dfe73a3693da4aaa769f75e09469d9ffbdbaa 100644 (file)
@@ -55,6 +55,7 @@
                 Whisper model: <span id="model-whisper-status"></span>
                 <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
                 <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
+                <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
                 <br><br>
                 Quantized models:<br><br>
                 <button id="fetch-whisper-tiny-en-q5_1"   onclick="loadWhisper('tiny-en-q5_1')">tiny.en (Q5_1, 31 MB)</button>
                 -->
             </div>
 
+            <table>
+                <tr>
+                    <td>
+                        Language:
+                        <select id="language" name="language">
+                            <option value="en">English</option>
+                            <option value="ar">Arabic</option>
+                            <option value="hy">Armenian</option>
+                            <option value="az">Azerbaijani</option>
+                            <option value="eu">Basque</option>
+                            <option value="be">Belarusian</option>
+                            <option value="bn">Bengali</option>
+                            <option value="bg">Bulgarian</option>
+                            <option value="ca">Catalan</option>
+                            <option value="zh">Chinese</option>
+                            <option value="hr">Croatian</option>
+                            <option value="cs">Czech</option>
+                            <option value="da">Danish</option>
+                            <option value="nl">Dutch</option>
+                            <option value="en">English</option>
+                            <option value="et">Estonian</option>
+                            <option value="tl">Filipino</option>
+                            <option value="fi">Finnish</option>
+                            <option value="fr">French</option>
+                            <option value="gl">Galician</option>
+                            <option value="ka">Georgian</option>
+                            <option value="de">German</option>
+                            <option value="el">Greek</option>
+                            <option value="gu">Gujarati</option>
+                            <option value="iw">Hebrew</option>
+                            <option value="hi">Hindi</option>
+                            <option value="hu">Hungarian</option>
+                            <option value="is">Icelandic</option>
+                            <option value="id">Indonesian</option>
+                            <option value="ga">Irish</option>
+                            <option value="it">Italian</option>
+                            <option value="ja">Japanese</option>
+                            <option value="kn">Kannada</option>
+                            <option value="ko">Korean</option>
+                            <option value="la">Latin</option>
+                            <option value="lv">Latvian</option>
+                            <option value="lt">Lithuanian</option>
+                            <option value="mk">Macedonian</option>
+                            <option value="ms">Malay</option>
+                            <option value="mt">Maltese</option>
+                            <option value="no">Norwegian</option>
+                            <option value="fa">Persian</option>
+                            <option value="pl">Polish</option>
+                            <option value="pt">Portuguese</option>
+                            <option value="ro">Romanian</option>
+                            <option value="ru">Russian</option>
+                            <option value="sr">Serbian</option>
+                            <option value="sk">Slovak</option>
+                            <option value="sl">Slovenian</option>
+                            <option value="es">Spanish</option>
+                            <option value="sw">Swahili</option>
+                            <option value="sv">Swedish</option>
+                            <option value="ta">Tamil</option>
+                            <option value="te">Telugu</option>
+                            <option value="th">Thai</option>
+                            <option value="tr">Turkish</option>
+                            <option value="uk">Ukrainian</option>
+                            <option value="ur">Urdu</option>
+                            <option value="vi">Vietnamese</option>
+                            <option value="cy">Welsh</option>
+                            <option value="yi">Yiddish</option>
+                        </select>
+                    </td>
+                </tr>
+            </table>
+
             <br>
 
             <div id="input">
                 let urls = {
                     'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
                     'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
+                    'base'   : 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
 
                     'tiny-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin',
                     'base-en-q5_1':  'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin',
                 let sizes = {
                     'tiny.en': 75,
                     'base.en': 142,
+                    'base':     142,
 
                     'tiny-en-q5_1':   31,
                     'base-en-q5_1':   57,
 
                 document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
                 document.getElementById('fetch-whisper-base-en').style.display = 'none';
+                document.getElementById('fetch-whisper-base').style.display = 'none';
 
                 document.getElementById('fetch-whisper-tiny-en-q5_1').style.display = 'none';
                 document.getElementById('fetch-whisper-base-en-q5_1').style.display = 'none';
                     var el;
                     el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
                     el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
+                    el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
 
                     el = document.getElementById('fetch-whisper-tiny-en-q5_1'); if (el) el.style.display = 'inline-block';
                     el = document.getElementById('fetch-whisper-base-en-q5_1'); if (el) el.style.display = 'inline-block';
 
             function onStart() {
                 if (!instance) {
-                    instance = Module.init('whisper.bin');
+                    instance = Module.init('whisper.bin', document.getElementById('language').value);
 
                     if (instance) {
                         printTextarea("js: whisper initialized, instance: " + instance);