]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
tests : update VAD tests to use Silero V6.2.0 (#3534)
authorDaniel Bevenius <redacted>
Sat, 6 Dec 2025 09:58:58 +0000 (10:58 +0100)
committerGitHub <redacted>
Sat, 6 Dec 2025 09:58:58 +0000 (10:58 +0100)
* tests : update VAD tests to use Silero V6.2.0

This commit updates the VAD tests to use the Silero V6.2.0 instead of
V5.1.2. I'm was not sure if we needed to keep testing for both versions,
but opted to just update to the latest version for simplicity.

* wasm : use C++17 for emscripten builds

This commit updates the CMakeLists.txt file to explicitly set the C++
standard to C++17 when building with Emscripten.

The motivation for this change is that building with Emscripten
will currently fail locally and on CI with the following error:
```console
[ 75%] Building CXX object examples/CMakeFiles/common.dir/common-ggml.cpp.o
In file included from /home/danbev/work/ai/whisper.cpp/examples/stream.wasm/emscripten.cpp:5:
/home/danbev/work/utils/emsdk/upstream/emscripten/cache/sysroot/include/emscripten/bind.h:11:2: error:
      "embind requires -std=c++17 or newer"
   11 | #error "embind requires -std=c++17 or newer"
      |  ^
In file included from /home/danbev/work/ai/whisper.cpp/examples/whisper.wasm/emscripten.cpp:4:
/home/danbev/work/utils/emsdk/upstream/emscripten/cache/sysroot/include/emscripten/bind.h:11:2: error:
      "embind requires -std=c++17 or newer"
   11 | #error "embind requires -std=c++17 or newer"
      |  ^
```

CMakeLists.txt
models/for-tests-silero-v5.1.2-ggml.bin [deleted file]
models/for-tests-silero-v6.2.0-ggml.bin [new file with mode: 0644]
tests/CMakeLists.txt
tests/test-vad-full.cpp
tests/test-vad.cpp

index 517f30bb6daa0d77589f1d0247aa4598b4870530..b60bb0452426ae46d9bb5dc065fd5d21658f1fce 100644 (file)
@@ -34,6 +34,9 @@ endif()
 if (EMSCRIPTEN)
     set(BUILD_SHARED_LIBS_DEFAULT OFF)
 
+    set(CMAKE_CXX_STANDARD 17)
+    set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
     option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
 
     # TODO: without these, we get the following error:
diff --git a/models/for-tests-silero-v5.1.2-ggml.bin b/models/for-tests-silero-v5.1.2-ggml.bin
deleted file mode 100644 (file)
index c5ddfb5..0000000
Binary files a/models/for-tests-silero-v5.1.2-ggml.bin and /dev/null differ
diff --git a/models/for-tests-silero-v6.2.0-ggml.bin b/models/for-tests-silero-v6.2.0-ggml.bin
new file mode 100644 (file)
index 0000000..e08fad6
Binary files /dev/null and b/models/for-tests-silero-v6.2.0-ggml.bin differ
index 0363193a745c2bb7b51597c9be399b439fbf33fc..09e77ea89c2e5510745f2e5c041922a99fe73287 100644 (file)
@@ -94,7 +94,7 @@ add_executable(${VAD_TEST} ${VAD_TEST}.cpp)
 target_include_directories(${VAD_TEST} PRIVATE ../include ../ggml/include ../examples)
 target_link_libraries(${VAD_TEST} PRIVATE common)
 target_compile_definitions(${VAD_TEST} PRIVATE
-    VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v5.1.2-ggml.bin"
+    VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v6.2.0-ggml.bin"
     SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
 add_test(NAME ${VAD_TEST} COMMAND ${VAD_TEST})
 set_tests_properties(${VAD_TEST} PROPERTIES LABELS "unit")
@@ -106,7 +106,7 @@ target_include_directories(${VAD_TEST} PRIVATE ../include ../ggml/include ../exa
 target_link_libraries(${VAD_TEST} PRIVATE common)
 target_compile_definitions(${VAD_TEST} PRIVATE
     WHISPER_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/ggml-base.en.bin"
-    VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v5.1.2-ggml.bin"
+    VAD_MODEL_PATH="${PROJECT_SOURCE_DIR}/models/for-tests-silero-v6.2.0-ggml.bin"
     SAMPLE_PATH="${PROJECT_SOURCE_DIR}/samples/jfk.wav")
 add_test(NAME ${VAD_TEST} COMMAND ${VAD_TEST})
 set_tests_properties(${VAD_TEST} PROPERTIES LABELS "base;en")
index 3bba36b1668bb1d37219049004af0977e4bd2674..8295b06d957b5630e293ef8eb39cfab6db8f8fe3 100644 (file)
@@ -42,11 +42,13 @@ int main() {
     const int n_segments = whisper_full_n_segments(wctx);
     assert(n_segments == 1);
 
+
+    printf("Segment text:\n%s", whisper_full_get_segment_text(wctx, 0));
     assert(strcmp(" And so my fellow Americans, ask not what your country can do for you,"
                   " ask what you can do for your country.",
            whisper_full_get_segment_text(wctx, 0)) == 0);
-    assert(whisper_full_get_segment_t0(wctx, 0) == 29);
-    assert(whisper_full_get_segment_t1(wctx, 0) == 1049);
+    assert(whisper_full_get_segment_t0(wctx, 0) == 32);
+    assert(whisper_full_get_segment_t1(wctx, 0) == 1051);
 
     whisper_free(wctx);
 
index 535721c86866db1b8f7dd69fe72b600c76449312..0003ae3acaa1501beeac6479b70930f4a98a17a0 100644 (file)
@@ -36,7 +36,7 @@ struct whisper_vad_segments * test_detect_timestamps(
         struct whisper_vad_context * vctx,
         struct whisper_vad_params params) {
     struct whisper_vad_segments * timestamps = whisper_vad_segments_from_probs(vctx, params);
-    assert(whisper_vad_segments_n_segments(timestamps) == 5);
+    assert(whisper_vad_segments_n_segments(timestamps) == 4);
 
     for (int i = 0; i < whisper_vad_segments_n_segments(timestamps); ++i) {
         printf("VAD segment %d: start = %.2f, end = %.2f\n", i,