vad : store VAD context in whisper_state (#3156)

author Daniel Bevenius <redacted>

Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)

committer GitHub <redacted>

Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)
author Daniel Bevenius <redacted>
Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)
committer GitHub <redacted>
Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)
diff --git a/src/whisper.cpp b/src/whisper.cpp

index ad4e7a12d71b13a3697391c37c48ed3dceacb976..a7e6ef2f81b5ce7b960c26d99e1069f4b068768f 100644 (file)
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -954,6 +954,8 @@ struct whisper_state {
      // [EXPERIMENTAL] speed-up techniques
      int32_t exp_n_audio_ctx = 0; // 0 - use default
  
+    whisper_vad_context * vad_context = nullptr;
+
      struct vad_segment_info {
          float orig_start;
          float orig_end;
@@ -3853,6 +3855,11 @@ void whisper_free_state(struct whisper_state * state) {
          // [EXPERIMENTAL] Token-level timestamps with DTW
          aheads_masks_free(state->aheads_masks);
  
+        if (state->vad_context != nullptr) {
+            whisper_vad_free(state->vad_context);
+            state->vad_context = nullptr;
+        }
+
          delete state;
      }
  }
@@ -6613,12 +6620,16 @@ static bool whisper_vad(
      WHISPER_LOG_INFO("%s: VAD is enabled, processing speach segments only\n", __func__);
      filtered_n_samples = 0;
  
-    struct whisper_vad_context_params vad_ctx_params = whisper_vad_default_context_params();
-    struct whisper_vad_context * vctx = whisper_vad_init_from_file_with_params(params.vad_model_path, vad_ctx_params);
-    if (vctx == nullptr) {
-        WHISPER_LOG_ERROR("%s: failed to initialize VAD context\n", __func__);
-        return false;
+    if (state->vad_context == nullptr) {
+        struct whisper_vad_context_params vad_ctx_params = whisper_vad_default_context_params();
+        struct whisper_vad_context * vctx = whisper_vad_init_from_file_with_params(params.vad_model_path, vad_ctx_params);
+        if (vctx == nullptr) {
+            WHISPER_LOG_ERROR("%s: failed to initialize VAD context\n", __func__);
+            return false;
+        }
+        state->vad_context = vctx;
      }
+    auto vctx = state->vad_context;
  
      const whisper_vad_params & vad_params = params.vad_params;
author	Daniel Bevenius <redacted>
	Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)
committer	GitHub <redacted>
	Fri, 16 May 2025 05:53:26 +0000 (07:53 +0200)