whisper : add single-timestamp logic (#2629)

author Karthick <redacted>

Tue, 17 Dec 2024 17:07:08 +0000 (22:37 +0530)

committer GitHub <redacted>

Tue, 17 Dec 2024 17:07:08 +0000 (19:07 +0200)
author Karthick <redacted>
Tue, 17 Dec 2024 17:07:08 +0000 (22:37 +0530)
committer GitHub <redacted>
Tue, 17 Dec 2024 17:07:08 +0000 (19:07 +0200)
diff --git a/src/whisper.cpp b/src/whisper.cpp

index ddeecc5e098a05ea55f1d3ebfe33135e5ac65f5b..810a8d267aba2520673d18f0e21e06842ba047f7 100644 (file)
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -6060,7 +6060,7 @@ int whisper_full_with_state(
          {
              const auto & best_decoder = state->decoders[best_decoder_id];
  
-            const auto seek_delta = best_decoder.seek_delta;
+            auto seek_delta = best_decoder.seek_delta;
              const auto result_len = best_decoder.sequence.result_len;
  
              const auto & tokens_cur = best_decoder.sequence.tokens;
@@ -6201,6 +6201,15 @@ int whisper_full_with_state(
                  }
              }
  
+            // ref: https://github.com/ggerganov/whisper.cpp/pull/2629
+            const bool single_timestamp_ending = tokens_cur.size() > 1 &&
+                tokens_cur[tokens_cur.size() - 2].id < whisper_token_beg(ctx) &&
+                tokens_cur[tokens_cur.size() - 1].id > whisper_token_beg(ctx);
+            if (single_timestamp_ending) {
+                WHISPER_LOG_DEBUG("single timestamp ending - skip entire chunk\n");
+                seek_delta = std::min(seek_end - seek, WHISPER_CHUNK_SIZE * 100);
+            }
+
              // update audio window
              seek += seek_delta;
author	Karthick <redacted>
	Tue, 17 Dec 2024 17:07:08 +0000 (22:37 +0530)
committer	GitHub <redacted>
	Tue, 17 Dec 2024 17:07:08 +0000 (19:07 +0200)