]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
fix: VAD time mapping timestamp drift caused by overlap samples (#3711)
authorlohopupa <redacted>
Tue, 17 Mar 2026 06:19:08 +0000 (12:19 +0600)
committerGitHub <redacted>
Tue, 17 Mar 2026 06:19:08 +0000 (07:19 +0100)
* whisper : fix VAD segment overlap boundary handling

 - Use original segment length (pre-overlap) for vad_end in the time
   mapping table, so segment boundaries are preserved accurately

Claude Sonnet 4.6 (Low)

* whisper : remove intermediate VAD time mapping points

Now that segment boundaries are mapped accurately, the intermediate
point interpolation is no longer necessary.

---------

Co-authored-by: Lohopupa <redacted>
src/whisper.cpp

index 796bccfb45d560624b272f4bf4369a6a04e4dc66..86bfafeaad8d96bea78ad1e3cb17b4fcf026bae1 100644 (file)
@@ -6701,12 +6701,13 @@ static bool whisper_vad(
             int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
             int segment_end_samples   = cs_to_samples(vad_segments->data[i].end);
 
-            if (i < (int)vad_segments->data.size() - 1) {
-                segment_end_samples += overlap_samples;
-            }
-
             segment_start_samples = std::min(segment_start_samples, n_samples - 1);
             segment_end_samples = std::min(segment_end_samples, n_samples - 1);
+            int original_segment_length = segment_end_samples - segment_start_samples;
+
+            if (i < (int)vad_segments->data.size() - 1) {
+                segment_end_samples = std::min(segment_end_samples + overlap_samples, n_samples - 1);
+            }
             int segment_length = segment_end_samples - segment_start_samples;
             if (segment_length > 0) {
                 whisper_state::vad_segment_info segment;
@@ -6715,7 +6716,7 @@ static bool whisper_vad(
                 segment.orig_end   = vad_segments->data[i].end;
 
                 segment.vad_start = samples_to_cs(offset);
-                segment.vad_end   = samples_to_cs(offset + segment_length);
+                segment.vad_end   = samples_to_cs(offset + original_segment_length);
 
                 // Add segment boundaries to mapping table
                 vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
@@ -6724,29 +6725,6 @@ static bool whisper_vad(
                 state->vad_mapping_table.push_back(start_mapping);
                 state->vad_mapping_table.push_back(end_mapping);
 
-                // Add intermediate points for longer segments to improve interpolation accuracy
-                const int64_t min_segment_length = 100; // 1 second
-                const int64_t point_interval = 20;     // Add a point every 200ms
-
-                if (segment.vad_end - segment.vad_start > min_segment_length) {
-                    int64_t segment_duration = segment.vad_end - segment.vad_start;
-                    int num_points = (int)(segment_duration / point_interval) - 1;
-
-                    for (int j = 1; j <= num_points; j++) {
-                        int64_t vad_time = segment.vad_start + j * point_interval;
-
-                        if (vad_time >= segment.vad_end) continue;
-
-                        int64_t vad_elapsed = vad_time - segment.vad_start;
-                        int64_t vad_total = segment.vad_end - segment.vad_start;
-                        int64_t orig_total = segment.orig_end - segment.orig_start;
-                        int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;
-
-                        vad_time_mapping intermediate_mapping = {vad_time, orig_time};
-                        state->vad_mapping_table.push_back(intermediate_mapping);
-                    }
-                }
-
                 WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
                     __func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
                 ctx->state->vad_segments.push_back(segment);