int segment_start_samples = cs_to_samples(vad_segments->data[i].start);
int segment_end_samples = cs_to_samples(vad_segments->data[i].end);
- if (i < (int)vad_segments->data.size() - 1) {
- segment_end_samples += overlap_samples;
- }
-
segment_start_samples = std::min(segment_start_samples, n_samples - 1);
segment_end_samples = std::min(segment_end_samples, n_samples - 1);
+ int original_segment_length = segment_end_samples - segment_start_samples;
+
+ if (i < (int)vad_segments->data.size() - 1) {
+ segment_end_samples = std::min(segment_end_samples + overlap_samples, n_samples - 1);
+ }
int segment_length = segment_end_samples - segment_start_samples;
if (segment_length > 0) {
whisper_state::vad_segment_info segment;
segment.orig_end = vad_segments->data[i].end;
segment.vad_start = samples_to_cs(offset);
- segment.vad_end = samples_to_cs(offset + segment_length);
+ segment.vad_end = samples_to_cs(offset + original_segment_length);
// Add segment boundaries to mapping table
vad_time_mapping start_mapping = {segment.vad_start, segment.orig_start};
state->vad_mapping_table.push_back(start_mapping);
state->vad_mapping_table.push_back(end_mapping);
- // Add intermediate points for longer segments to improve interpolation accuracy
- const int64_t min_segment_length = 100; // 1 second
- const int64_t point_interval = 20; // Add a point every 200ms
-
- if (segment.vad_end - segment.vad_start > min_segment_length) {
- int64_t segment_duration = segment.vad_end - segment.vad_start;
- int num_points = (int)(segment_duration / point_interval) - 1;
-
- for (int j = 1; j <= num_points; j++) {
- int64_t vad_time = segment.vad_start + j * point_interval;
-
- if (vad_time >= segment.vad_end) continue;
-
- int64_t vad_elapsed = vad_time - segment.vad_start;
- int64_t vad_total = segment.vad_end - segment.vad_start;
- int64_t orig_total = segment.orig_end - segment.orig_start;
- int64_t orig_time = segment.orig_start + (vad_elapsed * orig_total) / vad_total;
-
- vad_time_mapping intermediate_mapping = {vad_time, orig_time};
- state->vad_mapping_table.push_back(intermediate_mapping);
- }
- }
-
WHISPER_LOG_INFO("%s: vad_segment_info: orig_start: %.2f, orig_end: %.2f, vad_start: %.2f, vad_end: %.2f\n",
__func__, segment.orig_start/100.0, segment.orig_end/100.0, segment.vad_start/100.0, segment.vad_end/100.0);
ctx->state->vad_segments.push_back(segment);