const int n_segments = whisper_full_n_segments(ctx);
+ std::string speaker = "";
+
+ int64_t t0;
+ int64_t t1;
+
// print the last n_new segments
const int s0 = n_segments - n_new;
+
if (s0 == 0) {
printf("\n");
}
for (int i = s0; i < n_segments; i++) {
- if (params.no_timestamps) {
- if (params.print_colors) {
- for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
- if (params.print_special == false) {
- const whisper_token id = whisper_full_get_token_id(ctx, i, j);
- if (id >= whisper_token_eot(ctx)) {
- continue;
- }
- }
+ if (!params.no_timestamps || params.diarize) {
+ t0 = whisper_full_get_segment_t0(ctx, i);
+ t1 = whisper_full_get_segment_t1(ctx, i);
+ }
- const char * text = whisper_full_get_token_text(ctx, i, j);
- const float p = whisper_full_get_token_p (ctx, i, j);
+ if (!params.no_timestamps) {
+ printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
+ }
- const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
+ if (params.diarize && pcmf32s.size() == 2) {
- printf("%s%s%s", k_colors[col].c_str(), text, "\033[0m");
- }
- } else {
- const char * text = whisper_full_get_segment_text(ctx, i);
- printf("%s", text);
- }
- fflush(stdout);
- } else {
- const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
- const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+ const int64_t n_samples = pcmf32s[0].size();
- std::string speaker;
+ const int64_t is0 = timestamp_to_sample(t0, n_samples);
+ const int64_t is1 = timestamp_to_sample(t1, n_samples);
- if (params.diarize && pcmf32s.size() == 2) {
- const int64_t n_samples = pcmf32s[0].size();
+ double energy0 = 0.0f;
+ double energy1 = 0.0f;
- const int64_t is0 = timestamp_to_sample(t0, n_samples);
- const int64_t is1 = timestamp_to_sample(t1, n_samples);
+ for (int64_t j = is0; j < is1; j++) {
+ energy0 += fabs(pcmf32s[0][j]);
+ energy1 += fabs(pcmf32s[1][j]);
+ }
- double energy0 = 0.0f;
- double energy1 = 0.0f;
+ if (energy0 > 1.1*energy1) {
+ speaker = "(speaker 0)";
+ } else if (energy1 > 1.1*energy0) {
+ speaker = "(speaker 1)";
+ } else {
+ speaker = "(speaker ?)";
+ }
- for (int64_t j = is0; j < is1; j++) {
- energy0 += fabs(pcmf32s[0][j]);
- energy1 += fabs(pcmf32s[1][j]);
- }
+ //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
+ }
- if (energy0 > 1.1*energy1) {
- speaker = "(speaker 0)";
- } else if (energy1 > 1.1*energy0) {
- speaker = "(speaker 1)";
- } else {
- speaker = "(speaker ?)";
+ if (params.print_colors) {
+ for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
+ if (params.print_special == false) {
+ const whisper_token id = whisper_full_get_token_id(ctx, i, j);
+ if (id >= whisper_token_eot(ctx)) {
+ continue;
+ }
}
- //printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
- }
-
- if (params.print_colors) {
- printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
- for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
- if (params.print_special == false) {
- const whisper_token id = whisper_full_get_token_id(ctx, i, j);
- if (id >= whisper_token_eot(ctx)) {
- continue;
- }
- }
+ const char * text = whisper_full_get_token_text(ctx, i, j);
+ const float p = whisper_full_get_token_p (ctx, i, j);
- const char * text = whisper_full_get_token_text(ctx, i, j);
- const float p = whisper_full_get_token_p (ctx, i, j);
+ const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
- const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
+ printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
+ }
+ } else {
+ const char * text = whisper_full_get_segment_text(ctx, i);
- printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
- }
- printf("\n");
- } else {
- const char * text = whisper_full_get_segment_text(ctx, i);
+ printf("%s%s", speaker.c_str(), text);
+ }
- printf("[%s --> %s] %s%s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), speaker.c_str(), text);
- }
+ // with timestamps or speakers: each segment on new line
+ if (!params.no_timestamps || params.diarize) {
+ printf("\n");
}
+
+ fflush(stdout);
}
}