]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
Improve result printing
authorGeorgi Gerganov <redacted>
Tue, 4 Oct 2022 20:16:33 +0000 (23:16 +0300)
committerGeorgi Gerganov <redacted>
Tue, 4 Oct 2022 20:18:15 +0000 (23:18 +0300)
main.cpp
whisper.cpp
whisper.h

index ca29b8277938bf2549020983e8657a073ebf3b30..562559a19186632f814df1daadbc76dd323c2860 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -181,6 +181,9 @@ int main(int argc, char ** argv) {
     {
         whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
 
+        wparams.print_realtime       = true;
+        wparams.print_progress       = false;
+        wparams.print_timestamps     = !params.no_timestamps;
         wparams.print_special_tokens = params.print_special_tokens;
 
         if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
@@ -189,7 +192,7 @@ int main(int argc, char ** argv) {
         }
 
         // print result;
-        {
+        if (!wparams.print_realtime) {
             printf("\n");
 
             const int n_segments = whisper_full_n_segments(ctx);
index 583eb7283ccfb624130df0bc970bd711b550661d..4f105eefe40755ca7793dd6d7a88e27d9cf622e8 100644 (file)
@@ -1854,6 +1854,18 @@ whisper_vocab::id whisper_sample_timestamp(
     return probs_id[0].second;
 }
 
+static std::string to_timestamp(int64_t t) {
+    int64_t sec = t/100;
+    int64_t msec = t - sec*100;
+    int64_t min = sec/60;
+    sec = sec - min*60;
+
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+
+    return std::string(buf);
+}
+
 // naive Discrete Fourier Transform
 // input is real-valued
 // output is complex-valued
@@ -2245,6 +2257,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
                     .translate            = false,
                     .print_special_tokens = false,
                     .print_progress       = true,
+                    .print_realtime       = false,
+                    .print_timestamps     = true,
 
                     .language = "en",
 
@@ -2262,6 +2276,8 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
                     .translate            = false,
                     .print_special_tokens = false,
                     .print_progress       = true,
+                    .print_realtime       = false,
+                    .print_timestamps     = true,
 
                     .language = "en",
 
@@ -2436,6 +2452,15 @@ int whisper_full(
                 if (result_cur[i].id > whisper_token_beg(ctx)) {
                     const auto t1 = result_cur[i].t;
                     if (!text.empty()) {
+                        if (params.print_realtime) {
+                            if (params.print_timestamps) {
+                                printf("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
+                            } else {
+                                printf("%s", text.c_str());
+                                fflush(stdout);
+                            }
+                        }
+
                         result_all.push_back({ t0, t1, text });
                     }
                     text = "";
@@ -2448,7 +2473,18 @@ int whisper_full(
             }
 
             if (!text.empty()) {
-                result_all.push_back({ t0, seek + seek_delta, text });
+                const auto t1 = seek + seek_delta;
+
+                if (params.print_realtime) {
+                    if (params.print_timestamps) {
+                        printf("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text.c_str());
+                    } else {
+                        printf("%s", text.c_str());
+                        fflush(stdout);
+                    }
+                }
+
+                result_all.push_back({ t0, t1, text });
             }
         }
 
index cc24d71ab8438109d81f6c551065affdaf5ee3c6..2df5bdfb763378b608ff6086ed8187c5d1eb3b9b 100644 (file)
--- a/whisper.h
+++ b/whisper.h
@@ -106,6 +106,8 @@ extern "C" {
         bool translate;
         bool print_special_tokens;
         bool print_progress;
+        bool print_realtime;
+        bool print_timestamps;
 
         const char * language;