ref #17 : print whisper logs to stderr

author Georgi Gerganov <redacted>

Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)

committer Georgi Gerganov <redacted>

Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)
author Georgi Gerganov <redacted>
Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)
committer Georgi Gerganov <redacted>
Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)
diff --git a/main.cpp b/main.cpp

index 728ab6faa2f83dc5c1e82ed35c03b5bba42e2376..acaf3028d1a56f1f17612c611f42d3a1104237cc 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -192,21 +192,21 @@ int main(int argc, char ** argv) {
  
          // print some info about the processing
          {
-            printf("\n");
+            fprintf(stderr, "\n");
              if (!whisper_is_multilingual(ctx)) {
                  if (params.language != "en" || params.translate) {
                      params.language = "en";
                      params.translate = false;
-                    printf("%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
+                    fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
                  }
              }
-            printf("%s: processing '%s' (%d samples, %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
+            fprintf(stderr, "%s: processing '%s' (%d samples, %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
                      __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE, params.n_threads,
                      params.language.c_str(),
                      params.translate ? "translate" : "transcribe",
                      params.no_timestamps ? 0 : 1);
  
-            printf("\n");
+            fprintf(stderr, "\n");
          }
  
  
@@ -230,25 +230,25 @@ int main(int argc, char ** argv) {
  
              // print result
              if (!wparams.print_realtime) {
-                printf("\n");
+                fprintf(stderr, "\n");
  
                  const int n_segments = whisper_full_n_segments(ctx);
                  for (int i = 0; i < n_segments; ++i) {
                      const char * text = whisper_full_get_segment_text(ctx, i);
  
                      if (params.no_timestamps) {
-                        printf ("%s", text);
+                        fprintf(stderr, "%s", text);
                          fflush(stdout);
                      } else {
                          const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
                          const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
  
-                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
+                        fprintf(stderr, "[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
                      }
                  }
              }
  
-            printf("\n");
+            fprintf(stderr, "\n");
  
              // output to text file
              if (params.output_txt) {
@@ -260,7 +260,7 @@ int main(int argc, char ** argv) {
                      return 8;
                  }
  
-                printf("%s: saving output to '%s.txt'\n", __func__, fname_inp.c_str());
+                fprintf(stderr, "%s: saving output to '%s.txt'\n", __func__, fname_inp.c_str());
  
                  const int n_segments = whisper_full_n_segments(ctx);
                  for (int i = 0; i < n_segments; ++i) {
@@ -279,7 +279,7 @@ int main(int argc, char ** argv) {
                      return 9;
                  }
  
-                printf("%s: saving output to '%s.vtt'\n", __func__, fname_inp.c_str());
+                fprintf(stderr, "%s: saving output to '%s.vtt'\n", __func__, fname_inp.c_str());
  
                  fout_vtt << "WEBVTT\n\n";
  
@@ -304,7 +304,7 @@ int main(int argc, char ** argv) {
                      return 10;
                  }
  
-                printf("%s: saving output to '%s.srt'\n", __func__, fname_inp.c_str());
+                fprintf(stderr, "%s: saving output to '%s.srt'\n", __func__, fname_inp.c_str());
  
                  const int n_segments = whisper_full_n_segments(ctx);
                  for (int i = 0; i < n_segments; ++i) {
diff --git a/whisper.cpp b/whisper.cpp

index b59cfd7f04b95a25bd7037977e110b474bf95339..81da46944f7d383560f23b69ddb837d29a4c9247 100644 (file)
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -421,7 +421,7 @@ struct whisper_context {
  // see the convert-pt-to-ggml.py script for details
  //
  bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
-    printf("%s: loading model from '%s'\n", __func__, fname.c_str());
+    fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
  
      auto & model = wctx.model;
      auto & vocab = wctx.vocab;
@@ -480,18 +480,18 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
              model.type = e_model::MODEL_LARGE;
          }
  
-        printf("%s: n_vocab       = %d\n", __func__, hparams.n_vocab);
-        printf("%s: n_audio_ctx   = %d\n", __func__, hparams.n_audio_ctx);
-        printf("%s: n_audio_state = %d\n", __func__, hparams.n_audio_state);
-        printf("%s: n_audio_head  = %d\n", __func__, hparams.n_audio_head);
-        printf("%s: n_audio_layer = %d\n", __func__, hparams.n_audio_layer);
-        printf("%s: n_text_ctx    = %d\n", __func__, hparams.n_text_ctx);
-        printf("%s: n_text_state  = %d\n", __func__, hparams.n_text_state);
-        printf("%s: n_text_head   = %d\n", __func__, hparams.n_text_head);
-        printf("%s: n_text_layer  = %d\n", __func__, hparams.n_text_layer);
-        printf("%s: n_mels        = %d\n", __func__, hparams.n_mels);
-        printf("%s: f16           = %d\n", __func__, hparams.f16);
-        printf("%s: type          = %d\n", __func__, model.type);
+        fprintf(stderr, "%s: n_vocab       = %d\n", __func__, hparams.n_vocab);
+        fprintf(stderr, "%s: n_audio_ctx   = %d\n", __func__, hparams.n_audio_ctx);
+        fprintf(stderr, "%s: n_audio_state = %d\n", __func__, hparams.n_audio_state);
+        fprintf(stderr, "%s: n_audio_head  = %d\n", __func__, hparams.n_audio_head);
+        fprintf(stderr, "%s: n_audio_layer = %d\n", __func__, hparams.n_audio_layer);
+        fprintf(stderr, "%s: n_text_ctx    = %d\n", __func__, hparams.n_text_ctx);
+        fprintf(stderr, "%s: n_text_state  = %d\n", __func__, hparams.n_text_state);
+        fprintf(stderr, "%s: n_text_head   = %d\n", __func__, hparams.n_text_head);
+        fprintf(stderr, "%s: n_text_layer  = %d\n", __func__, hparams.n_text_layer);
+        fprintf(stderr, "%s: n_mels        = %d\n", __func__, hparams.n_mels);
+        fprintf(stderr, "%s: f16           = %d\n", __func__, hparams.f16);
+        fprintf(stderr, "%s: type          = %d\n", __func__, model.type);
  
          wctx.buf_model.resize(MEM_REQ_MODEL.at(model.type));
          wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
@@ -503,7 +503,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
                     wctx.buf_compute.size() +
                     wctx.buf_compute_layer.size();
  
-        printf("%s: mem_required  = %.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
+        fprintf(stderr, "%s: mem_required  = %.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
      }
  
      // load mel filters
@@ -553,7 +553,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
          }
  
          if (n_vocab < model.hparams.n_vocab) {
-            printf("%s: adding %d extra tokens\n", __func__, model.hparams.n_vocab - n_vocab);
+            fprintf(stderr, "%s: adding %d extra tokens\n", __func__, model.hparams.n_vocab - n_vocab);
              for (int i = n_vocab; i < model.hparams.n_vocab; i++) {
                  if (i > vocab.token_beg) {
                      word = "[_TT_" + std::to_string(i - vocab.token_beg) + "]";
@@ -698,7 +698,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
  
          ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
  
-        printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
+        fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
      }
  
      // create the ggml context
@@ -945,7 +945,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
              ggml_nbytes(model.memory_k)       + ggml_nbytes(model.memory_v) +
              ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
  
-        printf("%s: memory size = %8.2f MB \n", __func__, memory_size/1024.0/1024.0);
+        fprintf(stderr, "%s: memory size = %8.2f MB \n", __func__, memory_size/1024.0/1024.0);
      }
  
      // load weights
@@ -1008,10 +1008,10 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
              n_loaded++;
          }
  
-        printf("%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
+        fprintf(stderr, "%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
  
          if (n_loaded == 0) {
-            printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
+            fprintf(stderr, "%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
          } else if (n_loaded != (int) model.tensors.size()) {
              fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
              return false;
@@ -2242,13 +2242,13 @@ whisper_token whisper_token_transcribe() {
  void whisper_print_timings(struct whisper_context * ctx) {
      const int64_t t_end_us = ggml_time_us();
  
-    printf("\n");
-    printf("%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
-    printf("%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
-    printf("%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
-    printf("%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
-    printf("%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
-    printf("%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
+    fprintf(stderr, "%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
+    fprintf(stderr, "%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
+    fprintf(stderr, "%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
+    fprintf(stderr, "%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
+    fprintf(stderr, "%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
  }
  
  ////////////////////////////////////////////////////////////////////////////
@@ -2349,7 +2349,7 @@ int whisper_full(
          while (progress_cur >= progress_prev + progress_step) {
              progress_prev += progress_step;
              if (params.print_progress) {
-                printf("%s: progress = %3d%%\n", __func__, progress_prev);
+                fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress_prev);
              }
          }
author	Georgi Gerganov <redacted>
	Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)
committer	Georgi Gerganov <redacted>
	Sat, 8 Oct 2022 14:28:06 +0000 (17:28 +0300)
main.cpp		patch \| blob \| history
whisper.cpp		patch \| blob \| history