main : add -ocsv, aka --output-csv to output a CSV file

author Niels Mayer <redacted>

Thu, 29 Dec 2022 12:04:00 +0000 (04:04 -0800)

committer GitHub <redacted>

Thu, 29 Dec 2022 12:04:00 +0000 (14:04 +0200)
author Niels Mayer <redacted>
Thu, 29 Dec 2022 12:04:00 +0000 (04:04 -0800)
committer GitHub <redacted>
Thu, 29 Dec 2022 12:04:00 +0000 (14:04 +0200)
diff --git a/examples/main/main.cpp b/examples/main/main.cpp

index 6e991b79392f6f1d3b46b8ef0206ea4f88a37530..ce8b484df30f041f447571dcea5afea3777f5ea3 100644 (file)
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -69,6 +69,7 @@ struct whisper_params {
      bool output_vtt     = false;
      bool output_srt     = false;
      bool output_wts     = false;
+    bool output_csv     = false;
      bool print_special  = false;
      bool print_colors   = false;
      bool print_progress = false;
@@ -111,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
          else if (arg == "-ovtt" || arg == "--output-vtt")     { params.output_vtt     = true; }
          else if (arg == "-osrt" || arg == "--output-srt")     { params.output_srt     = true; }
          else if (arg == "-owts" || arg == "--output-words")   { params.output_wts     = true; }
+        else if (arg == "-ocsv" || arg == "--output-csv")     { params.output_csv     = true; }
          else if (arg == "-ps"   || arg == "--print-special")  { params.print_special  = true; }
          else if (arg == "-pc"   || arg == "--print-colors")   { params.print_colors   = true; }
          else if (arg == "-pp"   || arg == "--print-progress") { params.print_progress = true; }
@@ -150,6 +152,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
      fprintf(stderr, "  -ovtt,    --output-vtt     [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
      fprintf(stderr, "  -osrt,    --output-srt     [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
      fprintf(stderr, "  -owts,    --output-words   [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
+    fprintf(stderr, "  -ocsv,    --output-csv     [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
      fprintf(stderr, "  -ps,      --print-special  [%-7s] print special tokens\n",                           params.print_special ? "true" : "false");
      fprintf(stderr, "  -pc,      --print-colors   [%-7s] print colors\n",                                   params.print_colors ? "true" : "false");
      fprintf(stderr, "  -pp,      --print-progress [%-7s] print progress\n",                                 params.print_progress ? "true" : "false");
@@ -325,6 +328,32 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
      return true;
  }
  
+bool output_csv(struct whisper_context * ctx, const char * fname) {
+    std::ofstream fout(fname);
+    if (!fout.is_open()) {
+        fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
+        return false;
+    }
+
+    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
+
+    const int n_segments = whisper_full_n_segments(ctx);
+    for (int i = 0; i < n_segments; ++i) {
+        const char * text = whisper_full_get_segment_text(ctx, i);
+       if (text[0] == ' ')
+         text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character.
+        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
+        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
+       //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
+        fout << 10 * t0 << ", " 
+            << 10 * t1 << ", \"" 
+            << text    << "\"\n";
+    }
+
+    return true;
+}
+
+
  // karaoke video generation
  // outputs a bash script that uses ffmpeg to generate a video with the subtitles
  // TODO: font parameter adjustments
@@ -674,6 +703,13 @@ int main(int argc, char ** argv) {
                  const auto fname_wts = fname_inp + ".wts";
                  output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
              }
+
+           // output to CSV file
+            if (params.output_csv) {
+                const auto fname_csv = fname_inp + ".csv";
+                output_csv(ctx, fname_csv.c_str());
+            }
+
          }
      }
author	Niels Mayer <redacted>
	Thu, 29 Dec 2022 12:04:00 +0000 (04:04 -0800)
committer	GitHub <redacted>
	Thu, 29 Dec 2022 12:04:00 +0000 (14:04 +0200)