sync : whisper.cpp

author Georgi Gerganov <redacted>

Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)

committer Georgi Gerganov <redacted>

Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)
author Georgi Gerganov <redacted>
Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)
committer Georgi Gerganov <redacted>
Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)
diff --git a/examples/whisper/main.cpp b/examples/whisper/main.cpp

index 5362d4a21b6b06cdf1a5b08d3376d8f7cbfc2917..43838cf5bba439c6561a79cce80358f31d07e959 100644 (file)
--- a/examples/whisper/main.cpp
+++ b/examples/whisper/main.cpp
@@ -14,13 +14,16 @@
  //  500 -> 00:05.000
  // 6000 -> 01:00.000
  std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
-
+    int64_t msec = t * 10;
+    int64_t hr = msec / (1000 * 60 * 60);
+    msec = msec - hr * (1000 * 60 * 60);
+    int64_t min = msec / (1000 * 60);
+    msec = msec - min * (1000 * 60);
+    int64_t sec = msec / 1000;
+    msec = msec - sec * 1000;
+    
      char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+    snprintf(buf, sizeof(buf), "%02d:%02d:%02d.%03d", (int) hr, (int) min, (int) sec, (int) msec);
  
      return std::string(buf);
  }
diff --git a/examples/whisper/whisper.cpp b/examples/whisper/whisper.cpp

index 81da46944f7d383560f23b69ddb837d29a4c9247..a5f79d255cdf4c94d6bedd4e6013ddf41af52a6a 100644 (file)
--- a/examples/whisper/whisper.cpp
+++ b/examples/whisper/whisper.cpp
@@ -4,6 +4,7 @@
  
  #include <algorithm>
  #include <cassert>
+#define _USE_MATH_DEFINES
  #include <cmath>
  #include <cstdio>
  #include <cstring>
@@ -2072,6 +2073,8 @@ bool log_mel_spectrogram(
  //
  
  struct whisper_context * whisper_init(const char * path_model) {
+    ggml_time_init();
+
      whisper_context * ctx = new whisper_context;
  
      const int64_t t_start_us = ggml_time_us();
@@ -2259,7 +2262,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
      switch (strategy) {
          case WHISPER_DECODE_GREEDY:
              {
-                result = (struct whisper_full_params) {
+                result = {
                      .strategy  = WHISPER_DECODE_GREEDY,
                      .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
                      .offset_ms = 0,
@@ -2280,7 +2283,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_decode_strat
              } break;
          case WHISPER_DECODE_BEAM_SEARCH:
              {
-                result = (struct whisper_full_params) {
+                result = {
                      .strategy  = WHISPER_DECODE_GREEDY,
                      .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
                      .offset_ms = 0,
@@ -2317,6 +2320,13 @@ int whisper_full(
          return -1;
      }
  
+    // if length of spectrogram is less than 1s (100 samples), then return
+    // basically don't process anything that is less than 1s
+    // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
+    if (whisper_n_len(ctx) < 100) {
+        return 0;
+    }
+
      // the accumulated text context so far
      auto & prompt_past = ctx->prompt_past;
      if (params.no_context) {
@@ -2386,7 +2396,7 @@ int whisper_full(
          // print the prompt
          //printf("\n\n");
          //for (int i = 0; i < prompt.size(); i++) {
-        //    printf("%s: prompt[%d] = %s\n", __func__, i, vocab.id_to_token[prompt[i]].c_str());
+        //    printf("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token[prompt[i]].c_str());
          //}
          //printf("\n\n");
  
diff --git a/examples/whisper/whisper.h b/examples/whisper/whisper.h

index f462370a33015e4e883f1a4517b6386b751358e9..381afd71d6e7495d26cf49dc70169cf2bea730e5 100644 (file)
--- a/examples/whisper/whisper.h
+++ b/examples/whisper/whisper.h
@@ -2,6 +2,7 @@
  #define WHISPER_H
  
  #include <stdint.h>
+#include <stdbool.h>
  
  #ifdef WHISPER_SHARED
  #    ifdef _WIN32
diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h

index 5b7b2582ef5b1eddda22984f432a697334a8d71e..34f104b70c26daffd9b7f6988d6d1f5929d27712 100644 (file)
--- a/include/ggml/ggml.h
+++ b/include/ggml/ggml.h
@@ -136,6 +136,7 @@ struct ggml_init_params {
      void * mem_buffer; // if NULL, memory will be allocated internally
  };
  
+void ggml_time_init(void);
  int64_t ggml_time_ms(void);
  int64_t ggml_time_us(void);
  int64_t ggml_cycles(void);
diff --git a/src/ggml.c b/src/ggml.c

index a87e8dbc9f3b726d14b1a44393819a9e1f88bc3f..6608300f7864b194d0bb7c41cc18444f7c2f5b69 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -1,6 +1,11 @@
  #include "ggml.h"
  
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <malloc.h> // using malloc.h with MSC/MINGW
+#else
  #include <alloca.h>
+#endif
+
  #include <assert.h>
  #include <time.h>
  #include <math.h>
@@ -8,9 +13,15 @@
  #include <string.h>
  #include <stdint.h>
  #include <stdio.h>
-#include <stdatomic.h>
  
+
+#if defined _MSC_VER
+#include "msvc_thread_atomic.h"
+#else
  #include <pthread.h>
+#include <stdatomic.h>
+typedef void* thread_ret_t;
+#endif
  
  #define GGML_DEBUG 0
  
@@ -144,6 +155,25 @@ static ggml_fp16_t table_exp_f16[1 << 16];
  // timing
  //
  
+#if defined(_MSC_VER)
+static int64_t timer_freq;
+void ggml_time_init(void) {
+    LARGE_INTEGER frequency;
+    QueryPerformanceFrequency(&frequency);
+    timer_freq = frequency.QuadPart;
+}
+int64_t ggml_time_ms(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000) / timer_freq;
+}
+int64_t ggml_time_us(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000000) / timer_freq;
+}
+#else
+void ggml_time_init(void) {}
  int64_t ggml_time_ms(void) {
      struct timespec ts;
      clock_gettime(CLOCK_MONOTONIC, &ts);
@@ -155,6 +185,7 @@ int64_t ggml_time_us(void) {
      clock_gettime(CLOCK_MONOTONIC, &ts);
      return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
  }
+#endif
  
  int64_t ggml_cycles(void) {
      return clock();
@@ -6407,7 +6438,7 @@ void * ggml_graph_compute_one(void * data) {
      return NULL;
  }
  
-void * ggml_graph_compute_thread(void * data) {
+thread_ret_t ggml_graph_compute_thread(void * data) {
      struct ggml_compute_state * state = (struct ggml_compute_state *) data;
  
      const int n_threads = state->shared->n_threads;
@@ -6418,7 +6449,7 @@ void * ggml_graph_compute_thread(void * data) {
          } else {
              while (atomic_load(&state->shared->has_work)) {
                  if (atomic_load(&state->shared->stop)) {
-                    return NULL;
+                    return 0;
                  }
                  ggml_lock_lock  (&state->shared->spin);
                  ggml_lock_unlock(&state->shared->spin);
@@ -6430,7 +6461,7 @@ void * ggml_graph_compute_thread(void * data) {
          // wait for work
          while (!atomic_load(&state->shared->has_work)) {
              if (atomic_load(&state->shared->stop)) {
-                return NULL;
+                return 0;
              }
              ggml_lock_lock  (&state->shared->spin);
              ggml_lock_unlock(&state->shared->spin);
@@ -6449,7 +6480,7 @@ void * ggml_graph_compute_thread(void * data) {
          }
      }
  
-    return NULL;
+    return 0;
  }
  
  void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
author	Georgi Gerganov <redacted>
	Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)
committer	Georgi Gerganov <redacted>
	Thu, 13 Oct 2022 19:18:46 +0000 (22:18 +0300)
examples/whisper/main.cpp		patch \| blob \| history
examples/whisper/whisper.cpp		patch \| blob \| history
examples/whisper/whisper.h		patch \| blob \| history
include/ggml/ggml.h		patch \| blob \| history
src/ggml.c		patch \| blob \| history