whisper : fixed Beam Search Strategy and exposed whisper_pcm_to_mel_phase_vocoder...

author sandrohanea <redacted>

Wed, 8 Feb 2023 07:01:47 +0000 (08:01 +0100)

committer GitHub <redacted>

Wed, 8 Feb 2023 07:01:47 +0000 (09:01 +0200)
author sandrohanea <redacted>
Wed, 8 Feb 2023 07:01:47 +0000 (08:01 +0100)
committer GitHub <redacted>
Wed, 8 Feb 2023 07:01:47 +0000 (09:01 +0200)
diff --git a/whisper.cpp b/whisper.cpp

index 35d42f894f33d266ff54c8e605e2f6b8cc57b7bc..aebb4813f080c8f0d1584597adecd5e26986d75c 100644 (file)
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2905,7 +2905,7 @@ const char * whisper_print_system_info(void) {
  
  struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
      struct whisper_full_params result = {
-        /*.strategy         =*/ WHISPER_SAMPLING_GREEDY,
+        /*.strategy         =*/ strategy,
  
          /*.n_threads        =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
          /*.n_max_text_ctx   =*/ 16384,
@@ -3829,7 +3829,7 @@ int whisper_full(
  
                          auto & cur = beam_candidates[cur_c++];
  
-                        while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
+                        while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
                              ++cur_c;
                          }
  
diff --git a/whisper.h b/whisper.h

index 72331e6abd4d1a725e660f91ce4d647a4e1cd02e..786d67d9cb437d7425e1b8aa5135770910245d5e 100644 (file)
--- a/whisper.h
+++ b/whisper.h
@@ -113,6 +113,16 @@ extern "C" {
                                 int   n_samples,
                                 int   n_threads);
  
+    // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2. 
+    // The resulting spectrogram is stored inside the provided whisper context.
+    // Returns 0 on success
+    WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
+        struct whisper_context* ctx,
+        const float* samples,
+        int   n_samples,
+        int   n_threads);
+
+
      // This can be used to set a custom log mel spectrogram inside the provided whisper context.
      // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
      // n_mel must be 80
author	sandrohanea <redacted>
	Wed, 8 Feb 2023 07:01:47 +0000 (08:01 +0100)
committer	GitHub <redacted>
	Wed, 8 Feb 2023 07:01:47 +0000 (09:01 +0200)
whisper.cpp		patch \| blob \| history
whisper.h		patch \| blob \| history