]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
whisper : fixed Beam Search Strategy and exposed whisper_pcm_to_mel_phase_vocoder...
authorsandrohanea <redacted>
Wed, 8 Feb 2023 07:01:47 +0000 (08:01 +0100)
committerGitHub <redacted>
Wed, 8 Feb 2023 07:01:47 +0000 (09:01 +0200)
Co-authored-by: Sandro Hanea <redacted>
whisper.cpp
whisper.h

index 35d42f894f33d266ff54c8e605e2f6b8cc57b7bc..aebb4813f080c8f0d1584597adecd5e26986d75c 100644 (file)
@@ -2905,7 +2905,7 @@ const char * whisper_print_system_info(void) {
 
 struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
     struct whisper_full_params result = {
-        /*.strategy         =*/ WHISPER_SAMPLING_GREEDY,
+        /*.strategy         =*/ strategy,
 
         /*.n_threads        =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
         /*.n_max_text_ctx   =*/ 16384,
@@ -3829,7 +3829,7 @@ int whisper_full(
 
                         auto & cur = beam_candidates[cur_c++];
 
-                        while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
+                        while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
                             ++cur_c;
                         }
 
index 72331e6abd4d1a725e660f91ce4d647a4e1cd02e..786d67d9cb437d7425e1b8aa5135770910245d5e 100644 (file)
--- a/whisper.h
+++ b/whisper.h
@@ -113,6 +113,16 @@ extern "C" {
                                int   n_samples,
                                int   n_threads);
 
+    // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2. 
+    // The resulting spectrogram is stored inside the provided whisper context.
+    // Returns 0 on success
+    WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
+        struct whisper_context* ctx,
+        const float* samples,
+        int   n_samples,
+        int   n_threads);
+
+
     // This can be used to set a custom log mel spectrogram inside the provided whisper context.
     // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
     // n_mel must be 80