]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
Try to improve the sampling strategy a bit
authorGeorgi Gerganov <redacted>
Mon, 17 Oct 2022 20:52:24 +0000 (23:52 +0300)
committerGeorgi Gerganov <redacted>
Mon, 17 Oct 2022 21:12:51 +0000 (00:12 +0300)
It sill fails sometimes when it does not sample a timestamp token for
the entire segment. We now print a message in such cases

whisper.cpp

index fab7071ff0b22abcbeb0c39c5609442b02806c21..988527811c3cda0c253fc28908f95db7159f8f0e 100644 (file)
@@ -2425,7 +2425,7 @@ int whisper_full(
                 whisper_token id  = 0;
                 whisper_token tid = whisper_token_beg(ctx);
 
-                id = whisper_sample_best(ctx, result_len == 0);
+                id = whisper_sample_best(ctx, result_len == 0 || i > 32);
                 if (i > 0) {
                     tid = whisper_sample_timestamp(ctx);
                 }
@@ -2445,7 +2445,9 @@ int whisper_full(
                 // end of text token
                 if (id == whisper_token_eot(ctx)) {
                     if (result_len == 0) {
-                        result_len = i + 1;
+                        // TODO: figure out how to resolve this
+                        fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
+                        //result_len = i + 1;
                     }
                     break;
                 }