stream : add "max_tokens" parameter

author Georgi Gerganov <redacted>

Sun, 20 Nov 2022 18:52:24 +0000 (20:52 +0200)

committer Georgi Gerganov <redacted>

Sun, 20 Nov 2022 19:22:41 +0000 (21:22 +0200)
author Georgi Gerganov <redacted>
Sun, 20 Nov 2022 18:52:24 +0000 (20:52 +0200)
committer Georgi Gerganov <redacted>
Sun, 20 Nov 2022 19:22:41 +0000 (21:22 +0200)
diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp

index d2db0b899342ba4dcc5c2ea306e0b249df759dbb..040ba9ebf4b67a3c0c87cd71efe2e505bbac6d35 100644 (file)
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@@ -322,6 +322,7 @@ int main(int argc, char ** argv) {
          {
              whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
  
+            wparams.max_tokens           = 32;
              wparams.print_progress       = false;
              wparams.print_special_tokens = params.print_special_tokens;
              wparams.print_realtime       = false;
diff --git a/whisper.cpp b/whisper.cpp

index 95579ec38fe81aa91ea9a9a0b6bfe0f2d3ce01e5..48f93ebd89e8e0f7efe130c0632181207568944d 100644 (file)
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2402,6 +2402,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
                      /*.thold_pt             =*/ 0.01f,
                      /*.thold_ptsum          =*/ 0.01f,
                      /*.max_len              =*/ 0,
+                    /*.max_tokens           =*/ 0,
  
                      /*.speed_up             =*/ false,
  
@@ -2443,6 +2444,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
                      /*.thold_pt             =*/ 0.01f,
                      /*.thold_ptsum          =*/ 0.01f,
                      /*.max_len              =*/ 0,
+                    /*.max_tokens           =*/ 0,
  
                      /*.speed_up             =*/ false,
  
@@ -2685,7 +2687,7 @@ int whisper_full(
                  //}
  
                  // end of text token
-                if (token.id == whisper_token_eot(ctx) || (i > WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT)) {
+                if (token.id == whisper_token_eot(ctx) || (params.max_tokens > 0 && i > params.max_tokens)) {
                      if (result_len == 0) {
                          if (seek + seek_delta + 100 >= seek_end) {
                              result_len = i + 1;
diff --git a/whisper.h b/whisper.h

index ec4b1fb6c53ec60ee6097e5ef21ac7d3c27a6117..0211995dcb8d5922110eaea35f9cda1cbdc9072c 100644 (file)
--- a/whisper.h
+++ b/whisper.h
@@ -25,7 +25,6 @@
  #define WHISPER_CHUNK_SIZE  30
  
  #define WHISPER_EXPERIMENT_AUDIO_CTX 512
-#define WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT 32
  
  #ifdef __cplusplus
  extern "C" {
@@ -205,6 +204,7 @@ extern "C" {
          float thold_pt;         // timestamp token probability threshold (~0.01)
          float thold_ptsum;      // timestamp token sum probability threshold (~0.01)
          int   max_len;          // max segment length in characters
+        int   max_tokens;       // max tokens per segment (0 = no limit)
  
          // [EXPERIMENTAL] speed-up techniques
          bool speed_up; // speed-up the audio by 2x using Phase Vocoder
author	Georgi Gerganov <redacted>
	Sun, 20 Nov 2022 18:52:24 +0000 (20:52 +0200)
committer	Georgi Gerganov <redacted>
	Sun, 20 Nov 2022 19:22:41 +0000 (21:22 +0200)
examples/stream/stream.cpp		patch \| blob \| history
whisper.cpp		patch \| blob \| history
whisper.h		patch \| blob \| history