Used to limit the number of tokens in a segment.
Useful to battle with word repetition when using partial encoder context
{
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+ wparams.max_tokens = 32;
wparams.print_progress = false;
wparams.print_special_tokens = params.print_special_tokens;
wparams.print_realtime = false;
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
+ /*.max_tokens =*/ 0,
/*.speed_up =*/ false,
/*.thold_pt =*/ 0.01f,
/*.thold_ptsum =*/ 0.01f,
/*.max_len =*/ 0,
+ /*.max_tokens =*/ 0,
/*.speed_up =*/ false,
//}
// end of text token
- if (token.id == whisper_token_eot(ctx) || (i > WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT)) {
+ if (token.id == whisper_token_eot(ctx) || (params.max_tokens > 0 && i > params.max_tokens)) {
if (result_len == 0) {
if (seek + seek_delta + 100 >= seek_end) {
result_len = i + 1;
#define WHISPER_CHUNK_SIZE 30
#define WHISPER_EXPERIMENT_AUDIO_CTX 512
-#define WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT 32
#ifdef __cplusplus
extern "C" {
float thold_pt; // timestamp token probability threshold (~0.01)
float thold_ptsum; // timestamp token sum probability threshold (~0.01)
int max_len; // max segment length in characters
+ int max_tokens; // max tokens per segment (0 = no limit)
// [EXPERIMENTAL] speed-up techniques
bool speed_up; // speed-up the audio by 2x using Phase Vocoder