bool verbose = false;
bool translate = false;
+ bool no_context = true;
bool print_special_tokens = false;
bool no_timestamps = true;
params.verbose = true;
} else if (arg == "--translate") {
params.translate = true;
+ } else if (arg == "-kc" || arg == "--keep-context") {
+ params.no_context = false;
} else if (arg == "-l" || arg == "--language") {
params.language = argv[++i];
if (whisper_lang_id(params.language.c_str()) == -1) {
fprintf(stderr, " --step N audio step size in milliseconds (default: %d)\n", params.step_ms);
fprintf(stderr, " -v, --verbose verbose output\n");
fprintf(stderr, " --translate translate from source language to english\n");
+ fprintf(stderr, " -nc, --no-context disable context from earlier audio (default: false)\n");
fprintf(stderr, " -ps, --print_special print special tokens\n");
fprintf(stderr, " -nt, --no_timestamps do not print timestamps\n");
fprintf(stderr, " -l LANG, --language LANG spoken language (default: %s)\n", params.language.c_str());
wparams.print_realtime = false;
wparams.print_timestamps = !params.no_timestamps;
wparams.translate = params.translate;
+ wparams.no_context = params.no_context;
wparams.language = params.language.c_str();
wparams.n_threads = params.n_threads;
std::vector<whisper_result> result_cur;
std::vector<whisper_segment> result_all;
+
+ std::vector<whisper_token> prompt_past;
};
// load the model from a ggml file
// - model: the model
// - n_threads: number of threads to use
// - mel_offset: offset in the mel spectrogram (i.e. audio offset)
-// - mel_inp: input mel spectrogram
-// - features: output encoded features
//
bool whisper_encode(
whisper_context & wctx,
//
// - model: the model
// - n_threads: number of threads to use
-// - n_past: prompt length
-// - prompt: text prompt
-// - logits_out: output logits
-// - probs_out: output probabilities
+// - tokens: text prompt
+// - n_tokens: number of tokens in the prompt
+// - n_past: number of past tokens to prefix the prompt with
//
bool whisper_decode(
whisper_context & wctx,
.offset_ms = 0,
.translate = false,
+ .no_context = false,
.print_special_tokens = false,
.print_progress = true,
.print_realtime = false,
.offset_ms = 0,
.translate = false,
+ .no_context = false,
.print_special_tokens = false,
.print_progress = true,
.print_realtime = false,
return result;
}
+
int whisper_full(
struct whisper_context * ctx,
struct whisper_full_params params,
}
// the accumulated text context so far
- std::vector<whisper_token> prompt_past = { };
+ auto & prompt_past = ctx->prompt_past;
+ if (params.no_context) {
+ prompt_past.clear();
+ }
// these tokens determine the task that will be performed
std::vector<whisper_token> prompt_init = { whisper_token_sot(ctx) };