// command-list mode
// guide the transcription to match the most likely command from a provided list
-static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
+static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::ofstream &fout) {
fprintf(stderr, "\n");
fprintf(stderr, "%s: guided mode\n", __func__);
const float prob = probs_id[0].first;
const int index = probs_id[0].second;
+ const char * best_command = allowed_commands[index].c_str();
fprintf(stdout, "\n");
fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__,
- "\033[1m", allowed_commands[index].c_str(), "\033[0m", prob,
+ "\033[1m", best_command, "\033[0m", prob,
(int) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count());
fprintf(stdout, "\n");
+ if (fout.is_open()) {
+ fout << best_command << std::endl;
+ }
}
}
// always-prompt mode
// transcribe the voice into text after valid prompt
-static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
+static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
bool is_running = true;
bool ask_prompt = true;
if ((sim > 0.7f) && (command.size() > 0)) {
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+ if (fout.is_open()) {
+ fout << command << std::endl;
+ }
}
fprintf(stdout, "\n");
// general-purpose mode
// freely transcribe the voice into text
-static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
+static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) {
bool is_running = true;
bool have_prompt = false;
bool ask_prompt = true;
} else {
// cut the prompt from the decoded text
const std::string command = ::trim(txt.substr(best_len));
-
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+ if (fout.is_open()) {
+ fout << command << std::endl;
+ }
}
fprintf(stdout, "\n");
}
}
+ std::ofstream fout;
+ if (params.fname_out.length() > 0) {
+ fout.open(params.fname_out);
+ if (!fout.is_open()) {
+ fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
+ return 1;
+ }
+ }
+
if (ret_val == 0) {
if (!params.commands.empty()) {
- ret_val = process_command_list(ctx, audio, params);
+ ret_val = process_command_list(ctx, audio, params, fout);
} else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) {
- ret_val = always_prompt_transcription(ctx, audio, params);
+ ret_val = always_prompt_transcription(ctx, audio, params, fout);
} else {
- ret_val = process_general_transcription(ctx, audio, params);
+ ret_val = process_general_transcription(ctx, audio, params, fout);
}
}