int32_t port = 8080;
int32_t read_timeout = 600;
int32_t write_timeout = 600;
+
+ bool ffmpeg_converter = false;
};
struct whisper_params {
fprintf(stderr, " --host HOST, [%-7s] Hostname/ip-adress for the server\n", sparams.hostname.c_str());
fprintf(stderr, " --port PORT, [%-7d] Port number for the server\n", sparams.port);
fprintf(stderr, " --public PATH, [%-7s] Path to the public folder\n", sparams.public_path.c_str());
+ fprintf(stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server", sparams.ffmpeg_converter ? "true" : "false");
fprintf(stderr, "\n");
}
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
else if ( arg == "--host") { sparams.hostname = argv[++i]; }
else if ( arg == "--public") { sparams.public_path = argv[++i]; }
+ else if ( arg == "--convert") { sparams.ffmpeg_converter = true; }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params, sparams);
int progress_prev;
};
+void check_ffmpeg_availibility() {
+ int result = system("ffmpeg -version");
+
+ if (result == 0) {
+ std::cout << "ffmpeg is available." << std::endl;
+ } else {
+ // ffmpeg is not available
+ std::cout << "ffmpeg is not found. Please ensure that ffmpeg is installed ";
+ std::cout << "and that its executable is included in your system's PATH. ";
+ exit(0);
+ }
+}
+
+bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) {
+ std::ostringstream cmd_stream;
+ std::string converted_filename_temp = temp_filename + "_temp.wav";
+ cmd_stream << "ffmpeg -i \"" << temp_filename << "\" -ar 16000 -ac 1 -c:a pcm_s16le \"" << converted_filename_temp << "\" 2>&1";
+ std::string cmd = cmd_stream.str();
+
+ int status = std::system(cmd.c_str());
+ if (status != 0) {
+ error_resp = "{\"error\":\"FFmpeg conversion failed.\"}";
+ return false;
+ }
+
+ // Remove the original file
+ if (remove(temp_filename.c_str()) != 0) {
+ error_resp = "{\"error\":\"Failed to remove the original file.\"}";
+ return false;
+ }
+
+ // Rename the temporary file to match the original filename
+ if (rename(converted_filename_temp.c_str(), temp_filename.c_str()) != 0) {
+ error_resp = "{\"error\":\"Failed to rename the temporary file.\"}";
+ return false;
+ }
+ return true;
+}
+
std::string estimate_diarization_speaker(std::vector<std::vector<float>> pcmf32s, int64_t t0, int64_t t1, bool id_only = false) {
std::string speaker = "";
const int64_t n_samples = pcmf32s[0].size();
exit(0);
}
+ if (sparams.ffmpeg_converter) {
+ check_ffmpeg_availibility();
+ }
// whisper init
struct whisper_context_params cparams;
cparams.use_gpu = params.use_gpu;
temp_file << audio_file.content;
temp_file.close();
+ // if file is not wav, convert to wav
+
+ if (sparams.ffmpeg_converter) {
+ std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}";
+ const bool is_converted = convert_to_wav(temp_filename, error_resp);
+ if (!is_converted) {
+ res.set_content(error_resp, "application/json");
+ whisper_mutex.unlock();
+ return;
+ }
+ }
+
// read wav content into pcmf32
if (!::read_wav(temp_filename, pcmf32, pcmf32s, params.diarize)) {
fprintf(stderr, "error: failed to read WAV file '%s'\n", temp_filename.c_str());
// run the inference
{
-
printf("Running whisper.cpp inference on %s\n", filename.c_str());
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);