From: Ryan Hitchman Date: Thu, 18 Jan 2024 20:58:42 +0000 (-0700) Subject: server : implement "verbose_json" format with token details (whisper/1781) X-Git-Tag: upstream/0.0.1642~1034 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4298fbc66ea205aa7502f8c1d282a2f2b036d86a;p=pkg%2Fggml%2Fsources%2Fggml server : implement "verbose_json" format with token details (whisper/1781) * examples/server: implement "verbose_json" format with token details. This is intended to mirror the format of openai's Python whisper.transcribe() return values. * server: don't write WAV to a temporary file if not converting * server: use std::lock_guard instead of manual lock/unlock --- diff --git a/examples/common.cpp b/examples/common.cpp index 603c655a..8404e00e 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -639,6 +639,12 @@ bool read_wav(const std::string & fname, std::vector& pcmf32, std::vector fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); } + else if (fname.size() > 256 || fname.size() > 40 && fname.substr(0, 4) == "RIFF" && fname.substr(8, 4) == "WAVE") { + if (drwav_init_memory(&wav, fname.c_str(), fname.size(), nullptr) == false) { + fprintf(stderr, "error: failed to open WAV file from fname buffer\n"); + return false; + } + } else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) { fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str()); return false; diff --git a/examples/common.h b/examples/common.h index 54f0b00d..aebeb0cd 100644 --- a/examples/common.h +++ b/examples/common.h @@ -136,6 +136,7 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat( // // Read WAV audio file and store the PCM data into pcmf32 +// fname can be a buffer of WAV data instead of a filename // The sample rate of the audio must be equal to COMMON_SAMPLE_RATE // If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM bool read_wav(