struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
struct whisper_full_params result = {
- /*.strategy =*/ WHISPER_SAMPLING_GREEDY,
+ /*.strategy =*/ strategy,
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
/*.n_max_text_ctx =*/ 16384,
auto & cur = beam_candidates[cur_c++];
- while (beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
+ while (beam_candidates.size() > cur_c && beam_candidates[cur_c].sequence.sum_logprobs_all == cur.sequence.sum_logprobs_all && i > 0) {
++cur_c;
}
int n_samples,
int n_threads);
+ // Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
+ // The resulting spectrogram is stored inside the provided whisper context.
+ // Returns 0 on success
+ WHISPER_API int whisper_pcm_to_mel_phase_vocoder(
+ struct whisper_context* ctx,
+ const float* samples,
+ int n_samples,
+ int n_threads);
+
+
// This can be used to set a custom log mel spectrogram inside the provided whisper context.
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
// n_mel must be 80