////////////////////////////////////////////////////////////////////////////
-struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy) {
+struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
struct whisper_full_params result;
switch (strategy) {
- case WHISPER_DECODE_GREEDY:
+ case WHISPER_SAMPLING_GREEDY:
{
-#if defined(_MSC_VER)
result = {
-#else
- result = (struct whisper_full_params) {
-#endif
- .strategy = WHISPER_DECODE_GREEDY,
- .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
- .offset_ms = 0,
+ /*.strategy =*/ WHISPER_SAMPLING_GREEDY,
- .translate = false,
- .no_context = false,
- .print_special_tokens = false,
- .print_progress = true,
- .print_realtime = false,
- .print_timestamps = true,
+ /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+ /*.offset_ms =*/ 0,
- .language = "en",
+ /*.translate =*/ false,
+ /*.no_context =*/ false,
+ /*.print_special_tokens =*/ false,
+ /*.print_progress =*/ true,
+ /*.print_realtime =*/ false,
+ /*.print_timestamps =*/ true,
- .greedy = {
- .n_past = 0,
+ /*.language =*/ "en",
+
+ /*.greedy =*/ {
+ /*.n_past =*/ 0,
+ },
+
+ /*.beam_search =*/ {
+ /*.n_past =*/ -1,
+ /*.beam_width =*/ -1,
+ /*.n_best =*/ -1,
},
};
} break;
- case WHISPER_DECODE_BEAM_SEARCH:
+ case WHISPER_SAMPLING_BEAM_SEARCH:
{
-#if defined(_MSC_VER)
result = {
-#else
- result = (struct whisper_full_params) {
-#endif
- .strategy = WHISPER_DECODE_BEAM_SEARCH,
- .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
- .offset_ms = 0,
-
- .translate = false,
- .no_context = false,
- .print_special_tokens = false,
- .print_progress = true,
- .print_realtime = false,
- .print_timestamps = true,
-
- .language = "en",
-
- .beam_search = {
- .n_past = 0,
- .beam_width = 10,
- .n_best = 5,
+ /*.strategy =*/ WHISPER_SAMPLING_BEAM_SEARCH,
+
+ /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+ /*.offset_ms =*/ 0,
+
+ /*.translate =*/ false,
+ /*.no_context =*/ false,
+ /*.print_special_tokens =*/ false,
+ /*.print_progress =*/ true,
+ /*.print_realtime =*/ false,
+ /*.print_timestamps =*/ true,
+
+ /*.language =*/ "en",
+
+ /*.greedy =*/ {
+ /*.n_past =*/ -1,
+ },
+
+ /*.beam_search =*/ {
+ /*.n_past =*/ 0,
+ /*.beam_width =*/ 10,
+ /*.n_best =*/ 5,
},
};
} break;
////////////////////////////////////////////////////////////////////////////
- // Available decoding strategies
- enum whisper_decode_strategy {
- WHISPER_DECODE_GREEDY, // Always select the most probable token
- WHISPER_DECODE_BEAM_SEARCH, // TODO: not implemented yet!
+ // Available sampling strategies
+ enum whisper_sampling_strategy {
+ WHISPER_SAMPLING_GREEDY, // Always select the most probable token
+ WHISPER_SAMPLING_BEAM_SEARCH, // TODO: not implemented yet!
};
struct whisper_full_params {
- enum whisper_decode_strategy strategy;
+ enum whisper_sampling_strategy strategy;
int n_threads;
int offset_ms;
const char * language;
- union {
- struct {
- int n_past;
- } greedy;
-
- struct {
- int n_past;
- int beam_width;
- int n_best;
- } beam_search;
- };
+ struct {
+ int n_past;
+ } greedy;
+
+ struct {
+ int n_past;
+ int beam_width;
+ int n_best;
+ } beam_search;
};
- WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy);
+ WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
// Uses the specified decoding strategy to obtain the text.