# options
-option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
+option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
-option(GGML_PERF "ggml: enable perf timings" ${GGML_PERF})
+option(GGML_PERF "ggml: enable perf timings" OFF)
option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF)
# sanitizers
// run the inference
{
- whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
+ whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
wparams.print_realtime = true;
wparams.print_progress = false;
////////////////////////////////////////////////////////////////////////////
-struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy) {
+struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
struct whisper_full_params result;
switch (strategy) {
- case WHISPER_DECODE_GREEDY:
+ case WHISPER_SAMPLING_GREEDY:
{
result = {
- .strategy = WHISPER_DECODE_GREEDY,
- .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
- .offset_ms = 0,
+ /*.strategy =*/ WHISPER_SAMPLING_GREEDY,
- .translate = false,
- .no_context = false,
- .print_special_tokens = false,
- .print_progress = true,
- .print_realtime = false,
- .print_timestamps = true,
+ /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+ /*.offset_ms =*/ 0,
- .language = "en",
+ /*.translate =*/ false,
+ /*.no_context =*/ false,
+ /*.print_special_tokens =*/ false,
+ /*.print_progress =*/ true,
+ /*.print_realtime =*/ false,
+ /*.print_timestamps =*/ true,
- .greedy = {
- .n_past = 0,
+ /*.language =*/ "en",
+
+ /*.greedy =*/ {
+ /*.n_past =*/ 0,
+ },
+
+ /*.beam_search =*/ {
+ /*.n_past =*/ -1,
+ /*.beam_width =*/ -1,
+ /*.n_best =*/ -1,
},
};
} break;
- case WHISPER_DECODE_BEAM_SEARCH:
+ case WHISPER_SAMPLING_BEAM_SEARCH:
{
result = {
- .strategy = WHISPER_DECODE_GREEDY,
- .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
- .offset_ms = 0,
-
- .translate = false,
- .no_context = false,
- .print_special_tokens = false,
- .print_progress = true,
- .print_realtime = false,
- .print_timestamps = true,
-
- .language = "en",
-
- .beam_search = {
- .n_past = 0,
- .beam_width = 10,
- .n_best = 5,
+ /*.strategy =*/ WHISPER_SAMPLING_BEAM_SEARCH,
+
+ /*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+ /*.offset_ms =*/ 0,
+
+ /*.translate =*/ false,
+ /*.no_context =*/ false,
+ /*.print_special_tokens =*/ false,
+ /*.print_progress =*/ true,
+ /*.print_realtime =*/ false,
+ /*.print_timestamps =*/ true,
+
+ /*.language =*/ "en",
+
+ /*.greedy =*/ {
+ /*.n_past =*/ -1,
+ },
+
+ /*.beam_search =*/ {
+ /*.n_past =*/ 0,
+ /*.beam_width =*/ 10,
+ /*.n_best =*/ 5,
},
};
} break;
whisper_token id = 0;
whisper_token tid = whisper_token_beg(ctx);
- id = whisper_sample_best(ctx, result_len == 0 || i > 32);
+ id = whisper_sample_best(ctx, result_len == 0);
if (i > 0) {
tid = whisper_sample_timestamp(ctx);
}
// end of text token
if (id == whisper_token_eot(ctx)) {
if (result_len == 0) {
- // TODO: figure out how to resolve this
- fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
- //result_len = i + 1;
+ if (seek + seek_delta + 100 >= whisper_n_len(ctx)) {
+ result_len = i + 1;
+ } else {
+ // TODO: figure out how to resolve this
+ fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
+ }
}
break;
}
//
// C interface
//
-
+ // The following interface is thread-safe as long as the sample whisper_context is not used by multiple threads
+ // concurrently.
//
// Basic usage:
//
////////////////////////////////////////////////////////////////////////////
- // Available decoding strategies
- enum whisper_decode_strategy {
- WHISPER_DECODE_GREEDY, // Always select the most probable token
- WHISPER_DECODE_BEAM_SEARCH, // TODO: not implemented yet!
+ // Available sampling strategies
+ enum whisper_sampling_strategy {
+ WHISPER_SAMPLING_GREEDY, // Always select the most probable token
+ WHISPER_SAMPLING_BEAM_SEARCH, // TODO: not implemented yet!
};
struct whisper_full_params {
- enum whisper_decode_strategy strategy;
+ enum whisper_sampling_strategy strategy;
int n_threads;
int offset_ms;
const char * language;
- union {
- struct {
- int n_past;
- } greedy;
-
- struct {
- int n_past;
- int beam_width;
- int n_best;
- } beam_search;
- };
+ struct {
+ int n_past;
+ } greedy;
+
+ struct {
+ int n_past;
+ int beam_width;
+ int n_best;
+ } beam_search;
};
- WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy);
+ WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
// Uses the specified decoding strategy to obtain the text.
#include <stdint.h>
#include <stdio.h>
-
#if defined _MSC_VER
#include "msvc_thread_atomic.h"
#else
#endif
#define GGML_DEBUG 0
+#define GGML_GELU_FP16
#if UINTPTR_MAX == 0xFFFFFFFF
#define GGML_MEM_ALIGN 4
}
}
+#ifdef GGML_GELU_FP16
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
uint16_t t;
for (int i = 0; i < n; ++i) {
ggml_fp16_t fp16 = ggml_fp32_to_fp16(x[i]);
memcpy(&t, &fp16, sizeof(uint16_t));
- y[i] = table_gelu_f16[t];
+ y[i] = ggml_fp16_to_fp32(table_gelu_f16[t]);
}
}
-
-//inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
-// for (int i = 0; i < n; ++i) {
-// y[i] = ggml_gelu_f32(x[i]);
-// }
-//}
+#else
+inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
+ for (int i = 0; i < n; ++i) {
+ y[i] = ggml_gelu_f32(x[i]);
+ }
+}
+#endif
inline static void ggml_vec_sum_f32 (const int n, float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) sum += x[i]; *s += sum; }
inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { ggml_vec_norm_f32(n, s, x); *s = 1./(*s); }
--- /dev/null
+#pragma once
+#include <Windows.h>
+
+typedef volatile LONG atomic_int;
+typedef atomic_int atomic_bool;
+
+static void atomic_store(atomic_int* ptr, LONG val) {
+ InterlockedExchange(ptr, val);
+}
+static LONG atomic_load(atomic_int* ptr) {
+ return InterlockedCompareExchange(ptr, 0, 0);
+}
+static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
+ return InterlockedExchangeAdd(ptr, inc);
+}
+static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
+ return atomic_fetch_add(ptr, -(dec));
+}
+
+typedef HANDLE pthread_t;
+
+typedef DWORD thread_ret_t;
+static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
+ out = CreateThread(NULL, 0, func, arg, 0, NULL);
+ return out != NULL;
+}
+
+static int pthread_join(pthread_t thread, void* unused) {
+ return (int) WaitForSingleObject(thread, INFINITE);
+}
+