# options
-option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
+option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
-option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
-option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
+option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
+option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
-option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
-option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
-option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
+option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
+option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
+option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
-option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
-option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
-option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
+option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
if (APPLE)
- option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
- option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
- option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
- option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
+ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
+ option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
+ option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
+ option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
- option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
+ option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
+ option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
else()
- option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
+ option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
endif()
-option(WHISPER_PERF "whisper: enable perf timings" OFF)
+option(WHISPER_PERF "whisper: enable perf timings" OFF)
# sanitizers
else()
message(WARNING "CoreML framework not found")
endif()
+
+ if (WHISPER_COREML_ALLOW_FALLBACK)
+ set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_COREML_ALLOW_FALLBACK)
+ endif()
endif()
endif()
ifeq ($(UNAME_M),amd64)
CFLAGS += -mavx -mavx2 -mfma -mf16c
endif
+
ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
endif
endif
+
ifndef WHISPER_NO_ACCELERATE
# Mac M1 - include Accelerate framework
ifeq ($(UNAME_S),Darwin)
LDFLAGS += -framework Accelerate
endif
endif
+
ifdef WHISPER_COREML
CXXFLAGS += -DWHISPER_USE_COREML
LDFLAGS += -framework Foundation -framework CoreML
+
+ifdef WHISPER_COREML_ALLOW_FALLBACK
+ CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
+endif
endif
+
ifdef WHISPER_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
endif
+
ifdef WHISPER_GPROF
CFLAGS += -pg
CXXFLAGS += -pg
endif
+
ifneq ($(filter aarch64%,$(UNAME_M)),)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
+
ifneq ($(filter armv6%,$(UNAME_M)),)
# 32-bit Raspberry Pi 1, 2, 3
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
endif
+
ifneq ($(filter armv7%,$(UNAME_M)),)
# 32-bit ARM, for example on Armbian or possibly raspbian
CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
# 64-bit ARM, use these (TODO: auto-detect 64-bit)
# CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
+
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 4
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
const bool use_coreml = wstate.ctx_coreml != nullptr;
#endif
- if (!use_coreml)
- {
+ if (!use_coreml) {
// convolution + gelu
{
wstate.use_buf(ctx0, 1);
cur = ggml_conv_1d_1s(ctx0, model.e_conv_1_w, mel);
cur = ggml_add(ctx0,
- ggml_repeat(ctx0,
- model.e_conv_1_b,
- cur),
- cur);
+ ggml_repeat(ctx0,
+ model.e_conv_1_b,
+ cur),
+ cur);
cur = ggml_gelu(ctx0, cur);
cur = ggml_conv_1d_2s(ctx0, model.e_conv_2_w, cur);
cur = ggml_add(ctx0,
- ggml_repeat(ctx0,
- model.e_conv_2_b,
- cur),
- cur);
+ ggml_repeat(ctx0,
+ model.e_conv_2_b,
+ cur),
+ cur);
cur = ggml_gelu(ctx0, cur);
}
// cur = ln_0_w*cur + ln_0_b
cur = ggml_add(ctx0,
- ggml_mul(ctx0,
- ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
- cur),
- ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
+ ggml_mul(ctx0,
+ ggml_repeat(ctx0, layer.attn_ln_0_w, cur),
+ cur),
+ ggml_repeat(ctx0, layer.attn_ln_0_b, cur));
}
// self-attention
wstate.use_buf(ctx0, 1);
struct ggml_tensor * Qcur = ggml_mul_mat(ctx0,
- layer.attn_q_w,
- cur);
+ layer.attn_q_w,
+ cur);
Qcur = ggml_add(ctx0,
- ggml_repeat(ctx0,
- layer.attn_q_b,
- Qcur),
- Qcur);
+ ggml_repeat(ctx0,
+ layer.attn_q_b,
+ Qcur),
+ Qcur);
//Qcur = ggml_scale(ctx0, Qcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
// note: no bias for Key
struct ggml_tensor * Kcur = ggml_mul_mat(ctx0,
- layer.attn_k_w,
- cur);
+ layer.attn_k_w,
+ cur);
//Kcur = ggml_scale(ctx0, Kcur, ggml_new_f32(ctx0, pow(float(n_state)/n_head, -0.25)));
struct ggml_tensor * Vcur = ggml_mul_mat(ctx0,
- layer.attn_v_w,
- cur);
+ layer.attn_v_w,
+ cur);
Vcur = ggml_add(ctx0,
- ggml_repeat(ctx0,
- layer.attn_v_b,
- Vcur),
- Vcur);
+ ggml_repeat(ctx0,
+ layer.attn_v_b,
+ Vcur),
+ Vcur);
// ------
wstate.use_buf(ctx0, 0);
- #ifdef WHISPER_USE_FLASH_ATTN
+#ifdef WHISPER_USE_FLASH_ATTN
struct ggml_tensor * Q =
ggml_permute(ctx0,
ggml_cpy(ctx0,
ggml_new_tensor_3d(ctx0, wctx.wtype, n_ctx, n_state/n_head, n_head));
struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, false);
- #else
+#else
struct ggml_tensor * Q =
ggml_permute(ctx0,
ggml_cpy(ctx0,
);
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, ggml_transpose(ctx0, V), KQ_soft_max);
- #endif
+#endif
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
wstate.use_buf(ctx0, 1);
cur = ggml_cpy(ctx0,
- KQV_merged,
- ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
+ KQV_merged,
+ ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx));
}
// projection
wstate.use_buf(ctx0, 0);
cur = ggml_mul_mat(ctx0,
- layer.attn_ln_1_w,
- cur);
+ layer.attn_ln_1_w,
+ cur);
wstate.use_buf(ctx0, 1);
cur = ggml_add(ctx0,
- ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
- cur);
+ ggml_repeat(ctx0, layer.attn_ln_1_b, cur),
+ cur);
}
wstate.use_buf(ctx0, 2);
// cur = mlp_ln_w*cur + mlp_ln_b
cur = ggml_add(ctx0,
- ggml_mul(ctx0,
- ggml_repeat(ctx0, layer.mlp_ln_w, cur),
- cur),
- ggml_repeat(ctx0, layer.mlp_ln_b, cur));
+ ggml_mul(ctx0,
+ ggml_repeat(ctx0, layer.mlp_ln_w, cur),
+ cur),
+ ggml_repeat(ctx0, layer.mlp_ln_b, cur));
}
- #ifdef WHISPER_USE_FLASH_FF
+#ifdef WHISPER_USE_FLASH_FF
wstate.use_buf(ctx0, 0);
cur = ggml_flash_ff(ctx0,
- ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
- layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
- #else
+ ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wstate.wtype, n_state, n_ctx)),
+ layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
+#else
wstate.use_buf(ctx0, 0);
// fully connected
cur = ggml_mul_mat(ctx0,
- layer.mlp_0_w,
- cur);
+ layer.mlp_0_w,
+ cur);
wstate.use_buf(ctx0, 1);
cur = ggml_add(ctx0,
- ggml_repeat(ctx0, layer.mlp_0_b, cur),
- cur);
+ ggml_repeat(ctx0, layer.mlp_0_b, cur),
+ cur);
wstate.use_buf(ctx0, 0);
// projection
cur = ggml_mul_mat(ctx0,
- layer.mlp_1_w,
- cur);
+ layer.mlp_1_w,
+ cur);
wstate.use_buf(ctx0, 0);
cur = ggml_add(ctx0,
- ggml_repeat(ctx0, layer.mlp_1_b, cur),
- cur);
- #endif
+ ggml_repeat(ctx0, layer.mlp_1_b, cur),
+ cur);
+#endif
}
wstate.use_buf(ctx0, 3);
// cur = ln_f_g*cur + ln_f_b
cur = ggml_add(ctx0,
- ggml_mul(ctx0,
- ggml_repeat(ctx0, model.e_ln_w, cur),
- cur),
- ggml_repeat(ctx0, model.e_ln_b, cur));
+ ggml_mul(ctx0,
+ ggml_repeat(ctx0, model.e_ln_w, cur),
+ cur),
+ ggml_repeat(ctx0, model.e_ln_b, cur));
}
wstate.use_buf(ctx0, -1);
state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
if (!state->ctx_coreml) {
fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
-#ifndef WHISPER_COREML_ALLOW_FALLBACK
+#ifndef WHISPER_COREML_ALLOW_FALLBACK
return nullptr;
#endif
} else {
- fprintf(stderr, "%s: Core ML model loaded\n", __func__);
+ fprintf(stderr, "%s: Core ML model loaded\n", __func__);
}
#endif