option(GGML_HOME "whisper: Path to external GGML source" OFF)
set(
- SOURCE_FILES
- ${WHISPER_LIB_DIR}/whisper.cpp
- ${CMAKE_SOURCE_DIR}/jni.c
-)
+ SOURCE_FILES
+ ${WHISPER_LIB_DIR}/whisper.cpp
+ ${CMAKE_SOURCE_DIR}/jni.c
+ )
if (NOT GGML_HOME)
set(
${WHISPER_LIB_DIR}/ggml-alloc.c
${WHISPER_LIB_DIR}/ggml-backend.c
${WHISPER_LIB_DIR}/ggml-quants.c
-
- )
+ )
endif()
find_library(LOG_LIB log)
endif ()
if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
-
target_compile_options(${target_name} PRIVATE -O3)
target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
target_link_options(${target_name} PRIVATE -flto)
-
endif ()
if (GGML_HOME)
return res;
}
+// NOTE: not tested
+inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
+ int8x16_t res;
+
+ res[ 0] = a[b[ 0]];
+ res[ 1] = a[b[ 1]];
+ res[ 2] = a[b[ 2]];
+ res[ 3] = a[b[ 3]];
+ res[ 4] = a[b[ 4]];
+ res[ 5] = a[b[ 5]];
+ res[ 6] = a[b[ 6]];
+ res[ 7] = a[b[ 7]];
+ res[ 8] = a[b[ 8]];
+ res[ 9] = a[b[ 9]];
+ res[10] = a[b[10]];
+ res[11] = a[b[11]];
+ res[12] = a[b[12]];
+ res[13] = a[b[13]];
+ res[14] = a[b[14]];
+ res[15] = a[b[15]];
+
+ return res;
+}
+
#else
#define ggml_int16x8x2_t int16x8x2_t
#define ggml_vld1q_u8_x4 vld1q_u8_x4
#define ggml_vld1q_s8_x2 vld1q_s8_x2
#define ggml_vld1q_s8_x4 vld1q_s8_x4
+#define ggml_vqtbl1q_s8 vqtbl1q_s8
#endif
uint16_t gindex[8];
uint16x8x2_t vindex;
int8x16x4_t q1b;
- int8x16x4_t q8b;
+ ggml_int8x16x4_t q8b;
uint16x8x4_t scales;
int32x4x2_t sumi;
int32x4x2_t dotq;
q8b.val[2] = vld1q_s8(y[ib+1].qs);
q8b.val[3] = vld1q_s8(y[ib+1].qs + 16);
- q4b.val[0] = vqtbl1q_s8(values, vandq_u8(q4bits.val[0], m4b));
- q4b.val[1] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
- q4b.val[2] = vqtbl1q_s8(values, vandq_u8(q4bits.val[1], m4b));
- q4b.val[3] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
+ q4b.val[0] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[0], m4b));
+ q4b.val[1] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
+ q4b.val[2] = ggml_vqtbl1q_s8(values, vandq_u8 (q4bits.val[1], m4b));
+ q4b.val[3] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);