ggml : 32-bit arm compat (#1891)

author Georgi Gerganov <redacted>

Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)

committer GitHub <redacted>

Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)
author Georgi Gerganov <redacted>
Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)
committer GitHub <redacted>
Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)
diff --git a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt

index 49f34479a40fdeeafebcb17e1edb9a5a4c63416c..faaa7b662cf0bb79f664f7683280421794369f0b 100644 (file)
--- a/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
+++ b/examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt
@@ -9,10 +9,10 @@ set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../..)
  option(GGML_HOME       "whisper: Path to external GGML source" OFF)
  
  set(
-        SOURCE_FILES
-        ${WHISPER_LIB_DIR}/whisper.cpp
-        ${CMAKE_SOURCE_DIR}/jni.c
-)
+    SOURCE_FILES
+    ${WHISPER_LIB_DIR}/whisper.cpp
+    ${CMAKE_SOURCE_DIR}/jni.c
+    )
  
  if (NOT GGML_HOME)
      set(
@@ -22,8 +22,7 @@ if (NOT GGML_HOME)
          ${WHISPER_LIB_DIR}/ggml-alloc.c
          ${WHISPER_LIB_DIR}/ggml-backend.c
          ${WHISPER_LIB_DIR}/ggml-quants.c
-
-    )
+        )
  endif()
  
  find_library(LOG_LIB log)
@@ -44,7 +43,6 @@ function(build_library target_name)
      endif ()
  
      if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
-
          target_compile_options(${target_name} PRIVATE -O3)
          target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
          target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
@@ -52,7 +50,6 @@ function(build_library target_name)
          target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
          target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
          target_link_options(${target_name} PRIVATE -flto)
-
      endif ()
  
      if (GGML_HOME)
diff --git a/ggml-quants.c b/ggml-quants.c

index 6336538f0e99ec79010c590ec65bcf9a1f69fe7f..8917c8af142558f9cdb9fd6cf142ed9c3e17a726 100644 (file)
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -438,6 +438,30 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
      return res;
  }
  
+// NOTE: not tested
+inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
+    int8x16_t res;
+
+    res[ 0] = a[b[ 0]];
+    res[ 1] = a[b[ 1]];
+    res[ 2] = a[b[ 2]];
+    res[ 3] = a[b[ 3]];
+    res[ 4] = a[b[ 4]];
+    res[ 5] = a[b[ 5]];
+    res[ 6] = a[b[ 6]];
+    res[ 7] = a[b[ 7]];
+    res[ 8] = a[b[ 8]];
+    res[ 9] = a[b[ 9]];
+    res[10] = a[b[10]];
+    res[11] = a[b[11]];
+    res[12] = a[b[12]];
+    res[13] = a[b[13]];
+    res[14] = a[b[14]];
+    res[15] = a[b[15]];
+
+    return res;
+}
+
  #else
  
  #define ggml_int16x8x2_t  int16x8x2_t
@@ -451,6 +475,7 @@ inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
  #define ggml_vld1q_u8_x4  vld1q_u8_x4
  #define ggml_vld1q_s8_x2  vld1q_s8_x2
  #define ggml_vld1q_s8_x4  vld1q_s8_x4
+#define ggml_vqtbl1q_s8   vqtbl1q_s8
  
  #endif
  
@@ -9333,7 +9358,7 @@ void ggml_vec_dot_iq1_s_q8_K  (int n, float * GGML_RESTRICT s, size_t bs, const
      uint16_t gindex[8];
      uint16x8x2_t vindex;
      int8x16x4_t q1b;
-    int8x16x4_t q8b;
+    ggml_int8x16x4_t q8b;
      uint16x8x4_t scales;
      int32x4x2_t sumi;
      int32x4x2_t dotq;
@@ -9506,10 +9531,10 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
          q8b.val[2]    = vld1q_s8(y[ib+1].qs);
          q8b.val[3]    = vld1q_s8(y[ib+1].qs + 16);
  
-        q4b.val[0] = vqtbl1q_s8(values, vandq_u8(q4bits.val[0], m4b));
-        q4b.val[1] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
-        q4b.val[2] = vqtbl1q_s8(values, vandq_u8(q4bits.val[1], m4b));
-        q4b.val[3] = vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
+        q4b.val[0] = ggml_vqtbl1q_s8(values, vandq_u8  (q4bits.val[0], m4b));
+        q4b.val[1] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[0], 4));
+        q4b.val[2] = ggml_vqtbl1q_s8(values, vandq_u8  (q4bits.val[1], m4b));
+        q4b.val[3] = ggml_vqtbl1q_s8(values, vshrq_n_u8(q4bits.val[1], 4));
  
          prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
          prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
author	Georgi Gerganov <redacted>
	Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)
committer	GitHub <redacted>
	Thu, 22 Feb 2024 16:31:40 +0000 (18:31 +0200)
examples/whisper.android/lib/src/main/jni/whisper/CMakeLists.txt		patch \| blob \| history
ggml-quants.c		patch \| blob \| history