whisper : PPC64 big-endian support (#398)

author fitzsim <redacted>

Mon, 23 Jan 2023 18:48:10 +0000 (18:48 +0000)

committer GitHub <redacted>

Mon, 23 Jan 2023 18:48:10 +0000 (20:48 +0200)
author fitzsim <redacted>
Mon, 23 Jan 2023 18:48:10 +0000 (18:48 +0000)
committer GitHub <redacted>
Mon, 23 Jan 2023 18:48:10 +0000 (20:48 +0200)
diff --git a/Makefile b/Makefile

index 56c37935b5c328ae43f66cade38224c310eef8aa..20915e3005736105785ff092edc27ae662103605 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -115,11 +115,15 @@ endif
  ifeq ($(UNAME_M),amd64)
         CFLAGS += -mavx -mavx2 -mfma -mf16c
  endif
-ifeq ($(UNAME_M),ppc64le)
+ifneq ($(filter ppc64%,$(UNAME_M)),)
         POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
         ifneq (,$(findstring POWER9,$(POWER9_M)))
                 CFLAGS += -mpower9-vector
         endif
+       # Require c++23's std::byteswap for big-endian support.
+       ifeq ($(UNAME_M),ppc64)
+               CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+       endif
  endif
  ifndef WHISPER_NO_ACCELERATE
         # Mac M1 - include Accelerate framework
diff --git a/ggml.c b/ggml.c

index 16f0f85997bceaac060d246c7b45b9f26e4fe4eb..d2a5053713114b80abb2838bc2094471204297c3 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -339,8 +339,12 @@ int64_t ggml_cycles_per_ms(void) {
  #if defined(__cpp_lib_hardware_interference_size)
  #define CACHE_LINE_SIZE hardware_destructive_interference_size
  #else
+#if defined(__POWER9_VECTOR__)
+#define CACHE_LINE_SIZE 128
+#else
  #define CACHE_LINE_SIZE 64
  #endif
+#endif
  
  static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
  
@@ -609,9 +613,12 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
  #define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ?                   \
    vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
    vec_extract_fp32_from_shortl(vec_xl(0, p))
-#define GGML_F16_VEC_STORE(p, r, i)                                      \
-  if (i & 0x1)                                                           \
-    vec_xst(vec_pack_to_short_fp32(r[i], r[i - 1]), 0, p - GGML_F16_EPR)
+#define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i]
+#define GGML_F16_VEC_STORE(p, r, i)                             \
+  if (i & 0x1)                                                  \
+    vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)],  \
+                                   r[i - GGML_ENDIAN_BYTE(0)]), \
+            0, p - GGML_F16_EPR)
  
  #elif defined(__wasm_simd128__)
  
diff --git a/whisper.cpp b/whisper.cpp

index 81458a59f63508264a10cd7914d4ea8e065c7ba2..d12cc4ae58b6a5954e3aa424e405658049727460 100644 (file)
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -17,6 +17,68 @@
  #include <regex>
  #include <random>
  
+#if defined(GGML_BIG_ENDIAN)
+#include <bit>
+
+template<typename T>
+static T byteswap(T value) {
+    return std::byteswap(value);
+}
+
+template<>
+float byteswap(float value) {
+    return std::bit_cast<float>(byteswap(std::bit_cast<std::uint32_t>(value)));
+}
+
+template<typename T>
+static void byteswap_tensor_data(ggml_tensor * tensor) {
+    T * datum = reinterpret_cast<T *>(tensor->data);
+    for (int i = 0; i < ggml_nelements(tensor); i++) {
+        datum[i] = byteswap(datum[i]);
+    }
+}
+
+static void byteswap_tensor(ggml_tensor * tensor) {
+    switch (tensor->type) {
+        case GGML_TYPE_I16: {
+            byteswap_tensor_data<int16_t>(tensor);
+            break;
+        }
+        case GGML_TYPE_F16: {
+            byteswap_tensor_data<ggml_fp16_t>(tensor);
+            break;
+        }
+        case GGML_TYPE_I32: {
+            byteswap_tensor_data<int32_t>(tensor);
+            break;
+        }
+        case GGML_TYPE_F32: {
+            byteswap_tensor_data<float>(tensor);
+            break;
+        }
+        default: { // GML_TYPE_I8
+            break;
+        }
+    }
+}
+
+#define BYTESWAP_VALUE(d) d = byteswap(d)
+#define BYTESWAP_FILTERS(f)            \
+    do {                              \
+        for (auto & datum : f.data) { \
+            datum = byteswap(datum);  \
+        }                             \
+    } while (0)
+#define BYTESWAP_TENSOR(t)       \
+    do {                         \
+        byteswap_tensor(tensor); \
+    } while (0)
+#else
+#define BYTESWAP_VALUE(d) do {} while (0)
+#define BYTESWAP_FILTERS(f) do {} while (0)
+#define BYTESWAP_TENSOR(t) do {} while (0)
+#endif
+
  #define WHISPER_ASSERT(x) \
      do { \
          if (!(x)) { \
@@ -521,6 +583,7 @@ struct whisper_context {
  template<typename T>
  static void read_safe(whisper_model_loader * loader, T & dest) {
      loader->read(loader->context, &dest, sizeof(T));
+    BYTESWAP_VALUE(dest);
  }
  
  static bool kv_cache_init(
@@ -733,6 +796,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
  
          filters.data.resize(filters.n_mel * filters.n_fft);
          loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
+        BYTESWAP_FILTERS(filters);
      }
  
      // load vocab
@@ -1196,6 +1260,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
              }
  
              loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
+            BYTESWAP_TENSOR(tensor);
  
              //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
              total_size += ggml_nbytes(tensor);
author	fitzsim <redacted>
	Mon, 23 Jan 2023 18:48:10 +0000 (18:48 +0000)
committer	GitHub <redacted>
	Mon, 23 Jan 2023 18:48:10 +0000 (20:48 +0200)
Makefile		patch \| blob \| history
ggml.c		patch \| blob \| history
whisper.cpp		patch \| blob \| history