ifeq ($(UNAME_M),amd64)
CFLAGS += -mavx -mavx2 -mfma -mf16c
endif
-ifeq ($(UNAME_M),ppc64le)
+ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
CFLAGS += -mpower9-vector
endif
+ # Require c++23's std::byteswap for big-endian support.
+ ifeq ($(UNAME_M),ppc64)
+ CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
+ endif
endif
ifndef WHISPER_NO_ACCELERATE
# Mac M1 - include Accelerate framework
#if defined(__cpp_lib_hardware_interference_size)
#define CACHE_LINE_SIZE hardware_destructive_interference_size
#else
+#if defined(__POWER9_VECTOR__)
+#define CACHE_LINE_SIZE 128
+#else
#define CACHE_LINE_SIZE 64
#endif
+#endif
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
#define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \
vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \
vec_extract_fp32_from_shortl(vec_xl(0, p))
-#define GGML_F16_VEC_STORE(p, r, i) \
- if (i & 0x1) \
- vec_xst(vec_pack_to_short_fp32(r[i], r[i - 1]), 0, p - GGML_F16_EPR)
+#define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i]
+#define GGML_F16_VEC_STORE(p, r, i) \
+ if (i & 0x1) \
+ vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \
+ r[i - GGML_ENDIAN_BYTE(0)]), \
+ 0, p - GGML_F16_EPR)
#elif defined(__wasm_simd128__)
#include <regex>
#include <random>
+#if defined(GGML_BIG_ENDIAN)
+#include <bit>
+
+template<typename T>
+static T byteswap(T value) {
+ return std::byteswap(value);
+}
+
+template<>
+float byteswap(float value) {
+ return std::bit_cast<float>(byteswap(std::bit_cast<std::uint32_t>(value)));
+}
+
+template<typename T>
+static void byteswap_tensor_data(ggml_tensor * tensor) {
+ T * datum = reinterpret_cast<T *>(tensor->data);
+ for (int i = 0; i < ggml_nelements(tensor); i++) {
+ datum[i] = byteswap(datum[i]);
+ }
+}
+
+static void byteswap_tensor(ggml_tensor * tensor) {
+ switch (tensor->type) {
+ case GGML_TYPE_I16: {
+ byteswap_tensor_data<int16_t>(tensor);
+ break;
+ }
+ case GGML_TYPE_F16: {
+ byteswap_tensor_data<ggml_fp16_t>(tensor);
+ break;
+ }
+ case GGML_TYPE_I32: {
+ byteswap_tensor_data<int32_t>(tensor);
+ break;
+ }
+ case GGML_TYPE_F32: {
+ byteswap_tensor_data<float>(tensor);
+ break;
+ }
+ default: { // GML_TYPE_I8
+ break;
+ }
+ }
+}
+
+#define BYTESWAP_VALUE(d) d = byteswap(d)
+#define BYTESWAP_FILTERS(f) \
+ do { \
+ for (auto & datum : f.data) { \
+ datum = byteswap(datum); \
+ } \
+ } while (0)
+#define BYTESWAP_TENSOR(t) \
+ do { \
+ byteswap_tensor(tensor); \
+ } while (0)
+#else
+#define BYTESWAP_VALUE(d) do {} while (0)
+#define BYTESWAP_FILTERS(f) do {} while (0)
+#define BYTESWAP_TENSOR(t) do {} while (0)
+#endif
+
#define WHISPER_ASSERT(x) \
do { \
if (!(x)) { \
template<typename T>
static void read_safe(whisper_model_loader * loader, T & dest) {
loader->read(loader->context, &dest, sizeof(T));
+ BYTESWAP_VALUE(dest);
}
static bool kv_cache_init(
filters.data.resize(filters.n_mel * filters.n_fft);
loader->read(loader->context, filters.data.data(), filters.data.size() * sizeof(float));
+ BYTESWAP_FILTERS(filters);
}
// load vocab
}
loader->read(loader->context, tensor->data, ggml_nbytes(tensor));
+ BYTESWAP_TENSOR(tensor);
//printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
total_size += ggml_nbytes(tensor);