//
#include <arm_neon.h>
-#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
-#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
+
+#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+
+static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
+ __fp16 tmp;
+ memcpy(&tmp, &h, sizeof(ggml_fp16_t));
+ return (float)tmp;
+}
-#define GGML_FP16_TO_FP32(x) ((float) (x))
-#define GGML_FP32_TO_FP16(x) (x)
+static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
+ ggml_fp16_t res;
+ __fp16 tmp = f;
+ memcpy(&res, &tmp, sizeof(ggml_fp16_t));
+ return res;
+}
#else
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
// This is also true for POWER9.
-#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
-
+#if !defined(GGML_FP16_TO_FP32)
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
uint16_t s;
memcpy(&s, &f, sizeof(uint16_t));
}
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
+#endif
+#if !defined(GGML_FP32_TO_FP16)
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#endif
#define GGML_HASHTABLE_FULL ((size_t)-1)
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * (float)x[i].d;
- const float dmin = -y[i].d * (float)x[i].dmin;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * (float)x[i].d;
- const float dmin = -y[i].d * (float)x[i].dmin;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
int32_t isum = -4*(scales[0] * y[i].bsums[0] + scales[2] * y[i].bsums[1] + scales[1] * y[i].bsums[2] + scales[3] * y[i].bsums[3]);
- const float d = y[i].d * (float)x[i].d;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8x16_t htmp = vcombine_u8(hbits, vshr_n_u8(hbits, 1));
q3h.val[0] = vandq_u8(mh, vshlq_n_u8(htmp, 2));
int32_t isum = -4*(scales[0] * y[i].bsums[0] + scales[2] * y[i].bsums[1] + scales[1] * y[i].bsums[2] + scales[3] * y[i].bsums[3]);
- const float d = y[i].d * (float)x[i].d;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
aux16[1] = (a[0] >> 4) & 0x0f0f;
const int32_t summi = scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]);
- sum_mins += y[i].d * (float)x[i].d[1] * summi;
+ sum_mins += y[i].d * GGML_FP16_TO_FP32(x[i].d[1]) * summi;
- const float d = y[i].d * (float)x[i].d[0];
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d[0]);
const ggml_uint8x16x2_t q4bits = ggml_vld1q_u8_x2(q4);
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * (float)x[i].d;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const int8_t * sc = x[i].scales;
const uint8_t * restrict q5 = x[i].qs;
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * (float)x[i].d;
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const int8_t * sc = x[i].scales;
const uint8_t * restrict q5 = x[i].qs;
for (int i = 0; i < nb; ++i) {
- const float d_all = (float)x[i].d;
+ const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q6 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
for (int i = 0; i < nb; ++i) {
- const float d_all = (float)x[i].d;
+ const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q6 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
float sumf = 0;
for (int ib = 0; ib < nb; ib += 2) {
-
q4bits.val[0] = vld1q_u8(x[ib+0].qs);
q4bits.val[1] = vld1q_u8(x[ib+1].qs);
q8b.val[0] = vld1q_s8(y[ib+0].qs);
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
- sumf += (float)x[ib+0].d * (float)y[ib+0].d * vaddvq_s32(prod_1) + (float)x[ib+1].d * (float)y[ib+1].d * vaddvq_s32(prod_2);
-
+ sumf +=
+ GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) +
+ GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2);
}
*s = sumf;
// note: do not use these inside ggml.c
// these are meant to be used via the ggml.h API
float ggml_fp16_to_fp32(ggml_fp16_t x) {
- return (float) GGML_FP16_TO_FP32(x);
+ return GGML_FP16_TO_FP32(x);
}
ggml_fp16_t ggml_fp32_to_fp16(float x) {
#define GGML_F16x8 float16x8_t
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
- #define GGML_F16x8_LOAD vld1q_f16
+ #define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
#define GGML_F16x8_STORE vst1q_f16
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
#define GGML_F16x8_ADD vaddq_f16
#define GGML_F32Cx4 float32x4_t
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
- #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
+ #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
#define GGML_F32Cx4_ADD vaddq_f32