ggml : fix loongson compile warnings (llama/7537)

author Georgi Gerganov <redacted>

Fri, 31 May 2024 11:17:10 +0000 (14:17 +0300)

committer Georgi Gerganov <redacted>

Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
author Georgi Gerganov <redacted>
Fri, 31 May 2024 11:17:10 +0000 (14:17 +0300)
committer Georgi Gerganov <redacted>
Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
diff --git a/src/ggml-quants.c b/src/ggml-quants.c

index 1128d66e24c363de86396e0e6fcc36b9a07594cb..9f864e5c479ead1449c6d526841253f253f58909 100644 (file)
--- a/src/ggml-quants.c
+++ b/src/ggml-quants.c
@@ -6088,6 +6088,7 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
  
          const uint8_t * restrict q2 = x[i].qs;
          const int8_t  * restrict q8 = y[i].qs;
+
          const __m128i mins_and_scales = __lsx_vld((const __m128i*)x[i].scales, 0);
          const __m128i scales8 = __lsx_vand_v(mins_and_scales, m4);
          const __m128i mins8 = __lsx_vand_v(__lsx_vsrli_h(mins_and_scales, 4), m4);
@@ -6807,6 +6808,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
      for (int i = 0; i < nb; ++i) {
  
          const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+        const uint8_t * restrict q3 = x[i].qs;
+        const int8_t  * restrict q8 = y[i].qs;
          // Set up scales
          memcpy(aux, x[i].scales, 12);
          __m128i scales128 = lsx_set_w(
@@ -6830,8 +6833,6 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
          int is  = 0;
          __m256i xvbit;
  
-        const uint8_t * restrict q3 = x[i].qs;
-        const int8_t  * restrict q8 = y[i].qs;
  
          for (int j = 0; j < QK_K/128; ++j) {
              // load low 2 bits
@@ -7404,6 +7405,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
      *s = vec_extract(vsumf0, 0);
  
  #elif defined __loongarch_asx
+    GGML_UNUSED(kmask1);
+    GGML_UNUSED(kmask2);
+    GGML_UNUSED(kmask3);
  
      const __m256i m4 = __lasx_xvreplgr2vr_b(0xF);
  
@@ -7416,6 +7420,11 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
          const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
  
          memcpy(utmp, x[i].scales, 12);
+        utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
+        const uint32_t uaux = utmp[1] & kmask1;
+        utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
+        utmp[2] = uaux;
+        utmp[0] &= kmask1;
  
          const uint8_t * restrict q4 = x[i].qs;
          const int8_t  * restrict q8 = y[i].qs;
@@ -7455,16 +7464,17 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
  
          __m256 vd = __lasx_xvreplfr2vr_s(d);
          acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
+
      }
  
      acc_m = __lsx_vfadd_s(acc_m, (__m128)__lsx_vpermi_w((__m128i)acc_m, (__m128i)acc_m, 0xee));
      __m128i tmp1 = __lsx_vinsgr2vr_w(__lsx_vldi(0), __lsx_vpickve2gr_w((__m128i)acc_m, 1), 0);
      acc_m = __lsx_vfadd_s(acc_m, (__m128)tmp1);
  
+
      ft_union fi;
      fi.i = __lsx_vpickve2gr_w(acc_m, 0);
      *s = hsum_float_8(acc) + fi.f ;
-
  #else
  
      const uint8_t * scales = (const uint8_t*)&utmp[0];
@@ -8002,6 +8012,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
      *s = vec_extract(vsumf0, 0);
  
  #elif defined __loongarch_asx
+    GGML_UNUSED(kmask1);
+    GGML_UNUSED(kmask2);
+    GGML_UNUSED(kmask3);
  
      const __m256i m4 = __lasx_xvreplgr2vr_b(0xF);
      const __m128i mzero = __lsx_vldi(0);
@@ -8020,6 +8033,11 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
          const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
  
          memcpy(utmp, x[i].scales, 12);
+        utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
+        const uint32_t uaux = utmp[1] & kmask1;
+        utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
+        utmp[2] = uaux;
+        utmp[0] &= kmask1;
  
          const __m256i mins_and_scales = lasx_extu8_16(lsx_set_w(utmp[3], utmp[2], utmp[1], utmp[0]));
  
@@ -8069,10 +8087,12 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
              p16_1 = lasx_madd_h(scale_1, p16_1);
  
              sumi = __lasx_xvadd_w(sumi, __lasx_xvadd_w(p16_0, p16_1));
+
          }
  
          __m256 vd = __lasx_xvreplfr2vr_s(d);
          acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
+
      }
  
      *s = hsum_float_8(acc) + summs;
diff --git a/src/ggml.c b/src/ggml.c

index 76803639c97f615a6ac22d637d2e13d96a754c5a..f479dc3e1a8f5ee4dc0d85a98088ee06e86f473b 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -1576,11 +1576,11 @@ do {                                                              \
  
  // F16 arithmetic is not supported by AVX, so we use F32 instead
  
-#define GGML_F32Cx8             __m256
+#define GGML_F32Cx8          __m256
  #define GGML_F32Cx8_ZERO    (__m256)__lasx_xvldi(0)
  #define GGML_F32Cx8_SET1(x) (__m256)__lasx_xvreplgr2vr_w((x))
  
-static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t *x) {
+static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t * x) {
      float tmp[8];
  
      for (int i = 0; i < 8; i++) {
@@ -1589,13 +1589,14 @@ static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t *x) {
  
      return (__m256)__lasx_xvld(tmp, 0);
  }
-static inline void __lasx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
+static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
      float arr[8];
  
      __lasx_xvst(y, arr, 0);
  
-    for (int i = 0; i < 8; i++)
+    for (int i = 0; i < 8; i++) {
          x[i] = GGML_FP32_TO_FP16(arr[i]);
+    }
  }
  #define GGML_F32Cx8_LOAD(x)     __lasx_f32cx8_load(x)
  #define GGML_F32Cx8_STORE(x, y) __lasx_f32cx8_store(x, y)
@@ -1671,7 +1672,7 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t *x, __m256 y) {
  #define GGML_F16_STEP 32
  #define GGML_F16_EPR  4
  
-static inline __m128 __lsx_f16x4_load(ggml_fp16_t *x) {
+static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) {
      float tmp[4];
  
      tmp[0] = GGML_FP16_TO_FP32(x[0]);
@@ -1682,7 +1683,7 @@ static inline __m128 __lsx_f16x4_load(ggml_fp16_t *x) {
      return __lsx_vld(tmp, 0);
  }
  
-static inline void __lsx_f16x4_store(ggml_fp16_t *x, __m128 y) {
+static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
      float arr[4];
  
      __lsx_vst(y, arr, 0);
author	Georgi Gerganov <redacted>
	Fri, 31 May 2024 11:17:10 +0000 (14:17 +0300)
committer	Georgi Gerganov <redacted>
	Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
src/ggml-quants.c		patch \| blob \| history
src/ggml.c		patch \| blob \| history