ggml: bugfix: fix the inactive elements is agnostic for risc-v vector (#8748)

author CarterLi999 <redacted>

Mon, 29 Jul 2024 16:38:34 +0000 (00:38 +0800)

committer GitHub <redacted>

Mon, 29 Jul 2024 16:38:34 +0000 (18:38 +0200)
author CarterLi999 <redacted>
Mon, 29 Jul 2024 16:38:34 +0000 (00:38 +0800)
committer GitHub <redacted>
Mon, 29 Jul 2024 16:38:34 +0000 (18:38 +0200)
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c

index 9016314f507f4e24c6fad773c103608406e00cfa..16aaf523fcff9b7f941bac87ab455d5fb6ddeb69 100644 (file)
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -6449,22 +6449,22 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
              // compute mask for subtraction
              vuint8m1_t qh_m0 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_0 = __riscv_vmseq_vx_u8m1_b8(qh_m0, 0, vl);
-            vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_m(vmask_0, q3_0, 0x4, vl);
+            vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_mu(vmask_0, q3_0, q3_0, 0x4, vl);
              m <<= 1;
  
              vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_1 = __riscv_vmseq_vx_u8m1_b8(qh_m1, 0, vl);
-            vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_m(vmask_1, q3_1, 0x4, vl);
+            vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_mu(vmask_1, q3_1, q3_1, 0x4, vl);
              m <<= 1;
  
              vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_2 = __riscv_vmseq_vx_u8m1_b8(qh_m2, 0, vl);
-            vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_m(vmask_2, q3_2, 0x4, vl);
+            vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_mu(vmask_2, q3_2, q3_2, 0x4, vl);
              m <<= 1;
  
              vuint8m1_t qh_m3 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_3 = __riscv_vmseq_vx_u8m1_b8(qh_m3, 0, vl);
-            vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_m(vmask_3, q3_3, 0x4, vl);
+            vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_mu(vmask_3, q3_3, q3_3, 0x4, vl);
              m <<= 1;
  
              // load Q8 and take product with Q3
@@ -7720,13 +7720,13 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
              vint8m1_t q5_a = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q5_x, 0x0F, vl));
              vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_1 = __riscv_vmsne_vx_u8m1_b8(qh_m1, 0, vl);
-            vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_m(vmask_1, q5_a, 16, vl);
+            vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_mu(vmask_1, q5_a, q5_a, 16, vl);
              m <<= 1;
  
              vint8m1_t q5_l = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q5_x, 0x04, vl));
              vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
              vbool8_t vmask_2 = __riscv_vmsne_vx_u8m1_b8(qh_m2, 0, vl);
-            vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_m(vmask_2, q5_l, 16, vl);
+            vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_mu(vmask_2, q5_l, q5_l, 16, vl);
              m <<= 1;
  
              vint16m2_t v0 = __riscv_vwmul_vv_i16m2(q5_m1, q8_y1, vl);
author	CarterLi999 <redacted>
	Mon, 29 Jul 2024 16:38:34 +0000 (00:38 +0800)
committer	GitHub <redacted>
	Mon, 29 Jul 2024 16:38:34 +0000 (18:38 +0200)