From: Georgi Gerganov <redacted>
Date: Sat, 22 Apr 2023 13:34:39 +0000 (+0300)
Subject: ggml : fix Q4_3 cuBLAS + fix quantize_row_q4_2()
X-Git-Tag: upstream/0.0.1642~1526
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b94aca174fd70a27e5bf2153302baf83a8594727;p=pkg%2Fggml%2Fsources%2Fggml

ggml : fix Q4_3 cuBLAS + fix quantize_row_q4_2()
---

diff --git a/src/ggml.c b/src/ggml.c
index 4ecc6cf8..281b2028 100644
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -1242,9 +1242,9 @@ static void quantize_row_q4_2(const float * restrict x, void * restrict vy, int
 
     block_q4_2 * restrict y = vy;
 
-    quantize_row_q4_2_reference(x, y, k);
+    //quantize_row_q4_2_reference(x, y, k);
     // This produces the exact same format, just better match to the input floats ("better" as measured by RMSE)
-    //quantize_row_q4_2_rmse(x, y, k);
+    quantize_row_q4_2_rmse(x, y, k);
 }
 
 static void quantize_row_q4_3_reference(const float * restrict x, block_q4_3 * restrict y, int k) {
@@ -7992,6 +7992,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
         else if (type == GGML_TYPE_Q4_2) {
             dequantize_row_q_cuda = dequantize_row_q4_2_cuda;
         }
+        else if (type == GGML_TYPE_Q4_3) {
+            dequantize_row_q_cuda = dequantize_row_q4_3_cuda;
+        }
         else {
             GGML_ASSERT(false);
         }