]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
ggml : fix Q4_3 cuBLAS + fix quantize_row_q4_2()
authorGeorgi Gerganov <redacted>
Sat, 22 Apr 2023 13:34:39 +0000 (16:34 +0300)
committerGeorgi Gerganov <redacted>
Sat, 22 Apr 2023 13:34:39 +0000 (16:34 +0300)
src/ggml.c

index 4ecc6cf816409a529893610692fc9ad6a0634594..281b20283c16f858f53bf506fccae122ea0a2f20 100644 (file)
@@ -1242,9 +1242,9 @@ static void quantize_row_q4_2(const float * restrict x, void * restrict vy, int
 
     block_q4_2 * restrict y = vy;
 
-    quantize_row_q4_2_reference(x, y, k);
+    //quantize_row_q4_2_reference(x, y, k);
     // This produces the exact same format, just better match to the input floats ("better" as measured by RMSE)
-    //quantize_row_q4_2_rmse(x, y, k);
+    quantize_row_q4_2_rmse(x, y, k);
 }
 
 static void quantize_row_q4_3_reference(const float * restrict x, block_q4_3 * restrict y, int k) {
@@ -7992,6 +7992,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
         else if (type == GGML_TYPE_Q4_2) {
             dequantize_row_q_cuda = dequantize_row_q4_2_cuda;
         }
+        else if (type == GGML_TYPE_Q4_3) {
+            dequantize_row_q_cuda = dequantize_row_q4_3_cuda;
+        }
         else {
             GGML_ASSERT(false);
         }