From: Georgi Gerganov Date: Sat, 22 Apr 2023 13:34:39 +0000 (+0300) Subject: ggml : fix Q4_3 cuBLAS + fix quantize_row_q4_2() X-Git-Tag: upstream/0.0.1642~1526 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b94aca174fd70a27e5bf2153302baf83a8594727;p=pkg%2Fggml%2Fsources%2Fggml ggml : fix Q4_3 cuBLAS + fix quantize_row_q4_2() --- diff --git a/src/ggml.c b/src/ggml.c index 4ecc6cf8..281b2028 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -1242,9 +1242,9 @@ static void quantize_row_q4_2(const float * restrict x, void * restrict vy, int block_q4_2 * restrict y = vy; - quantize_row_q4_2_reference(x, y, k); + //quantize_row_q4_2_reference(x, y, k); // This produces the exact same format, just better match to the input floats ("better" as measured by RMSE) - //quantize_row_q4_2_rmse(x, y, k); + quantize_row_q4_2_rmse(x, y, k); } static void quantize_row_q4_3_reference(const float * restrict x, block_q4_3 * restrict y, int k) { @@ -7992,6 +7992,9 @@ static void ggml_compute_forward_mul_mat_q_f32( else if (type == GGML_TYPE_Q4_2) { dequantize_row_q_cuda = dequantize_row_q4_2_cuda; } + else if (type == GGML_TYPE_Q4_3) { + dequantize_row_q_cuda = dequantize_row_q4_3_cuda; + } else { GGML_ASSERT(false); }