From: Georgi Gerganov Date: Mon, 15 Jan 2024 11:27:00 +0000 (+0200) Subject: cuda : fix dequantize kernel names (llama/4938) X-Git-Tag: upstream/0.0.1642~1069 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4624d4aeff7dd5552a9ea077dd46c6e4dfe134e0;p=pkg%2Fggml%2Fsources%2Fggml cuda : fix dequantize kernel names (llama/4938) --- diff --git a/src/ggml-cuda.cu b/src/ggml-cuda.cu index a870718a..c3e14bc9 100644 --- a/src/ggml-cuda.cu +++ b/src/ggml-cuda.cu @@ -6309,14 +6309,14 @@ static void dequantize_row_q3_K_cuda(const void * vx, dst_t * y, const int k, cu } template -static void dequantize_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { +static void dequantize_row_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { const int nb32 = k / 32; const int nb = (k + 255) / 256; dequantize_block_q4_0<<>>(vx, y, nb32); } template -static void dequantize_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { +static void dequantize_row_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { const int nb32 = k / 32; const int nb = (k + 255) / 256; dequantize_block_q4_1<<>>(vx, y, nb32); @@ -6370,9 +6370,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) { int id; switch (type) { case GGML_TYPE_Q4_0: - return dequantize_q4_0_cuda; + return dequantize_row_q4_0_cuda; case GGML_TYPE_Q4_1: - return dequantize_q4_1_cuda; + return dequantize_row_q4_1_cuda; case GGML_TYPE_Q5_0: return dequantize_block_cuda; case GGML_TYPE_Q5_1: @@ -6407,9 +6407,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) { static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) { switch (type) { case GGML_TYPE_Q4_0: - return dequantize_q4_0_cuda; + return dequantize_row_q4_0_cuda; case GGML_TYPE_Q4_1: - return dequantize_q4_1_cuda; + return dequantize_row_q4_1_cuda; case GGML_TYPE_Q5_0: return dequantize_block_cuda; case GGML_TYPE_Q5_1: