From: lixing-star Date: Wed, 23 Jul 2025 06:39:51 +0000 (+0800) Subject: ggml: fix loongarch quantize_row_q8_1 error (#14827) X-Git-Tag: upstream/0.0.6073~106 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6c88b3bb2509d980e6a64c50fdf8dd304929f770;p=pkg%2Fggml%2Fsources%2Fllama.cpp ggml: fix loongarch quantize_row_q8_1 error (#14827) --- diff --git a/ggml/src/ggml-cpu/arch/loongarch/quants.c b/ggml/src/ggml-cpu/arch/loongarch/quants.c index 9e33fb32..7908da4d 100644 --- a/ggml/src/ggml-cpu/arch/loongarch/quants.c +++ b/ggml/src/ggml-cpu/arch/loongarch/quants.c @@ -544,7 +544,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i __m128 max4 = __lsx_vfmax_s( lasx_extractf128( max_abs, 1 ), lasx_extractf128( max_abs, 0) ); max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vpickod_d((__m128i) max4, (__m128i)max4 ) ); __m128 tmp = max4; - max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vextrins_w((__m128i)tmp, (__m128i)max4, 0x10 )); + max4 = __lsx_vfmax_s( max4, (__m128)__lsx_vextrins_w((__m128i)tmp, (__m128i)max4, 0x1 )); const float max_scalar = ((v4f32)max4)[0]; // Quantize these floats