From: Aaron Teo Date: Wed, 18 Jun 2025 17:10:08 +0000 (+0800) Subject: ggml-cpu: reduce asm calls for hsum (llama/14037) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=203451bcba96c225761c79113c2414de44efbeb2;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp ggml-cpu: reduce asm calls for hsum (llama/14037) Signed-off-by: Aaron Teo --- diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 2e3669c0..e42364c5 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { for (int i = 0; i < offset; ++i) { \ x[i] = vec_add(x[i], x[offset + i]); \ } \ - res = vec_extract(x[0], 0) + \ - vec_extract(x[0], 1) + \ - vec_extract(x[0], 2) + \ - vec_extract(x[0], 3); \ + float32x4_t tmp = x[0] + vec_reve(x[0]); \ + res = tmp[0] + tmp[1]; \ } #define GGML_F32_VEC GGML_F32x4