From: Aaron Teo Date: Wed, 18 Jun 2025 17:10:08 +0000 (+0800) Subject: ggml-cpu: reduce asm calls for hsum (llama/14037) X-Git-Tag: upstream/0.0.2309~107 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f8b07e25153a3a424c998d8e4257e847b35e2fd5;p=pkg%2Fggml%2Fsources%2Fggml ggml-cpu: reduce asm calls for hsum (llama/14037) Signed-off-by: Aaron Teo --- diff --git a/src/ggml-cpu/simd-mappings.h b/src/ggml-cpu/simd-mappings.h index 2e3669c0..e42364c5 100644 --- a/src/ggml-cpu/simd-mappings.h +++ b/src/ggml-cpu/simd-mappings.h @@ -944,10 +944,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { for (int i = 0; i < offset; ++i) { \ x[i] = vec_add(x[i], x[offset + i]); \ } \ - res = vec_extract(x[0], 0) + \ - vec_extract(x[0], 1) + \ - vec_extract(x[0], 2) + \ - vec_extract(x[0], 3); \ + float32x4_t tmp = x[0] + vec_reve(x[0]); \ + res = tmp[0] + tmp[1]; \ } #define GGML_F32_VEC GGML_F32x4