From: shalinib-ibm Date: Fri, 20 Mar 2026 23:11:45 +0000 (+0530) Subject: ggml-cpu: add always_inline to tinyBLAS_PPC accumulator saves (#20791) X-Git-Tag: upstream/0.0.8611~152 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=e6ec21e62f45a247d3a3d2cb8f164141853b1a9b;p=pkg%2Fggml%2Fsources%2Fllama.cpp ggml-cpu: add always_inline to tinyBLAS_PPC accumulator saves (#20791) Explicitly mark save_acc and add_save_Acc with always_inline in tinyBLAS_PPC. This ensures the compiler keeps MMA accumulator disassembly within kernel's register context, preventing un-necessary stask spills. Signed-off-by: Shalini Salomi Bodapati --- diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index c89e5076f..63ceb635d 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -3194,6 +3194,7 @@ class tinyBLAS_PPC { private: + __attribute__((always_inline)) inline void save_acc(acc_t * ACC, int64_t ii, int64_t jj) { vec_t vec_C[4]; __builtin_mma_disassemble_acc(vec_C, ACC); @@ -3204,6 +3205,7 @@ class tinyBLAS_PPC { } } + __attribute__((always_inline)) inline void add_save_acc(acc_t * ACC, int64_t ii, int64_t jj) { vec_t vec_C[4]; __builtin_mma_disassemble_acc(vec_C, ACC);