From: shalinib-ibm Date: Fri, 20 Mar 2026 23:11:45 +0000 (+0530) Subject: ggml-cpu: add always_inline to tinyBLAS_PPC accumulator saves (llama/20791) X-Git-Tag: v0.9.9~30 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=9b6c70190e6b9df0423604dccda24c4162db9aef;p=pkg%2Fggml%2Fsources%2Fggml ggml-cpu: add always_inline to tinyBLAS_PPC accumulator saves (llama/20791) Explicitly mark save_acc and add_save_Acc with always_inline in tinyBLAS_PPC. This ensures the compiler keeps MMA accumulator disassembly within kernel's register context, preventing un-necessary stask spills. Signed-off-by: Shalini Salomi Bodapati --- diff --git a/src/ggml-cpu/llamafile/sgemm.cpp b/src/ggml-cpu/llamafile/sgemm.cpp index c89e5076..63ceb635 100644 --- a/src/ggml-cpu/llamafile/sgemm.cpp +++ b/src/ggml-cpu/llamafile/sgemm.cpp @@ -3194,6 +3194,7 @@ class tinyBLAS_PPC { private: + __attribute__((always_inline)) inline void save_acc(acc_t * ACC, int64_t ii, int64_t jj) { vec_t vec_C[4]; __builtin_mma_disassemble_acc(vec_C, ACC); @@ -3204,6 +3205,7 @@ class tinyBLAS_PPC { } } + __attribute__((always_inline)) inline void add_save_acc(acc_t * ACC, int64_t ii, int64_t jj) { vec_t vec_C[4]; __builtin_mma_disassemble_acc(vec_C, ACC);