]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
ggml-cpu: add always_inline to tinyBLAS_PPC accumulator saves (#20791)
authorshalinib-ibm <redacted>
Fri, 20 Mar 2026 23:11:45 +0000 (04:41 +0530)
committerGitHub <redacted>
Fri, 20 Mar 2026 23:11:45 +0000 (07:11 +0800)
Explicitly mark save_acc and add_save_Acc with always_inline
in tinyBLAS_PPC. This ensures the compiler keeps MMA accumulator
disassembly within kernel's register context, preventing un-necessary
stask spills.

Signed-off-by: Shalini Salomi Bodapati <redacted>
ggml/src/ggml-cpu/llamafile/sgemm.cpp

index c89e5076f26eca9279f9dd226e29ebb7dea91407..63ceb635deafe37ce2d080e7ad82a1a735a64407 100644 (file)
@@ -3194,6 +3194,7 @@ class tinyBLAS_PPC {
 
   private:
 
+    __attribute__((always_inline))
     inline void save_acc(acc_t * ACC, int64_t ii, int64_t jj) {
         vec_t vec_C[4];
         __builtin_mma_disassemble_acc(vec_C, ACC);
@@ -3204,6 +3205,7 @@ class tinyBLAS_PPC {
         }
     }
 
+    __attribute__((always_inline))
     inline void add_save_acc(acc_t * ACC, int64_t ii, int64_t jj) {
         vec_t vec_C[4];
         __builtin_mma_disassemble_acc(vec_C, ACC);