]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
ggml-cpu: Fix gcc 15 ICE on ppc64le (#20083) (llama/20130)
authorshalinib-ibm <redacted>
Fri, 6 Mar 2026 15:22:39 +0000 (20:52 +0530)
committerGeorgi Gerganov <redacted>
Sun, 15 Mar 2026 19:50:13 +0000 (21:50 +0200)
This patch addresses an Internal Compiler Error (Segmentation fault)
observed with gcc 15 by replacing the intrinsic + cast by doing
a cat on the data first and then calling the intrinsic. This bypasses the
buggy compiler path while maintaining identical instruction selection.

Performance Verification:
Assembly analysis on RHEL 9 (GCC 15.1.1) confirms that both the original
code and this fix generate the identical Power10 prefixed load instruction:
    `plxv 40, 2(14)`

This ensures zero performance regression while unblocking builds on
newer toolchains.

Reproduced on:
- Alpine Linux + GCC 15.2.0-r2
- RHEL 9  + GCC 15.1.1 (gcc-toolset-15)

Signed-off-by: Shalini Salomi Bodapati <redacted>
src/ggml-cpu/llamafile/sgemm.cpp

index 5fd452a03d2cd093cfd6ee8c58d7c2ed90cba2d7..c89e5076f26eca9279f9dd226e29ebb7dea91407 100644 (file)
@@ -2497,7 +2497,7 @@ class tinyBLAS_Q0_PPC {
                 for (int r = 0; r < 8; r++) {
                     const block_q4_0 * current_blk = rows_base[r] + blk;
                     vector float v_scale = vec_extract_fp32_from_shorth(vec_splats(current_blk->d));
-                    vector signed char v_qs = reinterpret_cast<vector signed char>(vec_xl(0, current_blk->qs));
+                    vector signed char v_qs = vec_xl(0, (const vector signed char *)current_blk->qs);
                     vector signed char c1, c2;
                     unpack_q4_to_q8(v_qs, c1, c2);
                     convert_and_scale_q8(c1, v_scale, hp_res[r][0], hp_res[r][1]);
@@ -2611,14 +2611,14 @@ class tinyBLAS_Q0_PPC {
                 i = (cols >> 2);
                 if (i > 0) {
                     do {
-                        c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
-                        c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
-                        c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
-                        c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
-                        c5[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset5->qs));
-                        c6[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset6->qs));
-                        c7[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset7->qs));
-                        c8[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset8->qs));
+                        c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
+                        c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
+                        c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
+                        c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
+                        c5[1] = vec_xl(0, (const vector signed char *)aoffset5->qs);
+                        c6[1] = vec_xl(0, (const vector signed char *)aoffset6->qs);
+                        c7[1] = vec_xl(0, (const vector signed char *)aoffset7->qs);
+                        c8[1] = vec_xl(0, (const vector signed char *)aoffset8->qs);
 
                         process_q4_elements(c1, & comparray[0]);
                         process_q4_elements(c2, & comparray[1]);
@@ -2657,10 +2657,10 @@ class tinyBLAS_Q0_PPC {
             i = (cols >> 2);
             if (i > 0) {
                 do {
-                    c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
-                    c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
-                    c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
-                    c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
+                    c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
+                    c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
+                    c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
+                    c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
 
                     process_q4_elements(c1, & comparray[0]);
                     process_q4_elements(c2, & comparray[1]);
@@ -2686,9 +2686,9 @@ class tinyBLAS_Q0_PPC {
             if (i > 0) {
                 do {
                     switch(rows) {
-                        case 3: c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
-                        case 2: c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
-                        case 1: c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
+                        case 3: c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
+                        case 2: c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
+                        case 1: c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
                             break;
                     }
                     process_q4_elements(c1, & comparray[0]);