]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
CUDA: fix half2 -> half conversion for HIP (#15529)
authorJohannes Gäßler <redacted>
Sat, 23 Aug 2025 19:37:06 +0000 (21:37 +0200)
committerGitHub <redacted>
Sat, 23 Aug 2025 19:37:06 +0000 (21:37 +0200)
ggml/src/ggml-cuda/fattn-tile-f16.cu

index 6239d184d0a67baeae0d43870b0d478c85003070..a900799a991c05812b7550d4b39350536bf28098 100644 (file)
@@ -258,7 +258,7 @@ static __global__ void flash_attn_tile_ext_f16(
             const half val = hexp(sink - kqmax[j0/nwarps]);
             kqsum[j0/nwarps] = kqsum[j0/nwarps] * KQ_max_scale;
             if (threadIdx.x == 0) {
-                kqsum[j0/nwarps].x = __hadd(kqsum[j0/nwarps].x, val);
+                kqsum[j0/nwarps].x = __hadd(__low2half(kqsum[j0/nwarps]), val);
             }
 
 #pragma unroll