From: Johannes Gäßler Date: Sat, 23 Aug 2025 19:37:06 +0000 (+0200) Subject: CUDA: fix half2 -> half conversion for HIP (#15529) X-Git-Tag: upstream/0.0.6527~268 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=710dfc465a68f7443b87d9f792cffba00ed739fe;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: fix half2 -> half conversion for HIP (#15529) --- diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index 6239d184..a900799a 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -258,7 +258,7 @@ static __global__ void flash_attn_tile_ext_f16( const half val = hexp(sink - kqmax[j0/nwarps]); kqsum[j0/nwarps] = kqsum[j0/nwarps] * KQ_max_scale; if (threadIdx.x == 0) { - kqsum[j0/nwarps].x = __hadd(kqsum[j0/nwarps].x, val); + kqsum[j0/nwarps].x = __hadd(__low2half(kqsum[j0/nwarps]), val); } #pragma unroll