From: Mario Limonciello Date: Mon, 16 Feb 2026 13:46:08 +0000 (-0600) Subject: Adjust workaround for ROCWMMA_FATTN/GFX9 to only newer ROCm veresions (#19591) X-Git-Tag: gguf-v0.18.0~103 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2ba9adc093127016a48cd0c5d6bf1420dafe17a6;p=pkg%2Fggml%2Fsources%2Fllama.cpp Adjust workaround for ROCWMMA_FATTN/GFX9 to only newer ROCm veresions (#19591) Avoids issues with ROCm 6.4.4. Closes: https://github.com/ggml-org/llama.cpp/issues/19580 Fixes: 6845f7f87 ("Add a workaround for compilation with ROCWMMA_FATTN and gfx9 (#19461)") Signed-off-by: Mario Limonciello (AMD) --- diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index 35735d48b..f19defbff 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -63,7 +63,7 @@ static __global__ void flash_attn_ext_f16( constexpr int frag_m = ncols == 8 ? 32 : 16; constexpr int frag_n = ncols == 8 ? 8 : 16; static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); -#if defined(GGML_USE_HIP) +#if defined(GGML_USE_HIP) && HIP_VERSION >= 60500000 typedef wmma::fragment frag_a_K; typedef wmma::fragment frag_a_V; typedef wmma::fragment frag_b; @@ -135,7 +135,7 @@ static __global__ void flash_attn_ext_f16( __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. half2 * VKQ2 = (half2 *) VKQ; -#if defined(GGML_USE_HIP) +#if defined(GGML_USE_HIP) && HIP_VERSION >= 60500000 const _Float16 * K_h_f16 = reinterpret_cast(K_h); const _Float16 * V_h_f16 = reinterpret_cast(V_h); _Float16 * KQ_f16 = reinterpret_cast<_Float16 *>(KQ);