From: Johannes Gäßler Date: Sat, 6 Dec 2025 12:45:36 +0000 (+0100) Subject: HIP: fix RDNA3 FP16/BF16 matrix multiplication (llama/17817) X-Git-Tag: upstream/0.9.4.395~49 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6f370af5bc69abe270f37d275c9590281c85f651;p=pkg%2Fggml%2Fsources%2Fggml HIP: fix RDNA3 FP16/BF16 matrix multiplication (llama/17817) --- diff --git a/src/ggml-cuda/mmf.cu b/src/ggml-cuda/mmf.cu index be2ad1c6..7cf33f0d 100644 --- a/src/ggml-cuda/mmf.cu +++ b/src/ggml-cuda/mmf.cu @@ -160,9 +160,9 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const case GGML_TYPE_F32: return ampere_mma_available(cc); case GGML_TYPE_F16: - return volta_mma_available(cc) || turing_mma_available(cc) || amd_wmma_available(cc); + return volta_mma_available(cc) || turing_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc)); case GGML_TYPE_BF16: - return ampere_mma_available(cc) || amd_wmma_available(cc); + return ampere_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc)); default: return false; }