From: Johannes Gäßler Date: Wed, 1 Apr 2026 19:28:19 +0000 (+0200) Subject: CUDA: fix FA kernel selection logic (llama/21271) X-Git-Tag: v0.9.11~4 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=878642b85d1d6f2ba2d9f98e3298fab4d504500f;p=pkg%2Fggml%2Fsources%2Fggml CUDA: fix FA kernel selection logic (llama/21271) --- diff --git a/src/ggml-cuda/fattn.cu b/src/ggml-cuda/fattn.cu index a21c5361..addf9320 100644 --- a/src/ggml-cuda/fattn.cu +++ b/src/ggml-cuda/fattn.cu @@ -340,7 +340,14 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const case 128: case 112: case 256: + if (V->ne[0] != K->ne[0]) { + return BEST_FATTN_KERNEL_NONE; + } + break; case 512: + if (V->ne[0] != K->ne[0]) { + return BEST_FATTN_KERNEL_NONE; + } if (!gqa_opt_applies) { return BEST_FATTN_KERNEL_NONE; }