]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
CUDA: fix FA kernel selection logic (llama/21271)
authorJohannes Gäßler <redacted>
Wed, 1 Apr 2026 19:28:19 +0000 (21:28 +0200)
committerGeorgi Gerganov <redacted>
Thu, 2 Apr 2026 07:25:32 +0000 (10:25 +0300)
src/ggml-cuda/fattn.cu

index a21c53610483cad03ee00a0678eb594d477becff..addf93205ef92d8bf18840142867a9ffbf59619b 100644 (file)
@@ -340,7 +340,14 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
         case 128:
         case 112:
         case 256:
+            if (V->ne[0] != K->ne[0]) {
+                return BEST_FATTN_KERNEL_NONE;
+            }
+            break;
         case 512:
+            if (V->ne[0] != K->ne[0]) {
+                return BEST_FATTN_KERNEL_NONE;
+            }
             if (!gqa_opt_applies) {
                 return BEST_FATTN_KERNEL_NONE;
             }