]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
CUDA: fix typo in FlashAttention code (llama/13926)
authorJohannes Gäßler <redacted>
Fri, 30 May 2025 19:22:03 +0000 (21:22 +0200)
committerGeorgi Gerganov <redacted>
Sun, 1 Jun 2025 11:01:05 +0000 (14:01 +0300)
src/ggml-cuda/fattn-mma-f16.cuh

index 7120053b6ee01efe73b729ee83f1e352c0ce12d8..925f39e890db927eee9c27cef3f50a575838e7af 100644 (file)
@@ -1246,7 +1246,7 @@ static __global__ void flash_attn_ext_f16(
         NO_DEVICE_CODE;
         return;
     }
-#endif __CUDA_ARCH__ == GGML_CUDA_CC_TURING
+#endif // __CUDA_ARCH__ == GGML_CUDA_CC_TURING
 
     static_assert(!mla || DKQ >= DV, "MLA needs DKQ >= DV");