CUDA: fix typo in FlashAttention code (llama/13926)

author Johannes Gäßler <redacted>

Fri, 30 May 2025 19:22:03 +0000 (21:22 +0200)

committer Georgi Gerganov <redacted>

Sun, 1 Jun 2025 11:01:05 +0000 (14:01 +0300)
author Johannes Gäßler <redacted>
Fri, 30 May 2025 19:22:03 +0000 (21:22 +0200)
committer Georgi Gerganov <redacted>
Sun, 1 Jun 2025 11:01:05 +0000 (14:01 +0300)
diff --git a/src/ggml-cuda/fattn-mma-f16.cuh b/src/ggml-cuda/fattn-mma-f16.cuh

index 7120053b6ee01efe73b729ee83f1e352c0ce12d8..925f39e890db927eee9c27cef3f50a575838e7af 100644 (file)
--- a/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/src/ggml-cuda/fattn-mma-f16.cuh
@@ -1246,7 +1246,7 @@ static __global__ void flash_attn_ext_f16(
          NO_DEVICE_CODE;
          return;
      }
-#endif __CUDA_ARCH__ == GGML_CUDA_CC_TURING
+#endif // __CUDA_ARCH__ == GGML_CUDA_CC_TURING
  
      static_assert(!mla || DKQ >= DV, "MLA needs DKQ >= DV");
author	Johannes Gäßler <redacted>
	Fri, 30 May 2025 19:22:03 +0000 (21:22 +0200)
committer	Georgi Gerganov <redacted>
	Sun, 1 Jun 2025 11:01:05 +0000 (14:01 +0300)