From: Johannes Gäßler <redacted>
Date: Tue, 21 Oct 2025 13:27:53 +0000 (+0200)
Subject: CUDA: better error for FA kernel with 0 occupancy (llama/16643)
X-Git-Tag: upstream/0.9.4.185~96
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=7bd1456cb6d30ccf7aa9834c59fb578fa9dffc6c;p=pkg%2Fggml%2Fsources%2Fggml

CUDA: better error for FA kernel with 0 occupancy (llama/16643)
---

diff --git a/src/ggml-cuda/fattn-common.cuh b/src/ggml-cuda/fattn-common.cuh
index bc0c2523..218ccff1 100644
--- a/src/ggml-cuda/fattn-common.cuh
+++ b/src/ggml-cuda/fattn-common.cuh
@@ -895,6 +895,7 @@ void launch_fattn(
     const dim3 block_dim(warp_size, nwarps, 1);
     int max_blocks_per_sm = 1; // Max. number of active blocks limited by occupancy.
     CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, fattn_kernel, block_dim.x * block_dim.y * block_dim.z, nbytes_shared));
+    GGML_ASSERT(max_blocks_per_sm > 0);
     int parallel_blocks = max_blocks_per_sm;
 
     dim3 blocks_num;