From: Johannes Gäßler Date: Tue, 21 Oct 2025 13:27:53 +0000 (+0200) Subject: CUDA: better error for FA kernel with 0 occupancy (#16643) X-Git-Tag: upstream/0.0.7011~196 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=51d1a8c997bd2629ef211a30208058ea87a30982;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: better error for FA kernel with 0 occupancy (#16643) --- diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index bc0c2523..218ccff1 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -895,6 +895,7 @@ void launch_fattn( const dim3 block_dim(warp_size, nwarps, 1); int max_blocks_per_sm = 1; // Max. number of active blocks limited by occupancy. CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, fattn_kernel, block_dim.x * block_dim.y * block_dim.z, nbytes_shared)); + GGML_ASSERT(max_blocks_per_sm > 0); int parallel_blocks = max_blocks_per_sm; dim3 blocks_num;