From: Johannes Gäßler Date: Thu, 14 Aug 2025 21:21:24 +0000 (+0200) Subject: CUDA: fix negative KV_max values in FA (#15321) X-Git-Tag: upstream/0.0.6199~29 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4227c9be4268ac844921b90f31595f81236bd317;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: fix negative KV_max values in FA (#15321) --- diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index e46f0e20..d4ed9383 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -539,11 +539,15 @@ static __global__ void flash_attn_mask_to_KV_max( all_inf = warp_reduce_all(all_inf); if (!all_inf) { - KV_max_sj += FATTN_KQ_STRIDE; break; } } + // If the break in the loop was not triggered, KV_max_sj is now -FATTN_KQ_STRIDE. + // If the break was triggered it's the lower edge of the tile with the first non-masked values. + // In either case, walk back the decrementation by FATTN_KQ_STRIDE. + KV_max_sj += FATTN_KQ_STRIDE; + if (threadIdx.x != 0) { return; }