From: Aadeshveer Singh Date: Sat, 20 Dec 2025 11:28:57 +0000 (+0530) Subject: Added comments explaining thread block size selection logic based on row count and... X-Git-Tag: upstream/0.0.7599~110 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=10b4f82d441b611231a20633036439622e22f199;p=pkg%2Fggml%2Fsources%2Fllama.cpp Added comments explaining thread block size selection logic based on row count and column size, derived from historical commit context (#18212) --- diff --git a/ggml/src/ggml-cuda/mean.cu b/ggml/src/ggml-cuda/mean.cu index 347abc18..691d8dcb 100644 --- a/ggml/src/ggml-cuda/mean.cu +++ b/ggml/src/ggml-cuda/mean.cu @@ -63,6 +63,9 @@ void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const int id = ggml_cuda_get_device(); const int nsm = ggml_cuda_info().devices[id].nsm; + + // Heuristic for block size selection to optimize occupancy. + // See discussion in: https://github.com/ggml-org/llama.cpp/pull/15132 if ((nrows / nsm) < 2) { const dim3 block_dims(512, 1, 1); reduce_rows_f32<<>>(src0_d, dst_d, ncols);