From: leejet Date: Fri, 24 Oct 2025 19:39:37 +0000 (+0800) Subject: ggml: fix CUDA grid launch condition for large block_nums.y in binbcast (llama/16742) X-Git-Tag: upstream/0.9.4.185~87 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=503bbce4311350a292d04fa65319b720d50a668e;p=pkg%2Fggml%2Fsources%2Fggml ggml: fix CUDA grid launch condition for large block_nums.y in binbcast (llama/16742) * Fix CUDA grid launch condition for large block_nums.y * add backend ops test * reduce test repetitions --- diff --git a/src/ggml-cuda/binbcast.cu b/src/ggml-cuda/binbcast.cu index 60240102..0e6d777b 100644 --- a/src/ggml-cuda/binbcast.cu +++ b/src/ggml-cuda/binbcast.cu @@ -272,7 +272,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor * const uint3 ne12 = init_fastdiv_values((uint32_t) cne1[2]); const uint3 ne13 = init_fastdiv_values((uint32_t) cne1[3]); - if (block_nums.z > 65535) { + if (block_nums.z > 65535 || block_nums.y > 65535) { int block_num = (ne0 * ne1 * ne2 * ne3 + block_size - 1) / block_size; const uint3 prod_012 = init_fastdiv_values((uint32_t) (ne0 * ne1 * ne2)); const uint3 prod_01 = init_fastdiv_values((uint32_t) (ne0 * ne1)); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 991c6259..9eb2b668 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6407,6 +6407,7 @@ static std::vector> make_test_cases_eval() { add_test_bin_bcast(type, {1, 1, 640, 1}, {32, 32, 1, 1}); add_test_bin_bcast(type, {5120, 1, 1, 1}, {1, 256, 1, 1}); add_test_bin_bcast(type, {640, 1, 1, 1}, {1, 1, 1, 1}); + add_test_bin_bcast(type, {64, 262144, 1, 1}, {1, 1, 1, 1}); //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {1, 1, 1, 1}); //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1}); }