From: bssrdf Date: Fri, 7 Nov 2025 22:41:58 +0000 (-0500) Subject: CUDA: properly handle nb00=nb02 case for cpy (llama/17081) X-Git-Tag: upstream/1.8.3~355 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=547724b0a5d43540255647788b59ca2e78bba27d;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp CUDA: properly handle nb00=nb02 case for cpy (llama/17081) --- diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml/src/ggml-cuda/cpy.cu index 1dba60eb..50612237 100644 --- a/ggml/src/ggml-cuda/cpy.cu +++ b/ggml/src/ggml-cuda/cpy.cu @@ -198,7 +198,7 @@ static void ggml_cpy_flt_cuda( if (transposed) { GGML_ASSERT(ne == ne00*ne01*ne02); // ne[3] is 1 assumed int ne00n, ne01n, ne02n; - if (nb00 < nb02) { + if (nb00 <= nb02) { // most likely safe to handle nb00 = nb02 case here ne00n = ne00; ne01n = ne01; ne02n = ne02; @@ -206,8 +206,6 @@ static void ggml_cpy_flt_cuda( ne00n = ne00; ne01n = ne01*ne02; ne02n = 1; - } else { - GGML_ASSERT(false); } dim3 dimGrid( (ne01n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D,