]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
CUDA: properly handle nb00=nb02 case for cpy (llama/17081)
authorbssrdf <redacted>
Fri, 7 Nov 2025 22:41:58 +0000 (17:41 -0500)
committerGeorgi Gerganov <redacted>
Sun, 9 Nov 2025 21:38:03 +0000 (23:38 +0200)
ggml/src/ggml-cuda/cpy.cu

index 1dba60eb143ef13ef4b9db8a9c8a109781cec1e7..50612237c8a23b9dee2d71203832da384a54e065 100644 (file)
@@ -198,7 +198,7 @@ static void ggml_cpy_flt_cuda(
     if (transposed) {
         GGML_ASSERT(ne == ne00*ne01*ne02);  // ne[3] is 1 assumed
         int ne00n, ne01n, ne02n;
-        if (nb00 < nb02) {
+        if (nb00 <= nb02) { // most likely safe to handle nb00 = nb02 case here
             ne00n = ne00;
             ne01n = ne01;
             ne02n = ne02;
@@ -206,8 +206,6 @@ static void ggml_cpy_flt_cuda(
             ne00n = ne00;
             ne01n = ne01*ne02;
             ne02n = 1;
-        } else {
-            GGML_ASSERT(false);
         }
 
         dim3 dimGrid( (ne01n + CUDA_CPY_TILE_DIM_2D - 1) / CUDA_CPY_TILE_DIM_2D,