From: Diego Devesa Date: Fri, 30 May 2025 14:37:18 +0000 (-0700) Subject: cuda : prevent using split buffers with 3d/4d matrices (llama/13919) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b14cee184a78501eb11857ac216d7dbe80e82895;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp cuda : prevent using split buffers with 3d/4d matrices (llama/13919) --- diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index c442a649..009ed904 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2994,9 +2994,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g { struct ggml_tensor * a = op->src[0]; struct ggml_tensor * b = op->src[1]; - // for small weight matrices the active device can end up without any rows, don't use row split in those cases - // this avoids some edge cases (and the performance would not be good anyways) if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) { + if (a->ne[2] > 1 || a->ne[3] > 1) { + return false; + } + // for small weight matrices the active device can end up without any rows, don't use row split in those cases + // this avoids some edge cases (and the performance would not be good anyways) ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context; int64_t row_low; int64_t row_high;