From: Johannes Gäßler Date: Thu, 14 Nov 2024 12:00:15 +0000 (+0100) Subject: CUDA: no -sm row for very small matrices (#10185) X-Git-Tag: upstream/0.0.4488~408 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4a8ccb37ad9c9027cbcfd5548c19cdffe48d5197;p=pkg%2Fggml%2Fsources%2Fllama.cpp CUDA: no -sm row for very small matrices (#10185) --- diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 357cee66..b5096b3e 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -2978,6 +2978,17 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g { struct ggml_tensor * a = op->src[0]; struct ggml_tensor * b = op->src[1]; + // for small weight matrices the active device can end up without any rows, don't use row split in those cases + // this avoids some edge cases (and the performance would not be good anyways) + if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) { + ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context; + int64_t row_low; + int64_t row_high; + get_row_split(&row_low, &row_high, a, buft_ctx->tensor_split, dev_ctx->device); + if (row_low == row_high) { + return false; + } + } if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) { return false; }