From: Georgi Gerganov Date: Mon, 8 Sep 2025 10:56:51 +0000 (+0300) Subject: cuda : fix supports_op condition for get_rows when number of blocks is too large... X-Git-Tag: v0.9.1~62 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=79fca529ce1a5aee4bd42b470e502253f75b56eb;p=pkg%2Fggml%2Fsources%2Fggml cuda : fix supports_op condition for get_rows when number of blocks is too large (llama/15868) * cuda : fix supports_op condition for get_rows when src1->ne2 > 1 ggml-ci * ggml : add comment about ggml_get_rows ggml-ci * cuda : add FIXME [no ci] * cuda : update support condition ggml-ci --- diff --git a/include/ggml.h b/include/ggml.h index 058f4267..b7b472c5 100644 --- a/include/ggml.h +++ b/include/ggml.h @@ -1529,7 +1529,11 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); - // supports 3D: a->ne[2] == b->ne[1] + // supports 4D a: + // a [n_embd, ne1, ne2, ne3] + // b I32 [n_rows, ne2, ne3, 1] + // + // return [n_embd, n_rows, ne2, ne3] GGML_API struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, // data diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu index a88b9f75..0c6bd363 100644 --- a/src/ggml-cuda/ggml-cuda.cu +++ b/src/ggml-cuda/ggml-cuda.cu @@ -3392,6 +3392,10 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32; case GGML_OP_GET_ROWS: { + // FIXME: https://github.com/ggml-org/llama.cpp/pull/15868 + if (op->src[1]->ne[1]*op->src[1]->ne[2] > 65535) { + return false; + } switch (op->src[0]->type) { case GGML_TYPE_F16: case GGML_TYPE_F32: diff --git a/src/ggml.c b/src/ggml.c index f35c3379..50dc1aa2 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -3623,6 +3623,7 @@ struct ggml_tensor * ggml_get_rows( struct ggml_tensor * a, struct ggml_tensor * b) { GGML_ASSERT(a->ne[2] == b->ne[1]); + GGML_ASSERT(a->ne[3] == b->ne[2]); GGML_ASSERT(b->ne[3] == 1); GGML_ASSERT(b->type == GGML_TYPE_I32);