From: Johannes Gäßler Date: Thu, 27 Jun 2024 14:26:05 +0000 (+0200) Subject: CUDA: fix MMQ stream-k for --split-mode row (llama/8167) X-Git-Tag: upstream/0.0.1642~558 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=9bda223d5fa134172c26acce3abfe0f61e5904a3;p=pkg%2Fggml%2Fsources%2Fggml CUDA: fix MMQ stream-k for --split-mode row (llama/8167) --- diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh index 31fcbf13..1396e7a7 100644 --- a/src/ggml-cuda/mmq.cuh +++ b/src/ggml-cuda/mmq.cuh @@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a const dim3 block_nums_mmq(nsm, 1, 1); - ggml_cuda_pool & pool = ctx.pool(); + ggml_cuda_pool & pool = ctx.pool(id); ggml_cuda_pool_alloc tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y); if (args.ne01 % mmq_y == 0) {