]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
CUDA: fix MMQ stream-k for --split-mode row (llama/8167)
authorJohannes Gäßler <redacted>
Thu, 27 Jun 2024 14:26:05 +0000 (16:26 +0200)
committerGeorgi Gerganov <redacted>
Mon, 8 Jul 2024 10:03:28 +0000 (13:03 +0300)
src/ggml-cuda/mmq.cuh

index 31fcbf1397b6bb15475af811299efc8064006dd3..1396e7a753ac34175c93faac19459fc38868c44b 100644 (file)
@@ -2475,7 +2475,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a
 
     const dim3 block_nums_mmq(nsm, 1, 1);
 
-    ggml_cuda_pool & pool = ctx.pool();
+    ggml_cuda_pool & pool = ctx.pool(id);
     ggml_cuda_pool_alloc<float> tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y);
 
     if (args.ne01 % mmq_y == 0) {