]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
CUDA: fix MMQ stream-k fixup ne1 indices (llama/17089)
authorJohannes Gäßler <redacted>
Sat, 8 Nov 2025 07:26:18 +0000 (08:26 +0100)
committerGeorgi Gerganov <redacted>
Sun, 9 Nov 2025 16:30:22 +0000 (18:30 +0200)
src/ggml-cuda/mmq.cuh

index c9a07e82fedf2e8fc85562179b426414c6e937ca..2e133b6bda8841477098b73804c7a19b7ad44975 100644 (file)
@@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
     const int col_diff = col_high - col_low;
 
     for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) {
-        ids_dst_shared[j] = ids_dst[col_low + j];
+        ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j];
     }
     __syncthreads();