From: Johannes Gäßler Date: Sat, 8 Nov 2025 07:26:18 +0000 (+0100) Subject: CUDA: fix MMQ stream-k fixup ne1 indices (llama/17089) X-Git-Tag: upstream/0.9.4.185~13 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=30a0c4c981d18cab19410c32aef1b50cefc33e74;p=pkg%2Fggml%2Fsources%2Fggml CUDA: fix MMQ stream-k fixup ne1 indices (llama/17089) --- diff --git a/src/ggml-cuda/mmq.cuh b/src/ggml-cuda/mmq.cuh index c9a07e82..2e133b6b 100644 --- a/src/ggml-cuda/mmq.cuh +++ b/src/ggml-cuda/mmq.cuh @@ -3494,7 +3494,7 @@ static __global__ void mul_mat_q_stream_k_fixup( const int col_diff = col_high - col_low; for (int j = threadIdx.y*warp_size + threadIdx.x; j < mmq_x; j += nwarps*warp_size) { - ids_dst_shared[j] = ids_dst[col_low + j]; + ids_dst_shared[j] = ids_dst[col_low + jt*mmq_x + j]; } __syncthreads();