]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
CUDA: fix race condition in MMQ stream-k fixup (#13299)
authorJohannes Gäßler <redacted>
Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
committerGitHub <redacted>
Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
ggml/src/ggml-cuda/mmq.cuh

index fc6ce0083007a67c6d40de242e7f227c0ec8bcac..e1096dce6d90eb44261034190d1d09a2c840b9ac 100644 (file)
@@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
     for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
         ids_dst_shared[j] = ids_dst[col_low + j];
     }
+    __syncthreads();
 
     const int offset_dst = it*mmq_y;
     dst += offset_dst;