CUDA: fix race condition in MMQ stream-k fixup (#13299)

author Johannes Gäßler <redacted>

Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)

committer GitHub <redacted>

Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
author Johannes Gäßler <redacted>
Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
committer GitHub <redacted>
Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh

index fc6ce0083007a67c6d40de242e7f227c0ec8bcac..e1096dce6d90eb44261034190d1d09a2c840b9ac 100644 (file)
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
      for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
          ids_dst_shared[j] = ids_dst[col_low + j];
      }
+    __syncthreads();
  
      const int offset_dst = it*mmq_y;
      dst += offset_dst;
author	Johannes Gäßler <redacted>
	Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)
committer	GitHub <redacted>
	Sun, 4 May 2025 12:16:39 +0000 (14:16 +0200)