]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
metal : fix synchronization in new matrix multiplication kernel (#2686)
authorShouzheng Liu <redacted>
Mon, 21 Aug 2023 10:59:29 +0000 (06:59 -0400)
committerGitHub <redacted>
Mon, 21 Aug 2023 10:59:29 +0000 (13:59 +0300)
ggml-metal.metal

index 3f3125236f197f36f873b47daaf73f7be9834195..88d48f6c6a2ebddc085d10063714c8f7ca20cad3 100644 (file)
@@ -1898,10 +1898,11 @@ kernel void kernel_mul_mm(device const  uchar * src0,
         threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
                                       + 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
         for (int i = 0; i < 8; i++) {
+            threadgroup_barrier(mem_flags::mem_device);
             simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
         }
 
-        threadgroup_barrier(mem_flags::mem_threadgroup);
+        threadgroup_barrier(mem_flags::mem_device);
         device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
         if (sgitg==0) {
             for (int i = 0; i < n_rows; i++) {