]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
metal : round up to 16 to fix MTLDebugComputeCommandEncoder assertion (#3938)
authorPeter Sugihara <redacted>
Fri, 3 Nov 2023 19:18:18 +0000 (12:18 -0700)
committerGitHub <redacted>
Fri, 3 Nov 2023 19:18:18 +0000 (21:18 +0200)
ggml-metal.m

index acdb8384316862ba2c0943b2179d0140b96adc83..78ae4485da8e279ff5c536b8b2065ea1a5a59b17 100644 (file)
@@ -1017,7 +1017,7 @@ void ggml_metal_graph_compute(
                             [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
                             [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
                             [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];
 
                             [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
                         } break;
@@ -1348,7 +1348,7 @@ void ggml_metal_graph_compute(
                             [encoder setBytes:&ne00    length:sizeof( int64_t) atIndex:2];
                             [encoder setBytes:&nb01    length:sizeof(uint64_t) atIndex:3];
                             [encoder setBytes:&eps     length:sizeof(   float) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];
 
                             const int64_t nrows = ggml_nrows(src0);