From: Georgi Gerganov Date: Fri, 14 Jun 2024 14:14:09 +0000 (+0300) Subject: metal : utilize max shared memory for mul_mat_id (llama/7935) X-Git-Tag: upstream/0.0.1642~594 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=ee4f37c17d7d2310a7e2e1c06554f9d3ab6ef91a;p=pkg%2Fggml%2Fsources%2Fggml metal : utilize max shared memory for mul_mat_id (llama/7935) --- diff --git a/src/ggml-metal.m b/src/ggml-metal.m index ec9e9530..f894274c 100644 --- a/src/ggml-metal.m +++ b/src/ggml-metal.m @@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute( // ne21 = n_rows const int dst_rows = ne20*ne21; const int dst_rows_min = n_as; + const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4; // max size of the rowids array in the kernel shared buffer - GGML_ASSERT(dst_rows <= 2048); + GGML_ASSERT(dst_rows <= dst_rows_max); // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel