From: Georgi Gerganov Date: Thu, 27 Jul 2023 08:00:54 +0000 (+0300) Subject: metal : disable graph concurrency optimization due to bug (#2413) X-Git-Tag: gguf-v0.4.0~389 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=1a941869cbef8e9cc351a6c6987e4ae3b0f021f7;p=pkg%2Fggml%2Fsources%2Fllama.cpp metal : disable graph concurrency optimization due to bug (#2413) --- diff --git a/llama.cpp b/llama.cpp index 024af99a..9a8ecdcf 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1722,9 +1722,10 @@ static bool llama_eval_internal( #ifdef GGML_USE_METAL if (lctx.ctx_metal && N == 1) { - if (!ggml_metal_if_optimized(lctx.ctx_metal)) { - ggml_metal_graph_find_concurrency(lctx.ctx_metal, gf); - } + // TODO: disabled until #2413 is resolved + //if (!ggml_metal_if_optimized(lctx.ctx_metal)) { + // ggml_metal_graph_find_concurrency(lctx.ctx_metal, gf); + //} ggml_metal_set_n_cb (lctx.ctx_metal, n_threads); ggml_metal_graph_compute(lctx.ctx_metal, gf); ggml_metal_get_tensor (lctx.ctx_metal, cur);