From: Georgi Gerganov <redacted>
Date: Thu, 5 Feb 2026 17:07:22 +0000 (+0200)
Subject: metal : adaptive CPU/GPU interleave based on number of nodes (#19369)
X-Git-Tag: upstream/0.0.8067~116
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=22cae832188a1f08d18bd0a707a4ba5cd03c7349;p=pkg%2Fggml%2Fsources%2Fllama.cpp

metal : adaptive CPU/GPU interleave based on number of nodes (#19369)
---

diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m
index a412d70ae..c7e8ebd3f 100644
--- a/ggml/src/ggml-metal/ggml-metal-context.m
+++ b/ggml/src/ggml-metal/ggml-metal-context.m
@@ -415,7 +415,7 @@ bool ggml_metal_cpy_tensor_async(ggml_metal_t ctx_src, ggml_metal_t ctx_dst, con
 
 enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
     // number of nodes encoded by the main thread (empirically determined)
-    const int n_main = 64;
+    const int n_main = MAX(64, 0.1*gf->n_nodes);
 
     // number of threads in addition to the main thread
     const int n_cb = ctx->n_cb;