From: Diego Devesa <redacted>
Date: Fri, 28 Nov 2025 15:33:23 +0000 (-0800)
Subject: ggml : add GGML_SCHED_NO_REALLOC option to disable reallocations in ggml_backend_sche... 
X-Git-Tag: upstream/0.9.4.395~97
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5c33741b429e4558dde28d60112e9d85c3f6a582;p=pkg%2Fggml%2Fsources%2Fggml

ggml : add GGML_SCHED_NO_REALLOC option to disable reallocations in ggml_backend_sched (llama/17276)

* ggml : add GGML_SCHED_NO_REALLOC option to disable reallocations in ggml_backend_sched
Enabled in ggml-ci for testing.

* llama : update worst-case graph for unified cache

* ci : disable op offload in some tests

* fix spelling

---------

Co-authored-by: Georgi Gerganov <redacted>
---

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0211255a..9b10df00 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,6 +183,7 @@ endif()
 # ggml core
 set(GGML_SCHED_MAX_COPIES  "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
 option(GGML_CPU                             "ggml: enable CPU backend"                        ON)
+option(GGML_SCHED_NO_REALLOC                "ggml: disallow reallocations in ggml-alloc (for debugging)" OFF)
 
 # 3rd party libs / backends
 option(GGML_ACCELERATE                      "ggml: enable Accelerate framework"               ON)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a4499509..a36f5b66 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -221,6 +221,10 @@ if (GGML_BACKEND_DL)
     target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
 endif()
 
+if (GGML_SCHED_NO_REALLOC)
+    target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC)
+endif()
+
 add_library(ggml
             ggml-backend-reg.cpp)
 add_library(ggml::ggml ALIAS ggml)
diff --git a/src/ggml-alloc.c b/src/ggml-alloc.c
index 91aff205..218222ec 100644
--- a/src/ggml-alloc.c
+++ b/src/ggml-alloc.c
@@ -921,10 +921,15 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
         }
         if (realloc) {
 #ifndef NDEBUG
-            size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
-            GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
+            {
+                size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
+                if (cur_size > 0) {
+                    GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n",
+                        __func__, ggml_backend_buft_name(galloc->bufts[i]),
+                        cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
+                }
+            }
 #endif
-
             ggml_vbuffer_free(galloc->buffers[i]);
             galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
             if (galloc->buffers[i] == NULL) {
diff --git a/src/ggml-backend.cpp b/src/ggml-backend.cpp
index eeaf35c1..4cf377e7 100644
--- a/src/ggml-backend.cpp
+++ b/src/ggml-backend.cpp
@@ -1395,14 +1395,20 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
 
     // allocate graph
     if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
+#ifdef GGML_SCHED_NO_REALLOC
+        GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
+#endif
+
+#ifndef NDEBUG
+        GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
+#endif
+
         // the re-allocation may cause the split inputs to be moved to a different address
         // synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
         for (int i = 0; i < sched->n_backends; i++) {
             ggml_backend_synchronize(sched->backends[i]);
         }
-#ifndef NDEBUG
-        GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
-#endif
+
         ggml_gallocr_reserve_n(sched->galloc, &sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
         if (!ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
             GGML_LOG_ERROR("%s: failed to allocate graph\n", __func__);