bool op_offload;
int debug;
+
+ // used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
+ // ref: https://github.com/ggml-org/llama.cpp/pull/17617
+ int debug_realloc;
+ int debug_graph_size;
+ int debug_prev_graph_size;
};
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
}
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
+
+ // remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
+ sched->debug_prev_graph_size = sched->debug_graph_size;
+ sched->debug_graph_size = graph_size;
+
if (sched->graph.size < graph_size) {
sched->graph.size = graph_size;
sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
// allocate graph
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
-#ifdef GGML_SCHED_NO_REALLOC
- GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
-#endif
-
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
#endif
+ if (sched->debug_realloc > 0) {
+ // we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
+ // example: https://github.com/ggml-org/llama.cpp/pull/17143
+ const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
+
+ if (unexpected || sched->debug_realloc > 1) {
+ GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
+ sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
+ }
+ }
+
// the re-allocation may cause the split inputs to be moved to a different address
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
for (int i = 0; i < sched->n_backends; i++) {
const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
+
+ sched->debug_realloc = 0;
+#ifdef GGML_SCHED_NO_REALLOC
+ sched->debug_realloc = 1;
+#endif
+ const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
+ sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
+
sched->n_backends = n_backends;
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
+ sched->debug_graph_size = 0;
+ sched->debug_prev_graph_size = 0;
+
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
sched->context_buffer = (char *) malloc(sched->context_buffer_size);