void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
// (optional) sort/optimize the nodes in the graph
- void (*optimize_graph) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
+ void (*graph_optimize) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
};
struct ggml_backend {
backend->iface.event_wait(backend, event);
}
-static void ggml_backend_optimize_graph(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+static void ggml_backend_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
GGML_ASSERT(backend);
- if (backend->iface.optimize_graph != NULL) {
- backend->iface.optimize_graph(backend, cgraph);
+ if (backend->iface.graph_optimize != NULL) {
+ backend->iface.graph_optimize(backend, cgraph);
}
}
// Optimize this split of the graph. This needs to happen before we make graph_copy,
// so they are in sync.
- ggml_backend_optimize_graph(sched->backends[split->backend_id], &split->graph);
+ ggml_backend_graph_optimize(sched->backends[split->backend_id], &split->graph);
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
for (int j = 0; j < split->n_inputs; j++) {
/* .graph_compute = */ ggml_backend_blas_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
static ggml_guid_t ggml_backend_blas_guid(void) {
/* .graph_compute = */ ggml_backend_cann_graph_compute,
/* .event_record = */ ggml_backend_cann_event_record,
/* .event_wait = */ ggml_backend_cann_event_wait,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
/**
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
static ggml_guid_t ggml_backend_cpu_guid(void) {
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
/* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
static ggml_guid_t ggml_backend_cuda_guid() {
// https://developer.apple.com/documentation/metal/mtlcommandbuffer#Synchronizing-Passes-with-Events
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ ggml_backend_metal_graph_optimize,
+ /* .graph_optimize = */ ggml_backend_metal_graph_optimize,
};
static ggml_guid_t ggml_backend_metal_guid(void) {
/* .graph_compute = */ ggml_backend_opencl_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
ggml_backend_t ggml_backend_opencl_init(void) {
/* .graph_compute = */ ggml_backend_rpc_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .event_record = */ ggml_backend_sycl_event_record,
/* .event_wait = */ ggml_backend_sycl_event_wait,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
static ggml_guid_t ggml_backend_sycl_guid() {
bool disable_fusion;
bool disable_host_visible_vidmem;
bool allow_sysmem_fallback;
- bool disable_optimize_graph;
+ bool disable_graph_optimize;
#ifdef GGML_VULKAN_MEMORY_DEBUG
std::unique_ptr<vk_memory_logger> memory_logger;
const char* GGML_VK_ALLOW_SYSMEM_FALLBACK = getenv("GGML_VK_ALLOW_SYSMEM_FALLBACK");
device->allow_sysmem_fallback = GGML_VK_ALLOW_SYSMEM_FALLBACK != nullptr;
- const char* GGML_VK_DISABLE_OPTIMIZE_GRAPH = getenv("GGML_VK_DISABLE_OPTIMIZE_GRAPH");
- device->disable_optimize_graph = GGML_VK_DISABLE_OPTIMIZE_GRAPH != nullptr;
+ const char* GGML_VK_DISABLE_GRAPH_OPTIMIZE = getenv("GGML_VK_DISABLE_GRAPH_OPTIMIZE");
+ device->disable_graph_optimize = GGML_VK_DISABLE_GRAPH_OPTIMIZE != nullptr;
bool fp16_storage = false;
bool fp16_compute = false;
}
// Sort the graph for improved parallelism.
-static void ggml_vk_optimize_graph(ggml_backend_t backend, struct ggml_cgraph * graph)
+static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * graph)
{
- VK_LOG_DEBUG("ggml_vk_optimize_graph(" << graph->n_nodes << " nodes)");
+ VK_LOG_DEBUG("ggml_vk_graph_optimize(" << graph->n_nodes << " nodes)");
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
- if (ctx->device->disable_optimize_graph) {
+ if (ctx->device->disable_graph_optimize) {
return;
}
/* .graph_compute = */ ggml_backend_vk_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ ggml_vk_optimize_graph,
+ /* .graph_optimize = */ ggml_vk_graph_optimize,
};
static ggml_guid_t ggml_backend_vk_guid() {
/* .graph_compute = */ ggml_backend_webgpu_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
/* End GGML Backend Interface */
/* .graph_compute = */ ggml_backend_zdnn_graph_compute,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
- /* .optimize_graph = */ NULL,
+ /* .graph_optimize = */ NULL,
};
static ggml_guid_t ggml_backend_zdnn_guid(void) {