#ifdef USE_ACL_GRAPH
/// Cached CANN ACL graph used for executing the current ggml computation graph.
std::unique_ptr<ggml_cann_graph> cann_graph;
+ bool acl_graph_mode = true;
#endif
cann_task_queue task_queue;
bool async_mode;
ggml_cann_tensor_cache rms_norm_one_tensor_cache;
ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
-
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
/**
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
device, async_mode ? "ON" : "OFF");
+#ifdef USE_ACL_GRAPH
+ acl_graph_mode = !(parse_bool(get_env("GGML_CANN_DISABLE_ACL_GRAPH").value_or("")));
+ GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
+ __func__, device,
+ acl_graph_mode ? "GRAPH" : "EAGER",
+ acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
+#endif
}
/**
bool use_cann_graph = true;
bool cann_graph_update_required = false;
+ if (!cann_ctx->acl_graph_mode) {
+ use_cann_graph = false;
+ }
+
if (use_cann_graph) {
if (cann_ctx->cann_graph == nullptr) {
cann_ctx->cann_graph.reset(new ggml_cann_graph());