CANN: Support eager execution mode under ACL graph compilation (llama/15712)

author Chenguang Li <redacted>

Tue, 2 Sep 2025 06:07:48 +0000 (14:07 +0800)

committer Georgi Gerganov <redacted>

Fri, 5 Sep 2025 09:54:11 +0000 (12:54 +0300)
author Chenguang Li <redacted>
Tue, 2 Sep 2025 06:07:48 +0000 (14:07 +0800)
committer Georgi Gerganov <redacted>
Fri, 5 Sep 2025 09:54:11 +0000 (12:54 +0300)
diff --git a/src/ggml-cann/common.h b/src/ggml-cann/common.h

index f71aa9d1de65c1e5332dbeec2357373decc6c14a..a041a157c333a8b20d3c49bbe55caab31d51f1e6 100755 (executable)
--- a/src/ggml-cann/common.h
+++ b/src/ggml-cann/common.h
@@ -395,6 +395,7 @@ struct ggml_backend_cann_context {
  #ifdef USE_ACL_GRAPH
      /// Cached CANN ACL graph used for executing the current ggml computation graph.
      std::unique_ptr<ggml_cann_graph> cann_graph;
+    bool acl_graph_mode = true;
  #endif
      cann_task_queue task_queue;
      bool async_mode;
@@ -404,7 +405,6 @@ struct ggml_backend_cann_context {
      ggml_cann_tensor_cache rms_norm_one_tensor_cache;
      ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
  
-
      aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */
  
      /**
@@ -419,6 +419,13 @@ struct ggml_backend_cann_context {
          async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
          GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
              device, async_mode ? "ON" : "OFF");
+#ifdef USE_ACL_GRAPH
+        acl_graph_mode = !(parse_bool(get_env("GGML_CANN_DISABLE_ACL_GRAPH").value_or("")));
+        GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
+              __func__, device,
+              acl_graph_mode ? "GRAPH" : "EAGER",
+              acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
+#endif
      }
  
      /**
diff --git a/src/ggml-cann/ggml-cann.cpp b/src/ggml-cann/ggml-cann.cpp

index da6d74d48af0012d069fd09442ea7fc3a59dd23e..0d9eb8fa1b9ca187ad81126b691304c8f9a3916c 100755 (executable)
--- a/src/ggml-cann/ggml-cann.cpp
+++ b/src/ggml-cann/ggml-cann.cpp
@@ -2252,6 +2252,10 @@ static enum ggml_status ggml_backend_cann_graph_compute(
      bool use_cann_graph = true;
      bool cann_graph_update_required = false;
  
+    if (!cann_ctx->acl_graph_mode) {
+        use_cann_graph = false;
+    }
+
      if (use_cann_graph) {
          if (cann_ctx->cann_graph == nullptr) {
              cann_ctx->cann_graph.reset(new ggml_cann_graph());
author	Chenguang Li <redacted>
	Tue, 2 Sep 2025 06:07:48 +0000 (14:07 +0800)
committer	Georgi Gerganov <redacted>
	Fri, 5 Sep 2025 09:54:11 +0000 (12:54 +0300)
src/ggml-cann/common.h		patch \| blob \| history
src/ggml-cann/ggml-cann.cpp		patch \| blob \| history