ggml-cuda: check for srcs outside the cgraph (#18583)

author Aman Gupta <redacted>

Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)

committer GitHub <redacted>

Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)
author Aman Gupta <redacted>
Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)
committer GitHub <redacted>
Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh

index 55f2f46086d286bad81b56cc28d5e38bc30a9424..995b774c2078ddf6cf466ffabd00b68b395df1d0 100644 (file)
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -1065,6 +1065,7 @@ struct ggml_cuda_graph {
      int number_consecutive_updates = 0;
      bool cuda_graphs_enabled = false;
      std::vector<ggml_graph_node_properties> ggml_graph_properties;
+    std::vector<ggml_graph_node_properties> extraneous_srcs_properties;
  #endif
  };
  
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu

index 1bbca225d2b87e13e9f61fdab616ae8910b41651..75269170c34617548e49a985182b701b4474794b 100644 (file)
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2973,15 +2973,16 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
      }
  
      // Check if the graph size has changed
-    if (cuda_ctx->cuda_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes) {
+    if (cuda_ctx->cuda_graph->ggml_graph_properties.size() != (size_t)cgraph->n_nodes + cgraph->n_leafs) {
          cuda_graph_update_required = true;
-        cuda_ctx->cuda_graph->ggml_graph_properties.resize(cgraph->n_nodes);
+        cuda_ctx->cuda_graph->ggml_graph_properties.resize(cgraph->n_nodes + cgraph->n_leafs);
      }
  
      // Loop over nodes in GGML graph to determine if CUDA graph update is required
      // and store properties to allow this comparison for the next token
      for (int i = 0; i < cgraph->n_nodes; i++) {
          bool has_matching_properties = true;
+
          if (!cuda_graph_update_required) {
              has_matching_properties = ggml_graph_node_has_matching_properties(cgraph->nodes[i], &cuda_ctx->cuda_graph->ggml_graph_properties[i]);
          }
@@ -2991,6 +2992,17 @@ static bool is_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx,
          set_ggml_graph_node_properties(cgraph->nodes[i], &cuda_ctx->cuda_graph->ggml_graph_properties[i]);
      }
  
+    for (int i = 0; i < cgraph->n_leafs; i++) {
+        bool has_matching_properties = true;
+        if (!cuda_graph_update_required) {
+            has_matching_properties = ggml_graph_node_has_matching_properties(cgraph->leafs[i], &cuda_ctx->cuda_graph->ggml_graph_properties[cgraph->n_nodes + i]);
+        }
+        if (!has_matching_properties) {
+            cuda_graph_update_required = true;
+        }
+        set_ggml_graph_node_properties(cgraph->leafs[i], &cuda_ctx->cuda_graph->ggml_graph_properties[cgraph->n_nodes + i]);
+    }
+
      return cuda_graph_update_required;
  }
author	Aman Gupta <redacted>
	Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)
committer	GitHub <redacted>
	Mon, 5 Jan 2026 14:46:36 +0000 (22:46 +0800)
ggml/src/ggml-cuda/common.cuh		patch \| blob \| history
ggml/src/ggml-cuda/ggml-cuda.cu		patch \| blob \| history