CUDA: Do not mutate cgraph for fused ADDs (#19566)

author Oliver Simons <redacted>

Fri, 13 Feb 2026 09:37:55 +0000 (10:37 +0100)

committer GitHub <redacted>

Fri, 13 Feb 2026 09:37:55 +0000 (15:07 +0530)
author Oliver Simons <redacted>
Fri, 13 Feb 2026 09:37:55 +0000 (10:37 +0100)
committer GitHub <redacted>
Fri, 13 Feb 2026 09:37:55 +0000 (15:07 +0530)
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu

index b163468789fc7adb7a74b70dbc5104ec0cbe35e4..7dc688483ad19fd941f66ecbd48e01a82237e63a 100644 (file)
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3640,11 +3640,13 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
                          n_fuse++;
  
                          if (n_fuse > 1) {
+                            ggml_tensor fused_add_node;
+                            memcpy(&fused_add_node, node, sizeof(ggml_tensor));
                              for (int j = 0; j < n_fuse - 1; ++j) {
-                                node->src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
+                                fused_add_node.src[j + 2] = cgraph->nodes[i + j + 1]->src[1];
                              }
-                            cgraph->nodes[i + n_fuse - 1]->data = node->data;
-                            ggml_cuda_op_fused_add(*cuda_ctx, node, n_fuse);
+                            fused_add_node.data = cgraph->nodes[i + n_fuse - 1]->data;
+                            ggml_cuda_op_fused_add(*cuda_ctx, &fused_add_node, n_fuse);
                              i += n_fuse - 1;
  
                              continue;
author	Oliver Simons <redacted>
	Fri, 13 Feb 2026 09:37:55 +0000 (10:37 +0100)
committer	GitHub <redacted>
	Fri, 13 Feb 2026 09:37:55 +0000 (15:07 +0530)