From: Aman Gupta <redacted>
Date: Mon, 1 Dec 2025 09:12:48 +0000 (+0800)
Subject: llama-graph: avoid expand_forward for fusion (llama/17633)
X-Git-Tag: upstream/1.8.3~210
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=6cc2d0534f9446b7bc66902f249d64cd1ea7af26;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp

llama-graph: avoid expand_forward for fusion (llama/17633)
---

diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index fa7e1e13..eb2e2731 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3274,7 +3274,6 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
                         GGML_LOG_DEBUG("Setting stream no to %d for node %s\n", cuda_ctx->curr_stream_no, node->name);
                     }
                 }
-                prev_i = i;
 
 #ifdef GGML_CUDA_DEBUG
                 const int nodes_fused = i - prev_i - 1;
@@ -3282,6 +3281,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
                     GGML_LOG_INFO("nodes_fused: %d\n", nodes_fused);
                 }
 #endif
+                prev_i = i;
 
                 if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
                     continue;