]> git.djapps.eu Git - pkg/ggml/sources/ggml/commitdiff
ggml : allow CUDA graphs when using pipeline parallelism (llama/13814)
authorDiego Devesa <redacted>
Tue, 27 May 2025 11:05:18 +0000 (04:05 -0700)
committerGeorgi Gerganov <redacted>
Tue, 27 May 2025 13:58:49 +0000 (16:58 +0300)
src/ggml-backend.cpp

index b30b4cb386f9fee7e1c4956ec65808ed99cd0f86..1f40f10e876228d7b9d675fb5d861e6dd85b3019 100644 (file)
@@ -1598,6 +1598,9 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
     for (int i = 0; i < sched->n_backends; i++) {
         ggml_backend_synchronize(sched->backends[i]);
     }
+    // reset the current copy to 0 so that the graphs will be similar during generation
+    // necessary for CUDA graphs
+    sched->cur_copy = 0;
 }
 
 void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {