From: Diego Devesa Date: Tue, 27 May 2025 11:05:18 +0000 (-0700) Subject: ggml : allow CUDA graphs when using pipeline parallelism (llama/13814) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b75babebb23a047063d13236dc714f69dc1d546a;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp ggml : allow CUDA graphs when using pipeline parallelism (llama/13814) --- diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index b30b4cb3..1f40f10e 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -1598,6 +1598,9 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) { for (int i = 0; i < sched->n_backends; i++) { ggml_backend_synchronize(sched->backends[i]); } + // reset the current copy to 0 so that the graphs will be similar during generation + // necessary for CUDA graphs + sched->cur_copy = 0; } void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {