From: Jeff Bolz Date: Sat, 13 Dec 2025 15:19:51 +0000 (-0600) Subject: llama_context: synchronize before reallocating output buffer (#17974) X-Git-Tag: upstream/0.0.7446~59 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5266379bcae74214af397f36aa81b2a08b15d545;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama_context: synchronize before reallocating output buffer (#17974) --- diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 2692297d..9914b327 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1318,6 +1318,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) { // This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark) LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); #endif + synchronize(); buf_output = nullptr; logits = nullptr; embd = nullptr;