context: zero output buffer on allocation (#20781)

author Ruikai Peng <redacted>

Fri, 20 Mar 2026 09:31:34 +0000 (17:31 +0800)

committer GitHub <redacted>

Fri, 20 Mar 2026 09:31:34 +0000 (11:31 +0200)
author Ruikai Peng <redacted>
Fri, 20 Mar 2026 09:31:34 +0000 (17:31 +0800)
committer GitHub <redacted>
Fri, 20 Mar 2026 09:31:34 +0000 (11:31 +0200)
diff --git a/src/llama-context.cpp b/src/llama-context.cpp

index dc61afb0bdb301b3b8b3e4ab18c6f8b40b6373b8..8f25d477865dbe348820650db5cf2ffda1b90065 100644 (file)
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1946,6 +1946,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
              LLAMA_LOG_ERROR("%s: failed to allocate output buffer of size %.2f MiB\n", __func__, new_size / (1024.0 * 1024.0));
              return 0;
          }
+        ggml_backend_buffer_clear(buf_output.get(), 0);
      }
  
      float * output_base = (float *) ggml_backend_buffer_get_base(buf_output.get());
author	Ruikai Peng <redacted>
	Fri, 20 Mar 2026 09:31:34 +0000 (17:31 +0800)
committer	GitHub <redacted>
	Fri, 20 Mar 2026 09:31:34 +0000 (11:31 +0200)