From: Georgi Gerganov Date: Wed, 2 Jul 2025 11:12:07 +0000 (+0300) Subject: simple-chat : fix context-exceeded condition (#14494) X-Git-Tag: upstream/0.0.5882~79 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=d7f5f4e578d1f60b0835d1734a50438c309b3e5c;p=pkg%2Fggml%2Fsources%2Fllama.cpp simple-chat : fix context-exceeded condition (#14494) * simple-chat : fix context-exceeded condition ggml-ci * cont : fix n_ctx_used computation ggml-ci --- diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index cf117804..57195df3 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -113,15 +113,16 @@ int main(int argc, char ** argv) { while (true) { // check if we have enough space in the context to evaluate this batch int n_ctx = llama_n_ctx(ctx); - int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0); + int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1; if (n_ctx_used + batch.n_tokens > n_ctx) { printf("\033[0m\n"); fprintf(stderr, "context size exceeded\n"); exit(0); } - if (llama_decode(ctx, batch)) { - GGML_ABORT("failed to decode\n"); + int ret = llama_decode(ctx, batch); + if (ret != 0) { + GGML_ABORT("failed to decode, ret = %d\n", ret); } // sample the next token