simple-chat : fix context-exceeded condition (#14494)

author Georgi Gerganov <redacted>

Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)

committer GitHub <redacted>

Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)
author Georgi Gerganov <redacted>
Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)
committer GitHub <redacted>
Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)
diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp

index cf1178043d8d1b7b7b957c4029769c09930a472b..57195df331628ecedbca28e5fe242003feb8a427 100644 (file)
--- a/examples/simple-chat/simple-chat.cpp
+++ b/examples/simple-chat/simple-chat.cpp
@@ -113,15 +113,16 @@ int main(int argc, char ** argv) {
          while (true) {
              // check if we have enough space in the context to evaluate this batch
              int n_ctx = llama_n_ctx(ctx);
-            int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0);
+            int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1;
              if (n_ctx_used + batch.n_tokens > n_ctx) {
                  printf("\033[0m\n");
                  fprintf(stderr, "context size exceeded\n");
                  exit(0);
              }
  
-            if (llama_decode(ctx, batch)) {
-                GGML_ABORT("failed to decode\n");
+            int ret = llama_decode(ctx, batch);
+            if (ret != 0) {
+                GGML_ABORT("failed to decode, ret = %d\n", ret);
              }
  
              // sample the next token
author	Georgi Gerganov <redacted>
	Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)
committer	GitHub <redacted>
	Wed, 2 Jul 2025 11:12:07 +0000 (14:12 +0300)