]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server : improve context checkpoint logic (#19408)
authorGeorgi Gerganov <redacted>
Sun, 8 Feb 2026 07:40:04 +0000 (09:40 +0200)
committerGitHub <redacted>
Sun, 8 Feb 2026 07:40:04 +0000 (09:40 +0200)
tools/server/server-context.cpp

index b71d496eeb80357f65c2be69e951199d46623707..8ec84513399155fafb11639b42bc0bf6281d560f 100644 (file)
@@ -2507,7 +2507,8 @@ private:
                         slot.n_prompt_tokens_processed++;
 
                         // process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
-                        if (do_checkpoint && slot.task->n_tokens() - slot.prompt.n_tokens() == 64) {
+                        const int n_last = std::min(n_batch, 512);
+                        if (do_checkpoint && slot.task->n_tokens() == slot.prompt.n_tokens() + n_last) {
                             break;
                         }
                     }