]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
server : do context shift only while generating (#17000)
authorGeorgi Gerganov <redacted>
Tue, 4 Nov 2025 17:21:36 +0000 (19:21 +0200)
committerGitHub <redacted>
Tue, 4 Nov 2025 17:21:36 +0000 (19:21 +0200)
tools/server/server.cpp

index ec7dc1a130f69a2029cbc0df386f1cc531de0e4b..678aad93b86bcb4ee17524ba27bb3fe6cc23e356 100644 (file)
@@ -3587,7 +3587,7 @@ struct server_context {
         // apply context-shift if needed
         // TODO: simplify and improve
         for (server_slot & slot : slots) {
-            if (slot.is_processing() && slot.prompt.n_tokens() + 1 >= slot.n_ctx) {
+            if (slot.state == SLOT_STATE_GENERATING && slot.prompt.n_tokens() + 1 >= slot.n_ctx) {
                 if (!params_base.ctx_shift) {
                     // this check is redundant (for good)
                     // we should never get here, because generation should already stopped in process_token()