server : do not return error out of context (with ctx shift disabled) (#13577)

author Xuan-Son Nguyen <redacted>

Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)

committer GitHub <redacted>

Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)
author Xuan-Son Nguyen <redacted>
Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)
committer GitHub <redacted>
Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)
diff --git a/tools/server/server.cpp b/tools/server/server.cpp

index f32f3c86aad2c992b69f69e07fbe08c41f96e85b..129d013ac75f7861f7babe50977f1c7ecea96041 100644 (file)
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -2251,6 +2251,14 @@ struct server_context {
              slot.has_next_token = true;
          }
  
+        // if context shifting is disabled, make sure that we don't run out of context
+        if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
+            slot.stop           = STOP_TYPE_LIMIT;
+            slot.has_next_token = false;
+
+            SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
+        }
+
          // check the limits
          if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
              slot.stop           = STOP_TYPE_LIMIT;
diff --git a/tools/server/tests/unit/test_ctx_shift.py b/tools/server/tests/unit/test_ctx_shift.py

index be93a6d31f4109c7f8948ca847354402cea690a0..2431ac70882d7a29dd2e1946075899b674c5998b 100644 (file)
--- a/tools/server/tests/unit/test_ctx_shift.py
+++ b/tools/server/tests/unit/test_ctx_shift.py
@@ -65,3 +65,21 @@ def test_ctx_shift_disabled_long_prompt():
      assert res.status_code != 200
      assert "error" in res.body
      assert "exceeds the available context size" in res.body["error"]["message"]
+
+def test_ctx_shift_disabled_stream():
+    global server
+    server.disable_ctx_shift = True
+    server.start()
+    res = server.make_stream_request("POST", "/v1/completions", data={
+        "n_predict": 256,
+        "prompt": "Once",
+        "stream": True,
+    })
+    content = ""
+    for data in res:
+        choice = data["choices"][0]
+        if choice["finish_reason"] == "length":
+            assert len(content) > 0
+        else:
+            assert choice["finish_reason"] is None
+            content += choice["text"]
author	Xuan-Son Nguyen <redacted>
	Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)
committer	GitHub <redacted>
	Fri, 16 May 2025 19:50:00 +0000 (21:50 +0200)
tools/server/server.cpp		patch \| blob \| history
tools/server/tests/unit/test_ctx_shift.py		patch \| blob \| history