server : adjust unified KV cache tests (#18716)

author Georgi Gerganov <redacted>

Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)

committer GitHub <redacted>

Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)
author Georgi Gerganov <redacted>
Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)
committer GitHub <redacted>
Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)
diff --git a/tools/server/tests/unit/test_completion.py b/tools/server/tests/unit/test_completion.py

index ef1757db21f7f472ff99c674536f40089443d043..2a980601ec60849a36ab0a8c454357b9acf8dbed 100644 (file)
--- a/tools/server/tests/unit/test_completion.py
+++ b/tools/server/tests/unit/test_completion.py
@@ -393,12 +393,12 @@ def test_completion_unified(n_ctx, n_slots, n_predict_vals, expected_success):
      for res, n_predict, expect_ok in zip(results, n_predict_vals, expected_success):
          if expect_ok:
              assert res.status_code == 200
+
+        # note: https://github.com/ggml-org/llama.cpp/pull/18700#issuecomment-3728695581
+        if res.status_code == 200:
              assert "content" in res.body
              if "timings" in res.body:
                  assert res.body["timings"]["predicted_n"] == n_predict
-        else:
-            assert res.status_code == 500
-            assert "content" not in res.body
  
  
  @pytest.mark.parametrize(
author	Georgi Gerganov <redacted>
	Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)
committer	GitHub <redacted>
	Sat, 10 Jan 2026 15:51:56 +0000 (17:51 +0200)