From: Georgi Gerganov Date: Wed, 28 Jan 2026 07:11:40 +0000 (+0200) Subject: server : adjust spec tests to generate up to 16 tokens (#19093) X-Git-Tag: upstream/0.0.8067~213 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=b931f81b5a3bc3e16bd74cebc8fee8cbd69f8d4d;p=pkg%2Fggml%2Fsources%2Fllama.cpp server : adjust spec tests to generate up to 16 tokens (#19093) --- diff --git a/tools/server/tests/unit/test_speculative.py b/tools/server/tests/unit/test_speculative.py index d2f3fba5f..eebd3cc8f 100644 --- a/tools/server/tests/unit/test_speculative.py +++ b/tools/server/tests/unit/test_speculative.py @@ -30,6 +30,7 @@ def test_with_and_without_draft(): "prompt": "I believe the meaning of life is", "temperature": 0.0, "top_k": 1, + "n_predict": 16, }) assert res.status_code == 200 content_no_draft = res.body["content"] @@ -42,6 +43,7 @@ def test_with_and_without_draft(): "prompt": "I believe the meaning of life is", "temperature": 0.0, "top_k": 1, + "n_predict": 16, }) assert res.status_code == 200 content_draft = res.body["content"] @@ -68,6 +70,7 @@ def test_different_draft_min_draft_max(): "prompt": "I believe the meaning of life is", "temperature": 0.0, "top_k": 1, + "n_predict": 16, }) assert res.status_code == 200 if last_content is not None: