]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
tests : use `reasoning` instead of `reasoning_budget` in server tests (#20432)
authorPiotr Wilkin (ilintar) <redacted>
Thu, 12 Mar 2026 12:41:01 +0000 (13:41 +0100)
committerGitHub <redacted>
Thu, 12 Mar 2026 12:41:01 +0000 (13:41 +0100)
tools/server/tests/unit/test_template.py
tools/server/tests/utils.py

index e5185fcbfab850f5905331fd923b15bee911158d..43a356020dd5ee1ab75a2b80e03d153324f4ba5a 100644 (file)
@@ -11,6 +11,7 @@ sys.path.insert(0, str(path))
 
 import datetime
 from utils import *
+from typing import Literal
 
 server: ServerProcess
 
@@ -23,24 +24,24 @@ def create_server():
 
 
 @pytest.mark.parametrize("tools", [None, [], [TEST_TOOL]])
-@pytest.mark.parametrize("template_name,reasoning_budget,expected_end", [
-    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", None, "<think>\n"),
-    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B",   -1, "<think>\n"),
-    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B",    0, "<think>\n</think>"),
+@pytest.mark.parametrize("template_name,reasoning,expected_end", [
+    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B",  "on", "<think>\n"),
+    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B","auto", "<think>\n"),
+    ("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", "off", "<think>\n</think>"),
 
-    ("Qwen-Qwen3-0.6B", -1, "<|im_start|>assistant\n"),
-    ("Qwen-Qwen3-0.6B",  0, "<|im_start|>assistant\n<think>\n\n</think>\n\n"),
+    ("Qwen-Qwen3-0.6B","auto", "<|im_start|>assistant\n"),
+    ("Qwen-Qwen3-0.6B", "off", "<|im_start|>assistant\n<think>\n\n</think>\n\n"),
 
-    ("Qwen-QwQ-32B", -1, "<|im_start|>assistant\n<think>\n"),
-    ("Qwen-QwQ-32B",  0, "<|im_start|>assistant\n<think>\n</think>"),
+    ("Qwen-QwQ-32B","auto", "<|im_start|>assistant\n<think>\n"),
+    ("Qwen-QwQ-32B", "off", "<|im_start|>assistant\n<think>\n</think>"),
 
-    ("CohereForAI-c4ai-command-r7b-12-2024-tool_use", -1, "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"),
-    ("CohereForAI-c4ai-command-r7b-12-2024-tool_use",  0, "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|>"),
+    ("CohereForAI-c4ai-command-r7b-12-2024-tool_use","auto", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"),
+    ("CohereForAI-c4ai-command-r7b-12-2024-tool_use", "off", "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|><|END_THINKING|>"),
 ])
-def test_reasoning_budget(template_name: str, reasoning_budget: int | None, expected_end: str, tools: list[dict]):
+def test_reasoning(template_name: str, reasoning: Literal['on', 'off', 'auto'] | None, expected_end: str, tools: list[dict]):
     global server
     server.jinja = True
-    server.reasoning_budget = reasoning_budget
+    server.reasoning = reasoning
     server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
     server.start()
 
index db357d876b1129e14670d9f8f453098bae1bf295..c6fe11261f96764121aeb72580f0528ffa8c5e42 100644 (file)
@@ -95,7 +95,7 @@ class ServerProcess:
     no_webui: bool | None = None
     jinja: bool | None = None
     reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
-    reasoning_budget: int | None = None
+    reasoning: Literal['on', 'off', 'auto'] | None = None
     chat_template: str | None = None
     chat_template_file: str | None = None
     server_path: str | None = None
@@ -225,8 +225,8 @@ class ServerProcess:
             server_args.append("--no-jinja")
         if self.reasoning_format is not None:
             server_args.extend(("--reasoning-format", self.reasoning_format))
-        if self.reasoning_budget is not None:
-            server_args.extend(("--reasoning-budget", self.reasoning_budget))
+        if self.reasoning is not None:
+            server_args.extend(("--reasoning", self.reasoning))
         if self.chat_template:
             server_args.extend(["--chat-template", self.chat_template])
         if self.chat_template_file: