server : preserve anthropic thinking blocks in conversion (#20120)

author Tom Vaucourt <redacted>

Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)

committer GitHub <redacted>

Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)
author Tom Vaucourt <redacted>
Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)
committer GitHub <redacted>
Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp

index ff3c6d3c2b054040efb9acb315b3791b84be5799..13ea8c690f3067b20b72f3220af6ab618262b4b2 100644 (file)
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1463,6 +1463,7 @@ json convert_anthropic_to_oai(const json & body) {
              json tool_calls = json::array();
              json converted_content = json::array();
              json tool_results = json::array();
+            std::string reasoning_content;
              bool has_tool_calls = false;
  
              for (const auto & block : content) {
@@ -1470,6 +1471,8 @@ json convert_anthropic_to_oai(const json & body) {
  
                  if (type == "text") {
                      converted_content.push_back(block);
+                } else if (type == "thinking") {
+                    reasoning_content += json_value(block, "thinking", std::string());
                  } else if (type == "image") {
                      json source = json_value(block, "source", json::object());
                      std::string source_type = json_value(source, "type", std::string());
@@ -1528,16 +1531,19 @@ json convert_anthropic_to_oai(const json & body) {
                  }
              }
  
-            if (!converted_content.empty() || has_tool_calls) {
+            if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) {
                  json new_msg = {{"role", role}};
                  if (!converted_content.empty()) {
                      new_msg["content"] = converted_content;
-                } else if (has_tool_calls) {
+                } else if (has_tool_calls || !reasoning_content.empty()) {
                      new_msg["content"] = "";
                  }
                  if (!tool_calls.empty()) {
                      new_msg["tool_calls"] = tool_calls;
                  }
+                if (!reasoning_content.empty()) {
+                    new_msg["reasoning_content"] = reasoning_content;
+                }
                  oai_messages.push_back(new_msg);
              }
  
diff --git a/tools/server/tests/unit/test_compat_anthropic.py b/tools/server/tests/unit/test_compat_anthropic.py

index e16e0235c64b8a61519583b9bb5947e9ccf05d21..93ff03be6b4cf61651df80f1f79bdb53ce07b954 100644 (file)
--- a/tools/server/tests/unit/test_compat_anthropic.py
+++ b/tools/server/tests/unit/test_compat_anthropic.py
@@ -809,6 +809,139 @@ def test_anthropic_vs_openai_different_response_format():
  
  # Extended thinking tests with reasoning models
  
+# The next two tests cover the input path (conversation history):
+# Client sends thinking blocks -> convert_anthropic_to_oai -> reasoning_content -> template
+
+def test_anthropic_thinking_history_in_count_tokens():
+    """Test that interleaved thinking blocks in conversation history are not dropped during conversion."""
+    global server
+    server.jinja = True
+    server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
+    server.start()
+
+    tool = {
+        "name": "list_files",
+        "description": "List files",
+        "input_schema": {
+            "type": "object",
+            "properties": {"path": {"type": "string"}},
+            "required": ["path"]
+        }
+    }
+
+    messages_without_thinking = [
+        {"role": "user", "content": "Fix the bug"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
+            ]
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
+            ]
+        },
+    ]
+
+    messages_with_thinking = [
+        {"role": "user", "content": "Fix the bug"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "thinking", "thinking": "I should check the project structure first to understand the codebase layout."},
+                {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
+            ]
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
+            ]
+        },
+    ]
+
+    res_without = server.make_request("POST", "/v1/messages/count_tokens", data={
+        "model": "test",
+        "messages": messages_without_thinking,
+        "tools": [tool],
+    })
+    assert res_without.status_code == 200, f"Expected 200: {res_without.body}"
+
+    res_with = server.make_request("POST", "/v1/messages/count_tokens", data={
+        "model": "test",
+        "messages": messages_with_thinking,
+        "tools": [tool],
+    })
+    assert res_with.status_code == 200, f"Expected 200: {res_with.body}"
+
+    # Thinking blocks should increase the token count
+    assert res_with.body["input_tokens"] > res_without.body["input_tokens"], \
+        f"Expected more tokens with thinking ({res_with.body['input_tokens']}) than without ({res_without.body['input_tokens']})"
+
+
+def test_anthropic_thinking_history_in_template():
+    """Test that reasoning_content from converted interleaved thinking blocks renders in the prompt."""
+    global server
+    server.jinja = True
+    server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
+    server.start()
+
+    reasoning_1 = "I should check the project structure first."
+    reasoning_2 = "Now I need to read the main file."
+
+    res = server.make_request("POST", "/apply-template", data={
+        "messages": [
+            {"role": "user", "content": "Fix the bug in main.py"},
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_content": reasoning_1,
+                "tool_calls": [{
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "list_files", "arguments": "{\"path\": \".\"}"}
+                }]
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "main.py\nutils.py"},
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_content": reasoning_2,
+                "tool_calls": [{
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {"name": "read_file", "arguments": "{\"path\": \"main.py\"}"}
+                }]
+            },
+            {"role": "tool", "tool_call_id": "call_2", "content": "print('hello')"},
+        ],
+        "tools": [{
+            "type": "function",
+            "function": {
+                "name": "list_files",
+                "description": "List files",
+                "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
+            }
+        }, {
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "description": "Read a file",
+                "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
+            }
+        }],
+    })
+    assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}"
+    prompt = res.body["prompt"]
+
+    # Both reasoning_content values should be rendered in <think> tags
+    assert reasoning_1 in prompt, f"Expected first reasoning text in prompt: {prompt}"
+    assert reasoning_2 in prompt, f"Expected second reasoning text in prompt: {prompt}"
+    assert prompt.count("<think>") >= 2, f"Expected at least 2 <think> blocks in prompt: {prompt}"
+
+
  @pytest.mark.slow
  @pytest.mark.parametrize("stream", [False, True])
  def test_anthropic_thinking_with_reasoning_model(stream):
author	Tom Vaucourt <redacted>
	Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)
committer	GitHub <redacted>
	Fri, 6 Mar 2026 16:41:12 +0000 (17:41 +0100)
tools/server/server-common.cpp		patch \| blob \| history
tools/server/tests/unit/test_compat_anthropic.py		patch \| blob \| history