json tool_calls = json::array();
json converted_content = json::array();
json tool_results = json::array();
+ std::string reasoning_content;
bool has_tool_calls = false;
for (const auto & block : content) {
if (type == "text") {
converted_content.push_back(block);
+ } else if (type == "thinking") {
+ reasoning_content += json_value(block, "thinking", std::string());
} else if (type == "image") {
json source = json_value(block, "source", json::object());
std::string source_type = json_value(source, "type", std::string());
}
}
- if (!converted_content.empty() || has_tool_calls) {
+ if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) {
json new_msg = {{"role", role}};
if (!converted_content.empty()) {
new_msg["content"] = converted_content;
- } else if (has_tool_calls) {
+ } else if (has_tool_calls || !reasoning_content.empty()) {
new_msg["content"] = "";
}
if (!tool_calls.empty()) {
new_msg["tool_calls"] = tool_calls;
}
+ if (!reasoning_content.empty()) {
+ new_msg["reasoning_content"] = reasoning_content;
+ }
oai_messages.push_back(new_msg);
}
# Extended thinking tests with reasoning models
+# The next two tests cover the input path (conversation history):
+# Client sends thinking blocks -> convert_anthropic_to_oai -> reasoning_content -> template
+
+def test_anthropic_thinking_history_in_count_tokens():
+ """Test that interleaved thinking blocks in conversation history are not dropped during conversion."""
+ global server
+ server.jinja = True
+ server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
+ server.start()
+
+ tool = {
+ "name": "list_files",
+ "description": "List files",
+ "input_schema": {
+ "type": "object",
+ "properties": {"path": {"type": "string"}},
+ "required": ["path"]
+ }
+ }
+
+ messages_without_thinking = [
+ {"role": "user", "content": "Fix the bug"},
+ {
+ "role": "assistant",
+ "content": [
+ {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
+ ]
+ },
+ {
+ "role": "user",
+ "content": [
+ {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
+ ]
+ },
+ ]
+
+ messages_with_thinking = [
+ {"role": "user", "content": "Fix the bug"},
+ {
+ "role": "assistant",
+ "content": [
+ {"type": "thinking", "thinking": "I should check the project structure first to understand the codebase layout."},
+ {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}}
+ ]
+ },
+ {
+ "role": "user",
+ "content": [
+ {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"}
+ ]
+ },
+ ]
+
+ res_without = server.make_request("POST", "/v1/messages/count_tokens", data={
+ "model": "test",
+ "messages": messages_without_thinking,
+ "tools": [tool],
+ })
+ assert res_without.status_code == 200, f"Expected 200: {res_without.body}"
+
+ res_with = server.make_request("POST", "/v1/messages/count_tokens", data={
+ "model": "test",
+ "messages": messages_with_thinking,
+ "tools": [tool],
+ })
+ assert res_with.status_code == 200, f"Expected 200: {res_with.body}"
+
+ # Thinking blocks should increase the token count
+ assert res_with.body["input_tokens"] > res_without.body["input_tokens"], \
+ f"Expected more tokens with thinking ({res_with.body['input_tokens']}) than without ({res_without.body['input_tokens']})"
+
+
+def test_anthropic_thinking_history_in_template():
+ """Test that reasoning_content from converted interleaved thinking blocks renders in the prompt."""
+ global server
+ server.jinja = True
+ server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja'
+ server.start()
+
+ reasoning_1 = "I should check the project structure first."
+ reasoning_2 = "Now I need to read the main file."
+
+ res = server.make_request("POST", "/apply-template", data={
+ "messages": [
+ {"role": "user", "content": "Fix the bug in main.py"},
+ {
+ "role": "assistant",
+ "content": "",
+ "reasoning_content": reasoning_1,
+ "tool_calls": [{
+ "id": "call_1",
+ "type": "function",
+ "function": {"name": "list_files", "arguments": "{\"path\": \".\"}"}
+ }]
+ },
+ {"role": "tool", "tool_call_id": "call_1", "content": "main.py\nutils.py"},
+ {
+ "role": "assistant",
+ "content": "",
+ "reasoning_content": reasoning_2,
+ "tool_calls": [{
+ "id": "call_2",
+ "type": "function",
+ "function": {"name": "read_file", "arguments": "{\"path\": \"main.py\"}"}
+ }]
+ },
+ {"role": "tool", "tool_call_id": "call_2", "content": "print('hello')"},
+ ],
+ "tools": [{
+ "type": "function",
+ "function": {
+ "name": "list_files",
+ "description": "List files",
+ "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
+ }
+ }, {
+ "type": "function",
+ "function": {
+ "name": "read_file",
+ "description": "Read a file",
+ "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}
+ }
+ }],
+ })
+ assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}"
+ prompt = res.body["prompt"]
+
+ # Both reasoning_content values should be rendered in <think> tags
+ assert reasoning_1 in prompt, f"Expected first reasoning text in prompt: {prompt}"
+ assert reasoning_2 in prompt, f"Expected second reasoning text in prompt: {prompt}"
+ assert prompt.count("<think>") >= 2, f"Expected at least 2 <think> blocks in prompt: {prompt}"
+
+
@pytest.mark.slow
@pytest.mark.parametrize("stream", [False, True])
def test_anthropic_thinking_with_reasoning_model(stream):