From: Jesse Date: Mon, 8 Sep 2025 14:59:48 +0000 (-0400) Subject: chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style) (#15533) X-Git-Tag: upstream/0.0.6527~108 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=88021565f08e0b7c4e07ac089a15ec16fae9166c;p=pkg%2Fggml%2Fsources%2Fllama.cpp chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style) (#15533) * Add DeepSeek V3.1 thinking mode support - Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value - Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation) - Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format: - Extracts reasoning content before '' tag into reasoning_content - Extracts regular content after '' tag into content - No opening '' tag in V3.1 format - Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking' - Added V3.1 case to parsing switch statement This addresses the issue where V3.1 outputs reasoning content followed by '' and then regular content without the opening '' tag. * Another attempt by V3.1 non-thinking * Fix test, but it's not asserting anything. * Ignore vim swap files in tests dir * Update the test * Try using try_find_literal instead of regex * passing test * Revert "Try using try_find_literal instead of regex" This reverts commit c50d887ec2780dd9e6b8b397e92347d3db8d5575. * Remove unnecessary change * Remove comment * Add code to handle non-thinking mode. * Try to set message['prefix'] when thinking is enabled. * This fixes reasoning, but breaks normal content. We need state in the chat parser. * DeepSeek V3.1 thinking is now the default. Disable with `--reasoning-budget 0`. * Simplify (DeepSeek V3.1 reasoning) * Fix sign inversion bug * Add some tool calling code (not working). * Tool calls working in non-reasoning mode. * Attempt a unit test for tool call parsing. * Passing test * Add tests for both happy path and broken fenced DeepSeek V3.1 tool call variants. * Passing DeepSeek V3.1 tool call tests, but model is not working. * Revert assistance response prefill change. Not my monkeys. * Add fenced_thinking unit test variant. Passes, but thinking tool calling still isn't working for some reason. * Tests pass in reasoning mode. Also e2e tool test passes. * Make a copy of the parse_json_tool_calls function for deepseek-v3.1 so as to not accidentally introduce regressions. * Fix thinking_forced_open logic. tool calling broken. Need to add another test case. * That's what I get for cargo culting a newline. * Add multi tool call test for deepseek v3.1 non-reasoning * Move test, remove .gitignore change * Place deepseek-v3.1 reasoning test directly into existing reasoning function per CISC's request. * Address whitespace CI failure. * Merge two assert_equals per CISC's request. * Add DeepSeek-V3.1 tests to tests/test-chat.cpp per CISC's request. * Merge deepseek V3.1 and regular parse_json_tool_calls() function behaviors by adding optional update_cursor argument. * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * DeepSeek V3.1 fix reasoning_format none * Strip grammar down to strictly what we expect based on model card. Throw out parts we cargo culted from R1 that don't make sense. * Update tests/test-chat-parser.cpp Co-authored-by: Sigbjørn Skjæret * DeepSeek V3.1 - Add edge case where thinking is forced open, there is tool calling in the reasoning content, but then the model just stops the output without closing the tag, so it's not a partial. In this case, use the tool call in the reasoning content. * DeepSeek V3.1 - simplify update_cursor * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Update common/chat.cpp Co-authored-by: Sigbjørn Skjæret * Fix indent --------- Co-authored-by: openhands Co-authored-by: Sigbjørn Skjæret --- diff --git a/common/chat.cpp b/common/chat.cpp index a8a4c3e3..4707c4fe 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -631,6 +631,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1"; case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; @@ -698,11 +699,13 @@ static void parse_json_tool_calls( size_t from = std::string::npos; auto first = true; while (true) { + auto start_pos = builder.pos(); auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) : function_regex ? builder.try_find_regex(*function_regex, from) : std::nullopt; + if (res) { std::string name; if (get_function_name) { @@ -737,6 +740,8 @@ static void parse_json_tool_calls( return; } throw common_chat_msg_partial_exception("incomplete tool call"); + } else { + builder.move_to(start_pos); } break; } @@ -1388,6 +1393,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ } return data; } + +static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) { + common_chat_params data; + + // Pass thinking context for DeepSeek V3.1 template + json additional_context = { + {"thinking", inputs.enable_thinking}, + }; + + auto prompt = apply(tmpl, inputs, + /* messages_override= */ inputs.messages, + /* tools_override= */ std::nullopt, + additional_context); + data.prompt = prompt; + data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1; + if (string_ends_with(data.prompt, "")) { + if (!inputs.enable_thinking) { + data.prompt += ""; + } else { + data.thinking_forced_open = true; + } + } + if (inputs.tools.is_array() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null(); + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool.at("function"); + std::string name = function.at("name"); + auto parameters = function.at("parameters"); + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "( \"<ï½toolâcallâbeginï½>\" )? \"" + name + "<ï½toolâsepï½>" + "\" " + builder.add_schema(name + "-args", parameters) + " " + "\"<ï½toolâcallâendï½>\"")); + }); + // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag, + // so we accept common variants (then it's all constrained) + builder.add_rule("root", + std::string(data.thinking_forced_open ? "( \"\" space )? " : "") + + "( \"<ï½toolâcallsâbeginï½>\" | \"<ï½tool_calls_beginï½>\" | \"<ï½tool calls beginï½>\" | \"<ï½tool\\\\_calls\\\\_beginï½>\" | \"<ï½toolâcallsï½>\" ) " + "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " " + "\"<ï½toolâcallsâendï½>\"" + " space"); + data.grammar_triggers.push_back({ + COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, + // If thinking_forced_open, then we capture the tag in the grammar, + // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar) + std::string(data.thinking_forced_open ? "[\\s\\S]*?(\\s*)" : "(?:[\\s\\S]*?\\s*)?") + + "(<ï½toolâcallsâbeginï½>|<ï½tool_calls_beginï½>|<ï½tool calls beginï½>|<ï½tool\\\\_calls\\\\_beginï½>|<ï½toolâcallsï½>)[\\s\\S]*" + }); + data.preserved_tokens = { + "", + "", + "<ï½toolâcallsâbeginï½>", + "<ï½toolâcallâbeginï½>", + "<ï½toolâsepï½>", + "<ï½toolâcallâendï½>", + "<ï½toolâcallsâendï½>", + }; + }); + } + return data; +} + static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { builder.try_parse_reasoning("", ""); if (!builder.syntax().parse_tool_calls) { @@ -1409,6 +1479,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { tool_calls_end); } +static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) { + static const common_regex function_regex("(?:<ï½toolâcallâbeginï½>)?([^\\n<]+)(?:<ï½toolâsepï½>)"); + + static const common_regex close_regex("(?:[\\s]*)?<ï½toolâcallâendï½>"); + static const common_regex tool_calls_begin("(?:<ï½toolâcallsâbeginï½>|<ï½tool_calls_beginï½>|<ï½tool calls beginï½>|<ï½tool\\\\_calls\\\\_beginï½>|<ï½toolâcallsï½>)"); + static const common_regex tool_calls_end("<ï½toolâcallsâendï½>"); + + if (!builder.syntax().parse_tool_calls) { + LOG_DBG("%s: not parse_tool_calls\n", __func__); + builder.add_content(builder.consume_rest()); + return; + } + + LOG_DBG("%s: parse_tool_calls\n", __func__); + + parse_json_tool_calls( + builder, + /* block_open= */ tool_calls_begin, + /* function_regex_start_only= */ std::nullopt, + function_regex, + close_regex, + tool_calls_end); +} + +static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { + // DeepSeek V3.1 outputs reasoning content between "" and "" tags, followed by regular content + // First try to parse using the standard reasoning parsing method + LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str()); + + auto start_pos = builder.pos(); + auto found_end_think = builder.try_find_literal(""); + builder.move_to(start_pos); + + if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) { + LOG_DBG("%s: no end_think, not partial, adding content\n", __func__); + common_chat_parse_deepseek_v3_1_content(builder); + } else if (builder.try_parse_reasoning("", "")) { + // If reasoning was parsed successfully, the remaining content is regular content + LOG_DBG("%s: parsed reasoning, adding content\n", __func__); + // <ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>function<ï½toolâsepï½>NAME\n```json\nJSON\n```<ï½toolâcallâendï½><ï½toolâcallsâendï½> + common_chat_parse_deepseek_v3_1_content(builder); + } else { + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) { + LOG_DBG("%s: reasoning_format none, adding content\n", __func__); + common_chat_parse_deepseek_v3_1_content(builder); + return; + } + // If no reasoning tags found, check if we should treat everything as reasoning + if (builder.syntax().thinking_forced_open) { + // If thinking is forced open but no tags found, treat everything as reasoning + LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__); + builder.add_reasoning_content(builder.consume_rest()); + } else { + LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__); + // <ï½toolâcallâbeginï½>NAME<ï½toolâsepï½>JSON<ï½toolâcallâendï½> + common_chat_parse_deepseek_v3_1_content(builder); + } + } +} + static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; auto prompt = apply(tmpl, inputs); @@ -2365,6 +2495,12 @@ static common_chat_params common_chat_templates_apply_jinja( } } + // DeepSeek V3.1: detect based on specific patterns in the template + if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos && + params.json_schema.is_null()) { + return common_chat_params_init_deepseek_v3_1(tmpl, params); + } + // DeepSeek R1: use handler in all cases except json schema (thinking / tools). if (src.find("<ï½toolâcallsâbeginï½>") != std::string::npos && params.json_schema.is_null()) { return common_chat_params_init_deepseek_r1(tmpl, params); @@ -2537,6 +2673,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { case COMMON_CHAT_FORMAT_DEEPSEEK_R1: common_chat_parse_deepseek_r1(builder); break; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: + common_chat_parse_deepseek_v3_1(builder); + break; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: common_chat_parse_functionary_v3_2(builder); break; diff --git a/common/chat.h b/common/chat.h index 41851022..5170fc14 100644 --- a/common/chat.h +++ b/common/chat.h @@ -107,6 +107,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_FIREFUNCTION_V2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, COMMON_CHAT_FORMAT_HERMES_2_PRO, COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_GRANITE, diff --git a/models/templates/README.md b/models/templates/README.md index 2e8eaa59..3a649b8f 100644 --- a/models/templates/README.md +++ b/models/templates/README.md @@ -22,4 +22,5 @@ These templates can be updated with the following commands: ./scripts/get_chat_template.py Qwen/QwQ-32B > models/templates/Qwen-QwQ-32B.jinja ./scripts/get_chat_template.py Qwen/Qwen3-0.6B > models/templates/Qwen-Qwen3-0.6B.jinja ./scripts/get_chat_template.py zai-org/GLM-4.5 > models/templates/zai-org-GLM-4.5.jinja +./scripts/get_chat_template.py deepseek-ai/DeepSeek-V3.1 > models/templates/deepseek-ai-DeepSeek-V3.1.jinja ``` diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja new file mode 100644 index 00000000..e5656196 --- /dev/null +++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja @@ -0,0 +1,3 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<ï½Userï½>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<ï½Assistantï½>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>'+ tool['function']['name'] + '<ï½toolâsepï½>' + tool['function']['arguments'] + '<ï½toolâcallâendï½>'}}{%- else %}{{message['content'] + '<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>' + tool['function']['name'] + '<ï½toolâsepï½>' + tool['function']['arguments'] + '<ï½toolâcallâendï½>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<ï½toolâcallâbeginï½>'+ tool['function']['name'] + '<ï½toolâsepï½>' + tool['function']['arguments'] + '<ï½toolâcallâendï½>'}}{%- endif %}{%- endfor %}{{'<ï½toolâcallsâendï½><ï½endâofâsentenceï½>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<ï½Assistantï½>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{''}} {%- else %}{{''}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<ï½endâofâsentenceï½>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '' in content %}{%- set content = content.split('', 1)[1] -%}{%- endif %}{{content + '<ï½endâofâsentenceï½>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<ï½toolâoutputâbeginï½>' + message['content'] + '<ï½toolâoutputâendï½>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<ï½Assistantï½>'}}{%- if not thinking %}{{''}}{%- else %}{{''}}{%- endif %}{% endif %} \ No newline at end of file diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp index 59e44e07..547ebb48 100644 --- a/tests/test-chat-parser.cpp +++ b/tests/test-chat-parser.cpp @@ -15,14 +15,20 @@ #include "regex-partial.h" template -static void assert_equals(const T & expected, const T & actual) { +static void assert_equals(const std::string_view label, const T & expected, const T & actual) { if (expected != actual) { + std::cerr << label << std::endl; std::cerr << "Expected: " << expected << std::endl; std::cerr << "Actual: " << actual << std::endl; std::cerr << std::flush; throw std::runtime_error("Test failed"); } } + +template +static void assert_equals(const T & expected, const T & actual) { + assert_equals("", expected, actual); +} static void assert_equals(const char * expected, const std::string & actual) { return assert_equals(expected, actual); } @@ -46,6 +52,7 @@ static void assert_throws(const std::function & fn, const std::string & } static void test_reasoning() { + //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); { common_chat_msg_parser builder("CogitoErgo sum", /* is_partial= */ false, { /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY, @@ -99,6 +106,36 @@ static void test_reasoning() { assert_equals("Cogito", builder.result().content); assert_equals("Ergo sum", builder.consume_rest()); } + // Test DeepSeek V3.1 parsing - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("deepseek_v3_1_reasoning_format_deepseek"); + common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, syntax); + assert_equals(variant, true, builder.try_parse_reasoning("", "")); + assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content); + assert_equals(variant, std::string("ok"), builder.consume_rest()); + } + // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "" and then regular content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("deepseek_v3_1_reasoning_format_none"); + const std::string input = "REASONINGok"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, std::string("REASONINGok"), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + } } static void test_regex() { @@ -186,6 +223,159 @@ static void test(const std::string & input, bool is_partial, const std::vectoris_partial); assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get() : js->value.dump()); } + +static void test_deepseek_v3_1_tool_calls() { + //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG); + // variant: happy path for when it works as the model card says it should + const std::string variant("simple"); + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string input = "<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto msg = common_chat_parse(input, false, syntax); + assert_equals(variant, 1, msg.tool_calls.size()); + assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name); + // JSON arguments are dumped without spaces + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments); + assert_equals(variant, std::string(""), msg.content); + assert_equals(variant, std::string(""), msg.reasoning_content); + + // variant: simple + thinking open + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_thinking"); + const std::string in = "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: simple + multiple tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("simple_multiple_tool_calls"); + const std::string in = "CONTENT<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Paris\"}<ï½toolâcallâendï½><ï½toolâcallâbeginï½>get_weather<ï½toolâsepï½>{\"city\": \"Paris\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 2, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name); + assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } + + + // variant: thinking forced open + tool call in reasoning content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning"); + const std::string in = "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time2<ï½toolâsepï½>{\"city\": \"Tokyo2\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, std::string("REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time2<ï½toolâsepï½>{\"city\": \"Tokyo2\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>REASONING"), m.reasoning_content); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + not partial + // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting + // to make tool calls in reasoning content according to the model card, but it does sometimes, so + // add the reasoning content as regular content and parse the tool calls. + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial"); + const std::string in = "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, std::string("REASONING"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + assert_equals(variant, 1, m.tool_calls.size()); + assert_equals(variant, std::string("get_time"), m.tool_calls[0].name); + assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments); + } + + // variant: thinking forced open + tool call in reasoning content + no closing think + partial + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial"); + const std::string in = "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"; + auto m = common_chat_parse(in, /* is_partial= */ true, syntax); + assert_equals(variant, std::string("REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>"), m.reasoning_content); + assert_equals(variant, std::string(""), m.content); + assert_equals(variant, 0, m.tool_calls.size()); + } + + // variant: thinking not forced open + reasoning + regular content + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls"); + const std::string in = "REASONINGCONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string("REASONING"), m.reasoning_content); + } + // variant: thinking not forced open + missing reasoning + no tool calls + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls"); + const std::string in = "CONTENT"; + auto m = common_chat_parse(in, false, syntax); + assert_equals(variant, 0, m.tool_calls.size()); + assert_equals(variant, std::string("CONTENT"), m.content); + assert_equals(variant, std::string(""), m.reasoning_content); + } +} + static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) { common_chat_msg_parser builder(input, parse_as_partial, {}); auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {}); @@ -347,6 +537,7 @@ int main() { test_json_with_dumped_args(); test_reasoning(); test_regex(); + test_deepseek_v3_1_tool_calls(); std::cout << "All tests passed!\n"; return 0; } diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 17ff7ea9..ac8a0ade 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1757,7 +1757,6 @@ static void test_template_output_parsers() { /* is_partial= */ false, {COMMON_CHAT_FORMAT_SEED_OSS})); } - { auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja"); std::vector end_tokens{ "" }; @@ -1828,6 +1827,142 @@ static void test_template_output_parsers() { /* expect_grammar_triggered= */ true ); } + { + auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja"); + std::vector end_tokens{ "<ï½endâofâsentenceï½>" }; + + for (const auto & inputs : { inputs_no_tools, inputs_tools }) { + auto params = common_chat_templates_apply(tmpls.get(), inputs); + assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format); + assert_equals(true, params.thinking_forced_open); + } + + test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false); + assert_msg_equals( + simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"), + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + })); + // variant: thinking forced open, reasoning_format none + assert_msg_equals( + simple_assist_msg("REASONINGok", ""), + common_chat_parse( + "REASONINGok", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: happy path for when it works as the model card says it should + assert_msg_equals( + simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"), + common_chat_parse( + "<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + // variant: simple + thinking open + assert_msg_equals( + simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"), + common_chat_parse( + "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: simple + multiple tool calls + common_chat_msg message_assist_multiple_calls; + message_assist_multiple_calls.role = "assistant"; + message_assist_multiple_calls.content = "CONTENT"; + message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""}); + message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""}); + assert_msg_equals( + message_assist_multiple_calls, + common_chat_parse( + "CONTENT<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Paris\"}<ï½toolâcallâendï½><ï½toolâcallâbeginï½>get_weather<ï½toolâsepï½>{\"city\": \"Paris\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + assert_msg_equals( + simple_assist_msg("", "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time2<ï½toolâsepï½>{\"city\": \"Tokyo2\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>REASONING", "get_time", "{\"city\":\"Tokyo\"}"), + common_chat_parse( + "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time2<ï½toolâsepï½>{\"city\": \"Tokyo2\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + no closing think + not partial + // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting + // to make tool calls in reasoning content according to the model card, but it does sometimes, so + // add the reasoning content as regular content and parse the tool calls. + assert_msg_equals( + simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"), + common_chat_parse( + "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking forced open + tool call in reasoning content + no closing think + partial + assert_msg_equals( + simple_assist_msg("", "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", "", ""), + common_chat_parse( + "REASONING<ï½toolâcallsâbeginï½><ï½toolâcallâbeginï½>get_time<ï½toolâsepï½>{\"city\": \"Tokyo\"}<ï½toolâcallâendï½><ï½toolâcallsâendï½>", + /* is_partial= */ true, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ true, + /* .parse_tool_calls = */ true, + })); + // variant: thinking not forced open + missing reasoning + no tool calls + assert_msg_equals( + simple_assist_msg("CONTENT", ""), + common_chat_parse( + "CONTENT", + /* is_partial= */ false, + { + COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + })); + } } static void test_msg_diffs_compute() {