From: shun095 Date: Fri, 19 Sep 2025 15:57:30 +0000 (+0900) Subject: chat: Fix streaming parser for granite models (#15682) X-Git-Tag: upstream/0.0.6527~6 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f432d8d83e7407073634c5e4fd81a3d23a10827f;p=pkg%2Fggml%2Fsources%2Fllama.cpp chat: Fix streaming parser for granite models (#15682) * fix(chat): fix streaming parser for granite models * tests: add test cases for Granite models chat parser --- diff --git a/common/chat.cpp b/common/chat.cpp index 5ac57f1e..ce53f89f 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2232,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp static void common_chat_parse_granite(common_chat_msg_parser & builder) { // Parse thinking tags + static const common_regex start_think_regex(regex_escape("")); + static const common_regex end_think_regex(regex_escape("")); + // Granite models output partial tokens such as "<" and "groups[0].begin); + builder.try_find_regex(end_think_regex, std::string::npos, false); + // Restore position for try_parse_reasoning() + builder.move_to(res->groups[0].begin); + } builder.try_parse_reasoning("", ""); - // Parse response tags using regex - static const common_regex response_regex("([\\s\\S]*?)"); - if (auto res = builder.try_find_regex(response_regex)) { - // Extract the content between the tags (capture group 1) - auto content = builder.str(res->groups[1]); - builder.add_content(content); - builder.move_to(res->groups[0].end); + // Parse response tags + static const common_regex start_response_regex(regex_escape("")); + static const common_regex end_response_regex(regex_escape("")); + // Granite models output partial tokens such as "<" and "groups[0].end); // Expect JSON array of tool calls - auto tool_calls_data = builder.consume_json(); - if (tool_calls_data.json.is_array()) { - if (!builder.add_tool_calls(tool_calls_data.json)) { - builder.add_content("<|tool_call|>" + tool_calls_data.json.dump()); + if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) { + if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) { + throw common_chat_msg_partial_exception("incomplete tool call"); } - } else { - builder.add_content("<|tool_call|>" + tool_calls_data.json.dump()); } } else { builder.add_content(builder.consume_rest()); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index ac8a0ade..ce0f4b0a 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1402,6 +1402,12 @@ static void test_template_output_parsers() { "Hello, world!\nWhat's up?", /* is_partial= */ false, {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals( + message_assist, + common_chat_parse( + "Hello, world!\nWhat's up?", + /* is_partial= */ true, + {COMMON_CHAT_FORMAT_GRANITE})); // Test parsing content with thinking assert_msg_equals(message_assist_thoughts, @@ -1412,6 +1418,59 @@ static void test_template_output_parsers() { /* .format = */ COMMON_CHAT_FORMAT_GRANITE, /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, })); + assert_msg_equals(message_assist_thoughts_unparsed_deepseek, + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals(message_assist_thoughts, + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ true, + { + /* .format = */ COMMON_CHAT_FORMAT_GRANITE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + assert_msg_equals(message_assist_thoughts, + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + { + /* .format = */ COMMON_CHAT_FORMAT_GRANITE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + assert_msg_equals(simple_assist_msg("I'm\nthinkingHello, world!\nWhat's up?"), + common_chat_parse( + "I'm\nthinkingHello, world!\nWhat's up?", + /* is_partial= */ false, + {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals(message_assist_empty, + common_chat_parse( + "I'm\nthinking", + /* is_partial= */ true, + { + /* .format = */ COMMON_CHAT_FORMAT_GRANITE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + assert_msg_equals( + message_assist_empty, + common_chat_parse( + "I'm\nthinking[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", /* is_partial= */ false, {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals( + message_assist_call_empty_args, + common_chat_parse( + "<|tool_call|>[{\"name\": \"special_function\"", + /* is_partial= */ true, + {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals( + message_assist_call_cutoff_args, + common_chat_parse( + "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", + /* is_partial= */ true, + {COMMON_CHAT_FORMAT_GRANITE})); + assert_msg_equals( + message_assist_call_cutoff_args, + common_chat_parse( + "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg", + /* is_partial= */ true, + { + /* .format = */ COMMON_CHAT_FORMAT_GRANITE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); + + // Test parsing tool calls with thinking + assert_msg_equals( + message_assist_call_thoughts, + common_chat_parse( + "I'm\nthinking<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {", + /* is_partial= */ true, + { + /* .format = */ COMMON_CHAT_FORMAT_GRANITE, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + })); // Test template generation for regular content test_templates(tmpls.get(), end_tokens, message_assist, tools,