chat: Fix streaming parser for granite models (#15682)

author shun095 <redacted>

Fri, 19 Sep 2025 15:57:30 +0000 (00:57 +0900)

committer GitHub <redacted>

Fri, 19 Sep 2025 15:57:30 +0000 (09:57 -0600)
author shun095 <redacted>
Fri, 19 Sep 2025 15:57:30 +0000 (00:57 +0900)
committer GitHub <redacted>
Fri, 19 Sep 2025 15:57:30 +0000 (09:57 -0600)
diff --git a/common/chat.cpp b/common/chat.cpp

index 5ac57f1e801d1f99c9f14adb528a5cca65118aa8..ce53f89f786b8cf047c8403c16a90b6ae40db519 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -2232,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
  
  static void common_chat_parse_granite(common_chat_msg_parser & builder) {
      // Parse thinking tags
+    static const common_regex start_think_regex(regex_escape("<think>"));
+    static const common_regex end_think_regex(regex_escape("</think>"));
+    // Granite models output partial tokens such as "<" and "<think".
+    // By leveraging try_consume_regex()/try_find_regex() throwing
+    // common_chat_msg_partial_exception for these partial tokens,
+    // processing is interrupted and the tokens are not passed to add_content().
+    if (auto res = builder.try_consume_regex(start_think_regex)) {
+        // Restore position for try_parse_reasoning()
+        builder.move_to(res->groups[0].begin);
+        builder.try_find_regex(end_think_regex, std::string::npos, false);
+        // Restore position for try_parse_reasoning()
+        builder.move_to(res->groups[0].begin);
+    }
      builder.try_parse_reasoning("<think>", "</think>");
  
-    // Parse response tags using regex
-    static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
-    if (auto res = builder.try_find_regex(response_regex)) {
-        // Extract the content between the tags (capture group 1)
-        auto content = builder.str(res->groups[1]);
-        builder.add_content(content);
-        builder.move_to(res->groups[0].end);
+    // Parse response tags
+    static const common_regex start_response_regex(regex_escape("<response>"));
+    static const common_regex end_response_regex(regex_escape("</response>"));
+    // Granite models output partial tokens such as "<" and "<response".
+    // Same hack as reasoning parsing.
+    if (builder.try_consume_regex(start_response_regex)) {
+        builder.try_find_regex(end_response_regex);
      }
  
      if (!builder.syntax().parse_tool_calls) {
@@ -2254,13 +2267,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
          builder.move_to(res->groups[0].end);
  
          // Expect JSON array of tool calls
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            if (!builder.add_tool_calls(tool_calls_data.json)) {
-                builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
+        if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
+            if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
+                throw common_chat_msg_partial_exception("incomplete tool call");
              }
-        } else {
-            builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
          }
      } else {
          builder.add_content(builder.consume_rest());
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp

index ac8a0ade1f6e2258e440b767c6a65f9f9297b44c..ce0f4b0a2a9f39048b79138467657ea6a8f982e3 100644 (file)
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1402,6 +1402,12 @@ static void test_template_output_parsers() {
                  "Hello, world!\nWhat's up?",
                  /* is_partial= */ false,
                  {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(
+            message_assist,
+            common_chat_parse(
+                "Hello, world!\nWhat's up?",
+                /* is_partial= */ true,
+                {COMMON_CHAT_FORMAT_GRANITE}));
  
          // Test parsing content with thinking
          assert_msg_equals(message_assist_thoughts,
@@ -1412,6 +1418,59 @@ static void test_template_output_parsers() {
                      /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
                      /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
                  }));
+        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
+            common_chat_parse(
+                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
+                /* is_partial= */ false,
+                {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(message_assist_thoughts,
+            common_chat_parse(
+                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+        assert_msg_equals(message_assist_thoughts,
+            common_chat_parse(
+                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
+                /* is_partial= */ false,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+        assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
+            common_chat_parse(
+                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
+                /* is_partial= */ false,
+                {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(message_assist_empty,
+            common_chat_parse(
+                "<think",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+        assert_msg_equals(message_assist_empty,
+            common_chat_parse(
+                "<think",
+                /* is_partial= */ true,
+                {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(message_assist_thoughts_no_content,
+            common_chat_parse(
+                "<think>I'm\nthinking",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+        assert_msg_equals(
+            message_assist_empty,
+            common_chat_parse(
+                "<think>I'm\nthinking</think><response",
+                /* is_partial= */ true,
+                {COMMON_CHAT_FORMAT_GRANITE}));
  
          // Test parsing tool calls
          assert_msg_equals(message_assist_call,
@@ -1419,6 +1478,38 @@ static void test_template_output_parsers() {
                  "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
                  /* is_partial= */ false,
                  {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(
+            message_assist_call_empty_args,
+            common_chat_parse(
+                "<|tool_call|>[{\"name\": \"special_function\"",
+                /* is_partial= */ true,
+                {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(
+            message_assist_call_cutoff_args,
+            common_chat_parse(
+                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
+                /* is_partial= */ true,
+                {COMMON_CHAT_FORMAT_GRANITE}));
+        assert_msg_equals(
+            message_assist_call_cutoff_args,
+            common_chat_parse(
+                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
+
+        // Test parsing tool calls with thinking
+        assert_msg_equals(
+            message_assist_call_thoughts,
+            common_chat_parse(
+                "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
+                /* is_partial= */ true,
+                {
+                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
+                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
+                }));
  
          // Test template generation for regular content
          test_templates(tmpls.get(), end_tokens, message_assist, tools,
author	shun095 <redacted>
	Fri, 19 Sep 2025 15:57:30 +0000 (00:57 +0900)
committer	GitHub <redacted>
	Fri, 19 Sep 2025 15:57:30 +0000 (09:57 -0600)
common/chat.cpp		patch \| blob \| history
tests/test-chat.cpp		patch \| blob \| history