common/parser: fix nasty bug causing subtle corruption of generation prompt (#20825)

author Piotr Wilkin (ilintar) <redacted>

Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)

committer GitHub <redacted>

Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)
author Piotr Wilkin (ilintar) <redacted>
Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)
committer GitHub <redacted>
Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)
diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp

index 9dcdde25011cf3723df0afa5973e97fb204d628f..3a7a5c13a7053225d1ab3df660dbc8920f4af6be 100644 (file)
--- a/common/chat-auto-parser-helpers.cpp
+++ b/common/chat-auto-parser-helpers.cpp
@@ -188,6 +188,21 @@ diff_split calculate_diff_split(const std::string & left, const std::string & ri
          result.suffix = "";
          // pick prefix = all as representation
      }
+
+    // When left has no unique content (result.left is empty), left is entirely
+    // shared with right. The simultaneous prefix/suffix segment matching can
+    // incorrectly consume trailing segments of left as suffix when those same
+    // segments also appear at the end of right (e.g. "\n" at the end of both
+    // the shared content and the generation prompt). This rotates the diff.
+    // Fix: if left is a prefix of right, enforce that directly.
+    if (result.left.empty() && !result.right.empty() &&
+            left.size() <= right.size() &&
+            right.substr(0, left.size()) == left) {
+        result.prefix = left;
+        result.suffix = "";
+        result.right  = right.substr(left.size());
+    }
+
      return result;
  }
  
diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp

index 6abf71d6cf8fd09693c738972682b5303af7f056..0ba51ba23525c760658dbbd6ca18c0bef26e399d 100644 (file)
--- a/tests/test-chat-auto-parser.cpp
+++ b/tests/test-chat-auto-parser.cpp
@@ -22,6 +22,7 @@ static void test_calculate_diff_split_no_common(testing & t);
  static void test_calculate_diff_split_single_char(testing & t);
  static void test_calculate_diff_split_overlaps(testing & t);
  static void test_calculate_diff_split_tag_boundaries(testing & t);
+static void test_calculate_diff_split_generation_prompt(testing & t);
  static void test_calculate_diff_split(testing & t);
  
  static void test_until_common_prefix_basic(testing & t);
@@ -179,6 +180,7 @@ static void test_calculate_diff_split(testing & t) {
      t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
      t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
      t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
+    t.test("calculate_diff_split generation prompt", test_calculate_diff_split_generation_prompt);
  }
  
  static void test_calculate_diff_split_basic(testing & t) {
@@ -502,6 +504,39 @@ static void test_calculate_diff_split_tag_boundaries(testing & t) {
      }
  }
  
+static void test_calculate_diff_split_generation_prompt(testing & t) {
+    // ChatML thinking template: left is a prefix of right, generation_prompt is the appended part.
+    // The trailing \n in left matches the trailing \n in the generation_prompt, causing
+    // the suffix matcher to steal it and rotate the diff result.
+    {
+        // Simplified reproduction: left ends with \n, right = left + "<|im_start|>assistant\n<think>\n"
+        std::string left  = "<|im_start|>user\nHello<|im_end|>\n";
+        std::string right = left + "<|im_start|>assistant\n<think>\n";
+        diff_split result = calculate_diff_split(left, right);
+        t.assert_equal("chatml prefix", left, result.prefix);
+        t.assert_equal("chatml left", "", result.left);
+        t.assert_equal("chatml right should be generation prompt",
+                       "<|im_start|>assistant\n<think>\n", result.right);
+        t.assert_equal("chatml suffix", "", result.suffix);
+    }
+
+    {
+        // More realistic: longer conversation ending with tool_response
+        std::string common =
+            "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+            "<|im_start|>user\nSearch for files<|im_end|>\n"
+            "<|im_start|>assistant\n<think>\nLet me search.\n</think>\n\n"
+            "<tool_call>\n<function=search>\n</function>\n</tool_call><|im_end|>\n"
+            "<|im_start|>user\n<tool_response>\nNo files found\n</tool_response><|im_end|>\n";
+        std::string left  = common;
+        std::string right = common + "<|im_start|>assistant\n<think>\n";
+        diff_split result = calculate_diff_split(left, right);
+        t.assert_equal("tool_response left", "", result.left);
+        t.assert_equal("tool_response right should be generation prompt",
+                       "<|im_start|>assistant\n<think>\n", result.right);
+    }
+}
+
  static void test_until_common_prefix(testing & t) {
      t.test("until_common_prefix basic", test_until_common_prefix_basic);
  }
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp

index faac9e73062a92cd4e106ab660ae960f853f6432..575d240791747a6e8a676c271898140f7a9a3e06 100644 (file)
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1337,7 +1337,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
          tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
              .enable_thinking(true)
              .reasoning_format(COMMON_REASONING_FORMAT_NONE)
-            .expect_content("<think>I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .expect_content("<think>\nI'm\nthinking\n</think>\nHello, world!\nWhat's up?")
              .run();
  
          tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp

index 39d232c2e48094fb408421d4cb9e2b244050c71f..7d543b9292b8752811734016a5cfd3cde9d20992 100644 (file)
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -415,6 +415,7 @@ task_params server_task::params_from_json_cmpl(
          params.chat_parser_params.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
          params.chat_parser_params.generation_prompt = json_value(data, "generation_prompt", std::string());
          params.sampling.generation_prompt = params.chat_parser_params.generation_prompt;
+        SRV_DBG("Generation prompt: '%s'\n", params.chat_parser_params.generation_prompt.c_str());
          params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
          if (data.contains("chat_parser")) {
              params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
author	Piotr Wilkin (ilintar) <redacted>
	Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)
committer	GitHub <redacted>
	Fri, 20 Mar 2026 23:19:04 +0000 (00:19 +0100)
common/chat-auto-parser-helpers.cpp		patch \| blob \| history
tests/test-chat-auto-parser.cpp		patch \| blob \| history
tests/test-chat.cpp		patch \| blob \| history
tools/server/server-task.cpp		patch \| blob \| history