server : fix assistant prefilling when content is an array (#14360)

author Sigbjørn Skjæret <redacted>

Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)

committer GitHub <redacted>

Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)
author Sigbjørn Skjæret <redacted>
Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)
committer GitHub <redacted>
Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)
diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py

index 1b5205f79d610b7206b9319188be1bd6c1424123..7ee9a1651400daf5b5e6c25d3af09aca1c5f8c64 100644 (file)
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@@ -132,6 +132,28 @@ def test_chat_template():
      assert res.body["__verbose"]["prompt"] == "<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
  
  
+@pytest.mark.parametrize("prefill,re_prefill", [
+    ("Whill", "Whill"),
+    ([{"type": "text", "text": "Wh"}, {"type": "text", "text": "ill"}], "Whill"),
+])
+def test_chat_template_assistant_prefill(prefill, re_prefill):
+    global server
+    server.chat_template = "llama3"
+    server.debug = True  # to get the "__verbose" object in the response
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+            {"role": "assistant", "content": prefill},
+        ]
+    })
+    assert res.status_code == 200
+    assert "__verbose" in res.body
+    assert res.body["__verbose"]["prompt"] == f"<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{re_prefill}"
+
+
  def test_apply_chat_template():
      global server
      server.chat_template = "command-r"
@@ -228,6 +250,7 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
      [{"role": "system", "content": 123}],
      # [{"content": "hello"}], # TODO: should not be a valid case
      [{"role": "system", "content": "test"}, {}],
+    [{"role": "user", "content": "test"}, {"role": "assistant", "content": "test"}, {"role": "assistant", "content": "test"}],
  ])
  def test_invalid_chat_completion_req(messages):
      global server
diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp

index 2ef9a164513c39a8c221ea1b66d29af1c89d0d26..6c2e91359a66379072ad3be250f5316499c5d72e 100644 (file)
--- a/tools/server/utils.hpp
+++ b/tools/server/utils.hpp
@@ -792,7 +792,13 @@ static json oaicompat_chat_params_parse(
  
      /* Append assistant prefilled message */
      if (prefill_assistant_message) {
-         chat_params.prompt += last_message.content;
+        if (!last_message.content_parts.empty()) {
+            for (auto & p : last_message.content_parts) {
+                chat_params.prompt += p.text;
+            }
+        } else {
+            chat_params.prompt += last_message.content;
+        }
      }
  
      llama_params["chat_format"]      = static_cast<int>(chat_params.format);
author	Sigbjørn Skjæret <redacted>
	Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)
committer	GitHub <redacted>
	Sat, 5 Jul 2025 07:17:14 +0000 (09:17 +0200)
tools/server/tests/unit/test_chat_completion.py		patch \| blob \| history
tools/server/utils.hpp		patch \| blob \| history