server : fix --jinja when there's no tools or schema (typo was forcing JSON) (#11531)

author Olivier Chafik <redacted>

Fri, 31 Jan 2025 08:12:40 +0000 (08:12 +0000)

committer GitHub <redacted>

Fri, 31 Jan 2025 08:12:40 +0000 (10:12 +0200)
author Olivier Chafik <redacted>
Fri, 31 Jan 2025 08:12:40 +0000 (08:12 +0000)
committer GitHub <redacted>
Fri, 31 Jan 2025 08:12:40 +0000 (10:12 +0200)
diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py

index 0be04bab5037b803b9f5e64bf2748a6360f92cb8..f5d8b0572dbeddb1f2d1957989690d0c3d1056b1 100644 (file)
--- a/examples/server/tests/unit/test_chat_completion.py
+++ b/examples/server/tests/unit/test_chat_completion.py
@@ -14,11 +14,10 @@ def create_server():
      "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template",
      [
          (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None),
+        (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None),
+        (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"),
          ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None),
-        # TODO: fix testing of non-tool jinja mode
-        # (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None),
-        # (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"),
-        # ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None),
+        ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None),
      ]
  )
  def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason, jinja, chat_template):
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp

index 3d2c04666853fd0c88e980fce4068647f0662fb7..70bd6a42cb60811ebd835c0d8c063d5cce866c50 100644 (file)
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -642,7 +642,7 @@ static json oaicompat_completion_params_parse(
          inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
          inputs.stream = stream;
          // TODO: support mixing schema w/ tools beyond generic format.
-        inputs.json_schema = json_value(llama_params, "json_schema", json::object());
+        inputs.json_schema = json_value(llama_params, "json_schema", json());
          auto chat_params = common_chat_params_init(tmpl, inputs);
  
          llama_params["chat_format"] = static_cast<int>(chat_params.format);
author	Olivier Chafik <redacted>
	Fri, 31 Jan 2025 08:12:40 +0000 (08:12 +0000)
committer	GitHub <redacted>
	Fri, 31 Jan 2025 08:12:40 +0000 (10:12 +0200)
examples/server/tests/unit/test_chat_completion.py		patch \| blob \| history
examples/server/utils.hpp		patch \| blob \| history