* extract & return thoughts in reasoning_content field (unless --reasoning-format) for DeepSeek R1 & Command R7B
* tool-calls: add deepseek r1 template (models/templates/llama-cpp-deepseek-r1.jinja) + hackommodate broken official template
* tool-calls: accommodate variety of wrong tool call opening tags both R1 Qwen 32B and 7B distills like to spit out
* server/oai: ensure content is null when there are tool calls, and reasoning_content appears before content for readability
* tool-calls: add DeepSeek R1 Qwen distills to server/README.md & server tests
Co-authored-by: Georgi Gerganov <redacted>
---------
Co-authored-by: Georgi Gerganov <redacted>
params.use_jinja = true;
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
+ add_opt(common_arg(
+ {"--reasoning-format"}, "FORMAT",
+ "reasoning format (default: deepseek; allowed values: deepseek, none)\n"
+ "controls whether thought tags are extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only).\n"
+ "only supported for non-streamed responses",
+ [](common_params & params, const std::string & value) {
+ /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
+ else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
+ else { std::invalid_argument("invalid value"); }
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
add_opt(common_arg(
{"--chat-template"}, "JINJA_TEMPLATE",
string_format(
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
+ case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING: return "DeepSeek R1 (extract reasoning)";
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
+ case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
default:
throw std::runtime_error("Unknown chat format");
}
std::sregex_iterator rend;
std::sregex_iterator rit(it, end, function_regex);
if (rit == rend) {
- fprintf(stderr, "No more tool calls found\n");
result.content += std::string(it, end);
break;
}
json arguments;
if (!parse_json(it, end, arguments)) {
- throw std::runtime_error("Failed to parse json tool call arguments");
+ throw std::runtime_error("Failed to parse json tool call arguments: " + input);
}
if (!std::regex_search(it, end, match, close_regex)) {
- throw std::runtime_error("Malformed input, missing closing pattern");
+ throw std::runtime_error("Malformed input, missing closing pattern: " + input);
}
it = match.suffix().first;
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
}
+
+ if (!result.tool_calls.empty()) {
+ if (!string_strip(result.content).empty()) {
+ LOG_WRN("Content found with tool calls: %s\n", result.content.c_str());
+ }
+ result.content = "";
+ }
return result;
}
result.role = "assistant";
const auto process_tool_calls = [&](const json & tool_calls) {
for (const auto & tool_call : tool_calls) {
- const auto & arguments = tool_call["arguments"];
+ const auto & arguments = tool_call.at("arguments");
result.tool_calls.push_back({
- tool_call["name"],
+ tool_call.at("name"),
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
- tool_call.contains("id") ? tool_call["id"] : "",
+ tool_call.contains("id") ? tool_call.at("id") : "",
});
}
};
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
for (const auto & tool : tools) {
- if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) {
+ if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
continue;
}
auto tool_call_schemas = json::array();
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
+ const auto & function = tool.at("function");
auto tool_schema = json {
{"type", "object"},
{"properties", {
{"name", {
{"type", "string"},
- {"const", function["name"]},
+ {"const", function.at("name")},
}},
- {"arguments", function["parameters"]},
+ {"arguments", function.at("parameters")},
}},
{"required", json::array({"name", "arguments"})},
};
if (function.contains("description")) {
- tool_schema["description"] = function["description"];
+ tool_schema["description"] = function.at("description");
}
if (inputs.parallel_tool_calls) {
- tool_schema["properties"]["id"] = {
+ tool_schema.at("properties")["id"] = {
{"type", "string"},
{"minLength", 4},
};
- tool_schema["required"].push_back("id");
+ tool_schema.at("required").push_back("id");
}
tool_call_schemas.emplace_back(tool_schema);
});
common_chat_msg result;
result.role = "assistant";
if (data.contains("tool_calls")) {
- for (const auto & tool_call : data["tool_calls"]) {
+ for (const auto & tool_call : data.at("tool_calls")) {
result.tool_calls.push_back({
- tool_call["name"],
- tool_call["arguments"].dump(),
- tool_call.contains("id") ? tool_call["id"] : "",
+ tool_call.at("name"),
+ tool_call.at("arguments").dump(),
+ tool_call.contains("id") ? tool_call.at("id") : "",
});
}
} else if (data.contains("tool_call")) {
result.tool_calls.push_back({
- data["tool_call"]["name"],
- data["tool_call"]["arguments"].dump(),
+ data.at("tool_call").at("name"),
+ data.at("tool_call").at("arguments").dump(),
/* id= */ "",
});
} else if (data.contains("response")) {
- const auto & response = data["response"];
+ const auto & response = data.at("response");
result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
}
return result;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
auto schemas = json::array();
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
+ const auto & function = tool.at("function");
schemas.push_back({
{"type", "object"},
{"properties", {
// It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
{"name", {
{"type", "string"},
- {"const", function["name"]},
+ {"const", function.at("name")},
}},
- {"arguments", function["parameters"]},
+ {"arguments", function.at("parameters")},
{"id", {
{"type", "string"},
// Nemo's template expects a 9-character alphanumeric ID.
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
auto schemas = json::array();
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
+ const auto & function = tool.at("function");
schemas.push_back({
{"type", "object"},
{"properties", {
}},
{"tool_name", {
{"type", "string"},
- {"const", function["name"]},
+ {"const", function.at("name")},
}},
- {"parameters", function["parameters"]},
+ {"parameters", function.at("parameters")},
}},
{"required", json::array({"tool_call_id", "tool_name", "parameters"})},
});
"<|END_THINKING|>",
"<|END_ACTION|>",
};
- data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
- data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
+ auto adjusted_messages = json::array();
+ for (const auto & msg : inputs.messages) {
+ auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+ if (has_reasoning_content && has_tool_calls) {
+ auto adjusted_message = msg;
+ adjusted_message["tool_plan"] = msg.at("reasoning_content");
+ adjusted_message.erase("reasoning_content");
+ adjusted_messages.push_back(adjusted_message);
+ } else {
+ adjusted_messages.push_back(msg);
+ }
+ }
+ data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {});
+ data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING : COMMON_CHAT_FORMAT_COMMAND_R7B;
return data;
}
-static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
- static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
- static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
+static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
+ static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|>)([\\s\\S\\n\\r]*)");
+ static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
+ static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
+
std::smatch match;
common_chat_msg result;
result.role = "assistant";
- if (std::regex_match(input, match, response_regex)) {
- result.content = match[1].str();
- } else if (std::regex_match(input, match, thought_action_regex)) {
- result.tool_plan = match[1].str();
- auto actions_str = match[2].str();
+
+ std::string rest = input;
+
+ if (std::regex_match(rest, match, thought_regex)) {
+ if (extract_reasoning) {
+ result.reasoning_content = match[2].str();
+ } else if (!match[2].str().empty()) {
+ // Let the unparsed thinking tags through in content only if their insides aren't empty.
+ result.content = match[1].str();
+ }
+ rest = match[3].str();
+ }
+ if (std::regex_match(rest, match, action_regex)) {
+ auto actions_str = match[1].str();
auto actions = json::parse(actions_str);
for (const auto & action : actions) {
result.tool_calls.push_back({
- /* .name = */ action["tool_name"],
- /* .arguments = */ action["parameters"].dump(),
- /* .id = */ action["tool_call_id"],
+ /* .name = */ action.at("tool_name"),
+ /* .arguments = */ action.at("parameters").dump(),
+ /* .id = */ action.at("tool_call_id"),
});
}
+ } else if (std::regex_match(rest, match, response_regex)) {
+ auto response = match[1].str();
+ result.content += response;
} else {
- LOG_ERR("Failed to parse command_r output");
- result.content = input;
+ result.content += rest;
}
return result;
}
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
- if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
+ if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
}
const auto & parameters_properties = parameters.at("properties");
};
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
- std::string name = function["name"];
- auto parameters = function["parameters"];
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
common_chat_params data;
- data.grammar_lazy = inputs.tool_choice != "required";
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
- std::string name = function["name"];
- auto parameters = function["parameters"];
- auto args_rule = builder.add_schema(name + "-args", parameters);
- tool_rules.push_back(builder.add_rule(name + "-call",
- "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
- });
- data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
- data.preserved_tokens = {
- "<|tool▁sep|>",
- "<|tool▁call▁end|>",
- };
- builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space");
- }, grammar_options);
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
+ data.grammar_lazy = inputs.tool_choice != "required" && inputs.json_schema.is_null();
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ std::vector<std::string> tool_rules;
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
+ auto args_rule = builder.add_schema(name + "-args", parameters);
+ tool_rules.push_back(builder.add_rule(name + "-call",
+ "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n"
+ "```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
+ });
+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
+ // so we accept common variants (then it's all constrained)
+ builder.add_rule("root",
+ "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" ) "
+ "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
+ "\"<|tool▁calls▁end|>\""
+ " space");
+ data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
+ data.grammar_triggers.push_back({"<|tool_calls_begin|>", /* .at_start = */ false});
+ data.grammar_triggers.push_back({"<|tool calls begin|>", /* .at_start = */ false});
+ data.grammar_triggers.push_back({"<|tool\\_calls\\_begin|>", /* .at_start = */ false});
+ data.preserved_tokens = {
+ "<think>",
+ "</think>",
+ "<|tool▁sep|>",
+ "<|tool▁calls▁end|",
+ "<|tool▁call▁end|>",
+ };
+ }, grammar_options);
+ }
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
+
+ // Hacks to fix the official (broken) prompt.
+ // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
+ // until the official template is fixed.
+ if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
+ // Don't leave the chat dangling after tool results
+ if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
+ prompt += "<|end▁of▁sentence|>";
+ if (inputs.add_generation_prompt) {
+ prompt += "<|Assistant|>";
+ }
+ }
+ // Fix up tool call delta example added by Minja
+ prompt = std::regex_replace(
+ prompt,
+ std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
+ "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
+ }
data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
+ data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
return data;
}
-static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
- static std::regex trigger_regex("<|tool▁calls▁begin|>");
+static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
- static std::regex close_regex("```<|tool▁call▁end|>");
- return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
+ static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
+ static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
+ static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
+ common_chat_msg msg;
+ msg.role = "assistant";
+ std::smatch match;
+ if (std::regex_match(input, match, reasoning_content_regex)) {
+ std::string rest;
+ if (extract_reasoning) {
+ msg.reasoning_content = string_strip(match[2].str());
+ } else {
+ msg.content = match[1].str();
+ }
+ rest = match[3].str();
+
+ if (std::regex_search(rest, match, tool_calls_regex)) {
+ auto tool_calls = match[1].str();
+ auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
+ msg.tool_calls = std::move(msg2.tool_calls);
+ } else {
+ msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
+ }
+ } else {
+ msg.content = input;
+ }
+ return msg;
}
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
{"datetime", "Jan 29 2025 13:00:00 GMT"},
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
});
- if (!inputs.tools.is_null() && !inputs.tools.empty()) {
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != "required";
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
auto schemas = json::array();
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
+ const auto & function = tool.at("function");
schemas.push_back({
{"type", "object"},
{"properties", {
{"name", {
{"type", "string"},
- {"const", function["name"]},
+ {"const", function.at("name")},
}},
- {"arguments", function["parameters"]},
+ {"arguments", function.at("parameters")},
}},
{"required", json::array({"name", "arguments", "id"})},
});
common_chat_params data;
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
- if (!inputs.tools.is_null() && !inputs.tools.empty()) {
+ if (inputs.tools.is_array() && !inputs.tools.empty()) {
data.grammar_lazy = inputs.tool_choice != "required";
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> first_tool_rules;
std::vector<std::string> subsequent_tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
- std::string name = function["name"];
- auto parameters = function["parameters"];
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
auto args_rule = builder.add_schema(name + "-args", parameters);
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
- const auto & parameters = function["parameters"];
- std::string name = function["name"];
+ const auto & function = tool.at("function");
+ const auto & parameters = function.at("parameters");
+ std::string name = function.at("name");
if (name == "python" || name == "ipython") {
if (!parameters.contains("type")) {
throw std::runtime_error("Missing type in python tool");
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool["function"];
- std::string name = function["name"];
- auto parameters = function["parameters"];
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
tool_rules.push_back(builder.add_schema(name + "-call", {
{"type", "object"},
if (!parse_json(it, end, call)) {
throw std::runtime_error("Failed to parse json tool call");
}
- const auto & arguments = call["arguments"];
+ const auto & arguments = call.at("arguments");
result.tool_calls.push_back({
- call["name"],
+ call.at("name"),
arguments.dump(),
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
/* id= */ "",
}
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
- auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
- LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
+ const auto & src = tmpl.source();
+ const auto & caps = tmpl.original_caps();
- if (has_tools && !inputs.grammar.empty()) {
- throw std::runtime_error("Cannot specify grammar with tools");
+ if (inputs.tools.is_array()) {
+ if (inputs.tool_choice != "none" && !inputs.grammar.empty()) {
+ throw std::runtime_error("Cannot specify grammar with tools");
+ }
+ if (caps.supports_tool_calls && !caps.supports_tools) {
+ LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
+ }
}
- const auto & src = tmpl.source();
+ // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
+ if (src.find("<|tool▁calls▁begin|>") != std::string::npos && inputs.json_schema.is_null()) {
+ return common_chat_params_init_deepseek_r1(tmpl, inputs);
+ }
+
+ // Command R7B: : use handler in all cases except json schema (thinking / tools).
+ if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && inputs.json_schema.is_null()) {
+ return common_chat_params_init_command_r7b(tmpl, inputs);
+ }
+
+ // Use generic handler when mixing tools + JSON schema.
+ // TODO: support that mix in handlers below.
+ if ((!inputs.tools.is_array() && inputs.json_schema.is_object())) {
+ return common_chat_params_init_generic(tmpl, inputs);
+ }
+
+ // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
if (src.find(">>>all") != std::string::npos) {
- // Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
}
+
+ // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
if (src.find(" functools[") != std::string::npos) {
- // Firefunction v2 requires datetime and functions in the context, even w/o tools.
return common_chat_params_init_firefunction_v2(tmpl, inputs);
}
- if (!has_tools) {
+ // Plain handler (no tools)
+ if (inputs.tools.is_null() || inputs.tool_choice == "none") {
return common_chat_params_init_without_tools(tmpl, inputs);
}
+ // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
if (src.find("<tool_call>") != std::string::npos) {
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
}
+
+ // Functionary v3.1 (w/ tools)
if (src.find("<|start_header_id|>") != std::string::npos
&& src.find("<function=") != std::string::npos) {
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
}
+
+ // Llama 3.1, 3.2, 3.3 (w/ tools)
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
}
- if (src.find("<|tool▁calls▁begin|>") != std::string::npos) {
- return common_chat_params_init_deepseek_r1(tmpl, inputs);
- }
+
+ // Mistral Nemo (w/ tools)
if (src.find("[TOOL_CALLS]") != std::string::npos) {
return common_chat_params_init_mistral_nemo(tmpl, inputs);
}
- if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) {
- return common_chat_params_init_command_r7b(tmpl, inputs);
- }
+
+ // Generic fallback
return common_chat_params_init_generic(tmpl, inputs);
}
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
- return common_chat_parse_deepseek_r1(input);
+ return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ false);
+ case COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING:
+ return common_chat_parse_deepseek_r1(input, /* extract_reasoning= */ true);
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
return common_chat_parse_functionary_v3_2(input);
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
return common_chat_parse_firefunction_v2(input);
case COMMON_CHAT_FORMAT_COMMAND_R7B:
- return common_chat_parse_command_r7b(input);
+ return common_chat_parse_command_r7b(input, /* extract_reasoning= */ false);
+ case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
+ return common_chat_parse_command_r7b(input, /* extract_reasoning= */ true);
default:
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
}
bool stream;
std::string grammar;
bool add_generation_prompt = true;
+ bool extract_reasoning = true;
};
enum common_chat_format {
COMMON_CHAT_FORMAT_LLAMA_3_X,
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
COMMON_CHAT_FORMAT_HERMES_2_PRO,
COMMON_CHAT_FORMAT_COMMAND_R7B,
+ COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
};
+enum common_reasoning_format {
+ COMMON_REASONING_FORMAT_NONE,
+ COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`
+};
+
struct common_params {
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 4096; // context size
std::string chat_template = ""; // NOLINT
bool use_jinja = false; // NOLINT
bool enable_chat_template = true;
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
std::vector<std::string> api_keys;
std::string role;
std::string content;
std::vector<common_tool_call> tool_calls;
- std::string tool_plan = "";
+ std::string reasoning_content = "";
};
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
lparams.no_perf = params.no_perf;
- std::vector<const char *> trigger_words;
- trigger_words.reserve(params.grammar_trigger_words.size());
- for (const auto & str : params.grammar_trigger_words) {
- trigger_words.push_back(str.word.c_str());
- }
-
struct llama_sampler * grmr;
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
#ifdef LLAMA_USE_LLGUIDANCE
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
#endif // LLAMA_USE_LLGUIDANCE
} else {
+ std::vector<const char *> trigger_words;
+ trigger_words.reserve(params.grammar_trigger_words.size());
+ for (const auto & str : params.grammar_trigger_words) {
+ trigger_words.push_back(str.word.c_str());
+ }
+
grmr = params.grammar_lazy
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
trigger_words.data(), trigger_words.size(),
| `--grammar-file FNAME` | file to read grammar from |
| `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
| `--jinja` | Enable experimental Jinja templating engine (required for tool use) |
+| `--reasoning-format FORMAT` | Controls extraction of model thinking traces and the format / field in which they are returned (default: `deepseek`; allowed values: `deepseek`, `none`; requires `--jinja`). `none` will leave thinking traces inline in `message.content` in a model-specific format, while `deepseek` will return them separately under `message.reasoning_content` |
**Example-specific params**
| Template | Format |
|----------|--------|
- | CohereForAI-c4ai-command-r-plus-default.jinja | generic tool calls |
- | CohereForAI-c4ai-command-r-plus-rag.jinja | generic tool calls |
- | CohereForAI-c4ai-command-r-plus-tool_use.jinja | generic tool calls |
- | MiniMaxAI-MiniMax-Text-01.jinja | generic tool calls |
- | NexaAIDev-Octopus-v2.jinja | generic tool calls |
- | NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | generic tool calls |
- | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | hermes 2 pro tool calls |
- | NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | generic tool calls |
- | NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | hermes 2 pro tool calls |
- | NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | generic tool calls |
- | NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | hermes 2 pro tool calls |
- | OrionStarAI-Orion-14B-Chat.jinja | generic tool calls |
- | Qwen-QwQ-32B-Preview.jinja | hermes 2 pro tool calls |
- | Qwen-Qwen2-7B-Instruct.jinja | generic tool calls |
- | Qwen-Qwen2-VL-7B-Instruct.jinja | generic tool calls |
- | Qwen-Qwen2.5-7B-Instruct.jinja | hermes 2 pro tool calls |
- | Qwen-Qwen2.5-Math-7B-Instruct.jinja | hermes 2 pro tool calls |
- | TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | generic tool calls |
- | abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | generic tool calls |
- | bofenghuang-vigogne-2-70b-chat.jinja | generic tool calls |
- | databricks-dbrx-instruct.jinja | generic tool calls |
- | deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | generic tool calls |
- | deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | deepseek r1 tool calls |
- | deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | deepseek r1 tool calls |
- | deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | deepseek r1 tool calls |
- | deepseek-ai-DeepSeek-V2.5.jinja | deepseek r1 tool calls |
- | deepseek-ai-deepseek-coder-33b-instruct.jinja | generic tool calls |
- | google-gemma-2-2b-it.jinja | generic tool calls |
- | google-gemma-7b-it.jinja | generic tool calls |
- | indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | generic tool calls |
- | mattshumer-Reflection-Llama-3.1-70B.jinja | generic tool calls |
- | meetkai-functionary-medium-v3.2.jinja | functionary v3.2 tool calls |
- | meta-llama-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
- | meta-llama-Llama-3.2-3B-Instruct.jinja | llama 3.x tool calls |
- | meta-llama-Llama-3.3-70B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
- | meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) |
- | microsoft-Phi-3-medium-4k-instruct.jinja | generic tool calls |
- | microsoft-Phi-3-mini-4k-instruct.jinja | generic tool calls |
- | microsoft-Phi-3-small-8k-instruct.jinja | generic tool calls |
- | microsoft-Phi-3.5-mini-instruct.jinja | generic tool calls |
- | microsoft-Phi-3.5-vision-instruct.jinja | generic tool calls |
- | mistralai-Mistral-7B-Instruct-v0.2.jinja | generic tool calls |
- | mistralai-Mistral-Large-Instruct-2407.jinja | mistral nemo tool calls |
- | mistralai-Mistral-Large-Instruct-2411.jinja | generic tool calls |
- | mistralai-Mistral-Nemo-Instruct-2407.jinja | mistral nemo tool calls |
- | mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | generic tool calls |
- | mlabonne-AlphaMonarch-7B.jinja | generic tool calls |
- | nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | llama 3.x tool calls (w/ builtin tools) |
- | openchat-openchat-3.5-0106.jinja | generic tool calls |
- | teknium-OpenHermes-2.5-Mistral-7B.jinja | generic tool calls |
+ | Almawave-Velvet-14B.jinja | Hermes 2 Pro |
+ | AtlaAI-Selene-1-Mini-Llama-3.1-8B.jinja | Llama 3.x |
+ | CohereForAI-aya-expanse-8b.jinja | Generic |
+ | CohereForAI-c4ai-command-r-plus-default.jinja | Generic |
+ | CohereForAI-c4ai-command-r-plus-rag.jinja | Generic |
+ | CohereForAI-c4ai-command-r-plus-tool_use.jinja | Generic |
+ | CohereForAI-c4ai-command-r7b-12-2024-default.jinja | Command R7B (extract reasoning) |
+ | CohereForAI-c4ai-command-r7b-12-2024-rag.jinja | Command R7B (extract reasoning) |
+ | CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja | Command R7B (extract reasoning) |
+ | CohereForAI-c4ai-command-r7b-12-2024.jinja | Generic |
+ | DavieLion-Llama-3.2-1B-SPIN-iter3.jinja | Generic |
+ | Delta-Vector-Rei-12B.jinja | Mistral Nemo |
+ | EpistemeAI-Mistral-Nemo-Instruct-12B-Philosophy-Math.jinja | Mistral Nemo |
+ | FlofloB-83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit.jinja | Hermes 2 Pro |
+ | FlofloB-test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit.jinja | Generic |
+ | HelpingAI-HAI-SER.jinja | Generic |
+ | HuggingFaceTB-SmolLM2-1.7B-Instruct.jinja | Generic |
+ | HuggingFaceTB-SmolLM2-135M-Instruct.jinja | Generic |
+ | HuggingFaceTB-SmolLM2-360M-Instruct.jinja | Generic |
+ | INSAIT-Institute-BgGPT-Gemma-2-27B-IT-v1.0.jinja | Generic |
+ | Ihor-Text2Graph-R1-Qwen2.5-0.5b.jinja | Hermes 2 Pro |
+ | Infinigence-Megrez-3B-Instruct.jinja | Generic |
+ | Josephgflowers-TinyLlama_v1.1_math_code-world-test-1.jinja | Generic |
+ | LGAI-EXAONE-EXAONE-3.5-2.4B-Instruct.jinja | Generic |
+ | LGAI-EXAONE-EXAONE-3.5-7.8B-Instruct.jinja | Generic |
+ | LatitudeGames-Wayfarer-12B.jinja | Generic |
+ | Magpie-Align-Llama-3-8B-Magpie-Align-v0.1.jinja | Generic |
+ | Magpie-Align-Llama-3.1-8B-Magpie-Align-v0.1.jinja | Generic |
+ | MaziyarPanahi-calme-3.2-instruct-78b.jinja | Generic |
+ | MiniMaxAI-MiniMax-Text-01.jinja | Generic |
+ | MiniMaxAI-MiniMax-VL-01.jinja | Generic |
+ | NaniDAO-deepseek-r1-qwen-2.5-32B-ablated.jinja | DeepSeek R1 (extract reasoning) |
+ | NexaAIDev-Octopus-v2.jinja | Generic |
+ | NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | Generic |
+ | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | Hermes 2 Pro |
+ | NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | Generic |
+ | NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | Hermes 2 Pro |
+ | NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | Generic |
+ | NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | Hermes 2 Pro |
+ | NovaSky-AI-Sky-T1-32B-Flash.jinja | Hermes 2 Pro |
+ | NovaSky-AI-Sky-T1-32B-Preview.jinja | Hermes 2 Pro |
+ | OnlyCheeini-greesychat-turbo.jinja | Generic |
+ | Orenguteng-Llama-3.1-8B-Lexi-Uncensored-V2.jinja | Llama 3.x |
+ | OrionStarAI-Orion-14B-Chat.jinja | Generic |
+ | PowerInfer-SmallThinker-3B-Preview.jinja | Generic |
+ | PrimeIntellect-INTELLECT-1-Instruct.jinja | Generic |
+ | Qwen-QVQ-72B-Preview.jinja | Generic |
+ | Qwen-QwQ-32B-Preview.jinja | Hermes 2 Pro |
+ | Qwen-Qwen1.5-7B-Chat.jinja | Generic |
+ | Qwen-Qwen2-7B-Instruct.jinja | Generic |
+ | Qwen-Qwen2-VL-72B-Instruct.jinja | Generic |
+ | Qwen-Qwen2-VL-7B-Instruct.jinja | Generic |
+ | Qwen-Qwen2.5-0.5B.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-1.5B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-14B-Instruct-1M.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-14B.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-32B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-32B.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-3B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-72B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-7B-Instruct-1M.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-7B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-7B.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-Coder-32B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-Coder-7B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-Math-1.5B.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-Math-7B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-VL-3B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-VL-72B-Instruct.jinja | Hermes 2 Pro |
+ | Qwen-Qwen2.5-VL-7B-Instruct.jinja | Hermes 2 Pro |
+ | RWKV-Red-Team-ARWKV-7B-Preview-0.1.jinja | Hermes 2 Pro |
+ | SakanaAI-TinySwallow-1.5B-Instruct.jinja | Hermes 2 Pro |
+ | SakanaAI-TinySwallow-1.5B.jinja | Hermes 2 Pro |
+ | Sao10K-70B-L3.3-Cirrus-x1.jinja | Llama 3.x |
+ | SentientAGI-Dobby-Mini-Leashed-Llama-3.1-8B.jinja | Llama 3.x |
+ | SentientAGI-Dobby-Mini-Unhinged-Llama-3.1-8B.jinja | Llama 3.x |
+ | Steelskull-L3.3-Damascus-R1.jinja | Llama 3.x |
+ | Steelskull-L3.3-MS-Nevoria-70b.jinja | Llama 3.x |
+ | Steelskull-L3.3-Nevoria-R1-70b.jinja | Llama 3.x |
+ | THUDM-glm-4-9b-chat.jinja | Generic |
+ | THUDM-glm-edge-1.5b-chat.jinja | Generic |
+ | Tarek07-Progenitor-V1.1-LLaMa-70B.jinja | Llama 3.x |
+ | TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | Generic |
+ | TinyLlama-TinyLlama-1.1B-Chat-v1.0.jinja | Generic |
+ | UCLA-AGI-Mistral7B-PairRM-SPPO-Iter3.jinja | Generic |
+ | ValiantLabs-Llama3.1-8B-Enigma.jinja | Llama 3.x |
+ | abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | Generic |
+ | ai21labs-AI21-Jamba-1.5-Large.jinja | Generic |
+ | allenai-Llama-3.1-Tulu-3-405B-SFT.jinja | Generic |
+ | allenai-Llama-3.1-Tulu-3-405B.jinja | Generic |
+ | allenai-Llama-3.1-Tulu-3-8B.jinja | Generic |
+ | arcee-ai-Virtuoso-Lite.jinja | Hermes 2 Pro |
+ | arcee-ai-Virtuoso-Medium-v2.jinja | Hermes 2 Pro |
+ | arcee-ai-Virtuoso-Small-v2.jinja | Hermes 2 Pro |
+ | avemio-GRAG-NEMO-12B-ORPO-HESSIAN-AI.jinja | Generic |
+ | bespokelabs-Bespoke-Stratos-7B.jinja | Hermes 2 Pro |
+ | bfuzzy1-acheron-m1a-llama.jinja | Generic |
+ | bofenghuang-vigogne-2-70b-chat.jinja | Generic |
+ | bytedance-research-UI-TARS-72B-DPO.jinja | Generic |
+ | bytedance-research-UI-TARS-7B-DPO.jinja | Generic |
+ | bytedance-research-UI-TARS-7B-SFT.jinja | Generic |
+ | carsenk-phi3.5_mini_exp_825_uncensored.jinja | Generic |
+ | cyberagent-DeepSeek-R1-Distill-Qwen-14B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
+ | cyberagent-DeepSeek-R1-Distill-Qwen-32B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
+ | databricks-dbrx-instruct.jinja | Generic |
+ | deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | Generic |
+ | deepseek-ai-DeepSeek-Coder-V2-Lite-Base.jinja | Generic |
+ | deepseek-ai-DeepSeek-Coder-V2-Lite-Instruct.jinja | Generic |
+ | deepseek-ai-DeepSeek-R1-Distill-Llama-70B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Distill-Qwen-14B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1-Zero.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-V2-Lite.jinja | Generic |
+ | deepseek-ai-DeepSeek-V2.5.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-DeepSeek-V3.jinja | DeepSeek R1 (extract reasoning) |
+ | deepseek-ai-deepseek-coder-33b-instruct.jinja | Generic |
+ | deepseek-ai-deepseek-coder-6.7b-instruct.jinja | Generic |
+ | deepseek-ai-deepseek-coder-7b-instruct-v1.5.jinja | Generic |
+ | deepseek-ai-deepseek-llm-67b-chat.jinja | Generic |
+ | deepseek-ai-deepseek-llm-7b-chat.jinja | Generic |
+ | dicta-il-dictalm2.0-instruct.jinja | Generic |
+ | ehristoforu-Falcon3-8B-Franken-Basestruct.jinja | Hermes 2 Pro |
+ | fireworks-ai-llama-3-firefunction-v2.jinja | FireFunction v2 |
+ | godlikehhd-alpaca_data_sampled_ifd_new_5200.jinja | Hermes 2 Pro |
+ | godlikehhd-alpaca_data_score_max_0.7_2600.jinja | Hermes 2 Pro |
+ | google-gemma-2-27b-it.jinja | Generic |
+ | google-gemma-2-2b-it.jinja | Generic |
+ | google-gemma-2-2b-jpn-it.jinja | Generic |
+ | google-gemma-7b-it.jinja | Generic |
+ | huihui-ai-DeepSeek-R1-Distill-Llama-70B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
+ | huihui-ai-DeepSeek-R1-Distill-Llama-8B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
+ | huihui-ai-DeepSeek-R1-Distill-Qwen-14B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) |
+ | huihui-ai-DeepSeek-R1-Distill-Qwen-32B-abliterated.jinja | DeepSeek R1 (extract reasoning) |
+ | huihui-ai-DeepSeek-R1-Distill-Qwen-7B-abliterated-v2.jinja | DeepSeek R1 (extract reasoning) |
+ | huihui-ai-Qwen2.5-14B-Instruct-1M-abliterated.jinja | Hermes 2 Pro |
+ | ibm-granite-granite-3.1-8b-instruct.jinja | Generic |
+ | indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | Generic |
+ | inflatebot-MN-12B-Mag-Mell-R1.jinja | Generic |
+ | jinaai-ReaderLM-v2.jinja | Generic |
+ | kms7530-chemeng_qwen-math-7b_24_1_100_1_nonmath.jinja | Hermes 2 Pro |
+ | knifeayumu-Cydonia-v1.3-Magnum-v4-22B.jinja | Mistral Nemo |
+ | langgptai-qwen1.5-7b-chat-sa-v0.1.jinja | Generic |
+ | lightblue-DeepSeek-R1-Distill-Qwen-7B-Japanese.jinja | DeepSeek R1 (extract reasoning) |
+ | mattshumer-Reflection-Llama-3.1-70B.jinja | Generic |
+ | meetkai-functionary-medium-v3.1.jinja | Functionary v3.1 Llama 3.1 |
+ | meetkai-functionary-medium-v3.2.jinja | Functionary v3.2 |
+ | meta-llama-Llama-2-7b-chat-hf.jinja | Generic |
+ | meta-llama-Llama-3.1-8B-Instruct.jinja | Llama 3.x |
+ | meta-llama-Llama-3.2-11B-Vision-Instruct.jinja | Llama 3.x |
+ | meta-llama-Llama-3.2-1B-Instruct.jinja | Llama 3.x |
+ | meta-llama-Llama-3.2-3B-Instruct.jinja | Llama 3.x |
+ | meta-llama-Llama-3.3-70B-Instruct.jinja | Llama 3.x |
+ | meta-llama-Meta-Llama-3-8B-Instruct.jinja | Generic |
+ | meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | Llama 3.x |
+ | microsoft-Phi-3-medium-4k-instruct.jinja | Generic |
+ | microsoft-Phi-3-mini-4k-instruct.jinja | Generic |
+ | microsoft-Phi-3-small-8k-instruct.jinja | Generic |
+ | microsoft-Phi-3.5-mini-instruct.jinja | Generic |
+ | microsoft-Phi-3.5-vision-instruct.jinja | Generic |
+ | microsoft-phi-4.jinja | Generic |
+ | migtissera-Tess-3-Mistral-Nemo-12B.jinja | Generic |
+ | ministral-Ministral-3b-instruct.jinja | Generic |
+ | mistralai-Codestral-22B-v0.1.jinja | Generic |
+ | mistralai-Mistral-7B-Instruct-v0.1.jinja | Generic |
+ | mistralai-Mistral-7B-Instruct-v0.2.jinja | Generic |
+ | mistralai-Mistral-7B-Instruct-v0.3.jinja | Mistral Nemo |
+ | mistralai-Mistral-Large-Instruct-2407.jinja | Mistral Nemo |
+ | mistralai-Mistral-Large-Instruct-2411.jinja | Generic |
+ | mistralai-Mistral-Nemo-Instruct-2407.jinja | Mistral Nemo |
+ | mistralai-Mistral-Small-24B-Instruct-2501.jinja | Generic |
+ | mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | Generic |
+ | mkurman-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
+ | mlabonne-AlphaMonarch-7B.jinja | Generic |
+ | mlx-community-Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1-float32.jinja | Hermes 2 Pro |
+ | mlx-community-Qwen2.5-VL-7B-Instruct-8bit.jinja | Hermes 2 Pro |
+ | mobiuslabsgmbh-DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1.jinja | DeepSeek R1 (extract reasoning) |
+ | netcat420-MFANNv0.20.jinja | Generic |
+ | netcat420-MFANNv0.24.jinja | Generic |
+ | netease-youdao-Confucius-o1-14B.jinja | Hermes 2 Pro |
+ | nvidia-AceMath-7B-RM.jinja | Hermes 2 Pro |
+ | nvidia-Eagle2-1B.jinja | Hermes 2 Pro |
+ | nvidia-Eagle2-9B.jinja | Hermes 2 Pro |
+ | nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | Llama 3.x |
+ | onnx-community-DeepSeek-R1-Distill-Qwen-1.5B-ONNX.jinja | DeepSeek R1 (extract reasoning) |
+ | open-thoughts-OpenThinker-7B.jinja | Hermes 2 Pro |
+ | openchat-openchat-3.5-0106.jinja | Generic |
+ | pankajmathur-orca_mini_v6_8b.jinja | Generic |
+ | princeton-nlp-Mistral-7B-Base-SFT-RDPO.jinja | Generic |
+ | princeton-nlp-Mistral-7B-Instruct-DPO.jinja | Generic |
+ | princeton-nlp-Mistral-7B-Instruct-RDPO.jinja | Generic |
+ | prithivMLmods-Bellatrix-Tiny-1.5B-R1.jinja | Hermes 2 Pro |
+ | prithivMLmods-Bellatrix-Tiny-1B-R1.jinja | Llama 3.x |
+ | prithivMLmods-Bellatrix-Tiny-1B-v3.jinja | Generic |
+ | prithivMLmods-Bellatrix-Tiny-3B-R1.jinja | Llama 3.x |
+ | prithivMLmods-Blaze-14B-xElite.jinja | Generic |
+ | prithivMLmods-Calcium-Opus-14B-Elite2-R1.jinja | Hermes 2 Pro |
+ | prithivMLmods-Calme-Ties-78B.jinja | Generic |
+ | prithivMLmods-Calme-Ties2-78B.jinja | Generic |
+ | prithivMLmods-Calme-Ties3-78B.jinja | Generic |
+ | prithivMLmods-ChemQwen2-vL.jinja | Generic |
+ | prithivMLmods-GWQ2b.jinja | Generic |
+ | prithivMLmods-LatexMind-2B-Codec.jinja | Generic |
+ | prithivMLmods-Llama-3.2-6B-AlgoCode.jinja | Llama 3.x |
+ | prithivMLmods-Megatron-Opus-14B-Exp.jinja | Hermes 2 Pro |
+ | prithivMLmods-Megatron-Opus-14B-Stock.jinja | Hermes 2 Pro |
+ | prithivMLmods-Megatron-Opus-7B-Exp.jinja | Hermes 2 Pro |
+ | prithivMLmods-Omni-Reasoner-Merged.jinja | Hermes 2 Pro |
+ | prithivMLmods-Omni-Reasoner4-Merged.jinja | Hermes 2 Pro |
+ | prithivMLmods-Primal-Opus-14B-Optimus-v1.jinja | Hermes 2 Pro |
+ | prithivMLmods-QwQ-Math-IO-500M.jinja | Hermes 2 Pro |
+ | prithivMLmods-Qwen-7B-Distill-Reasoner.jinja | DeepSeek R1 (extract reasoning) |
+ | prithivMLmods-Qwen2.5-1.5B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro |
+ | prithivMLmods-Qwen2.5-14B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
+ | prithivMLmods-Qwen2.5-32B-DeepSeek-R1-Instruct.jinja | Hermes 2 Pro |
+ | prithivMLmods-Qwen2.5-7B-DeepSeek-R1-1M.jinja | Hermes 2 Pro |
+ | prithivMLmods-Triangulum-v2-10B.jinja | Hermes 2 Pro |
+ | qingy2024-Falcon3-2x10B-MoE-Instruct.jinja | Hermes 2 Pro |
+ | rubenroy-Zurich-14B-GCv2-5m.jinja | Hermes 2 Pro |
+ | rubenroy-Zurich-7B-GCv2-5m.jinja | Hermes 2 Pro |
+ | silma-ai-SILMA-Kashif-2B-Instruct-v1.0.jinja | Generic |
+ | simplescaling-s1-32B.jinja | Hermes 2 Pro |
+ | sometimesanotion-Lamarck-14B-v0.7.jinja | Hermes 2 Pro |
+ | sonthenguyen-zephyr-sft-bnb-4bit-DPO-mtbr-180steps.jinja | Generic |
+ | sthenno-tempesthenno-icy-0130.jinja | Generic |
+ | sumink-qwft.jinja | Hermes 2 Pro |
+ | teknium-OpenHermes-2.5-Mistral-7B.jinja | Generic |
+ | thirdeyeai-elevate360m.jinja | Generic |
+ | tiiuae-Falcon3-10B-Instruct.jinja | Hermes 2 Pro |
+ | unsloth-DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit.jinja | DeepSeek R1 (extract reasoning) |
+ | unsloth-DeepSeek-R1-Distill-Llama-8B.jinja | DeepSeek R1 (extract reasoning) |
+ | unsloth-DeepSeek-R1.jinja | DeepSeek R1 (extract reasoning) |
+ | unsloth-Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit.jinja | Generic |
+ | upstage-solar-pro-preview-instruct.jinja | Generic |
+ | whyhow-ai-PatientSeek.jinja | Generic |
+ | xwen-team-Xwen-72B-Chat.jinja | Hermes 2 Pro |
+ | xwen-team-Xwen-7B-Chat.jinja | Hermes 2 Pro |
This table can be generated with:
```bash
./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
+ ```
</details>
```shell
# Native support:
+
llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
+ # Native support for DeepSeek R1 works best w/ our own template (official template buggy)
+
+ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
+ --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
+
+ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M \
+ --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
+
# Native support requires the right template for these GGUFs:
llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \
{"grammar_trigger_words", grammar_trigger_words},
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
{"preserved_tokens", sampling.preserved_tokens},
+ {"chat_format", common_chat_format_name(oaicompat_chat_format)},
{"samplers", samplers},
{"speculative.n_max", speculative.n_max},
{"speculative.n_min", speculative.n_min},
msg.content = content;
}
- json tool_calls;
+ json message {
+ {"role", "assistant"},
+ };
+ if (!msg.reasoning_content.empty()) {
+ message["reasoning_content"] = msg.reasoning_content;
+ }
+ if (msg.content.empty() && !msg.tool_calls.empty()) {
+ message["content"] = json();
+ } else {
+ message["content"] = msg.content;
+ }
if (!msg.tool_calls.empty()) {
- tool_calls = json::array();
+ auto tool_calls = json::array();
for (const auto & tc : msg.tool_calls) {
tool_calls.push_back({
{"type", "function"},
{"id", tc.id},
});
}
- }
-
- json message {
- {"content", msg.content},
- {"tool_calls", tool_calls},
- {"role", "assistant"},
- };
- if (!msg.tool_plan.empty()) {
- message["tool_plan"] = msg.tool_plan;
+ message["tool_calls"] = tool_calls;
}
json choice {
}
auto body = json::parse(req.body);
- json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
+ json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
return handle_completions_impl(
SERVER_TASK_TYPE_COMPLETION,
// same with handle_chat_completions, but without inference part
const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) {
auto body = json::parse(req.body);
- json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates);
+ json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
};
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
+ assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
assert expected_function_name == tool_call["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
(TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
- (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
+ # (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
- (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
+ # (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
- (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
+ # (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
# TODO: fix these
# (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
# (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
+ assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
assert expected_function_name == tool_call["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
@pytest.mark.slow
@pytest.mark.parametrize("hf_repo,template_override", [
- ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")),
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
+ ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L", ("CohereForAI/c4ai-command-r7b-12-2024", "tool_use")),
+
+ ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
+
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
# ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
- # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
-def test_weather(hf_repo: str, template_override: Tuple[str, str | None] | None):
+def test_weather(hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
n_predict = 512
server.n_slots = 1
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": n_predict,
"messages": [
+ {"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things."},
{"role": "user", "content": "What is the weather in Istanbul?"},
],
"tools": [WEATHER_TOOL],
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
+ assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"]
actual_arguments = json.loads(tool_call["function"]["arguments"])
assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}"
assert re.match('^Istanbul(, (TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}'
+@pytest.mark.slow
+@pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [
+ (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
+ (None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
+ (None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
+ (None, 128, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
+ (None, 128, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
+ (None, 128, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
+ (None, 128, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
+ (None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
+ ("^> 0.56$", 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
+ (None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
+
+ # TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value)
+ ("^The y-coordinate [\\s\\S]*?\\*\\*0.5\\*\\*", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
+ ("[\\s\\S]*?\\*\\*0\\.5\\*\\*", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
+])
+def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
+ global server
+ # n_predict = 512
+ server.n_slots = 1
+ server.jinja = True
+ server.n_ctx = 8192 * 2
+ server.n_predict = n_predict
+ server.model_hf_repo = hf_repo
+ server.model_hf_file = None
+ if isinstance(template_override, tuple):
+ (template_hf_repo, template_variant) = template_override
+ server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
+ assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
+ elif isinstance(template_override, str):
+ server.chat_template = template_override
+ server.start(timeout_seconds=TIMEOUT_SERVER_START)
+ res = server.make_request("POST", "/chat/completions", data={
+ "max_tokens": n_predict,
+ "messages": [
+ {"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things, and provide very concise answers. Do not explain your reasoning to the user. Provide any numerical values back to the user with at most two decimals."},
+ {"role": "user", "content": "What's the y coordinate of a point on the unit sphere at angle 30 degrees?"},
+ {
+ "role": "assistant",
+ "content": None,
+ "tool_calls": [
+ {
+ "id": "call_6789",
+ "type": "function",
+ "function": {
+ "name": "calculate",
+ "arguments": "{\"expression\":\"sin(30 * pi / 180)\"}"
+ }
+ }
+ ]
+ },
+ {
+ "role": "tool",
+ "name": "calculate",
+ "content": 0.55644242476,
+ "tool_call_id": "call_6789"
+ }
+ ],
+ "tools": [
+ {
+ "type":"function",
+ "function":{
+ "name":"calculate",
+ "description":"A calculator function that computes values of arithmetic expressions in the Python syntax",
+ "parameters":{
+ "type":"object",
+ "properties":{
+ "expression":{
+ "type":"string",
+ "description":"An arithmetic expression to compute the value of (Python syntad, assuming all floats)"
+ }
+ },
+ "required":["expression"]
+ }
+ }
+ }
+ ]
+ }, timeout=TIMEOUT_HTTP_REQUEST)
+ assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
+ choice = res.body["choices"][0]
+ tool_calls = choice["message"].get("tool_calls")
+ assert tool_calls is None, f'Expected no tool call in {choice["message"]}'
+ content = choice["message"].get("content")
+ assert content is not None, f'Expected content in {choice["message"]}'
+ if result_override is not None:
+ assert re.match(result_override, content), f'Expected {result_override}, got {content}'
+ else:
+ assert re.match('^[\\s\\S]*?The (y[ -])?coordinate [\\s\\S]*?is (approximately )?0\\.56\\b|^0\\.56$', content), \
+ f'Expected something like "The y coordinate is 0.56.", got {content}'
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("n_predict,reasoning_format,expect_content,expect_reasoning_content,hf_repo,template_override", [
+ (128, 'deepseek', "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
+ (128, None, "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
+
+ (1024, 'deepseek', "To find the sum of.*", "I need to calculate the sum of 102 and 7.*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
+ (1024, 'none', "<think>\n?I need[\\s\\S]*?</think>\n?To find.*", None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
+
+ (1024, 'deepseek', "To find the sum of.*", "First, I [\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
+])
+def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] | None, expect_content: str | None, expect_reasoning_content: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
+ global server
+ server.n_slots = 1
+ server.reasoning_format = reasoning_format
+ server.jinja = True
+ server.n_ctx = 8192 * 2
+ server.n_predict = n_predict
+ server.model_hf_repo = hf_repo
+ server.model_hf_file = None
+ if isinstance(template_override, tuple):
+ (template_hf_repo, template_variant) = template_override
+ server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja"
+ assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template."
+ elif isinstance(template_override, str):
+ server.chat_template = template_override
+ server.start(timeout_seconds=TIMEOUT_SERVER_START)
+ res = server.make_request("POST", "/chat/completions", data={
+ "max_tokens": n_predict,
+ "messages": [
+ {"role": "user", "content": "What's the sum of 102 and 7?"},
+ ]
+ }, timeout=TIMEOUT_HTTP_REQUEST)
+ assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
+ choice = res.body["choices"][0]
+ assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}'
+
+ content = choice["message"].get("content")
+ if expect_content is None:
+ assert content is None, f'Expected no content in {choice["message"]}'
+ else:
+ assert re.match(expect_content, content), f'Expected {expect_content}, got {content}'
+
+ reasoning_content = choice["message"].get("reasoning_content")
+ if expect_reasoning_content is None:
+ assert reasoning_content is None, f'Expected no reasoning content in {choice["message"]}'
+ else:
+ assert re.match(expect_reasoning_content, reasoning_content), f'Expected {expect_reasoning_content}, got {reasoning_content}'
+
+
@pytest.mark.slow
@pytest.mark.parametrize("expected_arguments_override,hf_repo,template_override", [
+ (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
+ # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", "chatml"),
+
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
- (None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
- ('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
+ ('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
+ (None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
- ('{"code":"print("}', "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
+ (None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
- ('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
+ (None, "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
(None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
-
- # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
-def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
+def test_hello_world(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
server.n_slots = 1
server.jinja = True
server.n_ctx = 8192
- server.n_predict = 128
+ server.n_predict = 512 # High because of DeepSeek R1
server.model_hf_repo = hf_repo
server.model_hf_file = None
if isinstance(template_override, tuple):
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
+ assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
if expected_arguments_override is not None:
draft_max: int | None = None
no_webui: bool | None = None
jinja: bool | None = None
+ reasoning_format: Literal['deepseek', 'none'] | None = None
chat_template: str | None = None
chat_template_file: str | None = None
server_args.append("--no-webui")
if self.jinja:
server_args.append("--jinja")
+ if self.reasoning_format is not None:
+ server_args.extend(("--reasoning-format", self.reasoning_format))
if self.chat_template:
server_args.extend(["--chat-template", self.chat_template])
if self.chat_template_file:
static json oaicompat_completion_params_parse(
const json & body, /* openai api json semantics */
bool use_jinja,
+ common_reasoning_format reasoning_format,
const common_chat_templates & chat_templates)
{
json llama_params;
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
}
common_chat_inputs inputs;
- inputs.messages = body.at("messages");
- inputs.tools = tools;
- inputs.tool_choice = tool_choice;
+ inputs.extract_reasoning = reasoning_format != COMMON_REASONING_FORMAT_NONE;
+ inputs.messages = body.at("messages");
+ inputs.tools = tools;
+ inputs.tool_choice = tool_choice;
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
--- /dev/null
+These templates can be updated with the following commands:
+
+```bash
+./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use > models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja
+./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 default > models/templates/CohereForAI-c4ai-command-r7b-12-2024-default.jinja
+./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 rag > models/templates/CohereForAI-c4ai-command-r7b-12-2024-rag.jinja
+./scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use > models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
+./scripts/get_chat_template.py deepseek-ai/DeepSeek-R1-Distill-Llama-8B > models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+./scripts/get_chat_template.py deepseek-ai/DeepSeek-R1-Distill-Qwen-32B > models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+./scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 > models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
+./scripts/get_chat_template.py google/gemma-2-2b-it > models/templates/google-gemma-2-2b-it.jinja
+./scripts/get_chat_template.py meetkai/functionary-medium-v3. > models/templates/meetkai-functionary-medium-v3.jinja
+./scripts/get_chat_template.py meetkai/functionary-medium-v3.2 > models/templates/meetkai-functionary-medium-v3.2.jinja
+./scripts/get_chat_template.py meta-llama/Llama-3.1-8B-Instruct > models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
+./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct > models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
+./scripts/get_chat_template.py meta-llama/Llama-3.3-70B-Instruct > models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja
+./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct > models/templates/microsoft-Phi-3.5-mini-instruct.jinja
+./scripts/get_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 > models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja
+./scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use > models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
+./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use > models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
+./scripts/get_chat_template.py Qwen/Qwen2.5-7B-Instruct > models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
+```
\ No newline at end of file
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
\ No newline at end of file
-{% if not add_generation_prompt is defined %}
-{% set add_generation_prompt = false %}
-{% endif %}
-{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}
-{%- for message in messages %}
-{%- if message['role'] == 'system' %}
-{% set ns.system_prompt = message['content'] %}
-{%- endif %}
-{%- endfor %}
-{{bos_token}}
-{{ns.system_prompt}}
-{%- for message in messages %}
-{%- if message['role'] == 'user' %}
-{%- set ns.is_tool = false -%}
-{{'<|User|>' + message['content']}}
-{%- endif %}
-{%- if message['role'] == 'assistant' and message['content'] is none %}
-{%- set ns.is_tool = false -%}
-{%- for tool in message['tool_calls']%}
-{%- if not ns.is_first %}
-{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
-{%- set ns.is_first = true -%}
-{%- else %}
-{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
-{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
-{%- endif %}
-{%- endfor %}
-{%- endif %}
-{%- if message['role'] == 'assistant' and message['content'] is not none %}
-{%- if ns.is_tool %}
-{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
-{%- set ns.is_tool = false -%}
-{%- else %}
-{% set content = message['content'] %}
-{% if '</think>' in content %}
-{% set content = content.split('</think>')[-1] %}
-{% endif %}
-{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
-{%- endif %}
-{%- endif %}
-{%- if message['role'] == 'tool' %}
-{%- set ns.is_tool = true -%}
-{%- if ns.is_output_first %}
-{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
-{%- set ns.is_output_first = false %}
-{%- else %}
-{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
-{%- endif %}
-{%- endif %}
-{%- endfor -%}
-{% if ns.is_tool %}
-{{'<|tool▁outputs▁end|>'}}
-{% endif %}
-{% if add_generation_prompt and not ns.is_tool %}
-{{'<|Assistant|>'}}
-{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
\ No newline at end of file
--- /dev/null
+{%- if not add_generation_prompt is defined -%}
+ {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool_outputs=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+ {%- if message['role'] == 'system' -%}
+ {%- set ns.system_prompt = message['content'] -%}
+ {%- endif -%}
+{%- endfor -%}
+{{bos_token}}
+{%- if tools %}
+You can call any of the following function tools to satisfy the user's requests: {{tools | map(attribute='function') | tojson(indent=2)}}
+
+Example function tool call syntax:
+
+<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>example_function_name
+```json
+{
+ "arg1": "some_value"
+ ...
+}
+```
+<|tool▁call▁end|><|tool▁calls▁end|>
+
+{% endif -%}
+{{ns.system_prompt}}
+{%- macro flush_tool_outputs() -%}
+ {%- if ns.is_tool_outputs -%}
+ {{- '<|tool▁outputs▁end|><|end▁of▁sentence|>' -}}
+ {%- set ns.is_tool_outputs = false -%}
+ {%- endif -%}
+{%- endmacro -%}
+{{- flush_tool_outputs() -}}
+{%- for message in messages -%}
+ {%- if message['role'] != 'tool' -%}
+ {{- flush_tool_outputs() -}}
+ {%- endif -%}
+ {%- if message['role'] == 'user' -%}
+ {{- '<|User|>' + message['content'] + '<|end▁of▁sentence|>' -}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['content'] is none -%}
+ {{- '<|Assistant|><|tool▁calls▁begin|>' -}}
+ {%- set ns.is_first = true -%}
+ {%- for tc in message['tool_calls'] -%}
+ {%- if ns.is_first -%}
+ {%- set ns.is_first = false -%}
+ {%- else -%}
+ {{- '\n' -}}
+ {%- endif -%}
+ {%- set tool_name = tc['function']['name'] -%}
+ {%- set tool_args = tc['function']['arguments'] -%}
+ {{- '<|tool▁call▁begin|>' + tc['type'] + '<|tool▁sep|>' + tool_name + '\n' + '```json' + '\n' + tool_args + '\n' + '```' + '<|tool▁call▁end|>' -}}
+ {%- endfor -%}
+ {{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+ {{- flush_tool_outputs() -}}
+ {%- set content = message['content'] -%}
+ {%- if '</think>' in content -%}
+ {%- set content = content.split('</think>')[-1] -%}
+ {%- endif -%}
+ {{- '<|Assistant|>' + content + '<|end▁of▁sentence|>' -}}
+ {%- endif -%}
+ {%- if message['role'] == 'tool' -%}
+ {%- set ns.is_tool_outputs = true -%}
+ {%- if ns.is_output_first -%}
+ {{- '<|tool▁outputs▁begin|>' -}}
+ {%- set ns.is_output_first = false -%}
+ {%- endif -%}
+ {{- '\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>' -}}
+ {%- endif -%}
+{%- endfor -%}
+{{- flush_tool_outputs() -}}
+{%- if add_generation_prompt and not ns.is_tool_outputs -%}
+ {{- '<|Assistant|><think>\n' -}}
+{%- endif -%}
\ No newline at end of file
./scripts/get_chat_template.py model_id [variant]
Examples:
- ./scripts/get_chat_template.py NousResearch/Meta-Llama-3-8B-Instruct
- ./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use
- ./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct
+ ./scripts/get_chat_template.py CohereForAI/c4ai-command-r-plus tool_use
+ ./scripts/get_chat_template.py microsoft/Phi-3.5-mini-instruct
'''
import json
return;
}
}
- LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`) (buffer: `%s`)\n", token, piece.c_str(), grammar.trigger_buffer.c_str());
+ LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
return;
}
}
ret.content = message.at("content");
}
if (message.contains("tool_plan")) {
- ret.tool_plan = message.at("tool_plan");
+ ret.reasoning_content = message.at("tool_plan");
+ }
+ if (message.contains("reasoning_content")) {
+ ret.reasoning_content = message.at("reasoning_content");
}
auto has_tool_calls = message.contains("tool_calls");
if (has_tool_calls) {
static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) {
assert_equals(expected.role, actual.role);
assert_equals(expected.content, actual.content);
+ assert_equals(expected.reasoning_content, actual.reasoning_content);
assert_equals(expected.tool_calls.size(), actual.tool_calls.size());
for (size_t i = 0; i < expected.tool_calls.size(); i++) {
const auto & expected_tool_call = expected.tool_calls[i];
static delta_data init_delta(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
const json & user_message, const json & delta_message, const json & tools,
- const json & tool_choice) {
+ const json & tool_choice,
+ bool think = false) {
common_chat_inputs inputs;
inputs.parallel_tool_calls = true;
inputs.messages = json::array();
inputs.messages.push_back(user_message);
inputs.tools = tools;
inputs.tool_choice = tool_choice;
+ inputs.extract_reasoning = think;
auto params_prefix = common_chat_params_init(tmpl, inputs);
inputs.messages.push_back(delta_message);
std::string prefix = params_prefix.prompt;
std::string full = params_full.prompt;
- // Check full starts with prefix
- if (full.find(prefix) != 0) {
- fprintf(stderr, "Full:\n%s\n\nPrefix:\n%s\n\n", full.c_str(), prefix.c_str());
- throw std::runtime_error("Full message does not start with prefix");
- }
-
if (full == prefix) {
throw std::runtime_error("Full message is the same as the prefix");
}
- auto delta = full.substr(prefix.size());
+ size_t common_prefix_length = 0;
+ for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
+ if (prefix[i] != full[i]) {
+ break;
+ }
+ if (prefix[i] == '<') {
+ // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
+ // but it removes thinking tags for past messages.
+ // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
+ continue;
+ }
+ common_prefix_length = i + 1;
+ }
+ auto delta = full.substr(common_prefix_length);
// Strip end tokens
for (const auto & end_token : end_tokens) {
*/
static void test_template(const common_chat_template & tmpl, const std::vector<std::string> & end_tokens,
const json & test_message, const json & tools = {}, const std::string & expected_delta = "",
- bool expect_grammar_triggered = true) {
+ bool expect_grammar_triggered = true,
+ bool test_grammar_if_triggered = true,
+ bool think = false) {
common_chat_msg expected_msg = msg_from_json(test_message);
auto user_message = json{
};
for (const auto & tool_choice : json({ "auto", "required" })) {
- auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice);
+ auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice, think);
if (!expected_delta.empty()) {
assert_equals(expected_delta, data.delta);
}
assert_equals(expect_grammar_triggered, grammar_triggered);
}
- if (grammar_triggered && !match_string(constrained, grammar.get())) {
+ if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
"\n\nGrammar: " + data.params.grammar);
}
}
static void test_template_output_parsers() {
- json text_message {
+ json message_user {
+ { "role", "user" },
+ { "content", "Hey there!" },
+ };
+ json message_assist {
+ { "role", "assistant" },
+ { "content", "Hello, world!\nWhat's up?" },
+ };
+ json message_assist_thoughts_unparsed_think {
+ { "role", "assistant" },
+ { "content", "<think>I'm thinking</think>Hello, world!\nWhat's up?" },
+ };
+ json message_assist_thoughts_unparsed_r7b {
+ { "role", "assistant" },
+ { "content", "<|START_THINKING|>I'm thinking<|END_THINKING|>Hello, world!\nWhat's up?" },
+ };
+ json message_assist_thoughts {
{ "role", "assistant" },
{ "content", "Hello, world!\nWhat's up?" },
+ { "reasoning_content", "I'm thinking" },
};
json tool_calls = json::array({{
{ "type", "function" },
{ "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } },
}});
- json tool_call_message {
+ json message_assist_call {
{ "role", "assistant"},
{ "content", {}},
{ "tool_calls", {
},
}},
};
- json tool_call_message_with_id {
+ json message_assist_call_thoughts = {
+ { "role", "assistant" },
+ { "content", nullptr },
+ { "reasoning_content", "I'm\nthinking" },
+ { "tool_calls", {
+ {
+ { "type", "function" },
+ { "function", {
+ { "name", "special_function" },
+ { "arguments", "{\"arg1\": 1}" },
+ }},
+ },
+ }},
+ };
+ json message_assist_call_thoughts_unparsed = {
+ { "role", "assistant" },
+ { "content", "<think>I'm\nthinking</think>" },
+ { "tool_calls", {
+ {
+ { "type", "function" },
+ { "function", {
+ { "name", "special_function" },
+ { "arguments", "{\"arg1\": 1}" },
+ }},
+ },
+ }},
+ };
+ json message_assist_call_id {
{ "role", "assistant"},
{ "content", {}},
{ "tool_calls", {
{ "content", {} },
{ "tool_calls", tool_calls }
};
- json tool_call_plan_message_with_idx {
+ json message_assist_call_idx {
{ "role", "assistant"},
{ "content", {}},
- { "tool_plan", "I'm not so sure"},
{ "tool_calls", {
{
{ "type", "function" },
{ "content", {} },
{ "tool_calls", tool_calls }
};
+ json message_assist_call_tool_plan_idx = message_assist_call_idx;
+ message_assist_call_tool_plan_idx["tool_plan"] = "I'm thinking";
- auto python_tool_call_message = json{
+ auto python_message_assist_call = json{
{ "role", "assistant" },
{ "content", {} },
{ "tool_calls", json{ {
} },
} } }
};
- auto code_interpreter_tool_call_message = json{
+ auto code_interpreter_message_assist_call = json{
{ "role", "assistant" },
{ "content", {} },
{ "tool_calls", json{ {
};
common_chat_inputs inputs_no_tools;
- inputs_no_tools.messages = {
- { { "role", "user" }, { "content", "Hey\nThere" } }
- };
+ inputs_no_tools.messages = json::array({message_user});
+ inputs_no_tools.extract_reasoning = false;
- common_chat_inputs inputs_tools = inputs_no_tools;
- inputs_tools.tools = json::array();
- inputs_tools.tools.push_back(special_function_tool);
+ common_chat_inputs inputs_no_tools_think;
+ inputs_no_tools_think.messages = json::array({message_user});
+ inputs_no_tools_think.extract_reasoning = true;
- common_chat_inputs inputs_tools_builtin = inputs_no_tools;
- inputs_tools_builtin.tools = json::array();
- inputs_tools_builtin.tools.push_back(python_tool);
+ common_chat_inputs inputs_tools;
+ inputs_tools.messages = json::array({message_user});
+ inputs_tools.tools = json::array({special_function_tool});
+ inputs_tools.extract_reasoning = false;
+
+ common_chat_inputs inputs_tools_think;
+ inputs_tools_think.messages = json::array({message_user});
+ inputs_tools_think.tools = json::array({special_function_tool});
+ inputs_tools_think.extract_reasoning = true;
+
+ common_chat_inputs inputs_tools_builtin;
+ inputs_tools_builtin.messages = json::array({message_user});
+ inputs_tools_builtin.tools = json::array({python_tool});
+ inputs_tools_builtin.extract_reasoning = false;
{
// Not supported yet
const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"), "<s>", "</s>");
std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_params_init(tmpl, inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format);
-
- test_template(tmpl, end_tokens, tool_call_plan_message_with_idx, tools,
- "<|START_THINKING|>I'm not so sure<|END_THINKING|>"
+ assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_no_tools).format);
+ assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format);
+ assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
+
+ assert_msg_equals(msg_from_json(message_assist),
+ common_chat_parse(
+ "Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_COMMAND_R7B));
+ assert_msg_equals(msg_from_json(message_assist),
+ common_chat_parse(
+ "Hello, world!\nWhat's up?<|END_RESPONSE|>",
+ COMMON_CHAT_FORMAT_COMMAND_R7B));
+ assert_msg_equals(msg_from_json(message_assist),
+ common_chat_parse(
+ "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
+ COMMON_CHAT_FORMAT_COMMAND_R7B));
+ assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
+ common_chat_parse(
+ "<|START_THINKING|>I'm thinking<|END_THINKING|>"
+ "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
+ COMMON_CHAT_FORMAT_COMMAND_R7B));
+ assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_r7b),
+ common_chat_parse(
+ "<|START_THINKING|>I'm thinking<|END_THINKING|>"
+ "Hello, world!\nWhat's up?<|END_RESPONSE|>",
+ COMMON_CHAT_FORMAT_COMMAND_R7B));
+
+ assert_msg_equals(msg_from_json(message_assist_thoughts),
+ common_chat_parse(
+ "<|START_THINKING|>I'm thinking<|END_THINKING|>"
+ "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
+ COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING));
+
+ test_template(tmpl, end_tokens, message_assist_call_idx, tools,
+ "<|START_THINKING|><|END_THINKING|>"
"<|START_ACTION|>[\n"
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
"]<|END_ACTION|>");
- test_template(tmpl, end_tokens, text_message, tools,
+ test_template(tmpl, end_tokens, message_assist_call_tool_plan_idx, tools,
+ "<|START_THINKING|>I'm thinking<|END_THINKING|>"
+ "<|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+ "]<|END_ACTION|>",
+ /* expect_grammar_triggered= */ true,
+ /* test_grammar_if_triggered= */ true,
+ /* think= */ true);
+ test_template(tmpl, end_tokens, message_assist, tools,
"<|START_RESPONSE|>Hello, world!\n"
"What's up?<|END_RESPONSE|>",
/* expect_grammar_triggered= */ false);
// Generic tool calls doesn't generate / parse content-only messages symmetrically.
- assert_msg_equals(msg_from_json(text_message),
+ assert_msg_equals(msg_from_json(message_assist),
common_chat_parse("{\n"
" \"response\": \"Hello, world!\\nWhat's up?\"\n"
"}",
common_chat_params_init(tmpl, inputs_tools).format));
- test_template(tmpl, end_tokens, tool_call_message_with_id, tools,
+ test_template(tmpl, end_tokens, message_assist_call_id, tools,
"{\n"
" \"tool_calls\": [\n"
" {\n"
assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format);
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
test_template(
- tmpl, end_tokens, tool_call_message_with_id, tools,
+ tmpl, end_tokens, message_assist_call_id, tools,
"[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
}
{
inputs_tools)
.format);
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_call, tools,
"<tool_call>\n"
"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
"</tool_call>");
- test_template(tmpl, end_tokens, python_tool_call_message, tools,
+ test_template(tmpl, end_tokens, python_message_assist_call, tools,
"<tool_call>\n"
"{\"name\": \"python\", \"arguments\": {\"code\": \"print('hey')\"}}\n"
"</tool_call>");
inputs_tools_builtin)
.format);
- // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, code_interpreter_tool_call_message, llama_3_1_tools,
+ // test_template(tmpl, end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, code_interpreter_message_assist_call, llama_3_1_tools,
"<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
- test_template(tmpl, end_tokens, python_tool_call_message, tools,
+ test_template(tmpl, end_tokens, python_message_assist_call, tools,
"<|python_tag|>python.call(code=\"print('hey')\")");
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist_call, tools,
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
}
{
assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format);
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_call, tools,
"{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
}
{
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
common_chat_params_init(tmpl, inputs_tools).format);
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_call, tools,
"<function=special_function>{\"arg1\": 1}</function>");
}
{
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_tools).format);
- test_template(tmpl, end_tokens, text_message, {},
+ test_template(tmpl, end_tokens, message_assist, {},
"all\n"
"Hello, world!\n"
"What's up?",
/* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist_call, tools,
"special_function\n"
"{\"arg1\": 1}");
}
assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format);
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_call, tools,
" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
}
{
+ // Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt.
const common_chat_template tmpl(read_file("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"),
"<s>", "</s>");
std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
+
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
+ common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1));
+ assert_msg_equals(msg_from_json(message_assist_thoughts),
+ common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
+ assert_msg_equals(msg_from_json(message_assist_thoughts),
+ // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
+ common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
+ // test_template(tmpl, end_tokens, message_assist_call, tools,
+ // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ // "```json\n"
+ // "{\"arg1\": 1}\n"
+ // // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic)
+ // "```<|tool▁call▁end|>",
+ // /* expect_grammar_triggered= */ true,
+ // /* test_grammar_if_triggered= */ false);
+ }
+ {
+ // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
+ const common_chat_template tmpl(read_file("models/templates/llama-cpp-deepseek-r1.jinja"),
+ "<s>", "</s>");
+ std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
- test_template(tmpl, end_tokens, text_message, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_template(tmpl, end_tokens, tool_call_message, tools,
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
- "```json\n"
- "{\"arg1\": 1}\n"
- "```<|tool▁call▁end|>");
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format);
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_params_init(tmpl, inputs_tools_think).format);
+
+ test_template(tmpl, end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ test_template(tmpl, end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+ assert_msg_equals(msg_from_json(message_assist_thoughts_unparsed_think),
+ common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1));
+ assert_msg_equals(msg_from_json(message_assist_thoughts),
+ common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
+
+ assert_msg_equals(msg_from_json(message_assist_call_thoughts_unparsed),
+ common_chat_parse(
+ "<think>I'm\nthinking</think>\n\n"
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ "```json\n"
+ "{\"arg1\": 1}\n"
+ "```<|tool▁call▁end|><|tool▁calls▁end|>",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1));
+ assert_msg_equals(msg_from_json(message_assist_call_thoughts),
+ common_chat_parse(
+ "<think>I'm\nthinking</think>\n\n"
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ "```json\n"
+ "{\"arg1\": 1}\n"
+ "```<|tool▁call▁end|><|tool▁calls▁end|>",
+ COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
+ test_template(tmpl, end_tokens, message_assist_call, tools,
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ "```json\n"
+ "{\"arg1\": 1}\n"
+ "```<|tool▁call▁end|><|tool▁calls▁end|>");
}
}
std::cout << "|----------|--------|\n";
for (int i = 1; i < argc; i++) {
- std::string path = argv[i];
- if (path.rfind(".jinja") != path.size() - 6) {
- std::cerr << "Skipping non-jinja file: " << path << std::endl;
- continue;
+ try {
+ std::string path = argv[i];
+ if (path.rfind(".jinja") != path.size() - 6) {
+ std::cerr << "Skipping non-jinja file: " << path << std::endl;
+ continue;
+ }
+ common_chat_template tmpl(read_file(path), "", "");
+ auto parts = string_split(path, "/");
+ auto name = parts[parts.size() - 1];
+ auto format = common_chat_format_name(common_chat_params_init(tmpl, inputs).format);
+ std::cout << "| " << name << " | " << format << " |\n";
+ } catch (const std::exception & e) {
+ std::cerr << "Failed to process " << argv[i] << ": " << e.what() << std::endl;
}
- common_chat_template tmpl(read_file(path), "", "");
- auto parts = string_split(path, "/");
- auto name = parts[parts.size() - 1];
- std::cout << "| " << name << " | " << common_chat_format_name(common_chat_params_init(tmpl, inputs).format)
- << " |\n";
}
} else
#endif