arg.cpp
arg.h
base64.hpp
- chat-parser.cpp
- chat-parser.h
- chat-parser-xml-toolcall.h
- chat-parser-xml-toolcall.cpp
+ chat-auto-parser-generator.cpp
+ chat-auto-parser-helpers.cpp
+ chat-auto-parser.h
+ chat-diff-analyzer.cpp
chat-peg-parser.cpp
chat-peg-parser.h
chat.cpp
--- /dev/null
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "json-schema-to-grammar.h"
+#include "nlohmann/json.hpp"
+
+#include <stdexcept>
+#include <string>
+
+using json = nlohmann::ordered_json;
+
+// Helper to iterate over tools/functions
+static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
+ for (const auto & tool : tools) {
+ if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
+ continue;
+ }
+ fn(tool);
+ }
+}
+
+namespace autoparser {
+
+parser_build_context::parser_build_context(common_chat_peg_builder & p, const templates_params & inputs) :
+ p(p),
+ inputs(inputs),
+ reasoning_parser(p.eps()) {}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
+ const struct templates_params & inputs) {
+ // Run differential analysis to extract template structure
+ struct autoparser autoparser;
+ autoparser.analyze_template(tmpl);
+ return generate_parser(tmpl, inputs, autoparser);
+}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template & tmpl,
+ const struct templates_params & inputs,
+ const autoparser & autoparser) {
+ // Build the parser using the analysis results
+ auto parser = autoparser.build_parser(inputs);
+
+ // Create the result structure
+ common_chat_params data;
+ data.prompt = common_chat_template_direct_apply(tmpl, inputs);
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+ data.preserved_tokens = autoparser.preserved_tokens;
+ data.parser = parser.save();
+
+ // Build grammar if tools are present
+ bool has_tools =
+ autoparser.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
+ std::string trigger_marker = !autoparser.tools.format.section_start.empty() ? autoparser.tools.format.section_start :
+ autoparser.tools.format.per_call_start;
+ bool include_grammar =
+ has_tools && ((inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO && !trigger_marker.empty()) ||
+ inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
+
+ if (include_grammar) {
+ data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ auto schema = function.at("parameters");
+ builder.resolve_refs(schema);
+ });
+ parser.build_grammar(builder, data.grammar_lazy);
+ });
+
+ // Set grammar triggers based on tool section markers (fall back to per-call markers)
+ if (data.grammar_lazy) { // only do triggers on lazy grammar
+ data.grammar_triggers = {
+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
+ };
+ }
+ }
+
+ return data;
+}
+
+common_peg_arena autoparser::build_parser(const templates_params & inputs) const {
+ if (!analysis_complete) {
+ throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
+ }
+ return build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ // If the template uses Python dict format (single-quoted strings in JSON structures),
+ // pre-register a json-string rule that accepts both quote styles. This must happen
+ // before any call to p.json() so that all JSON parsing inherits the flexible rule.
+ if (tools.format.uses_python_dicts) {
+ p.rule("json-string", [&]() { return p.choice({ p.double_quoted_string(), p.single_quoted_string() }); });
+ }
+
+ parser_build_context ctx(p, inputs);
+ bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+ bool enable_thinking = inputs.enable_thinking;
+
+ ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
+ ctx.content = &content;
+
+ // Build reasoning parser
+ ctx.reasoning_parser = reasoning.build_parser(ctx);
+
+ bool has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+
+ if (has_response_format) {
+ return ctx.reasoning_parser + p.space() +
+ p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
+ }
+
+ if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
+ return tools.build_parser(ctx);
+ }
+
+ return content.build_parser(ctx);
+ });
+}
+
+common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+
+ if (!ctx.extracting_reasoning) {
+ return p.eps();
+ }
+
+ bool thinking_forced_open = (mode == reasoning_mode::FORCED_OPEN);
+ bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
+
+ if (thinking_forced_open || thinking_forced_closed) {
+ // Thinking is forced open OR forced closed with enable_thinking=true
+ // In both cases, expect only the closing tag (opening was in template)
+ return p.reasoning(p.until(end)) + end;
+ }
+ if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
+ // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
+ // Both use the same tag-based pattern if markers are available
+ if (!start.empty() && !end.empty()) {
+ return p.optional(start + p.reasoning(p.until(end)) + end);
+ }
+ } else if (mode == reasoning_mode::DELIMITER) {
+ return p.optional(p.reasoning(p.until(end)) + end);
+ }
+
+ return p.eps();
+}
+
+common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+
+ if (is_always_wrapped()) {
+ if (ctx.extracting_reasoning) {
+ return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end();
+ }
+ return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
+ }
+ return ctx.reasoning_parser + p.content(p.rest()) + p.end();
+}
+
+common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+
+ if (is_always_wrapped()) {
+ return p.optional(start + p.content(p.until(end)) + end);
+ }
+ return p.eps();
+}
+
+common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const {
+ switch (format.mode) {
+ case tool_format::JSON_NATIVE:
+ return build_tool_parser_json_native(ctx);
+ case tool_format::TAG_WITH_JSON:
+ return build_tool_parser_tag_json(ctx);
+ case tool_format::TAG_WITH_TAGGED:
+ return build_tool_parser_tag_tagged(ctx);
+ default:
+ GGML_ABORT("Unable to create tool parser");
+ }
+}
+
+common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+ const auto & inputs = ctx.inputs;
+ bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ // Build effective field names with dot notation if function_field is set
+ std::string name_field = format.name_field;
+ std::string args_field = format.args_field;
+
+ if (!format.function_field.empty() && format.function_field != "function" &&
+ name_field.find('.') == std::string::npos) {
+ name_field = format.function_field + "." + name_field;
+ args_field = format.function_field + "." + args_field;
+ }
+
+ auto tools_parser = p.standard_json_tools(
+ format.section_start, format.section_end, inputs.tools, inputs.parallel_tool_calls,
+ inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
+ format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
+
+ // Handle content wrappers if present
+ if (ctx.content && ctx.content->is_always_wrapped()) {
+ auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
+ return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
+ }
+
+ std::string tool_start = "{";
+ if (!format.section_start.empty()) {
+ tool_start = format.section_start;
+ } else if (!format.per_call_start.empty()) {
+ tool_start = format.per_call_start;
+ }
+
+ return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser +
+ p.end();
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+ const auto & inputs = ctx.inputs;
+ bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ common_peg_parser tool_choice = p.choice();
+
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & func = tool.at("function");
+ std::string name = func.at("name");
+ const auto & schema = func.at("parameters");
+
+ // Build call_id parser based on position (if supported)
+ common_peg_parser call_id_section = p.eps();
+ if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+ !call_id.suffix.empty()) {
+ call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
+ }
+
+ auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
+ call_id_section + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
+ if (!function.close.empty()) {
+ func_parser = func_parser + function.close;
+ }
+ func_parser = p.atomic(func_parser);
+
+ tool_choice |= p.rule("tool-" + name, func_parser);
+ });
+
+ auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ common_peg_parser tool_calls = p.eps();
+
+ if (!format.per_call_start.empty()) {
+ auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end;
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+ } else {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call);
+ }
+ if (!format.section_start.empty()) {
+ tool_calls = p.trigger_rule("tool-calls",
+ p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+ (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+ }
+ } else {
+ std::string separator = ", "; // Default
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice +
+ p.zero_or_more(separator + tool_choice) + format.section_end);
+ } else {
+ tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice + format.section_end);
+ }
+ }
+
+ if (!require_calls) {
+ tool_calls = p.optional(tool_calls);
+ }
+
+ std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
+ auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+ return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+ p.end();
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
+ auto & p = ctx.p;
+ const auto & inputs = ctx.inputs;
+ bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ common_peg_parser tool_choice = p.choice();
+
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & func = tool.at("function");
+ std::string name = func.at("name");
+ const auto & params = func.at("parameters");
+
+ if (!params.contains("properties") || !params.at("properties").is_object()) {
+ return;
+ }
+
+ const auto & properties = params.at("properties");
+ std::set<std::string> required;
+ if (params.contains("required") && params.at("required").is_array()) {
+ params.at("required").get_to(required);
+ }
+
+ // Build parser for each argument
+ std::vector<common_peg_parser> arg_parsers;
+ for (const auto & [param_name, param_schema] : properties.items()) {
+ bool is_required = required.find(param_name) != required.end();
+ std::string type = "object";
+ auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object();
+ if (type_obj.is_string()) {
+ type_obj.get_to(type);
+ } else if (type_obj.is_object()) {
+ if (type_obj.contains("type") && type_obj.at("type").is_string()) {
+ type_obj.at("type").get_to(type);
+ }
+ }
+
+ auto arg = p.tool_arg(
+ p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
+ arguments.name_suffix) +
+ arguments.value_prefix +
+ (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
+ "tool-" + name + "-arg-" + param_name + "-schema",
+ param_schema, true)) :
+ p.tool_arg_json_value(p.schema(
+ p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
+ p.space()) +
+ p.tool_arg_close(p.literal(arguments.value_suffix)));
+
+ if (is_required) {
+ arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
+ } else {
+ arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+ }
+ }
+
+ // Build arg sequence with space() between consecutive args
+ common_peg_parser args_seq = p.eps();
+ for (size_t i = 0; i < arg_parsers.size(); i++) {
+ if (i > 0) {
+ args_seq = args_seq + p.space();
+ }
+ args_seq = args_seq + arg_parsers[i];
+ }
+
+ // Build call_id parser based on position (if supported)
+ common_peg_parser call_id_section = p.eps();
+ if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+ !call_id.suffix.empty()) {
+ call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
+ }
+
+ auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
+ call_id_section + p.space() + args_seq;
+
+ if (!function.close.empty()) {
+ func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close));
+ } else if (!format.per_call_end.empty()) {
+ // When there's no func_close but there is a per_call_end marker, use peek() to ensure
+ // we only emit tool_close when we can actually see the closing marker. This prevents
+ // premature closing during partial parsing when we've seen e.g. "</" which could be
+ // either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
+ func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end)));
+ } else {
+ func_parser =
+ func_parser + p.tool_close(p.space()); // force this to process tool closing callbacks in mapper
+ }
+
+ func_parser = p.atomic(func_parser);
+ tool_choice |= p.rule("tool-" + name, func_parser);
+ });
+
+ auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+ common_peg_parser tool_calls = p.eps();
+
+ if (!format.per_call_start.empty()) {
+ auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+ } else {
+ tool_calls = p.trigger_rule("tool-call", wrapped_call);
+ }
+ if (!format.section_start.empty()) {
+ tool_calls = p.trigger_rule("tool-calls",
+ p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+ (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+ }
+ } else {
+ std::string separator = ", "; // Default
+
+ if (inputs.parallel_tool_calls) {
+ tool_calls = p.trigger_rule("tool-call", format.section_start + p.space() + tool_choice +
+ p.zero_or_more(separator + tool_choice) + p.space() +
+ format.section_end);
+ } else {
+ tool_calls = p.trigger_rule(
+ "tool-call", format.section_start + p.space() + tool_choice + p.space() + format.section_end);
+ }
+ }
+
+ if (!require_tools) {
+ tool_calls = p.optional(tool_calls);
+ }
+
+ std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
+ auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+ return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+ p.end();
+}
+
+} // namespace autoparser
--- /dev/null
+#include "chat-auto-parser-helpers.h"
+
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+
+#include <cctype>
+#include <numeric>
+
+using json = nlohmann::ordered_json;
+
+std::string trim_whitespace(const std::string & str) {
+ size_t start = 0;
+ while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+ start++;
+ }
+
+ if (start == str.length()) {
+ return "";
+ }
+
+ size_t end = str.length() - 1;
+ while (end > start && std::isspace(static_cast<unsigned char>(str[end]))) {
+ end--;
+ }
+
+ return str.substr(start, end - start + 1);
+}
+
+std::string trim_leading_whitespace(const std::string & str) {
+ size_t start = 0;
+ while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+ start++;
+ }
+
+ return str.substr(start);
+}
+
+std::string trim_trailing_whitespace(const std::string & str) {
+ if (str.empty()) {
+ return "";
+ }
+
+ size_t end = str.length() - 1;
+ while (end > 0 && std::isspace(static_cast<unsigned char>(str[end]))) {
+ end--;
+ }
+
+ // If first char is also whitespace, return empty string
+ if (end == 0 && std::isspace(static_cast<unsigned char>(str[0]))) {
+ return "";
+ }
+
+ return str.substr(0, end + 1);
+}
+
+std::string trim_trailing_newlines(const std::string & str) {
+ size_t end = str.length();
+ while (end > 0 && str[end - 1] == '\n') {
+ end--;
+ }
+
+ return str.substr(0, end);
+}
+
+static size_t common_prefix_len(const std::string & left, const std::string & right) {
+ size_t prefix_len = 0;
+ size_t min_len = std::min(left.length(), right.length());
+ while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) {
+ prefix_len++;
+ }
+ return prefix_len;
+}
+
+static size_t common_suffix_len(const std::string & left, const std::string & right) {
+ size_t suffix_len = 0;
+ size_t min_len = std::min(left.length(), right.length());
+ while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) {
+ suffix_len++;
+ }
+ return suffix_len;
+}
+
+diff_split calculate_diff_split(const std::string & left, const std::string & right) {
+ diff_split result;
+
+ auto left_seg = segmentize_markers(left);
+ auto right_seg = segmentize_markers(right);
+
+ if (left_seg.empty()) {
+ result.right = right;
+ return result;
+ }
+ if (right_seg.empty()) {
+ result.left = left;
+ return result;
+ }
+
+ auto left_start = left_seg.begin();
+ auto left_end = --left_seg.end();
+ auto right_start = right_seg.begin();
+ auto right_end = --right_seg.end();
+
+ auto test = [&] () {
+ return left_start != left_end && right_start != right_end;
+ };
+
+ bool left_fully_consumed = false;
+ bool right_fully_consumed = false;
+
+ while (test()) {
+ bool advanced = false;
+ if (*left_start == *right_start) {
+ result.prefix.append(left_start->value);
+ left_start++;
+ right_start++;
+ advanced = true;
+ }
+ if (*left_end == *right_end) {
+ result.suffix = left_end->value + result.suffix;
+ if (left_start != left_end) {
+ left_end--;
+ } else {
+ left_fully_consumed = true;
+ }
+ if (right_start != right_end) {
+ right_end--;
+ } else {
+ right_fully_consumed = true;
+ }
+ advanced = true;
+ }
+ if (!advanced) {
+ break;
+ }
+ }
+
+ if (left_start == left_end && right_start != right_end) {
+ if (*left_start == *right_end) {
+ result.suffix = right_end->value + result.suffix;
+ right_end--;
+ left_fully_consumed = true;
+ } else if (*left_start == *right_start) {
+ result.prefix.append(right_start->value);
+ right_start++;
+ left_fully_consumed = true;
+ }
+ } else if (right_start == right_end && left_start != left_end) {
+ if (*left_end == *right_start) {
+ result.suffix = left_end->value + result.suffix;
+ left_end--;
+ right_fully_consumed = true;
+ } else if (*left_start == *right_start) {
+ result.prefix.append(left_start->value);
+ left_start++;
+ right_fully_consumed = true;
+ }
+ } else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) {
+ result.prefix.append(right_start->value);
+ left_fully_consumed = true;
+ right_fully_consumed = true;
+ }
+
+ auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); };
+
+ bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT;
+ bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT;
+
+ std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment);
+ std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment);
+
+ size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0;
+ // avoid overlaps between prefix and suffix
+ size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len),
+ remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0;
+
+ result.prefix.append(remainder_left.substr(0, prefix_len));
+ result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix;
+ result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len);
+ result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len);
+
+ if (result.left == "" && result.right == "") {
+ // degenerate case, no diff
+ result.prefix = left;
+ result.suffix = "";
+ // pick prefix = all as representation
+ }
+ return result;
+}
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) {
+ // Find the common prefix of left and right
+ size_t common_prefix_len = 0;
+ size_t min_len = std::min(left.length(), right.length());
+ while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) {
+ common_prefix_len++;
+ }
+
+ // If there's no common prefix, return empty string
+ if (common_prefix_len == 0) {
+ return "";
+ }
+
+ // Find the common prefix in the full string
+ std::string common_prefix = left.substr(0, common_prefix_len);
+ size_t pos = full.find(common_prefix);
+
+ // If not found, return empty string
+ if (pos == std::string::npos) {
+ return "";
+ }
+
+ // Return everything before the common prefix
+ return full.substr(0, pos);
+}
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) {
+ // Find the common suffix of left and right (compare from the end)
+ size_t common_suffix_len = 0;
+ size_t min_len = std::min(left.length(), right.length());
+ while (common_suffix_len < min_len &&
+ left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) {
+ common_suffix_len++;
+ }
+
+ // If there's no common suffix, return empty string
+ if (common_suffix_len == 0) {
+ return "";
+ }
+
+ // Extract the common suffix
+ std::string common_suffix = left.substr(left.length() - common_suffix_len);
+
+ // Find the last occurrence of the common suffix in the full string
+ size_t pos = full.rfind(common_suffix);
+
+ // If not found, return empty string
+ if (pos == std::string::npos) {
+ return "";
+ }
+
+ // Return everything after the common suffix
+ return full.substr(pos + common_suffix_len);
+}
+
+// TODO: segmentize will treat a JSON array inside tags as a tag: <calls>[{ "fun": { ... } }]</calls> will be three markers
+// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will
+// Might have to put some restrictions on tag contents as well (like "no { }")
+std::vector<segment> segmentize_markers(const std::string & text) {
+ std::vector<segment> retval;
+ bool in_marker = false;
+ char marker_opener = '\0';
+
+ auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; };
+ auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); };
+
+ size_t last_border = 0;
+
+ for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) {
+ if (!in_marker && is_marker_opener(text[cur_pos])) {
+ if (last_border < cur_pos) {
+ retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border)));
+ }
+ last_border = cur_pos;
+ in_marker = true;
+ marker_opener = text[cur_pos];
+ } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) {
+ // no need to check because last_border will always be smaller
+ retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1)));
+ last_border = cur_pos + 1;
+ in_marker = false;
+ marker_opener = '\0';
+ }
+ }
+ if (last_border < text.length()) {
+ retval.push_back(segment(segment_type::TEXT, text.substr(last_border)));
+ }
+ return retval;
+}
+
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments) {
+ std::vector<segment> result;
+ for (const auto & seg : segments) {
+ if (!trim_whitespace(seg.value).empty()) {
+ result.push_back(seg);
+ }
+ }
+ return result;
+}
+
+namespace autoparser {
+
+std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
+ templates_params tmpl_params;
+ tmpl_params.messages = params.messages;
+ tmpl_params.tools = params.tools;
+ tmpl_params.add_generation_prompt = params.add_generation_prompt;
+ tmpl_params.enable_thinking = params.enable_thinking;
+
+ if (params.extra_context) {
+ tmpl_params.extra_context = *params.extra_context;
+ }
+ tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
+
+ try {
+ return common_chat_template_direct_apply(tmpl, tmpl_params);
+ } catch (const std::exception & e) {
+ LOG_DBG("Template application failed: %s\n", e.what());
+ return "";
+ }
+}
+
+std::optional<compare_variants_result> compare_variants(
+ const common_chat_template & tmpl,
+ const template_params & params_A,
+ const std::function<void(template_params &)> & params_modifier) {
+ // Create variant B by copying A
+ template_params params_B = params_A;
+
+ // Apply modifier to create variant B
+ if (params_modifier) {
+ params_modifier(params_B);
+ }
+
+ // Apply template to both variants
+ std::string output_A = apply_template(tmpl, params_A);
+ std::string output_B = apply_template(tmpl, params_B);
+
+ // Check for template application failures
+ if (output_A.empty() || output_B.empty()) {
+ return std::nullopt;
+ }
+
+ // Calculate diff and return result with both outputs
+ compare_variants_result result;
+ result.diff = calculate_diff_split(output_A, output_B);
+ result.output_A = output_A;
+ result.output_B = output_B;
+
+ return result;
+}
+
+} // namespace autoparser
+
--- /dev/null
+#pragma once
+
+#include "chat-auto-parser.h"
+#include <functional>
+#include <optional>
+#include <string>
+
+std::string trim_whitespace(const std::string & str);
+std::string trim_leading_whitespace(const std::string & str);
+std::string trim_trailing_whitespace(const std::string & str);
+std::string trim_trailing_newlines(const std::string & str);
+
+// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
+// mismatched part on the left, mismatched part on the right) between two strings
+// account for markers - align prefix and suffix endings so that they end on markers
+// * eg.:
+// calculate_diff_split("<html><body><div></div></body></html>", "<html><body><p>Something</p></body><html>") ->
+// { "prefix": "<html><body>" (not: "<html><body><"), "suffix": "</body></html>", "left": "<div></div>", "right": "<p>Something</p>" }
+// calculate_diff_split("<html><body>Something</body></html>", "<html><body></body><html>") ->
+// { "prefix": "<html><body>", "suffix": "</body></html>", "left": "Something", "right": "" }
+diff_split calculate_diff_split(const std::string & left, const std::string & right);
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+// Returns empty string if there's no common prefix
+// * eg.:
+// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a "
+// until_common_prefix("<tool_call>", "<something>", "<something_else>") -> ""
+// until_common_prefix("some text", "1234", "abcd") -> ""
+// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one ""
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+// Returns empty string if there's no common suffix
+// Mirror function of `until_common_prefix`
+// * eg.:
+// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call"
+// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four"
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
+
+// Segmentize text into markers and non-marker fragments
+// * eg.:
+// segmentize_markers("<html><head><title>The site title</title><body><div>Here's some <b>content</b></div></body></html>" ->
+// [ (MARKER, "<html>"), (MARKER, "<head>"), (MARKER, "<title>"), (TEXT, "The site title"), (MARKER, "</title>"),
+// (MARKER, "<body>"), (MARKER, "<div>"), (TEXT, "Here's some "), (MARKER, "<b>"), (TEXT, "content"), (MARKER, "</b>"),
+// (MARKER, "</div>"), (MARKER, "</body>"), (MARKER, "</html>")
+// ]
+// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") ->
+// [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ]
+std::vector<segment> segmentize_markers(const std::string & text);
+
+// Prune whitespace-only segments from a vector of segments
+// * eg.:
+// segmentize_markers("<tool_call>\n<function=foo>\n<arg=bar>\n \n</arg>\n</function>\n</tool_call>") ->
+// X = [ (MARKER, "<tool_call>"), (TEXT, "\n"), (MARKER, "<function=foo>"), (TEXT, "\n"), (MARKER, "<arg=bar>"), (TEXT, "\n \n"),
+// (MARKER, "</arg>"), (TEXT, "\n"), (MARKER, "</function>"), (TEXT, "\n"), (MARKER, "</tool_call>") ]
+// prune_whitespace_segments(X) -> [ (MARKER, "<tool_call>"), (MARKER, "<function=foo>"), (MARKER, "<arg=bar>"), (MARKER, "</arg>"),
+// (MARKER, "</function>"), (MARKER, "</tool_call>") ]
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
+
+namespace autoparser {
+
+// Apply a template with the given parameters, returning the rendered string (empty on failure)
+std::string apply_template(const common_chat_template & tmpl, const template_params & params);
+
+// Factorized differential comparison function
+// Takes base params and a single modifier lambda to create variant B
+// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
+std::optional<compare_variants_result> compare_variants(
+ const common_chat_template & tmpl,
+ const template_params & params_A,
+ const std::function<void(template_params &)> & params_modifier);
+
+} // namespace autoparser
--- /dev/null
+#pragma once
+
+#include "chat.h"
+#include "common.h"
+#include "jinja/caps.h"
+#include "peg-parser.h"
+
+#include <chrono>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+class common_chat_peg_builder;
+
+// ============================================================================
+// Parameters for template application (low-level, used by diff analysis)
+// ============================================================================
+struct template_params {
+ json messages;
+ json tools;
+ bool add_generation_prompt = false;
+ bool enable_thinking = true;
+ std::optional<json> extra_context = std::nullopt;
+};
+
+struct diff_split {
+ std::string prefix;
+ std::string suffix;
+ std::string left;
+ std::string right;
+
+ bool operator==(struct diff_split & other) const {
+ return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
+ }
+};
+
+// Result of compare_variants containing diff and original outputs
+struct compare_variants_result {
+ diff_split diff;
+ std::string output_A;
+ std::string output_B;
+};
+
+namespace autoparser {
+
+// ============================================================================
+// High-level params for parser generation
+// ============================================================================
+
+struct templates_params {
+ json messages;
+ json tools;
+ common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+ json json_schema;
+ bool parallel_tool_calls = true;
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+ bool stream = true;
+ std::string grammar;
+ bool add_generation_prompt = false;
+ bool enable_thinking = true;
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+ json extra_context;
+ bool add_bos = false;
+ bool add_eos = false;
+ bool is_inference = true;
+ bool add_inference = false;
+ bool mark_input = true; // whether to mark input strings in the jinja context
+};
+
+// ============================================================================
+// Analysis Result Enums
+// ============================================================================
+
+// Reasoning handling mode (derived from R1-R3 comparisons)
+enum class reasoning_mode {
+ NONE, // No reasoning markers detected
+ TAG_BASED, // Standard tag-based: <think>...</think>
+ DELIMITER, // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
+ FORCED_OPEN, // Template ends with open reasoning tag (empty start, non-empty end)
+ FORCED_CLOSED, // Template ends with open reasoning tag on enabled thinking but
+ // with both opened and closed tag for disabled thinking
+ TOOLS_ONLY // Only reason on tool calls, not on normal content
+};
+
+inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
+ switch (mode) {
+ case reasoning_mode::NONE:
+ return os << "NONE";
+ case reasoning_mode::TAG_BASED:
+ return os << "TAG_BASED";
+ case reasoning_mode::DELIMITER:
+ return os << "DELIMITER";
+ case reasoning_mode::FORCED_OPEN:
+ return os << "FORCED_OPEN";
+ case reasoning_mode::FORCED_CLOSED:
+ return os << "FORCED_CLOSED";
+ case reasoning_mode::TOOLS_ONLY:
+ return os << "TOOLS_ONLY";
+ default:
+ return os << "UNKNOWN";
+ }
+}
+
+// Content wrapping mode (derived from C1 comparison)
+enum class content_mode {
+ PLAIN, // No content markers
+ ALWAYS_WRAPPED, // Content always wrapped with markers
+ WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
+};
+
+inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
+ switch (mode) {
+ case content_mode::PLAIN:
+ return os << "PLAIN";
+ case content_mode::ALWAYS_WRAPPED:
+ return os << "ALWAYS_WRAPPED";
+ case content_mode::WRAPPED_WITH_REASONING:
+ return os << "WRAPPED_WITH_REASONING";
+ default:
+ return os << "UNKNOWN";
+ }
+}
+
+// Call ID position in tool calls (for non-JSON formats)
+enum class call_id_position {
+ NONE, // No call ID support detected
+ PRE_FUNC_NAME, // Call ID before function name: [CALL_ID]id[FUNC]name{args}
+ BETWEEN_FUNC_AND_ARGS, // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
+ POST_ARGS, // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
+};
+
+inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
+ switch (pos) {
+ case call_id_position::NONE:
+ return os << "NONE";
+ case call_id_position::PRE_FUNC_NAME:
+ return os << "PRE_FUNC_NAME";
+ case call_id_position::BETWEEN_FUNC_AND_ARGS:
+ return os << "BETWEEN_FUNC_AND_ARGS";
+ case call_id_position::POST_ARGS:
+ return os << "POST_ARGS";
+ default:
+ return os << "UNKNOWN";
+ }
+}
+
+// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
+enum class tool_format {
+ NONE, // No tool support detected
+ JSON_NATIVE, // Pure JSON: {"name": "X", "arguments": {...}}
+ TAG_WITH_JSON, // Tag-based with JSON args: <function=X>{...}</function>
+ TAG_WITH_TAGGED, // Tag-based with tagged args: <param=key>value</param>
+};
+
+inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
+ switch (format) {
+ case tool_format::NONE:
+ return os << "NONE";
+ case tool_format::JSON_NATIVE:
+ return os << "JSON_NATIVE";
+ case tool_format::TAG_WITH_JSON:
+ return os << "TAG_WITH_JSON";
+ case tool_format::TAG_WITH_TAGGED:
+ return os << "TAG_WITH_TAGGED";
+ default:
+ return os << "UNKNOWN";
+ }
+}
+
+// ============================================================================
+// Sub-structs for tool analysis
+// ============================================================================
+
+struct tool_format_analysis {
+ tool_format mode = tool_format::NONE;
+
+ std::string section_start; // e.g., "<tool_call>", "[TOOL_CALLS]", ""
+ std::string section_end; // e.g., "</tool_call>", ""
+ std::string per_call_start; // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
+ std::string per_call_end; // e.g., "<|tool_call_end|>", ""
+
+ bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
+ bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
+ bool uses_python_dicts = false; // Tool call args use Python dict format (single-quoted strings)
+
+ std::string function_field = "function";
+ std::string name_field = "name";
+ std::string args_field = "arguments";
+ std::string id_field;
+ std::string gen_id_field;
+ std::vector<std::string> parameter_order;
+};
+
+struct tool_function_analysis {
+ std::string name_prefix; // e.g., "<function=", "\"name\": \"", "functions."
+ std::string name_suffix; // e.g., ">", "\"", ":0"
+ std::string close; // e.g., "</function>", "" (for tag-based)
+};
+
+struct tool_arguments_analysis {
+ std::string start; // e.g., "<|tool_call_argument_begin|>", "<args>"
+ std::string end; // e.g., "<|tool_call_argument_end|>", "</args>"
+ std::string name_prefix; // e.g., "<param=", "<arg_key>", "\""
+ std::string name_suffix; // e.g., ">", "</arg_key>", "\":"
+ std::string value_prefix; // e.g., "", "<arg_value>", ""
+ std::string value_suffix; // e.g., "</param>", "</arg_value>", ""
+ std::string separator; // e.g., "", "\n", ","
+};
+
+struct tool_id_analysis {
+ call_id_position pos = call_id_position::NONE;
+
+ std::string prefix; // e.g., "[CALL_ID]" (marker before call ID value)
+ std::string suffix; // e.g., "" (marker after call ID value, before next section)
+};
+
+// ============================================================================
+// Parser build context (shared interface for build_parser methods)
+// ============================================================================
+
+struct analyze_content;
+
+struct parser_build_context {
+ common_chat_peg_builder & p;
+ const templates_params & inputs;
+ common_peg_parser reasoning_parser;
+ bool extracting_reasoning = false;
+ const analyze_content * content = nullptr;
+
+ parser_build_context(common_chat_peg_builder & p, const templates_params & inputs);
+};
+
+// ============================================================================
+// Base class for analyzers with parser building
+// ============================================================================
+
+struct analyze_base {
+ virtual ~analyze_base() = default;
+ virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0;
+
+ protected:
+ const common_chat_template * tmpl = nullptr;
+
+ analyze_base() = default;
+ explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {}
+};
+
+// ============================================================================
+// Reasoning analyzer
+// ============================================================================
+
+struct analyze_reasoning : analyze_base {
+ reasoning_mode mode = reasoning_mode::NONE;
+
+ std::string start; // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
+ std::string end; // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
+
+ analyze_reasoning() = default;
+ analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
+
+ common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+ private:
+ // Look for reasoning markers in rendered content
+ void compare_reasoning_presence();
+
+ // Compare generation prompt with enable_thinking=true vs false
+ void compare_thinking_enabled();
+
+ // Check if reasoning is always possible or only in tool calls
+ void compare_reasoning_scope();
+};
+
+// ============================================================================
+// Content analyzer
+// ============================================================================
+
+struct analyze_content : analyze_base {
+ content_mode mode = content_mode::PLAIN;
+
+ std::string start; // e.g., "<response>", ">>>all\n", ""
+ std::string end; // e.g., "</response>", ""
+
+ bool requires_nonnull_content = false;
+
+ analyze_content() = default;
+ analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning);
+
+ common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+ bool is_always_wrapped() const;
+ common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
+};
+
+// ============================================================================
+// Tool analyzer
+// ============================================================================
+
+struct analyze_tools : analyze_base {
+ tool_format_analysis format;
+ tool_function_analysis function;
+ tool_arguments_analysis arguments;
+ tool_id_analysis call_id;
+
+ analyze_tools() = default;
+ analyze_tools(const common_chat_template & tmpl,
+ const jinja::caps & caps,
+ const analyze_reasoning & reasoning);
+
+ common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+ private:
+ // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
+ void analyze_tool_calls(const analyze_reasoning & reasoning);
+
+ // Analyze format based on position of function and argument name in needle
+ void analyze_tool_call_format(const std::string & haystack,
+ const std::string & fun_name_needle,
+ const std::string & arg_name_needle,
+ const analyze_reasoning & reasoning);
+
+ // Analyze specifics of JSON native format (entire tool call is a JSON object)
+ void analyze_tool_call_format_json_native(const std::string & clean_haystack,
+ const std::string & fun_name_needle,
+ const std::string & arg_name_needle);
+
+ // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
+ void analyze_tool_call_format_non_json(const std::string & clean_haystack,
+ const std::string & fun_name_needle);
+
+ // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
+ void check_per_call_markers();
+
+ // Extract function name markers
+ void extract_function_markers();
+
+ // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
+ void analyze_arguments();
+
+ // Extract argument name markers
+ void extract_argument_name_markers();
+
+ // Extract argument value markers
+ void extract_argument_value_markers();
+
+ // Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
+ void extract_argument_separator();
+
+ // Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
+ void extract_args_markers();
+
+ // Extract call ID markers, if present
+ void extract_call_id_markers();
+
+ // Per-format tool parser builders
+ common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const;
+ common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const;
+ common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const;
+};
+
+// ============================================================================
+// Main autoparser class
+// ============================================================================
+
+struct autoparser {
+ jinja::caps jinja_caps;
+ analyze_reasoning reasoning;
+ analyze_content content;
+ analyze_tools tools;
+ bool analysis_complete = false;
+
+ // Preserved tokens for tokenizer (union of all non-empty markers)
+ std::vector<std::string> preserved_tokens;
+
+ autoparser() = default;
+
+ // Run full differential analysis on a template
+ void analyze_template(const common_chat_template & tmpl);
+
+ // Build the PEG parser for this template
+ common_peg_arena build_parser(const templates_params & inputs) const;
+
+ private:
+ // Collect tokens from entire analysis to preserve
+ void collect_preserved_tokens();
+};
+
+// ============================================================================
+// Parser generator
+// ============================================================================
+
+class peg_generator {
+ public:
+ static common_chat_params generate_parser(const common_chat_template & tmpl,
+ const struct templates_params & inputs);
+
+ static common_chat_params generate_parser(const common_chat_template & tmpl,
+ const struct templates_params & inputs,
+ const autoparser & autoparser);
+};
+
+} // namespace autoparser
+
+enum segment_type { TEXT, MARKER };
+
+inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
+ switch (type) {
+ case segment_type::TEXT:
+ return os << "TEXT";
+ case segment_type::MARKER:
+ return os << "MARKER";
+ default:
+ return os << "UNKNOWN";
+ }
+}
+
+struct segment {
+ segment_type type;
+ std::string value;
+
+ segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
+
+ bool operator==(const segment & other) const {
+ return type == other.type && value == other.value;
+ }
+
+ bool operator!=(const segment & other) const {
+ return !(*this == other);
+ }
+};
--- /dev/null
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <algorithm>
+
+#define ANSI_RESET "\033[0m"
+#define ANSI_PURPLE "\033[1m\x1b[38;5;126m"
+#define ANSI_ORANGE "\033[1m\x1b[38;5;214m"
+#define ANSI_RED "\033[1m\x1b[38;5;196m"
+
+using json = nlohmann::ordered_json;
+
+namespace autoparser {
+
+static const std::string FUN_FIRST = "FFF_FIRST_FUN_F";
+static const std::string FUN_SECOND = "SSS_SECOND_FUN_S";
+static const std::string ARG_FIRST = "AA_ARG_FST_AA";
+static const std::string ARG_SECOND = "BB_ARG_SND_BB";
+static const std::string USER_MSG = "U_USER_MSG Hello END_U";
+static const std::string ASSISTANT_MSG = "A_ASST_MSG I can help END_A";
+static const std::string THINKING_CONTENT = "REASON_PART I am thinking END_R";
+
+static std::vector<std::function<void(const common_chat_template & tmpl, autoparser &)>> workarounds(
+ { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to
+ // support reasoning on them
+ [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+ if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
+ tmpl.src.find("reasoning_content") == std::string::npos &&
+ analysis.reasoning.mode == reasoning_mode::NONE) {
+ analysis.reasoning.mode = reasoning_mode::FORCED_OPEN;
+ analysis.reasoning.start = "<think>";
+ analysis.reasoning.end = "</think>";
+ analysis.preserved_tokens.push_back("<think>");
+ analysis.preserved_tokens.push_back("</think>");
+ LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
+ }
+ },
+ // Granite 3.3, with separate reasoning and content markers
+ [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+ if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
+ "<response></response>") != std::string::npos) {
+ analysis.reasoning.mode = reasoning_mode::TAG_BASED;
+ analysis.reasoning.start = "<think>";
+ analysis.reasoning.end = "</think>";
+ analysis.preserved_tokens.push_back("<think>");
+ analysis.preserved_tokens.push_back("</think>");
+ analysis.content.mode = content_mode::WRAPPED_WITH_REASONING;
+ analysis.content.start = "<response>";
+ analysis.content.end = "</response>";
+ analysis.preserved_tokens.push_back("<response>");
+ analysis.preserved_tokens.push_back("</response>");
+ LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET);
+ }
+ },
+ // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|>
+ [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+ if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos &&
+ tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) {
+ analysis.content.mode = content_mode::ALWAYS_WRAPPED;
+ analysis.content.start = "<|CHATBOT_TOKEN|>";
+ analysis.content.end = "<|END_OF_TURN_TOKEN|>";
+ analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>");
+ analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>");
+ LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET);
+ }
+ },
+ // Functionary - no tool call section delimiter
+ [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+ if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", "
+ "\"code_interpreter\") | list | length > 0") != std::string::npos) {
+ analysis.content.mode = content_mode::PLAIN;
+ analysis.content.end = "";
+ analysis.tools.function.name_prefix = "";
+ analysis.tools.format.section_start = "";
+ analysis.tools.format.section_end = "";
+ analysis.tools.format.per_call_start = "<function=";
+ analysis.tools.format.per_call_end = "</function>";
+ analysis.tools.function.close = "";
+ analysis.preserved_tokens.clear();
+ analysis.preserved_tokens.push_back("<|eot_id|>");
+ analysis.preserved_tokens.push_back("<|eom_id|>");
+ analysis.preserved_tokens.push_back("<function=");
+ analysis.preserved_tokens.push_back(">");
+ analysis.preserved_tokens.push_back("</function>");
+ LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET);
+ }
+ },
+ // DeepSeek-R1-Distill-Qwen
+ [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+ if (tmpl.src.find(
+ "{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>'") !=
+ std::string::npos) {
+ analysis.tools.format.section_start = "<|tool▁calls▁begin|>";
+ analysis.tools.format.section_end = "<|tool▁calls▁end|>";
+ analysis.tools.format.per_call_start = "<|tool▁call▁begin|>function";
+ analysis.tools.function.name_prefix = "<|tool▁sep|>";
+ analysis.tools.format.per_call_end = "<|tool▁call▁end|>";
+ analysis.tools.function.close = "```";
+ }
+ }
+ });
+
+// Common JSON structures
+static json params_schema = {
+ { "type", "object" },
+ { "properties",
+ { { ARG_FIRST, { { "type", "string" }, { "description", "First argument" } } },
+ { ARG_SECOND, { { "type", "string" }, { "description", "Second argument" } } } } },
+ { "required", json::array({}) }
+};
+
+static json tools = json::array({
+ { { "type", "function" },
+ { "function",
+ json{ { "name", FUN_FIRST }, { "description", "Test function foo" }, { "parameters", params_schema } } } },
+ { { "type", "function" },
+ { "function",
+ json{ { "name", FUN_SECOND }, { "description", "Test function bar" }, { "parameters", params_schema } } } }
+});
+
+static json user_msg = json{
+ { "role", "user" },
+ { "content", USER_MSG }
+};
+
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call00001") {
+ return json{
+ { "id", id },
+ { "type", "function" },
+ { "function", json{ { "name", name }, { "arguments", args } } }
+ };
+}
+
+static json first_tool_call_zero_args = build_tool_call(FUN_FIRST, json::object(), "call00001");
+static json first_tool_call_one_arg = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "XXXX" }}, "call00001");
+static json first_tool_call_one_arg_other_val = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "YYYY" }}, "call00001");
+static json first_tool_call_other_arg = build_tool_call(FUN_FIRST, {{ ARG_SECOND, "YYYY" }}, "call00001");
+
+static json first_tool_call =
+ build_tool_call(FUN_FIRST, json{{ ARG_FIRST, "XXXX" }, { ARG_SECOND, "YYYY" }}, "call00001");
+static json second_tool_call =
+ build_tool_call(FUN_SECOND, json{ { ARG_FIRST, "XXXX" }, { ARG_SECOND, "YYYY" }}, "call00002");
+static json first_tool_call_alt_id =
+ build_tool_call(FUN_FIRST, json{{ ARG_FIRST, "XXXX" }, { ARG_SECOND, "YYYY" }}, "call99999");
+
+template <typename T>
+static std::string mode_to_str(T mode) {
+ std::ostringstream os;
+ os << mode;
+ return os.str();
+}
+
+void autoparser::analyze_template(const common_chat_template & tmpl) {
+ jinja_caps = tmpl.original_caps();
+ reasoning = analyze_reasoning(tmpl, jinja_caps.supports_tool_calls);
+ content = analyze_content(tmpl, reasoning);
+ tools = analyze_tools(jinja_caps.supports_tool_calls ? analyze_tools(tmpl, jinja_caps, reasoning) : analyze_tools());
+ collect_preserved_tokens();
+
+ for (auto & workaround : workarounds) {
+ workaround(tmpl, *this);
+ }
+
+ LOG_DBG("\n--- Reasoning & Content Structure ---\n");
+ LOG_DBG("reasoning_mode: %s\n", mode_to_str(reasoning.mode).c_str());
+ LOG_DBG("reasoning_start: '%s'\n", reasoning.start.c_str());
+ LOG_DBG("reasoning_end: '%s'\n", reasoning.end.c_str());
+ LOG_DBG("content_mode: %s\n", mode_to_str(content.mode).c_str());
+ LOG_DBG("content_start: '%s'\n", content.start.c_str());
+ LOG_DBG("content_end: '%s'\n", content.end.c_str());
+
+ LOG_DBG("\n--- Tool Call Structure ---\n");
+ LOG_DBG("tool_mode: %s\n", mode_to_str(tools.format.mode).c_str());
+ LOG_DBG("supports_tools: %s\n", jinja_caps.supports_tools ? "true" : "false");
+ LOG_DBG("supports_parallel_calls: %s\n", jinja_caps.supports_parallel_tool_calls ? "true" : "false");
+ LOG_DBG("tool_section_start: '%s'\n", tools.format.section_start.c_str());
+ LOG_DBG("tool_section_end: '%s'\n", tools.format.section_end.c_str());
+ LOG_DBG("per_call_start: '%s'\n", tools.format.per_call_start.c_str());
+ LOG_DBG("per_call_end: '%s'\n", tools.format.per_call_end.c_str());
+ LOG_DBG("func_name_prefix: '%s'\n", tools.function.name_prefix.c_str());
+ LOG_DBG("func_name_suffix: '%s'\n", tools.function.name_suffix.c_str());
+ LOG_DBG("func_close: '%s'\n", tools.function.close.c_str());
+ LOG_DBG("python_dict_format: %s\n", tools.format.uses_python_dicts ? "true" : "false");
+ LOG_DBG("arg_name_prefix: '%s'\n", tools.arguments.name_prefix.c_str());
+ LOG_DBG("arg_name_suffix: '%s'\n", tools.arguments.name_suffix.c_str());
+ LOG_DBG("arg_value_prefix: '%s'\n", tools.arguments.value_prefix.c_str());
+ LOG_DBG("arg_value_suffix: '%s'\n", tools.arguments.value_suffix.c_str());
+ LOG_DBG("name_field: '%s'\n", tools.format.name_field.c_str());
+ LOG_DBG("args_field: '%s'\n", tools.format.args_field.c_str());
+ LOG_DBG("id_field: '%s'\n", tools.format.id_field.c_str());
+ LOG_DBG("gen_id_field: '%s'\n", tools.format.gen_id_field.c_str());
+ LOG_DBG("parameter_order: '%s'\n", std::accumulate(tools.format.parameter_order.begin(), tools.format.parameter_order.end(),
+ std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
+ ).c_str());
+
+ LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET);
+ analysis_complete = true;
+}
+
+void autoparser::collect_preserved_tokens() {
+ auto add_token = [this](const std::string & org_token) {
+ std::string token = trim_whitespace(org_token);
+ if (!token.empty()) {
+ // Avoid duplicates
+ if (std::find(preserved_tokens.begin(), preserved_tokens.end(), token) == preserved_tokens.end()) {
+ preserved_tokens.push_back(token);
+ }
+ }
+ };
+
+ add_token(reasoning.start);
+ add_token(reasoning.end);
+ add_token(content.start);
+ add_token(content.end);
+ add_token(tools.format.section_start);
+ add_token(tools.format.section_end);
+ add_token(tools.format.per_call_start);
+ add_token(tools.format.per_call_end);
+ add_token(tools.function.name_prefix);
+ add_token(tools.function.name_suffix);
+ add_token(tools.function.close);
+ add_token(tools.arguments.start);
+ add_token(tools.arguments.end);
+ add_token(tools.arguments.name_prefix);
+ add_token(tools.arguments.name_suffix);
+ add_token(tools.arguments.separator);
+ add_token(tools.arguments.value_prefix);
+ add_token(tools.arguments.value_suffix);
+ add_token(tools.call_id.prefix);
+ add_token(tools.call_id.suffix);
+}
+
+analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools)
+ : analyze_base(tmpl) {
+ LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET);
+ LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET);
+
+ compare_reasoning_presence();
+ compare_thinking_enabled();
+ if (supports_tools) {
+ compare_reasoning_scope();
+ }
+}
+
+void analyze_reasoning::compare_reasoning_presence() {
+ json user_msg = json{
+ { "role", "user" },
+ { "content", USER_MSG }
+ };
+
+ json assistant_no_reasoning = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG }
+ };
+
+ json assistant_with_reasoning = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG },
+ { "reasoning_content", THINKING_CONTENT }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_no_reasoning });
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ const std::string reasoning_content = THINKING_CONTENT;
+
+ if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
+ auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
+ });
+ auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+ });
+ // try the more aggressive parse first, if it fails, fall back to the delimiter one
+ auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+ if (!result.result.success()) {
+ result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+ }
+ if (result.result.success()) {
+ if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
+ if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close
+ mode = reasoning_mode::TAG_BASED;
+ } else {
+ mode = reasoning_mode::FORCED_CLOSED;
+ }
+ start = trim_whitespace(result.tags["pre"]);
+ end = result.tags["post"];
+ } else if (!result.tags["post"].empty()) {
+ mode = reasoning_mode::DELIMITER;
+ end = result.tags["post"];
+ }
+ }
+ }
+}
+
+void analyze_reasoning::compare_thinking_enabled() {
+ json user_msg = json{
+ { "role", "user" },
+ { "content", USER_MSG }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg });
+ params.add_generation_prompt = true;
+ params.enable_thinking = false;
+
+ auto comparison = compare_variants(*tmpl, params, [&](template_params & p) { p.enable_thinking = true; });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ std::string left_trimmed = trim_whitespace(diff.left);
+
+ if (left_trimmed.empty() && !diff.right.empty()) {
+ std::string right_trimmed = trim_whitespace(diff.right);
+
+ if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
+ if (start.empty()) {
+ start = right_trimmed;
+ mode = reasoning_mode::FORCED_OPEN;
+ }
+ }
+ }
+
+ if (start.empty() && !end.empty()) {
+ mode = reasoning_mode::DELIMITER;
+ }
+
+ // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
+ // but enable_thinking=true produces only the start marker
+ if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
+ auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.literal(start) + p.space() + p.literal(end) + p.rest();
+ });
+ auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest();
+ });
+ if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
+ parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
+ mode = reasoning_mode::FORCED_CLOSED;
+ } else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
+ auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
+ if (result.result.success()) {
+ start = result.tags["pre"];
+ mode = reasoning_mode::FORCED_CLOSED;
+ }
+ }
+ }
+
+ if (start.empty() && end.empty()) { // we might still have the case of "just open" and "just close"
+ if (!diff.left.empty() && !diff.right.empty()) {
+ auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
+ auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
+ if (seg_A.size() == 1 && seg_B.size() == 1) {
+ mode = reasoning_mode::FORCED_CLOSED;
+ start = seg_B[0].value;
+ end = seg_A[0].value;
+ }
+ }
+ }
+}
+
+void analyze_reasoning::compare_reasoning_scope() {
+ json assistant_reasoning_content = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG },
+ { "reasoning_content", THINKING_CONTENT }
+ };
+
+ json assistant_reasoning_tools = json{
+ { "role", "assistant" },
+ { "content", nullptr },
+ { "reasoning_content", THINKING_CONTENT },
+ { "tool_calls",
+ json::array({ build_tool_call(FUN_FIRST, json{ { ARG_FIRST, "VVVV" }, { ARG_SECOND, "XXXX" } }) }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_reasoning_content });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ std::string reasoning_content = THINKING_CONTENT;
+
+ // Check if reasoning only appears in variant B (with tools)
+ bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos;
+ bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos;
+
+ if (!reasoning_in_A && reasoning_in_B) {
+ mode = reasoning_mode::TOOLS_ONLY;
+ LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__);
+
+ auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space()));
+ });
+ auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+ if (result.result.success()) {
+ start = result.tags["pre"];
+ end = result.tags["post"];
+ } else {
+ auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())));
+ });
+ result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+ if (result.result.success()) {
+ end = result.tags["post"];
+ } else {
+ LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
+ mode = reasoning_mode::NONE;
+ }
+ }
+ }
+}
+
+analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning)
+ : analyze_base(tmpl) {
+ LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET);
+
+ json assistant_content_only = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG }
+ };
+
+ json assistant_with_tools = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) }
+ };
+
+ json assistant_with_reasoning = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "reasoning_content", THINKING_CONTENT }
+ };
+
+ template_params params_content_only;
+ params_content_only.messages = json::array({ user_msg, assistant_content_only });
+ params_content_only.add_generation_prompt = false;
+ params_content_only.enable_thinking = true;
+ params_content_only.tools = tools;
+
+ auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+ p.messages = json::array({ user_msg, assistant_with_tools });
+ });
+
+ auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+ p.messages = json::array({ user_msg, assistant_with_reasoning });
+ });
+
+ if (!comparison_with_tools || !comparison_with_reasoning) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ }
+
+ const auto & diff_tools = comparison_with_tools->diff;
+ const auto & diff_reasoning = comparison_with_reasoning->diff;
+
+ std::string response = ASSISTANT_MSG;
+
+ bool found_plain_content = false;
+ if (trim_whitespace(diff_tools.left) == response) {
+ auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.space() + diff_reasoning.left + p.space() + p.optional(p.marker()) + p.space() + p.end();
+ });
+ if (parser.parse_and_extract(diff_reasoning.left).result.success()) {
+ // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
+ mode = content_mode::PLAIN;
+ found_plain_content = true;
+ } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) {
+ auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.literal(reasoning.end) + p.space() + p.literal(response);
+ });
+ if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) {
+ mode = content_mode::PLAIN;
+ found_plain_content = true;
+ }
+ }
+ }
+ if (!found_plain_content) {
+ std::string rdiff = diff_reasoning.left;
+ if (!reasoning.end.empty() && rdiff.find(reasoning.end) != std::string::npos) {
+ rdiff = rdiff.substr(rdiff.find(reasoning.end) + reasoning.end.length());
+ }
+ // Take the more promising diff
+ std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
+ auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.marker()) + p.space() + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+ });
+ auto result = parser_wrapped.parse_anywhere_and_extract(pure_content);
+ start = result.tags["pre"];
+ end = result.tags["post"];
+ // TODO: WRAPPED_WITH_REASONING
+ }
+
+ // Determine content mode
+ if (!start.empty() || !end.empty()) {
+ mode = content_mode::ALWAYS_WRAPPED;
+ // TODO: END_DELIMITED content mode - delimited at end but not at start?
+ }
+}
+
+bool analyze_content::is_always_wrapped() const {
+ return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
+}
+
+analyze_tools::analyze_tools(const common_chat_template & tmpl,
+ const jinja::caps & caps,
+ const analyze_reasoning & reasoning)
+ : analyze_base(tmpl) {
+ LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET);
+
+ analyze_tool_calls(reasoning);
+
+ if (format.mode != tool_format::NONE && format.mode != tool_format::JSON_NATIVE) {
+ if (caps.supports_parallel_tool_calls) {
+ check_per_call_markers();
+ }
+ extract_function_markers();
+ if (format.mode == tool_format::TAG_WITH_TAGGED) {
+ analyze_arguments();
+ }
+ extract_argument_separator();
+ extract_args_markers();
+ extract_call_id_markers();
+ }
+}
+
+void analyze_tools::analyze_tool_calls(const analyze_reasoning & reasoning) {
+ json assistant_no_tools = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG }
+ };
+
+ json assistant_with_tools = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_no_tools });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ std::string tool_section = diff.right;
+
+ if (tool_section.empty()) {
+ return;
+ }
+
+ analyze_tool_call_format(tool_section, FUN_FIRST, ARG_FIRST, reasoning);
+}
+
+void analyze_tools::analyze_tool_call_format(const std::string & haystack,
+ const std::string & fun_name_needle,
+ const std::string & arg_name_needle,
+ const analyze_reasoning & reasoning) {
+ if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) {
+ return;
+ }
+
+ enum class json_quote_style { NONE, DOUBLE_QUOTES, SINGLE_QUOTES };
+
+ auto in_json_haystack = [&haystack](const std::string & needle) -> json_quote_style {
+ auto parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.choice({ p.literal("{"), p.literal(":") }) << p.choice({
+ p.tag("sq", p.literal("'") + p.literal(needle) + p.literal("'")),
+ p.tag("dq", p.literal("\"") + p.literal(needle) + p.literal("\"")) });
+ });
+ auto result = parser.parse_anywhere_and_extract(haystack);
+ if (!result.result.success()) {
+ return json_quote_style::NONE;
+ }
+ return result.tags.count("sq") && !result.tags["sq"].empty()
+ ? json_quote_style::SINGLE_QUOTES
+ : json_quote_style::DOUBLE_QUOTES;
+ };
+
+ auto fun_quote = in_json_haystack(fun_name_needle);
+ auto arg_quote = in_json_haystack(arg_name_needle);
+
+ if (fun_quote != json_quote_style::NONE) {
+ // no need to check further, we're in JSON land
+ format.mode = tool_format::JSON_NATIVE;
+ format.uses_python_dicts = (fun_quote == json_quote_style::SINGLE_QUOTES);
+ } else if (arg_quote != json_quote_style::NONE) {
+ format.mode = tool_format::TAG_WITH_JSON;
+ format.uses_python_dicts = (arg_quote == json_quote_style::SINGLE_QUOTES);
+ } else {
+ format.mode = tool_format::TAG_WITH_TAGGED;
+ }
+
+ // first, remove any reasoning markers
+ std::string clean_haystack = haystack;
+ if (!reasoning.start.empty()) {
+ auto pos = haystack.find(reasoning.start);
+ if (pos != std::string::npos) {
+ clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + reasoning.start.length());
+ }
+ }
+ if (!reasoning.end.empty()) {
+ auto pos = clean_haystack.find(reasoning.end);
+ if (pos != std::string::npos) {
+ clean_haystack = clean_haystack.substr(0, pos) + clean_haystack.substr(pos + reasoning.end.length());
+ }
+ }
+
+ if (format.mode == tool_format::JSON_NATIVE) {
+ analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle);
+ } else {
+ analyze_tool_call_format_non_json(clean_haystack, fun_name_needle);
+ }
+ // always relax whitespace requirements on ending markers since they don't influence content
+ format.section_end = trim_whitespace(format.section_end);
+ format.per_call_end = trim_whitespace(format.per_call_end);
+}
+
+void analyze_tools::analyze_tool_call_format_json_native(const std::string & clean_haystack,
+ const std::string & fun_name_needle,
+ const std::string & arg_name_needle) {
+ // we might not have the typical OpenAI tool calling structure
+ int json_start = clean_haystack.find_first_of('{');
+ int json_end = clean_haystack.find_last_of('}');
+ std::string cut = clean_haystack.substr(json_start, json_end - json_start + 1);
+ json call_struct = json::parse(cut);
+ auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
+ if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) {
+ format.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+ } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) {
+ format.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+ } else if (subel.value().dump().find(arg_name_needle) !=
+ std::string::npos) { // handle both string and JSON obj variants
+ format.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+ } else if (subel.key().find("id") != std::string::npos) {
+ // heuristics for generated id field
+ format.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+ }
+ };
+ for (const auto & el : call_struct.items()) {
+ if (el.key() == fun_name_needle) {
+ format.fun_name_is_key = true;
+ // When function name is the key, there's no name field and args are direct
+ format.name_field.clear();
+ format.args_field.clear();
+ // Don't register this element - the function name IS the key, not a field
+ } else {
+ if (el.value().is_object() &&
+ el.value().dump().find(arg_name_needle) == std::string::npos) { // not the args object
+ format.function_field = el.key();
+ for (const auto & subel : el.value().items()) {
+ register_field(el.key(), subel);
+ }
+ }
+ // Register this element as a potential field
+ register_field("", el);
+ }
+ }
+ auto array_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.literal("[") + p.space()) + p.literal(cut) + p.tag("post", p.space() + p.literal("]"));
+ });
+
+ auto ar_parse_res = array_parser.parse_anywhere_and_extract(clean_haystack);
+ if (ar_parse_res.result.success()) {
+ format.tools_array_wrapped = true;
+ json_start -= ar_parse_res.tags["pre"].length();
+ json_end += ar_parse_res.tags["post"].length();
+ }
+ json_end++; // we want to move past the closing char for end marker extraction
+
+ std::vector<std::pair<size_t, std::string>> located_params;
+ if (!format.name_field.empty()) {
+ located_params.push_back({ clean_haystack.find(format.name_field), format.name_field });
+ }
+ if (!format.args_field.empty()) {
+ located_params.push_back({ clean_haystack.find(format.args_field), format.args_field });
+ }
+ if (!format.id_field.empty()) {
+ located_params.push_back({ clean_haystack.find(format.id_field), format.id_field });
+ }
+ if (!format.gen_id_field.empty()) {
+ located_params.push_back({ clean_haystack.find(format.gen_id_field), format.gen_id_field });
+ }
+ std::sort(located_params.begin(), located_params.end());
+ for (auto & pair : located_params) {
+ format.parameter_order.push_back(pair.second);
+ }
+ // we can immediately extract tool calling markers too
+ format.section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start));
+ format.section_end = trim_whitespace(clean_haystack.substr(json_end));
+ // When tools_array_wrapped is true, the closing bracket is part of the array structure,
+ // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets.
+ if (format.tools_array_wrapped && format.section_end == "]") {
+ format.section_end.clear();
+ }
+}
+
+void analyze_tools::analyze_tool_call_format_non_json(const std::string & clean_haystack,
+ const std::string & fun_name_needle) {
+ // first, let's find out if the function is inside a tag or standalone
+ auto fun_marker_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("fun_marker", p.choice({
+ p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", fun_name_needle })) + p.literal(fun_name_needle) +
+ p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+ p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", fun_name_needle })) + p.literal(fun_name_needle) +
+ p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+ });
+ auto fun_res = fun_marker_parser.parse_anywhere_and_extract(clean_haystack);
+ std::string fun_marker = fun_name_needle;
+ if (fun_res.result.success()) {
+ fun_marker = fun_res.tags["fun_marker"];
+ }
+ // now, consume up to two markers, then treat everything up to the function marker as function name prefix
+ auto per_tool_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("sec_start", p.marker() + p.space()) + p.tag("call_start", p.marker() + p.space()) +
+ p.tag("fun_pre", p.until(fun_marker)) + fun_marker + p.tag("rest", p.rest());
+ });
+ auto section_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("sec_start", p.marker() + p.space()) + fun_marker + p.tag("rest", p.rest());
+ });
+ auto result = per_tool_parser.parse_anywhere_and_extract(clean_haystack);
+ tagged_parse_result result_end;
+ if (result.result.success()) {
+ auto double_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("call_end", p.marker() + p.space()) + p.tag("sec_end", p.marker() + p.space()) + p.end();
+ });
+ result_end = double_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+ function.name_prefix = fun_res.tags["fun_pre"] + function.name_prefix;
+ } else {
+ result = section_parser.parse_anywhere_and_extract(clean_haystack);
+ auto single_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("sec_end", p.marker() + p.space()) + p.end();
+ });
+ result_end = single_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+ }
+ format.per_call_start = result.tags["call_start"];
+ format.per_call_end = result_end.tags["call_end"];
+ format.section_start = result.tags["sec_start"];
+ format.section_end = result_end.tags["sec_end"];
+}
+
+void analyze_tools::check_per_call_markers() {
+ json assistant_one_tool = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call }) }
+ };
+
+ json assistant_two_tools = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call, second_tool_call }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_one_tool });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto one_vs_two = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); });
+
+ if (!one_vs_two) {
+ LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right);
+
+ std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right);
+ if (!format.section_start.empty() &&
+ second_tool_content.find(format.section_start) == 0) {
+ format.per_call_start = format.section_start;
+ format.per_call_end = format.section_end;
+ format.section_start.clear();
+ format.section_end.clear();
+ }
+}
+
+void analyze_tools::extract_function_markers() {
+ json assistant_nocall = json{
+ { "role", "assistant" },
+ { "content", ASSISTANT_MSG },
+ };
+
+ json assistant_foofoo = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call }) }
+ };
+
+ json assistant_barbar = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ second_tool_call }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_foofoo });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (diff.left.find(FUN_FIRST) != std::string::npos && diff.right.find(FUN_SECOND) != std::string::npos) {
+ std::string prefix_marker;
+ if (!format.per_call_start.empty()) {
+ prefix_marker = format.per_call_start;
+ } else {
+ prefix_marker = format.section_start;
+ }
+ if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) {
+ function.name_prefix =
+ diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size());
+ }
+
+ // Extract name prefix/suffix from diff.left (stop at the next marker boundary)
+ auto name_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("pre", p.until(FUN_FIRST)) + p.literal(FUN_FIRST) +
+ p.tag("post", p.zero_or_more(p.negate(p.marker()) + p.any()));
+ });
+ auto name_result = name_parser.parse_and_extract(diff.left);
+ if (name_result.result.success()) {
+ function.name_prefix += name_result.tags["pre"];
+ function.name_suffix = name_result.tags["post"];
+ }
+
+ // Extend name_suffix with content from diff.suffix before args begin
+ if (format.mode == tool_format::TAG_WITH_JSON) {
+ // For JSON: name_suffix extends to the first non-marker { or [, including any
+ // markers along the way. Only applies if there's at least one marker after
+ // the JSON content (matching the original "stop < seg_suf.size() - 1" guard).
+ auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ auto non_json = p.marker() | (p.negate(p.literal("{")) + p.negate(p.literal("[")) + p.any());
+ auto after_json = p.zero_or_more(p.negate(p.marker()) + p.any()) + p.marker();
+ return p.tag("ext", p.zero_or_more(non_json)) + after_json;
+ });
+ auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+ if (suf_result.result.success()) {
+ function.name_suffix += suf_result.tags["ext"];
+ }
+ } else {
+ // For tagged: name_suffix extends to the first marker (arg marker)
+ auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.tag("ext", p.zero_or_more(p.negate(p.marker()) + p.any()));
+ });
+ auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+ if (suf_result.result.success()) {
+ function.name_suffix += suf_result.tags["ext"];
+ }
+ }
+
+ // Extract the closer (between last arg and call/section end marker)
+ std::string suffix_marker;
+ if (!format.per_call_end.empty()) {
+ suffix_marker = format.per_call_end;
+ } else {
+ suffix_marker = format.section_end;
+ }
+ std::string closer_suffix;
+ if (suffix_marker.empty()) {
+ // we'll have to rely on an extra diff with no-calls version
+ auto notool_comp = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
+ auto nt_diff = notool_comp->diff;
+ closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
+ } else {
+ closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
+ }
+ if (!closer_suffix.empty()) {
+ if (format.mode == tool_format::TAG_WITH_TAGGED) {
+ // After last arg value, skip the closing arg marker, rest is closer
+ auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.until("YYYY") + p.literal("YYYY") + p.space() +
+ p.marker() + p.space() +
+ p.tag("close", p.rest());
+ });
+ auto close_result = closer_parser.parse_and_extract(closer_suffix);
+ if (close_result.result.success()) {
+ function.close = close_result.tags["close"];
+ }
+ } else if (format.mode == tool_format::TAG_WITH_JSON) {
+ // After last arg value, find end of JSON args, rest is closer
+ auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+ return p.until("YYYY") + p.literal("YYYY") + p.tag("post_val", p.rest());
+ });
+ auto close_result = closer_parser.parse_and_extract(closer_suffix);
+ if (close_result.result.success()) {
+ const auto & post = close_result.tags["post_val"];
+ size_t pos = post.find_last_of("}]");
+ if (pos != std::string::npos && pos < post.size() - 1) {
+ function.close = trim_leading_whitespace(post.substr(pos + 1));
+ }
+ }
+ }
+ }
+ function.close = trim_leading_whitespace(function.close);
+ }
+}
+
+void analyze_tools::analyze_arguments() {
+ LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET);
+
+ extract_argument_name_markers();
+ extract_argument_value_markers();
+}
+
+void analyze_tools::extract_argument_name_markers() {
+ json assistant_first_arg = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_one_arg }) }
+ };
+
+ json assistant_second_arg = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_other_arg }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_first_arg });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (!diff.left.empty() && !diff.right.empty()) {
+ // Parse both sides to find ARG_FIRST/ARG_SECOND and extract the surrounding structure
+ auto left_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.tag("pre", p.until(ARG_FIRST)) + p.literal(ARG_FIRST) +
+ p.tag("suffix", p.until_one_of({"\"", "X"}));
+ });
+ auto right_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.tag("pre", p.until(ARG_SECOND)) + p.literal(ARG_SECOND) +
+ p.tag("suffix", p.until_one_of({"\"", "Y"}));
+ });
+ auto left_result = left_parser.parse_anywhere_and_extract(diff.left);
+ auto right_result = right_parser.parse_anywhere_and_extract(diff.right);
+
+ if (left_result.result.success() && right_result.result.success() &&
+ !left_result.tags["pre"].empty() &&
+ left_result.tags["pre"] == right_result.tags["pre"] &&
+ left_result.tags["suffix"] == right_result.tags["suffix"]) {
+ // Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
+ arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
+ arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
+ } else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
+ // Name is directly in the diff: prefix comes from last marker in diff.prefix
+ auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+ return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("name_prefix", last_marker);
+ });
+ auto pre_result = pre_parser.parse_and_extract(diff.prefix);
+ arguments.name_prefix = pre_result.result.success()
+ ? pre_result.tags["name_prefix"] : diff.prefix;
+
+ // Suffix extends from after ARG_FIRST to the first marker (+ optional whitespace).
+ // The marker could be in diff.left itself or in diff.suffix, so we concatenate.
+ std::string after_first = diff.left.substr(ARG_FIRST.length()) + diff.suffix;
+ auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.tag("suffix", p.zero_or_more(p.negate(p.marker()) + p.any()) +
+ p.marker() + p.space());
+ });
+ auto suf_result = suffix_parser.parse_anywhere_and_extract(after_first);
+ if (suf_result.result.success()) {
+ arguments.name_suffix = suf_result.tags["suffix"];
+ }
+ }
+ }
+}
+
+void analyze_tools::extract_argument_value_markers() {
+ json assistant_val_X = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_one_arg }) }
+ };
+
+ json assistant_val_Y = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_val_X });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (diff.left == "XXXX" && diff.right == "YYYY") {
+ std::string arg_name_ending = ARG_FIRST + arguments.name_suffix;
+ std::string prefix = diff.prefix;
+ if (prefix.rfind(arg_name_ending) != std::string::npos) {
+ prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size());
+ }
+ if (!prefix.empty()) {
+ // Find the last marker + any trailing non-marker text to end
+ auto prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+ return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("val_prefix", last_marker);
+ });
+ auto pre_result = prefix_parser.parse_and_extract(prefix);
+ arguments.value_prefix = pre_result.result.success() ? pre_result.tags["val_prefix"] : prefix;
+ }
+
+ std::string value_suffix = diff.suffix;
+ if (!function.close.empty()) {
+ size_t func_close_pos = value_suffix.find(function.close);
+ if (func_close_pos != std::string::npos) {
+ value_suffix = value_suffix.substr(0, func_close_pos);
+ }
+ } else if (!format.per_call_end.empty() || !format.section_end.empty()) {
+ std::string end_marker =
+ !format.per_call_end.empty() ? format.per_call_end : format.section_end;
+ size_t end_marker_pos = value_suffix.find(end_marker);
+ if (end_marker_pos != std::string::npos) {
+ value_suffix = value_suffix.substr(0, end_marker_pos);
+ }
+ }
+ value_suffix = trim_leading_whitespace(value_suffix);
+ if (!value_suffix.empty()) {
+ arguments.value_suffix = value_suffix;
+ }
+ }
+}
+
+void analyze_tools::extract_argument_separator() {
+ json assistant_one_arg = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_one_arg }) }
+ };
+
+ json assistant_two_args = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_one_arg });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (!diff.right.empty()) {
+ std::string separator = until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND);
+ arguments.separator = separator;
+ }
+}
+
+void analyze_tools::extract_args_markers() {
+ json assistant_no_args = json{
+ { "role", "assistant"},
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_zero_args }) }
+ };
+
+ json assistant_with_args = json{
+ { "role", "assistant"},
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_one_arg }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_no_args });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (format.mode != tool_format::JSON_NATIVE) {
+ std::string prefix_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
+ std::string suffix_marker = !format.section_end.empty() ? format.section_end : format.per_call_end;
+ // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones
+ size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker);
+ size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker);
+ if (prefix_pos == std::string::npos) {
+ prefix_pos = 0;
+ }
+ if (suffix_pos == std::string::npos) {
+ suffix_pos = diff.suffix.size();
+ }
+ std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size());
+ std::string suffix_cut = diff.suffix.substr(0, suffix_pos);
+ std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":");
+ std::string args_end = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}");
+
+ if (!args_start.empty() || !args_end.empty()) {
+ size_t find_fun = args_start.find(FUN_FIRST);
+ if (find_fun != std::string::npos) {
+ args_start = args_start.substr(find_fun + FUN_FIRST.size(), args_start.size() - find_fun - FUN_FIRST.size());
+ }
+ arguments.start = args_start;
+ arguments.end = args_end;
+ }
+ }
+}
+
+void analyze_tools::extract_call_id_markers() {
+ json assistant_id1 = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call }) }
+ };
+
+ json assistant_id2 = json{
+ { "role", "assistant" },
+ { "content", "" },
+ { "tool_calls", json::array({ first_tool_call_alt_id }) }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_id1 });
+ params.tools = tools;
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ auto comparison = compare_variants(
+ *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); });
+
+ if (!comparison) {
+ LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__);
+ return;
+ }
+
+ const auto & diff = comparison->diff;
+
+ if (diff.left.empty() && diff.right.empty()) {
+ return;
+ }
+
+ std::string id_value_1 = "call00001";
+ std::string id_value_2 = "call99999";
+
+ size_t common_id_prefix_len = 0;
+ for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) {
+ if (id_value_1[i] == id_value_2[i]) {
+ common_id_prefix_len++;
+ } else {
+ break;
+ }
+ }
+ std::string common_id_part = id_value_1.substr(0, common_id_prefix_len);
+
+ // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS)
+ // or in the suffix (call_id is PRE_FUNC_NAME)
+ std::string func_name = FUN_FIRST;
+ size_t func_name_in_prefix = diff.prefix.rfind(func_name);
+ size_t func_name_in_suffix = diff.suffix.find(func_name);
+
+ // Helper: find the last marker in a string (returns just the marker, not trailing text)
+ auto find_last_marker = [](const std::string & str) -> std::string {
+ auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ auto last = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+ return p.zero_or_more(p.negate(last) + p.any()) + p.tag("m", p.marker());
+ });
+ auto res = parser.parse_anywhere_and_extract(str);
+ return res.result.success() ? res.tags["m"] : "";
+ };
+
+ // Helper: find the first marker in a string
+ auto find_first_marker = [](const std::string & str) -> std::string {
+ auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.tag("m", p.marker());
+ });
+ auto res = parser.parse_anywhere_and_extract(str);
+ return res.result.success() ? res.tags["m"] : "";
+ };
+
+ if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) {
+ // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS
+ // Check if args indicator "{" is in prefix or suffix
+ size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix);
+ size_t args_in_suffix = diff.suffix.find('{');
+
+ if (args_in_suffix != std::string::npos &&
+ (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) {
+ // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS
+ call_id.pos = call_id_position::BETWEEN_FUNC_AND_ARGS;
+
+ // Find call_id_prefix: marker immediately preceding common_id_part (no intervening markers)
+ std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length());
+ auto id_prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.tag("prefix", p.marker()) +
+ p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal(common_id_part)) + p.any()) +
+ p.literal(common_id_part);
+ });
+ auto id_res = id_prefix_parser.parse_anywhere_and_extract(after_func);
+ if (id_res.result.success()) {
+ call_id.prefix = id_res.tags["prefix"];
+ } else {
+ // Fallback: use the last marker in after_func
+ call_id.prefix = find_last_marker(after_func);
+ }
+
+ // Extract call_id_suffix: the first marker in the suffix before args "{"
+ auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+ return p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal("{")) + p.any()) +
+ p.tag("suffix", p.marker());
+ });
+ auto suf_res = suffix_parser.parse_anywhere_and_extract(diff.suffix);
+ if (suf_res.result.success()) {
+ call_id.suffix = suf_res.tags["suffix"];
+ }
+ } else if (args_in_prefix != std::string::npos) {
+ // Args are in prefix, so call_id is POST_ARGS
+ call_id.pos = call_id_position::POST_ARGS;
+
+ // Extract last marker between args closing brace and the ID
+ std::string after_args = diff.prefix.substr(args_in_prefix);
+ size_t closing_brace = after_args.rfind('}');
+ if (closing_brace != std::string::npos) {
+ std::string between_args_and_id = after_args.substr(closing_brace + 1);
+ call_id.prefix = find_last_marker(between_args_and_id);
+ }
+
+ // call_id_suffix: first marker in diff.suffix
+ call_id.suffix = find_first_marker(diff.suffix);
+ }
+ } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) {
+ // Function name is only in suffix - call_id is PRE_FUNC_NAME
+ call_id.pos = call_id_position::PRE_FUNC_NAME;
+
+ // call_id_prefix: last marker in diff.prefix
+ call_id.prefix = find_last_marker(diff.prefix);
+
+ // call_id_suffix: first marker in the portion of diff.suffix before func_name
+ std::string before_func = diff.suffix.substr(0, func_name_in_suffix);
+ call_id.suffix = find_first_marker(before_func);
+ }
+
+ // When call_id is detected, per_call_end may have been incorrectly set to include
+ // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix.
+ if (call_id.pos != call_id_position::NONE && !call_id.suffix.empty() &&
+ format.per_call_end.find(call_id.suffix) == 0) {
+ format.per_call_end.clear();
+ }
+}
+
+} // namespace autoparser
+++ /dev/null
-#include "chat.h"
-#include "chat-parser.h"
-#include "common.h"
-#include "json-partial.h"
-#include "json-schema-to-grammar.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-class xml_toolcall_syntax_exception : public std::runtime_error {
- public:
- xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-template<typename T>
-inline void sort_uniq(std::vector<T> &vec) {
- std::sort(vec.begin(), vec.end());
- vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-}
-
-template<typename T>
-inline bool all_space(const T &str) {
- return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
-}
-
-static size_t utf8_truncate_safe(const std::string_view s) {
- size_t len = s.size();
- if (len == 0) return 0;
- size_t i = len;
- for (size_t back = 0; back < 4 && i > 0; ++back) {
- --i;
- unsigned char c = s[i];
- if ((c & 0x80) == 0) {
- return len;
- } else if ((c & 0xC0) == 0xC0) {
- size_t expected_len = 0;
- if ((c & 0xE0) == 0xC0) expected_len = 2;
- else if ((c & 0xF0) == 0xE0) expected_len = 3;
- else if ((c & 0xF8) == 0xF0) expected_len = 4;
- else return i;
- if (len - i >= expected_len) {
- return len;
- } else {
- return i;
- }
- }
- }
- return len - std::min(len, size_t(3));
-}
-
-inline void utf8_truncate_safe_resize(std::string &s) {
- s.resize(utf8_truncate_safe(s));
-}
-
-inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
- return s.substr(0, utf8_truncate_safe(s));
-}
-
-static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
- if (literal1.size() == 0) return builder.try_find_literal(literal2);
- const auto saved_pos = builder.pos();
- while (auto res = builder.try_find_literal(literal1)) {
- builder.consume_spaces();
- const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
- if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
- if (res->prelude.size() != res->groups[0].begin - saved_pos) {
- res->prelude = builder.str({saved_pos, res->groups[0].begin});
- }
- builder.move_to(builder.pos() + match_len);
- res->groups[0].end = builder.pos();
- GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
- return res;
- }
- builder.move_to(res->groups[0].begin + 1);
- }
- builder.move_to(saved_pos);
- return std::nullopt;
-}
-
-/**
- * make a GBNF that accept any strings except those containing any of the forbidden strings.
- */
-std::string make_gbnf_excluding(std::vector<std::string> forbids) {
- constexpr auto charclass_escape = [](unsigned char c) -> std::string {
- if (c == '\\' || c == ']' || c == '^' || c == '-') {
- std::string s = "\\";
- s.push_back((char)c);
- return s;
- }
- if (isprint(c)) {
- return std::string(1, (char)c);
- }
- char buf[16];
- snprintf(buf, 15, "\\x%02X", c);
- return std::string(buf);
- };
- constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
- std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
- int i = l;
- while (i < r) {
- const std::string &s = forbids[i];
- if ((int)s.size() == depth) {
- ++i;
- continue;
- }
- unsigned char c = (unsigned char)s[depth];
- int j = i;
- while (j < r && (int)forbids[j].size() > depth &&
- (unsigned char)forbids[j][depth] == c) {
- ++j;
- }
- children.push_back({c, {i, j}});
- i = j;
- }
- std::vector<std::string> alts;
- if (!children.empty()) {
- std::string cls;
- for (auto &ch : children) cls += charclass_escape(ch.first);
- alts.push_back(std::string("[^") + cls + "]");
- }
- for (auto &ch : children) {
- std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
- if (!childExpr.empty()) {
- std::string quoted_ch = "\"";
- if (ch.first == '\\') quoted_ch += "\\\\";
- else if (ch.first == '"') quoted_ch += "\\\"";
- else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
- else {
- char buf[16];
- snprintf(buf, 15, "\\x%02X", ch.first);
- quoted_ch += buf;
- }
- quoted_ch += "\"";
- std::string branch = quoted_ch + std::string(" ") + childExpr;
- alts.push_back(branch);
- }
- }
- if (alts.empty()) return "";
- std::ostringstream oss;
- oss << "( ";
- for (size_t k = 0; k < alts.size(); ++k) {
- if (k) oss << " | ";
- oss << alts[k];
- }
- oss << " )";
- return oss.str();
- };
- if (forbids.empty()) return "( . )*";
- sort(forbids.begin(), forbids.end());
- std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
- if (expr.empty()) {
- std::string cls;
- for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
- expr = std::string("( [^") + cls + "] )";
- }
- if (forbids.size() == 1)
- return expr + "*";
- else
- return std::string("( ") + expr + " )*";
-}
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
- GGML_ASSERT(!form.tool_start.empty());
- GGML_ASSERT(!form.tool_sep.empty());
- GGML_ASSERT(!form.key_start.empty());
- GGML_ASSERT(!form.val_end.empty());
- GGML_ASSERT(!form.tool_end.empty());
-
- std::string key_val_sep = form.key_val_sep;
- if (form.key_val_sep2) {
- key_val_sep += "\n";
- key_val_sep += *form.key_val_sep2;
- }
- GGML_ASSERT(!key_val_sep.empty());
-
- if (tools.is_array() && !tools.empty()) {
- data.grammar = build_grammar([&](const common_grammar_builder &builder) {
- auto string_arg_val = form.last_val_end ?
- builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
- builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
-
- std::vector<std::string> tool_rules;
- for (const auto & tool : tools) {
- if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
- LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
- continue;
- }
- const auto & function = tool.at("function");
- if (!function.contains("name") || !function.at("name").is_string()) {
- LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
- continue;
- }
- if (!function.contains("parameters") || !function.at("parameters").is_object()) {
- LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
- continue;
- }
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
-
- struct parameter_rule {
- std::string symbol_name;
- bool is_required;
- };
- std::vector<parameter_rule> arg_rules;
- if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
- LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
- continue;
- } else {
- std::vector<std::string> requiredParameters;
- if (parameters.contains("required")) {
- try { parameters.at("required").get_to(requiredParameters); }
- catch (const std::runtime_error&) {
- LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
- }
- }
- sort_uniq(requiredParameters);
- for (const auto & [key, value] : parameters.at("properties").items()) {
- std::string quoted_key = key;
- bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
- if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
- quoted_key = gbnf_format_literal(key);
- quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
- }
- arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
- gbnf_format_literal(form.key_start) + " " +
- gbnf_format_literal(quoted_key) + " " +
- gbnf_format_literal(key_val_sep) + " " +
- ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
- (form.raw_argval ?
- string_arg_val :
- "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
- ) :
- builder.add_schema(name + "-arg-" + key, value)
- )
- ), required});
- }
- }
-
- auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
- decltype(next_arg_with_sep) next_arg = "\"\"";
- for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
- std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
- next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
- include_this_arg : "( " + include_this_arg + " ) | " + next_arg
- );
- include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
- next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
- include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
- );
- }
-
- std::string quoted_name = name;
- if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
- quoted_name = gbnf_format_literal(name);
- quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
- }
- quoted_name = gbnf_format_literal(quoted_name);
- // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
- if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
- quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
- }
- tool_rules.push_back(builder.add_rule(name + "-call",
- gbnf_format_literal(form.tool_start) + " " +
- quoted_name + " " +
- gbnf_format_literal(form.tool_sep) + " " +
- next_arg
- ));
- }
-
- auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
- auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
- auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
- auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
- builder.add_rule("root",
- (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
- tool_call_multiple_with_end + "?" +
- (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
- );
- });
-
- // grammar trigger for tool call
- data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
- }
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
- GGML_ASSERT(!form.tool_start.empty());
- GGML_ASSERT(!form.key_start.empty());
- GGML_ASSERT(!form.key_val_sep.empty());
- GGML_ASSERT(!form.val_end.empty());
- GGML_ASSERT(!form.tool_end.empty());
-
- // Helper to choose return false or throw error
- constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
- LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
- if (recovery) {
- builder.move_to(start_pos);
- return false;
- } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
- };
- // Drop substring from needle to end from a JSON
- constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
- auto pos = json_str.rfind(needle);
- if (pos == std::string::npos) {
- return false;
- }
- for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
- unsigned char ch = static_cast<unsigned char>(json_str[i]);
- if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
- return false;
- }
- }
- if (pos != 0 && json_str[pos - 1] == '"') {
- --pos;
- }
- json_str.resize(pos);
- return true;
- };
- // Helper to generate a partial argument JSON
- constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
- auto rest = builder.consume_rest();
- utf8_truncate_safe_resize(rest);
- set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
- auto tool_str = arguments.dump();
- if (partial_json(tool_str)) {
- if (builder.add_tool_call(function_name, "", tool_str)) {
- return;
- }
- }
- LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
- };
- // Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
- constexpr auto try_find_close = [](
- common_chat_msg_parser & builder,
- const std::string & end,
- const std::optional<std::string> & alt_end,
- const std::string & end_next,
- const std::optional<std::string> & alt_end_next
- ) {
- auto saved_pos = builder.pos();
- auto tc = builder.try_find_literal(end);
- auto val_end_size = end.size();
- if (alt_end) {
- auto pos_1 = builder.pos();
- builder.move_to(saved_pos);
- auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
- if (alt_end_next) {
- builder.move_to(saved_pos);
- auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
- if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
- tc2 = tc3;
- }
- }
- if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
- tc = tc2;
- tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
- builder.move_to(tc->groups[0].end);
- val_end_size = alt_end->size();
- } else {
- builder.move_to(pos_1);
- }
- }
- return std::make_pair(val_end_size, tc);
- };
- // Helper to find a val_end or last_val_end, returns matched pattern size
- const auto try_find_val_end = [try_find_close, &builder, &form]() {
- return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
- };
- // Helper to find a tool_end or last_tool_end, returns matched pattern size
- const auto try_find_tool_end = [try_find_close, &builder, &form]() {
- return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
- };
-
- bool recovery = true;
- const auto start_pos = builder.pos();
- if (!all_space(form.scope_start)) {
- if (auto tc = builder.try_find_literal(form.scope_start)) {
- if (all_space(tc->prelude)) {
- if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
- throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
- } else {
- builder.move_to(start_pos);
- return false;
- }
- } else return false;
- }
- while (auto tc = builder.try_find_literal(form.tool_start)) {
- if (!all_space(tc->prelude)) {
- LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
- gbnf_format_literal(form.tool_start).c_str(),
- gbnf_format_literal(tc->prelude).c_str()
- );
- builder.move_to(tc->groups[0].begin - tc->prelude.size());
- break;
- }
-
- // Find tool name
- auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
- if (!func_name) {
- auto [sz, tc] = try_find_tool_end();
- func_name = tc;
- }
- if (!func_name) {
- // Partial tool name not supported
- throw common_chat_msg_partial_exception("incomplete tool_call");
- }
- // If the model generate multiple tool call and the first tool call has no argument
- if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
- builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
- auto [sz, tc] = try_find_tool_end();
- func_name = tc;
- }
-
- // Parse tool name
- builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
- std::string function_name = string_strip(func_name->prelude);
- // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
- if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
- if (string_starts_with(function_name, "functions.")) {
- static const std::regex re(":\\d+$");
- if (std::regex_search(function_name, re)) {
- function_name = function_name.substr(10, function_name.rfind(":") - 10);
- }
- }
- }
-
- // Argument JSON
- json arguments = json::object();
-
- // Helper to generate a partial argument JSON
- const auto gen_partial_args = [&](auto set_partial_arg) {
- gen_partial_json(set_partial_arg, arguments, builder, function_name);
- };
-
- // Parse all arg_key/arg_value pairs
- while (auto tc = builder.try_find_literal(form.key_start)) {
- if (!all_space(tc->prelude)) {
- LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
- gbnf_format_literal(form.key_start).c_str(),
- gbnf_format_literal(tc->prelude).c_str()
- );
- builder.move_to(tc->groups[0].begin - tc->prelude.size());
- break;
- }
- if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
- auto tool_call_arg = arguments.dump();
- if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
- tool_call_arg.resize(tool_call_arg.size() - 1);
- }
- builder.add_tool_call(function_name, "", tool_call_arg);
- throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
- }
-
- // Parse arg_key
- auto key_res = builder.try_find_literal(form.key_val_sep);
- if (!key_res) {
- gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
- throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
- }
- if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
- gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
- throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
- }
- auto &key = key_res->prelude;
- recovery = false;
-
- // Parse arg_value
- if (form.key_val_sep2) {
- if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
- if (!all_space(tc->prelude)) {
- LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
- gbnf_format_literal(tc->prelude).c_str(),
- gbnf_format_literal(form.key_val_sep).c_str(),
- gbnf_format_literal(*form.key_val_sep2).c_str()
- );
- return return_error(builder, start_pos, false);
- }
- if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
- gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
- throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
- }
- } else {
- gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
- throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
- }
- }
- auto val_start = builder.pos();
-
- // Test if arg_val is a partial JSON
- std::optional<common_json> value_json = std::nullopt;
- if (!form.raw_argval || !*form.raw_argval) {
- try { value_json = builder.try_consume_json(); }
- catch (const std::runtime_error&) { builder.move_to(val_start); }
- // TODO: Delete this when json_partial adds top-level support for null/true/false
- if (builder.pos() == val_start) {
- const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
- builder.consume_spaces();
- std::string_view sv = utf8_truncate_safe_view(builder.input());
- sv.remove_prefix(builder.pos());
- std::string rest = "a";
- if (sv.size() < 6) rest = sv;
- if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
- value_json = {123, {"123", "123"}};
- builder.consume_rest();
- } else {
- builder.move_to(val_start);
- }
- }
- }
-
- // If it is a JSON and followed by </arg_value>, parse as json
- // cannot support streaming because it may be a plain text starting with JSON
- if (value_json) {
- auto json_end = builder.pos();
- builder.consume_spaces();
- if (builder.pos() == builder.input().size()) {
- if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
- arguments[key] = value_json->json;
- auto json_str = arguments.dump();
- if (!value_json->healing_marker.json_dump_marker.empty()) {
- GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
- json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
- } else {
- GGML_ASSERT(json_str.back() == '}');
- json_str.resize(json_str.size() - 1);
- }
- builder.add_tool_call(function_name, "", json_str);
- } else {
- gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
- }
- LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
- throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
- }
- builder.move_to(json_end);
- auto [val_end_size, tc] = try_find_val_end();
- if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
- if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
- gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
- LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
- throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
- } else arguments[key] = value_json->json;
- } else builder.move_to(val_start);
- }
-
- // If not, parse as plain text
- if (val_start == builder.pos()) {
- if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
- auto &value_str = value_plain->prelude;
- if (form.trim_raw_argval) value_str = string_strip(value_str);
- if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
- gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
- throw common_chat_msg_partial_exception(
- "Expected " + gbnf_format_literal(form.val_end) +
- " after " + gbnf_format_literal(form.key_val_sep) +
- (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
- );
- }
- arguments[key] = value_str;
- } else {
- if (form.trim_raw_argval) {
- gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
- } else {
- gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
- }
- throw common_chat_msg_partial_exception(
- "Expected " + gbnf_format_literal(form.val_end) +
- " after " + gbnf_format_literal(form.key_val_sep) +
- (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
- );
- }
- }
- }
-
- // Consume closing tag
- if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
- if (!all_space(tc->prelude)) {
- LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
- gbnf_format_literal(form.tool_end).c_str(),
- gbnf_format_literal(tc->prelude).c_str()
- );
- return return_error(builder, start_pos, recovery);
- }
- if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
- // Add the parsed tool call
- if (!builder.add_tool_call(function_name, "", arguments.dump())) {
- throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
- }
- recovery = false;
- continue;
- }
- }
-
- auto tool_call_arg = arguments.dump();
- if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
- tool_call_arg.resize(tool_call_arg.size() - 1);
- }
- builder.add_tool_call(function_name, "", tool_call_arg);
- throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
- }
- if (auto tc = builder.try_find_literal(form.scope_end)) {
- if (!all_space(tc->prelude)) {
- LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
- gbnf_format_literal(form.scope_end).c_str(),
- gbnf_format_literal(tc->prelude).c_str()
- );
- return return_error(builder, start_pos, recovery);
- }
- } else {
- if (all_space(form.scope_end)) return true;
- builder.consume_spaces();
- if (builder.pos() == builder.input().size())
- throw common_chat_msg_partial_exception("incomplete tool calls");
- LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
- gbnf_format_literal(form.scope_end).c_str(),
- gbnf_format_literal(builder.consume_rest()).c_str()
- );
- return return_error(builder, start_pos, recovery);
- }
-
- return true;
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
- auto pos = pos_;
- auto tsize = result_.tool_calls.size();
- try { return parse_xml_tool_calls(*this, form); }
- catch (const xml_toolcall_syntax_exception&) {}
- move_to(pos);
- result_.tool_calls.resize(tsize);
- return false;
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
- constexpr auto rstrip = [](std::string &s) {
- s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
- };
- // Erase substring from l to r, along with additional spaces nearby
- constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
- while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
- ++l;
- while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
- if (l < r) str[l] = '\n';
- if (l + 1 < r) str[l + 1] = '\n';
- if (l != 0) l += 2;
- str.erase(l, r - l);
- return l;
- };
- constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
- auto best_match = content.size();
- for (auto pattern: list) {
- if (pattern.size() == 0) continue;
- for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
- auto match_len = content.size() - match_idx;
- if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
- best_match = match_idx;
- }
- }
- }
- if (content.size() > best_match) {
- content.erase(best_match);
- }
- };
- const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
- return trim_suffix(content, {
- start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
- form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
- form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
- form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
- form.scope_end
- });
- };
-
-
- // Trim leading spaces without affecting keyword matching
- static const common_regex spaces_regex("\\s*");
- {
- auto tc = builder.consume_regex(spaces_regex);
- auto spaces = builder.str(tc.groups[0]);
- auto s1 = spaces.size();
- trim_potential_partial_word(spaces);
- auto s2 = spaces.size();
- builder.move_to(builder.pos() - (s1 - s2));
- }
-
- // Parse content
- bool reasoning_unclosed = builder.syntax().thinking_forced_open;
- std::string unclosed_reasoning_content("");
- for (;;) {
- auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
- std::string content;
- std::string tool_call_start;
-
- if (tc) {
- content = std::move(tc->prelude);
- tool_call_start = builder.str(tc->groups[0]);
- LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
- } else {
- content = builder.consume_rest();
- utf8_truncate_safe_resize(content);
- }
-
- // Handle unclosed think block
- if (reasoning_unclosed) {
- if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
- unclosed_reasoning_content += content;
- if (!(form.allow_toolcall_in_think && tc)) {
- unclosed_reasoning_content += tool_call_start;
- continue;
- }
- } else {
- reasoning_unclosed = false;
- std::string reasoning_content;
- if (pos == std::string::npos) {
- reasoning_content = std::move(content);
- } else {
- reasoning_content = content.substr(0, pos);
- content.erase(0, pos + end_think.size());
- }
- if (builder.pos() == builder.input().size() && all_space(content)) {
- rstrip(reasoning_content);
- trim_potential_partial_word(reasoning_content);
- rstrip(reasoning_content);
- if (reasoning_content.empty()) {
- rstrip(unclosed_reasoning_content);
- trim_potential_partial_word(unclosed_reasoning_content);
- rstrip(unclosed_reasoning_content);
- if (unclosed_reasoning_content.empty()) continue;
- }
- }
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
- builder.add_content(start_think);
- builder.add_content(unclosed_reasoning_content);
- builder.add_content(reasoning_content);
- if (builder.pos() != builder.input().size() || !all_space(content))
- builder.add_content(end_think);
- } else {
- builder.add_reasoning_content(unclosed_reasoning_content);
- builder.add_reasoning_content(reasoning_content);
- }
- unclosed_reasoning_content.clear();
- }
- }
-
- // Handle multiple think block
- bool toolcall_in_think = false;
- for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
- if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
- if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
- auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
- builder.add_reasoning_content(reasoning_content);
- think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
- } else {
- think_start = think_end + end_think.size() - 1;
- }
- } else {
- // This <tool_call> start is in thinking block, skip this tool call
- // This <tool_call> start is in thinking block
- if (form.allow_toolcall_in_think) {
- unclosed_reasoning_content = content.substr(think_start + start_think.size());
- } else {
- unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
- }
- reasoning_unclosed = true;
- content.resize(think_start);
- toolcall_in_think = true;
- }
- }
-
- if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
- rstrip(content);
- // Handle unclosed </think> token from content: delete all </think> token
- if (auto pos = content.rfind(end_think); pos != std::string::npos) {
- while (pos != std::string::npos) {
- pos = erase_spaces(content, pos, pos + end_think.size() - 1);
- pos = content.rfind(end_think, pos);
- }
- }
- // Strip if needed
- if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
- content = string_strip(content);
- }
- }
-
- // remove potential partial suffix
- if (builder.pos() == builder.input().size() && builder.is_partial()) {
- if (unclosed_reasoning_content.empty()) {
- rstrip(content);
- trim_potential_partial_word(content);
- rstrip(content);
- } else {
- rstrip(unclosed_reasoning_content);
- trim_potential_partial_word(unclosed_reasoning_content);
- rstrip(unclosed_reasoning_content);
- }
- }
-
- // consume unclosed_reasoning_content if allow_toolcall_in_think is set
- if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
- if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
- builder.add_reasoning_content(unclosed_reasoning_content);
- } else {
- if (content.empty()) {
- content = start_think + unclosed_reasoning_content;
- } else {
- content += "\n\n" + start_think;
- content += unclosed_reasoning_content;
- }
- }
- unclosed_reasoning_content.clear();
- }
-
- // Add content
- if (!content.empty()) {
- // If there are multiple content blocks
- if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
- builder.add_content("\n\n");
- }
- builder.add_content(content);
- }
-
- // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
- if (toolcall_in_think && !form.allow_toolcall_in_think) {
- continue;
- }
-
- // There is no tool call and all content is parsed
- if (!tc) {
- GGML_ASSERT(builder.pos() == builder.input().size());
- GGML_ASSERT(unclosed_reasoning_content.empty());
- if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
- break;
- }
-
- builder.move_to(tc->groups[0].begin);
- if (builder.try_consume_xml_tool_calls(form)) {
- auto end_of_tool = builder.pos();
- builder.consume_spaces();
- if (builder.pos() != builder.input().size()) {
- builder.move_to(end_of_tool);
- if (!builder.result().content.empty()) {
- builder.add_content("\n\n");
- }
- }
- } else {
- static const common_regex next_char_regex(".");
- auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
- rstrip(c);
- builder.add_content(c);
- }
- }
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- */
-void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
- parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
-}
+++ /dev/null
-#pragma once
-
-#include "chat.h"
-
-#include <nlohmann/json.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-
-// Sample config:
-// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
-// GLM 4.5 (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
-struct xml_tool_call_format {
- std::string scope_start; // <minimax:tool_call>\n // \n // can be empty
- std::string tool_start; // <invoke name=\" // <tool_call>
- std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls
- std::string key_start; // <parameter name=\" // <arg_key>
- std::string key_val_sep; // \"> // </arg_key>\n<arg_value>
- std::string val_end; // </parameter>\n // </arg_value>\n
- std::string tool_end; // </invoke>\n // </tool_call>\n
- std::string scope_end; // </minimax:tool_call> // // can be empty
- // Set this if there can be dynamic spaces inside key_val_sep.
- // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
- std::optional<std::string> key_val_sep2 = std::nullopt;
- // Set true if argval should only be raw string. e.g. Hello "world" hi
- // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
- // Defaults to std::nullopt, both will be allowed.
- std::optional<bool> raw_argval = std::nullopt;
- std::optional<std::string> last_val_end = std::nullopt;
- std::optional<std::string> last_tool_end = std::nullopt;
- bool trim_raw_argval = false;
- bool allow_toolcall_in_think = false;
-};
-
-// make a GBNF that accept any strings except those containing any of the forbidden strings.
-std::string make_gbnf_excluding(std::vector<std::string> forbids);
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
+++ /dev/null
-#include "chat-parser.h"
-#include "chat-peg-parser.h"
-#include "common.h"
-#include "log.h"
-#include "peg-parser.h"
-#include "regex-partial.h"
-
-#include <algorithm>
-#include <cctype>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <vector>
-
-using json = nlohmann::ordered_json;
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
- const common_regex & prefix,
- size_t rstrip_prefix = 0) {
- static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
- if (auto res = builder.try_find_regex(prefix)) {
- builder.move_back(rstrip_prefix);
- auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
- if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call array");
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
- std::string arguments;
- if (builder.is_partial()) {
- arguments = (json{
- { "code", code + builder.healing_marker() }
- })
- .dump();
- auto idx = arguments.find(builder.healing_marker());
- if (idx != std::string::npos) {
- arguments.resize(idx);
- }
- } else {
- arguments = (json{
- { "code", code }
- })
- .dump();
- }
- return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
- common_chat_msg_parser & builder,
- const std::optional<common_regex> & block_open,
- const std::optional<common_regex> & function_regex_start_only,
- const std::optional<common_regex> & function_regex,
- const common_regex & close_regex,
- const std::optional<common_regex> & block_close,
- bool allow_raw_python = false,
- const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
- nullptr) {
- auto parse_tool_calls = [&]() {
- size_t from = std::string::npos;
- auto first = true;
- while (true) {
- auto start_pos = builder.pos();
- auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
- function_regex ? builder.try_find_regex(*function_regex, from) :
- std::nullopt;
-
- if (res) {
- std::string name;
- if (get_function_name) {
- name = get_function_name(*res);
- } else {
- GGML_ASSERT(res->groups.size() == 2);
- name = builder.str(res->groups[1]);
- }
- first = false;
- if (name.empty()) {
- // get_function_name signalled us that we should skip this match and treat it as content.
- from = res->groups[0].begin + 1;
- continue;
- }
- from = std::string::npos;
-
- auto maybe_raw_python = name == "python" && allow_raw_python;
- if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
- if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
- if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_regex(close_regex);
- }
- continue;
- }
- if (maybe_raw_python) {
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
- if (!builder.add_tool_call(name, "", arguments)) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- return;
- }
- throw common_chat_msg_partial_exception("incomplete tool call");
- } else {
- builder.move_to(start_pos);
- }
- break;
- }
- if (block_close) {
- builder.consume_regex(*block_close);
- }
- builder.consume_spaces();
- builder.add_content(builder.consume_rest());
- };
- if (block_open) {
- if (auto res = builder.try_find_regex(*block_open)) {
- parse_tool_calls();
- } else {
- builder.add_content(builder.consume_rest());
- }
- } else {
- parse_tool_calls();
- }
-}
-
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
- : input_(input), is_partial_(is_partial), syntax_(syntax)
-{
- result_.role = "assistant";
-
- while (true) {
- std::string id = std::to_string(std::rand());
- if (input.find(id) == std::string::npos) {
- healing_marker_ = id;
- break;
- }
- }
-}
-
-std::string common_chat_msg_parser::str(const common_string_range & rng) const {
- GGML_ASSERT(rng.begin <= rng.end);
- return input_.substr(rng.begin, rng.end - rng.begin);
-}
-
-void common_chat_msg_parser::add_content(const std::string &content) {
- result_.content += content;
-}
-
-void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
- result_.reasoning_content += reasoning_content;
-}
-
-bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
- if (name.empty()) {
- return false;
- }
-
- common_chat_tool_call tool_call;
- tool_call.name = name;
- tool_call.arguments = arguments;
- tool_call.id = id;
-
- // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
- result_.tool_calls.emplace_back(tool_call);
-
- return true;
-}
-bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
- std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
- std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
- std::string arguments = "";
- if (tool_call.contains("arguments")) {
- if (tool_call.at("arguments").is_object()) {
- arguments = tool_call.at("arguments").dump();
- } else {
- arguments = tool_call.at("arguments");
- }
- }
-
- return add_tool_call(name, id, arguments);
-}
-
-bool common_chat_msg_parser::add_tool_calls(const json & arr) {
- for (const auto & item : arr) {
- if (!add_tool_call(item)) {
- return false;
- }
- }
- return true;
-}
-
-bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
- if (!tool_call.is_object() || tool_call.size() != 1) {
- return false;
- }
-
- // Get the tool name (the single key in the object)
- auto it = tool_call.begin();
- std::string name = it.key();
-
- if (name.empty()) {
- return false;
- }
-
- // Get the arguments (the nested object)
- const json & args_json = it.value();
- std::string arguments = "";
-
- if (args_json.is_object()) {
- arguments = args_json.dump();
- } else if (args_json.is_string()) {
- arguments = args_json;
- } else if (!args_json.is_null()) {
- // For other types, convert to string representation
- arguments = args_json.dump();
- }
-
- return add_tool_call(name, "", arguments);
-}
-void common_chat_msg_parser::finish() {
- if (!is_partial_ && pos_ != input_.size()) {
- throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
- }
-}
-
-bool common_chat_msg_parser::consume_spaces() {
- const auto length = input_.size();
- auto consumed = false;
- while (pos_ < length && std::isspace(input_[pos_])) {
- ++pos_;
- consumed = true;
- }
- return consumed;
-}
-
-bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
- auto pos = pos_;
- for (auto i = 0u; i < literal.size(); ++i) {
- if (pos >= input_.size()) {
- return false;
- }
- if (input_[pos] != literal[i]) {
- return false;
- }
- ++pos;
- }
- pos_ = pos;
- return true;
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal(const std::string & literal) {
- auto idx = input_.find(literal, pos_);
- if (idx != std::string::npos) {
- find_regex_result res;
- res.prelude = input_.substr(pos_, idx - pos_);
- auto end = idx + literal.size();
- res.groups.emplace_back(common_string_range{idx, end});
- move_to(end);
- return res;
- }
- if (is_partial_) {
- idx = string_find_partial_stop(input_, literal);
- if (idx != std::string::npos && idx >= pos_) {
- find_regex_result res;
- res.prelude = input_.substr(pos_, idx - pos_);
- auto end = input_.size();
- res.groups.emplace_back(common_string_range{idx, end});
- move_to(end);
- return res;
- }
- }
- return std::nullopt;
-}
-
-void common_chat_msg_parser::consume_literal(const std::string & literal) {
- if (!try_consume_literal(literal)) {
- throw common_chat_msg_partial_exception(literal);
- }
-}
-
-bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
- std::string pending_reasoning_prefix;
-
- if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
- return false;
- }
-
- auto set_reasoning_prefix = [&](size_t prefix_pos) {
- if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
- return;
- }
- if (prefix_pos + start_think.size() > input_.size()) {
- pending_reasoning_prefix.clear();
- return;
- }
- // Capture the exact literal that opened the reasoning section so we can
- // surface it back to callers. This ensures formats that force the
- // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
- // instead of dropping it during parsing.
- pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
- };
-
- auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
- auto stripped_reasoning = string_strip(reasoning);
- if (stripped_reasoning.empty()) {
- return;
- }
- if (syntax_.reasoning_in_content) {
- add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
- add_content(stripped_reasoning);
- if (closed) {
- add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
- }
- } else {
- if (!pending_reasoning_prefix.empty()) {
- add_reasoning_content(pending_reasoning_prefix);
- pending_reasoning_prefix.clear();
- }
- add_reasoning_content(stripped_reasoning);
- }
- };
-
- const size_t saved_pos = pos_;
- const size_t saved_content_size = result_.content.size();
- const size_t saved_reasoning_size = result_.reasoning_content.size();
-
- auto restore_state = [&]() {
- move_to(saved_pos);
- result_.content.resize(saved_content_size);
- result_.reasoning_content.resize(saved_reasoning_size);
- };
-
- // Allow leading whitespace to be preserved as content when reasoning is present at the start
- size_t cursor = pos_;
- size_t whitespace_end = cursor;
- while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
- ++whitespace_end;
- }
-
- if (whitespace_end >= input_.size()) {
- restore_state();
- if (syntax_.thinking_forced_open) {
- auto rest = input_.substr(saved_pos);
- if (!rest.empty()) {
- handle_reasoning(rest, /* closed */ !is_partial());
- }
- move_to(input_.size());
- return true;
- }
- return false;
- }
-
- cursor = whitespace_end;
- const size_t remaining = input_.size() - cursor;
- const size_t start_prefix = std::min(start_think.size(), remaining);
- const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
-
- if (has_start_tag && start_prefix < start_think.size()) {
- move_to(input_.size());
- return true;
- }
-
- if (has_start_tag) {
- if (whitespace_end > pos_) {
- add_content(input_.substr(pos_, whitespace_end - pos_));
- }
- set_reasoning_prefix(cursor);
- cursor += start_think.size();
- } else if (syntax_.thinking_forced_open) {
- cursor = whitespace_end;
- } else {
- restore_state();
- return false;
- }
- while (true) {
- if (cursor >= input_.size()) {
- move_to(input_.size());
- return true;
- }
-
- size_t end_pos = input_.find(end_think, cursor);
- if (end_pos == std::string::npos) {
- std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
- size_t partial_off = string_find_partial_stop(remaining_view, end_think);
- size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
- if (reasoning_end > cursor) {
- handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
- }
- move_to(input_.size());
- return true;
- }
-
- if (end_pos > cursor) {
- handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
- } else {
- handle_reasoning("", /* closed */ true);
- }
-
- cursor = end_pos + end_think.size();
-
- while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
- ++cursor;
- }
-
- const size_t next_remaining = input_.size() - cursor;
- if (next_remaining == 0) {
- move_to(cursor);
- return true;
- }
-
- const size_t next_prefix = std::min(start_think.size(), next_remaining);
- if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
- if (next_prefix < start_think.size()) {
- move_to(input_.size());
- return true;
- }
- set_reasoning_prefix(cursor);
- cursor += start_think.size();
- continue;
- }
-
- move_to(cursor);
- return true;
- }
-}
-
-std::string common_chat_msg_parser::consume_rest() {
- auto rest = input_.substr(pos_);
- pos_ = input_.size();
- return rest;
-}
-
-// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
- auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
- if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
- return std::nullopt;
- }
- auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
- pos_ = m.groups[0].end;
-
- if (add_prelude_to_content) {
- add_content(prelude);
- }
- if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
- if (is_partial()) {
- throw common_chat_msg_partial_exception(regex.str());
- }
- return std::nullopt;
- }
- return find_regex_result{prelude, m.groups};
-}
-
-common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
- if (auto result = try_consume_regex(regex)) {
- return *result;
- }
- throw common_chat_msg_partial_exception(regex.str());
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
- auto m = regex.search(input_, pos_);
- if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
- return std::nullopt;
- }
- if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
- if (is_partial()) {
- throw common_chat_msg_partial_exception(regex.str());
- }
- return std::nullopt;
- }
- if (m.groups[0].begin != pos_) {
- // Didn't match at the current position.
- return std::nullopt;
- }
- pos_ = m.groups[0].end;
-
- return find_regex_result {
- /* .prelude = */ "",
- m.groups,
- };
-}
-
-std::optional<common_json> common_chat_msg_parser::try_consume_json() {
- auto it = input_.cbegin() + pos_;
- const auto end = input_.cend();
- common_json result;
- if (!common_json_parse(it, end, healing_marker_, result)) {
- return std::nullopt;
- }
- pos_ = std::distance(input_.cbegin(), it);
- if (result.healing_marker.marker.empty()) {
- // No healing marker, just return the parsed json
- return result;
- }
- if (!is_partial()) {
- throw common_chat_msg_partial_exception("JSON");
- }
- return result;
-}
-
-common_json common_chat_msg_parser::consume_json() {
- if (auto result = try_consume_json()) {
- return *result;
- }
- throw common_chat_msg_partial_exception("JSON");
-}
-
-common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
- const std::vector<std::vector<std::string>> & args_paths,
- const std::vector<std::vector<std::string>> & content_paths
-) {
- if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
- return *result;
- }
- throw common_chat_msg_partial_exception("JSON");
-}
-
-std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
- const std::vector<std::vector<std::string>> & args_paths,
- const std::vector<std::vector<std::string>> & content_paths
-) {
- auto partial = try_consume_json();
- if (!partial) {
- return std::nullopt;
- }
- auto is_arguments_path = [&](const std::vector<std::string> & path) {
- return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
- };
- auto is_content_path = [&](const std::vector<std::string> & path) {
- return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
- };
-
- if (partial->healing_marker.marker.empty()) {
- if (args_paths.empty()) {
- // No arguments to dump, and JSON was parsed fully.
- return consume_json_result {
- partial->json,
- /* .is_partial = */ false,
- };
- }
- if (is_arguments_path({})) {
- // Entire JSON is the arguments and was parsed fully.
- return consume_json_result {
- partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
- /* .is_partial = */ false,
- };
- }
- }
-
- LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-
- auto found_healing_marker = false;
- std::vector<std::string> path;
- std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
- if (is_arguments_path(path)) {
- auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
- if (is_partial() && !partial->healing_marker.marker.empty()) {
- auto idx = arguments.find(partial->healing_marker.json_dump_marker);
- if (idx != std::string::npos) {
- arguments.resize(idx);
- found_healing_marker = true;
- }
- if (arguments == "\"") {
- // This happens because of completing `:"$magic` after `"arguments"`
- arguments = "";
- }
- }
- return arguments;
- }
- if (is_content_path(path)) {
- if (!j.is_string()) {
- throw std::runtime_error("Content path must be a string");
- }
- std::string str = j;
- auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
- if (idx != std::string::npos) {
- str.resize(idx);
- found_healing_marker = true;
- }
- return str;
- }
- if (j.is_object()) {
- auto obj = json::object();
- for (const auto & p : j.items()) {
- const auto & key = p.key();
- const auto & value = p.value();
- const std::string key_str = key; // NOLINT
- auto idx = key_str.find(healing_marker_);
- if (idx != std::string::npos) {
- found_healing_marker = true;
- break;
- }
- path.push_back(key_str);
- if (value.is_string()) {
- const std::string value_str = value;
- if (value_str.find(healing_marker_) != std::string::npos) {
- found_healing_marker = true;
- if (is_content_path(path)) {
- if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
- // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
- obj[key] = remove_unsupported_healings_and_dump_args(value);
- }
- }
- break;
- }
- obj[key] = value;
- } else {
- obj[key] = remove_unsupported_healings_and_dump_args(value);
- }
- path.pop_back();
- }
- return obj;
- }
- if (j.is_array()) {
- auto arr = json::array();
- for (const auto & value : j) {
- if (value.is_string()) {
- std::string str = value;
- auto idx = str.find(healing_marker_);
- if (idx != std::string::npos) {
- // Don't heal array values that aren't in the arguments.
- found_healing_marker = true;
- break;
- }
- }
- arr.push_back(remove_unsupported_healings_and_dump_args(value));
- }
- return arr;
- }
- return j;
- };
-
- auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
- LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
- return consume_json_result {
- cleaned,
- /* .is_partial = */ found_healing_marker,
- };
-}
-
-void common_chat_msg_parser::clear_tools() {
- result_.tool_calls.clear();
-}
-
-/**
- * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
- * to reduce incremental compile time for parser changes.
- */
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- static const std::vector<std::vector<std::string>> content_paths = {
- {"response"},
- };
- static const std::vector<std::vector<std::string>> args_paths = {
- {"tool_call", "arguments"},
- {"tool_calls", "arguments"},
- };
- auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
- if (data.value.contains("tool_calls")) {
- if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool calls");
- }
- } else if (data.value.contains("tool_call")) {
- if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- } else if (data.value.contains("response")) {
- const auto & response = data.value.at("response");
- builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
- if (data.is_partial) {
- throw common_chat_msg_partial_exception("incomplete response");
- }
- } else {
- throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
- }
-}
-
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
- parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
- parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
- static const common_regex start_action_regex("<\\|START_ACTION\\|>");
- static const common_regex end_action_regex("<\\|END_ACTION\\|>");
- static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
- static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
- if (auto res = builder.try_find_regex(start_action_regex)) {
- // If we didn't extract thoughts, prelude includes them.
- auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
- for (const auto & tool_call : tool_calls.value) {
- std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
- std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
- std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
- if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- if (tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_regex(end_action_regex);
- } else if (auto res = builder.try_find_regex(start_response_regex)) {
- if (!builder.try_find_regex(end_response_regex)) {
- builder.add_content(builder.consume_rest());
- throw common_chat_msg_partial_exception(end_response_regex.str());
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
- builder.try_parse_reasoning("<think>", "</think>");
-
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex function_regex(
- "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
- static const common_regex close_regex("\\}\\s*");
-
- static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
- static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
- if (with_builtin_tools) {
- static const common_regex builtin_call_regex("<\\|python_tag\\|>");
- if (auto res = builder.try_find_regex(builtin_call_regex)) {
- auto fun_res = builder.consume_regex(function_name_regex);
- auto function_name = builder.str(fun_res.groups[1]);
-
- common_healing_marker healing_marker;
- json args = json::object();
- while (true) {
- if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
- auto arg_name = builder.str(arg_res->groups[1]);
- auto partial = builder.consume_json();
- args[arg_name] = partial.json;
- healing_marker.marker = partial.healing_marker.marker;
- healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
- builder.consume_spaces();
- if (!builder.try_consume_literal(",")) {
- break;
- }
- } else {
- break;
- }
- }
- builder.consume_literal(")");
- builder.consume_spaces();
-
- auto arguments = args.dump();
- if (!builder.add_tool_call(function_name, "", arguments)) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- return;
- }
- }
- parse_json_tool_calls(
- builder,
- /* block_open= */ std::nullopt,
- /* function_regex_start_only= */ function_regex,
- /* function_regex= */ std::nullopt,
- close_regex,
- std::nullopt);
-
-}
-
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<think>", "</think>");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
- static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ tool_calls_begin,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
- static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
-
- static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
- static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
- static const common_regex tool_calls_end("<|tool▁calls▁end|>");
-
- if (!builder.syntax().parse_tool_calls) {
- LOG_DBG("%s: not parse_tool_calls\n", __func__);
- builder.add_content(builder.consume_rest());
- return;
- }
-
- LOG_DBG("%s: parse_tool_calls\n", __func__);
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ tool_calls_begin,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
- // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
- // First try to parse using the standard reasoning parsing method
- LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
- auto start_pos = builder.pos();
- auto found_end_think = builder.try_find_literal("</think>");
- builder.move_to(start_pos);
-
- if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
- LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
- common_chat_parse_deepseek_v3_1_content(builder);
- } else if (builder.try_parse_reasoning("<think>", "</think>")) {
- // If reasoning was parsed successfully, the remaining content is regular content
- LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
- // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
- common_chat_parse_deepseek_v3_1_content(builder);
- } else {
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
- LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
- common_chat_parse_deepseek_v3_1_content(builder);
- return;
- }
- // If no reasoning tags found, check if we should treat everything as reasoning
- if (builder.syntax().thinking_forced_open) {
- // If thinking is forced open but no tags found, treat everything as reasoning
- LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
- builder.add_reasoning_content(builder.consume_rest());
- } else {
- LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
- // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
- common_chat_parse_deepseek_v3_1_content(builder);
- }
- }
-}
-
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "<minimax:tool_call>",
- /* form.tool_start = */ "<invoke name=\"",
- /* form.tool_sep = */ "\">",
- /* form.key_start = */ "<parameter name=\"",
- /* form.key_val_sep = */ "\">",
- /* form.val_end = */ "</parameter>",
- /* form.tool_end = */ "</invoke>",
- /* form.scope_end = */ "</minimax:tool_call>",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "<|tool_calls_section_begin|>";
- form.tool_start = "<|tool_call_begin|>";
- form.tool_sep = "<|tool_call_argument_begin|>{";
- form.key_start = "\"";
- form.key_val_sep = "\":";
- form.val_end = ",";
- form.tool_end = "}<|tool_call_end|>";
- form.scope_end = "<|tool_calls_section_end|>";
- form.raw_argval = false;
- form.last_val_end = "";
- form.allow_toolcall_in_think = true;
- return form;
- })();
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "<tool_calls>[";
- form.tool_start = "{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}, ";
- form.scope_end = "]</tool_calls>";
- form.raw_argval = false;
- form.last_val_end = "";
- form.last_tool_end = "}";
- return form;
- })();
- builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
-}
-
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "";
- form.tool_start = "<tool_call>\n{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}\n</tool_call>";
- form.scope_end = "";
- form.raw_argval = false;
- form.last_val_end = "";
- return form;
- })();
- builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
- static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
- static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
- static const common_regex start_regex("<\\|start\\|>assistant");
- static const common_regex analysis_regex("<\\|channel\\|>analysis");
- static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
- static const common_regex preamble_regex("<\\|channel\\|>commentary");
- static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
- static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
- auto consume_end = [&](bool include_end = false) {
- if (auto res = builder.try_find_literal("<|end|>")) {
- return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
- }
- return builder.consume_rest();
- };
-
- auto handle_tool_call = [&](const std::string & name) {
- if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
- if (builder.syntax().parse_tool_calls) {
- if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- } else if (args->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- };
-
- auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
- auto match = regex.search(input, 0, true);
- if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
- return match;
- }
- return std::nullopt;
- };
-
- do {
- auto header_start_pos = builder.pos();
- auto content_start = builder.try_find_literal("<|message|>");
- if (!content_start) {
- throw common_chat_msg_partial_exception("incomplete header");
- }
-
- auto header = content_start->prelude;
-
- if (auto match = regex_match(tool_call1_regex, header)) {
- auto group = match->groups[1];
- auto name = header.substr(group.begin, group.end - group.begin);
- handle_tool_call(name);
- continue;
- }
-
- if (auto match = regex_match(tool_call2_regex, header)) {
- auto group = match->groups[2];
- auto name = header.substr(group.begin, group.end - group.begin);
- handle_tool_call(name);
- continue;
- }
-
- if (regex_match(analysis_regex, header)) {
- builder.move_to(header_start_pos);
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
- builder.add_content(consume_end(true));
- } else {
- builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
- }
- continue;
- }
-
- if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
- builder.add_content(consume_end());
- continue;
- }
-
- // Possibly a malformed message, attempt to recover by rolling
- // back to pick up the next <|start|>
- LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
- builder.move_to(header_start_pos);
- } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
- auto remaining = builder.consume_rest();
- if (!remaining.empty()) {
- LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
- }
-}
-
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "<tool_call>",
- /* form.tool_sep = */ "",
- /* form.key_start = */ "<arg_key>",
- /* form.key_val_sep = */ "</arg_key>",
- /* form.val_end = */ "</arg_value>",
- /* form.tool_end = */ "</tool_call>",
- /* form.scope_end = */ "",
- /* form.key_val_sep2 = */ "<arg_value>",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- static const common_regex prefix(regex_escape(" functools["));
- parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
-
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
- static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
- static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
- static const common_regex close_regex(R"(\s*)");
-
- parse_json_tool_calls(
- builder,
- std::nullopt,
- function_regex_start_only,
- function_regex,
- close_regex,
- std::nullopt,
- /* allow_raw_python= */ true,
- /* get_function_name= */ [&](const auto & res) -> std::string {
- auto at_start = res.groups[0].begin == 0;
- auto name = builder.str(res.groups[1]);
- if (!name.empty() && name.back() == '{') {
- // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
- builder.move_back(1);
- }
- auto idx = name.find_last_not_of("\n{");
- name = name.substr(0, idx + 1);
- if (at_start && name == "all") {
- return "";
- }
- return name;
- });
-}
-
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
- // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
- static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
- static const common_regex function_regex(R"(<function=(\w+)>)");
- static const common_regex close_regex(R"(</function>)");
-
- parse_json_tool_calls(
- builder,
- /* block_open= */ std::nullopt,
- /* function_regex_start_only= */ std::nullopt,
- function_regex,
- close_regex,
- std::nullopt);
-
- if (auto res = builder.try_find_regex(python_tag_regex)) {
- auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
- builder.add_tool_call("python", "", arguments);
- return;
- }
-}
-
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<think>", "</think>");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- static const common_regex open_regex(
- "(?:"
- "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
- "(" // match 2 (open_tag)
- "<tool_call>"
- "|<function_call>"
- "|<tool>"
- "|<tools>"
- "|<response>"
- "|<json>"
- "|<xml>"
- "|<JSON>"
- ")?"
- "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
- ")"
- "|<function=([^>]+)>" // match 4 (function name)
- "|<function name=\"([^\"]+)\">" // match 5 (function name again)
- );
-
- while (auto res = builder.try_find_regex(open_regex)) {
- const auto & block_start = res->groups[1];
- std::string block_end = block_start.empty() ? "" : "```";
-
- const auto & open_tag = res->groups[2];
- std::string close_tag;
-
- if (!res->groups[3].empty()) {
- builder.move_to(res->groups[3].begin);
- close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
-
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
- if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_spaces();
- builder.consume_literal(close_tag);
- builder.consume_spaces();
- if (!block_end.empty()) {
- builder.consume_literal(block_end);
- builder.consume_spaces();
- }
- } else {
- throw common_chat_msg_partial_exception("failed to parse tool call");
- }
- } else {
- auto function_name = builder.str(res->groups[4]);
- if (function_name.empty()) {
- function_name = builder.str(res->groups[5]);
- }
- GGML_ASSERT(!function_name.empty());
-
- close_tag = "</function>";
-
- if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
- if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_spaces();
- builder.consume_literal(close_tag);
- builder.consume_spaces();
- if (!block_end.empty()) {
- builder.consume_literal(block_end);
- builder.consume_spaces();
- }
- }
- }
- }
-
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
- // Parse thinking tags
- static const common_regex start_think_regex(regex_escape("<think>"));
- static const common_regex end_think_regex(regex_escape("</think>"));
- // Granite models output partial tokens such as "<" and "<think".
- // By leveraging try_consume_regex()/try_find_regex() throwing
- // common_chat_msg_partial_exception for these partial tokens,
- // processing is interrupted and the tokens are not passed to add_content().
- if (auto res = builder.try_consume_regex(start_think_regex)) {
- // Restore position for try_parse_reasoning()
- builder.move_to(res->groups[0].begin);
- builder.try_find_regex(end_think_regex, std::string::npos, false);
- // Restore position for try_parse_reasoning()
- builder.move_to(res->groups[0].begin);
- }
- builder.try_parse_reasoning("<think>", "</think>");
-
- // Parse response tags
- static const common_regex start_response_regex(regex_escape("<response>"));
- static const common_regex end_response_regex(regex_escape("</response>"));
- // Granite models output partial tokens such as "<" and "<response".
- // Same hack as reasoning parsing.
- if (builder.try_consume_regex(start_response_regex)) {
- builder.try_find_regex(end_response_regex);
- }
-
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // Look for tool calls
- static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- // Expect JSON array of tool calls
- if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
- if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
- // Parse thinking tags
- builder.try_parse_reasoning("<think>", "</think>");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // Look for tool calls
- static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- // Expect JSON array of tool calls
- auto tool_calls_data = builder.consume_json();
- if (tool_calls_data.json.is_array()) {
- if (!builder.try_consume_literal("</TOOLCALL>")) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- builder.add_tool_calls(tool_calls_data.json);
- } else {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
- // Parse thinking tags
- builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // Look for tool calls
- static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
- if (auto res = builder.try_find_regex(tool_call_regex)) {
- builder.move_to(res->groups[0].end);
-
- auto tool_calls_data = builder.consume_json();
- if (tool_calls_data.json.is_array()) {
- builder.consume_spaces();
- if (!builder.try_consume_literal("<|tools_suffix|>")) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- for (const auto & value : tool_calls_data.json) {
- if (value.is_object()) {
- builder.add_tool_call_short_form(value);
- }
- }
- } else {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
- if (!builder.syntax().parse_tool_calls) {
- builder.add_content(builder.consume_rest());
- return;
- }
-
- // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
- static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
- static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
- // Loop through all tool calls
- while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
- builder.move_to(res->groups[0].end);
-
- // Parse JSON array format: [{"name": "...", "arguments": {...}}]
- auto tool_calls_data = builder.consume_json();
-
- // Consume end marker
- builder.consume_spaces();
- if (!builder.try_consume_regex(tool_call_end_regex)) {
- throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
- }
-
- // Process each tool call in the array
- if (tool_calls_data.json.is_array()) {
- for (const auto & tool_call : tool_calls_data.json) {
- if (!tool_call.is_object()) {
- throw common_chat_msg_partial_exception("Tool call must be an object");
- }
-
- if (!tool_call.contains("name")) {
- throw common_chat_msg_partial_exception("Tool call missing 'name' field");
- }
-
- std::string function_name = tool_call.at("name");
- std::string arguments = "{}";
-
- if (tool_call.contains("arguments")) {
- if (tool_call.at("arguments").is_object()) {
- arguments = tool_call.at("arguments").dump();
- } else if (tool_call.at("arguments").is_string()) {
- arguments = tool_call.at("arguments");
- }
- }
-
- if (!builder.add_tool_call(function_name, "", arguments)) {
- throw common_chat_msg_partial_exception("Incomplete tool call");
- }
- }
- } else {
- throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
- }
-
- // Consume any trailing whitespace after this tool call
- builder.consume_spaces();
- }
-
- // Consume any remaining content after all tool calls
- auto remaining = builder.consume_rest();
- if (!string_strip(remaining).empty()) {
- builder.add_content(remaining);
- }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
- static const xml_tool_call_format form {
- /* form.scope_start = */ "<seed:tool_call>",
- /* form.tool_start = */ "<function=",
- /* form.tool_sep = */ ">",
- /* form.key_start = */ "<parameter=",
- /* form.key_val_sep = */ ">",
- /* form.val_end = */ "</parameter>",
- /* form.tool_end = */ "</function>",
- /* form.scope_end = */ "</seed:tool_call>",
- };
- builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
-}
-
-static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
-
- // TODO: Tool calling
-
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
- // 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
- // 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
- static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
-
- if (!builder.syntax().parse_tool_calls) {
- LOG_DBG("%s: not parse_tool_calls\n", __func__);
- builder.add_content(builder.consume_rest());
- return;
- }
-
- LOG_DBG("%s: parse_tool_calls\n", __func__);
-
- // Find all <tool_call></tool_call> blocks
- while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
- builder.move_to(first->groups[0].end);
- builder.consume_spaces();
-
- builder.try_consume_literal("```json");
- builder.try_consume_literal("```");
- builder.consume_spaces();
-
- // Consume JSON object
- auto data = builder.consume_json();
-
- builder.consume_spaces();
- builder.try_consume_literal("```");
- builder.consume_spaces();
-
- if (!builder.try_consume_literal("</tool_call>")) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_spaces();
-
- // Extract name and arguments
- std::string name;
- std::string id;
- nlohmann::ordered_json arguments;
-
- const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
- if (!obj.contains("name") || !obj.contains("arguments")) {
- return false;
- }
- name = obj.at("name").get<std::string>();
- arguments = obj.at("arguments");
- if (obj.contains("id") && obj.at("id").is_string()) {
- id = obj.at("id").get<std::string>();
- }
- return true;
- };
-
- if (!extract_args(data.json)) {
- if (data.json.contains("function") && data.json.at("function").is_object()) {
- auto fn = data.json.at("function");
- extract_args(fn);
- if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
- id = data.json.at("id").get<std::string>();
- }
- }
- }
-
- // If name is empty, treat the JSON object as content
- if (name.empty()) {
- LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
- builder.add_content(data.json.dump());
- continue;
- }
-
- std::string args_str = arguments.dump();
- if (!builder.add_tool_call(name, id, args_str)) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
-
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
- LOG_DBG("%s: parsing exaone_moe\n", __func__);
- // EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
- // First try to parse using the standard reasoning parsing method
- LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
- auto start_pos = builder.pos();
- auto found_end_think = builder.try_find_literal("</think>");
- builder.move_to(start_pos);
-
- if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
- LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
- common_chat_parse_exaone_moe_content(builder);
- } else if (builder.try_parse_reasoning("<think>", "</think>")) {
- // If reasoning was parsed successfully, the remaining content is regular content
- LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
- common_chat_parse_exaone_moe_content(builder);
- } else {
- if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
- LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
- common_chat_parse_exaone_moe_content(builder);
- return;
- }
- // If no reasoning tags found, check if we should treat everything as reasoning
- if (builder.syntax().thinking_forced_open) {
- // If thinking is forced open but no tags found, treat everything as reasoning
- LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
- builder.add_reasoning_content(builder.consume_rest());
- } else {
- LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
- common_chat_parse_exaone_moe_content(builder);
- }
- }
-}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
- builder.try_parse_reasoning("<think>", "</think>");
- builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
- LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
- switch (builder.syntax().format) {
- case COMMON_CHAT_FORMAT_CONTENT_ONLY:
- common_chat_parse_content_only(builder);
- break;
- case COMMON_CHAT_FORMAT_GENERIC:
- common_chat_parse_generic(builder);
- break;
- case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
- common_chat_parse_mistral_nemo(builder);
- break;
- case COMMON_CHAT_FORMAT_MAGISTRAL:
- common_chat_parse_magistral(builder);
- break;
- case COMMON_CHAT_FORMAT_LLAMA_3_X:
- common_chat_parse_llama_3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
- common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
- break;
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
- common_chat_parse_deepseek_r1(builder);
- break;
- case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
- common_chat_parse_deepseek_v3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
- common_chat_parse_functionary_v3_2(builder);
- break;
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
- common_chat_parse_functionary_v3_1_llama_3_1(builder);
- break;
- case COMMON_CHAT_FORMAT_HERMES_2_PRO:
- common_chat_parse_hermes_2_pro(builder);
- break;
- case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
- common_chat_parse_firefunction_v2(builder);
- break;
- case COMMON_CHAT_FORMAT_COMMAND_R7B:
- common_chat_parse_command_r7b(builder);
- break;
- case COMMON_CHAT_FORMAT_GRANITE:
- common_chat_parse_granite(builder);
- break;
- case COMMON_CHAT_FORMAT_GPT_OSS:
- common_chat_parse_gpt_oss(builder);
- break;
- case COMMON_CHAT_FORMAT_SEED_OSS:
- common_chat_parse_seed_oss(builder);
- break;
- case COMMON_CHAT_FORMAT_NEMOTRON_V2:
- common_chat_parse_nemotron_v2(builder);
- break;
- case COMMON_CHAT_FORMAT_APERTUS:
- common_chat_parse_apertus(builder);
- break;
- case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
- common_chat_parse_lfm2(builder);
- break;
- case COMMON_CHAT_FORMAT_MINIMAX_M2:
- common_chat_parse_minimax_m2(builder);
- break;
- case COMMON_CHAT_FORMAT_GLM_4_5:
- common_chat_parse_glm_4_5(builder);
- break;
- case COMMON_CHAT_FORMAT_KIMI_K2:
- common_chat_parse_kimi_k2(builder);
- break;
- case COMMON_CHAT_FORMAT_APRIEL_1_5:
- common_chat_parse_apriel_1_5(builder);
- break;
- case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
- common_chat_parse_xiaomi_mimo(builder);
- break;
- case COMMON_CHAT_FORMAT_SOLAR_OPEN:
- common_chat_parse_solar_open(builder);
- break;
- case COMMON_CHAT_FORMAT_EXAONE_MOE:
- common_chat_parse_exaone_moe(builder);
- break;
- default:
- throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
- }
- builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
- if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
- syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
- syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
- return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
- }
- common_chat_msg_parser builder(input, is_partial, syntax);
- try {
- common_chat_parse(builder);
- } catch (const common_chat_msg_partial_exception & ex) {
- LOG_DBG("Partial parse: %s\n", ex.what());
- if (!is_partial) {
- builder.clear_tools();
- builder.move_to(0);
- common_chat_parse_content_only(builder);
- }
- }
- auto msg = builder.result();
- if (!is_partial) {
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
- }
- return msg;
-}
-
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
- if (parser.empty()) {
- throw std::runtime_error("Failed to parse due to missing parser definition.");
- }
-
- LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
-
- common_peg_parse_context ctx(input, is_partial);
- auto result = parser.parse(ctx);
- if (result.fail()) {
- throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
- }
-
- common_chat_msg msg;
- msg.role = "assistant";
-
- if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
- auto mapper = common_chat_peg_native_mapper(msg);
- mapper.from_ast(ctx.ast, result);
- } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
- auto mapper = common_chat_peg_constructed_mapper(msg);
- mapper.from_ast(ctx.ast, result);
- } else {
- // Generic mapper
- auto mapper = common_chat_peg_mapper(msg);
- mapper.from_ast(ctx.ast, result);
- }
- if (!is_partial) {
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
- }
- return msg;
-}
+++ /dev/null
-#pragma once
-
-#include "chat.h"
-#include "chat-parser-xml-toolcall.h"
-#include "json-partial.h"
-#include "regex-partial.h"
-
-#include <nlohmann/json_fwd.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-class common_chat_msg_partial_exception : public std::runtime_error {
- public:
- common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-class common_chat_msg_parser {
- std::string input_;
- bool is_partial_;
- common_chat_parser_params syntax_; // TODO: rename to params
- std::string healing_marker_;
-
- size_t pos_ = 0;
- common_chat_msg result_;
-
- public:
- common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
- const std::string & input() const { return input_; }
- size_t pos() const { return pos_; }
- const std::string & healing_marker() const { return healing_marker_; }
- const bool & is_partial() const { return is_partial_; }
- const common_chat_msg & result() const { return result_; }
- const common_chat_parser_params & syntax() const { return syntax_; }
-
- void move_to(size_t pos) {
- if (pos > input_.size()) {
- throw std::runtime_error("Invalid position!");
- }
- pos_ = pos;
- }
- void move_back(size_t n) {
- if (pos_ < n) {
- throw std::runtime_error("Can't move back that far!");
- }
- pos_ -= n;
- }
-
- // Get the substring of the input at the given range
- std::string str(const common_string_range & rng) const;
-
- // Appends to the result.content field
- void add_content(const std::string & content);
-
- // Appends to the result.reasoning_content field
- void add_reasoning_content(const std::string & reasoning_content);
-
- // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
- bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
-
- // Adds a tool call using the "name", "id" and "arguments" fields of the json object
- bool add_tool_call(const nlohmann::ordered_json & tool_call);
-
- // Adds an array of tool calls using their "name", "id" and "arguments" fields.
- bool add_tool_calls(const nlohmann::ordered_json & arr);
-
- // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
- bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
-
- void finish();
-
- bool consume_spaces();
-
- void consume_literal(const std::string & literal);
-
- bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
-
- std::string consume_rest();
-
- struct find_regex_result {
- std::string prelude;
- std::vector<common_string_range> groups;
- };
-
- std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
-
- bool try_consume_literal(const std::string & literal);
-
- std::optional<find_regex_result> try_find_literal(const std::string & literal);
-
- find_regex_result consume_regex(const common_regex & regex);
-
- std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
-
- std::optional<common_json> try_consume_json();
- common_json consume_json();
-
- struct consume_json_result {
- nlohmann::ordered_json value;
- bool is_partial;
- };
-
- /*
- Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
-
- By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
- e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
-
- But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
- - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
- - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
- */
- consume_json_result consume_json_with_dumped_args(
- const std::vector<std::vector<std::string>> & args_paths = {},
- const std::vector<std::vector<std::string>> & content_paths = {}
- );
- std::optional<consume_json_result> try_consume_json_with_dumped_args(
- const std::vector<std::vector<std::string>> & args_paths = {},
- const std::vector<std::vector<std::string>> & content_paths = {}
- );
-
- /**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
- bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
-
- // Parse content uses reasoning and XML-Style tool call
- void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
-
- void clear_tools();
-};
#include "chat-peg-parser.h"
+#include "chat-auto-parser.h"
+#include "ggml.h"
+#include "peg-parser.h"
+
#include <nlohmann/json.hpp>
-using json = nlohmann::json;
+using json = nlohmann::ordered_json;
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
int count = 0;
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
- if (max != -1 && count <= max) {
+ if (max != -1 && count >= max) {
break;
}
sv.remove_suffix(1);
return sv;
}
-void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
- arena.visit(result, [this](const common_peg_ast_node & node) {
- map(node);
- });
+static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
+ int count = 0;
+ while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
+ if (max != -1 && count >= max) {
+ break;
+ }
+ sv.remove_prefix(1);
+ count++;
+ }
+ return sv;
}
-void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
- bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
- bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+static std::string_view trim(std::string_view sv) {
+ return trim_trailing_space(trim_leading_space(sv, 1));
+}
- if (is_reasoning) {
- result.reasoning_content = std::string(trim_trailing_space(node.text));
+// Count the number of unclosed '{' braces in a JSON-like string,
+// properly skipping braces inside quoted strings.
+static int json_brace_depth(const std::string & s) {
+ int depth = 0;
+ bool in_string = false;
+ bool escaped = false;
+ for (char c : s) {
+ if (escaped) {
+ escaped = false;
+ continue;
+ }
+ if (c == '\\' && in_string) {
+ escaped = true;
+ continue;
+ }
+ if (c == '"') {
+ in_string = !in_string;
+ continue;
+ }
+ if (!in_string) {
+ if (c == '{') {
+ depth++;
+ } else if (c == '}') {
+ depth--;
+ }
+ }
}
+ return depth;
+}
- if (is_content) {
- result.content = std::string(trim_trailing_space(node.text));
+// JSON-escape a string and return the inner content (without surrounding quotes).
+static std::string escape_json_string_inner(const std::string & s) {
+ std::string escaped = json(s).dump();
+ if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+ return escaped.substr(1, escaped.size() - 2);
}
+ return escaped;
}
-void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
- common_chat_peg_mapper::map(node);
+// Convert Python-style single-quoted strings to JSON double-quoted strings
+// Only converts outer string delimiters, properly handling escape sequences:
+// - {'key': 'value'} -> {"key": "value"}
+// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
+// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
+static std::string normalize_quotes_to_json(const std::string & input) {
+ std::string result;
+ result.reserve(input.size() + 16); // May need extra space for escaping
- bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
- bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
- bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
- bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+ bool in_single_quoted = false;
+ bool in_double_quoted = false;
- if (is_tool_open) {
- result.tool_calls.emplace_back();
- current_tool = &result.tool_calls.back();
+ for (size_t i = 0; i < input.size(); ++i) {
+ char c = input[i];
+
+ // Handle escape sequences
+ if (c == '\\' && i + 1 < input.size()) {
+ char next = input[i + 1];
+
+ if (in_single_quoted) {
+ // Inside a single-quoted string being converted to double quotes
+ if (next == '\'') {
+ // \' -> ' (escaped single quote becomes unescaped in double-quoted string)
+ result += '\'';
+ ++i;
+ continue;
+ }
+ if (next == '"') {
+ // \" stays as \" (already escaped, works in double-quoted string)
+ result += "\\\"";
+ ++i;
+ continue;
+ }
+ // Other escapes (\n, \\, etc.): pass through both characters
+ result += c;
+ result += next;
+ ++i;
+ continue;
+ }
+
+ if (in_double_quoted) {
+ // Inside a double-quoted string - pass through escape sequences as-is
+ result += c;
+ result += next;
+ ++i;
+ continue;
+ }
+
+ // Outside any string - just pass through the backslash
+ result += c;
+ continue;
+ }
+
+ // Handle quote characters
+ if (c == '"') {
+ if (in_single_quoted) {
+ // Unescaped double quote inside single-quoted string -> must escape for JSON
+ result += "\\\"";
+ } else {
+ // Double quote as string delimiter or outside strings
+ in_double_quoted = !in_double_quoted;
+ result += c;
+ }
+ } else if (c == '\'') {
+ if (in_double_quoted) {
+ // Single quote inside double-quoted string -> pass through
+ result += c;
+ } else if (in_single_quoted) {
+ // Closing single quote -> convert to double quote
+ in_single_quoted = false;
+ result += '"';
+ } else {
+ // Opening single quote -> convert to double quote
+ in_single_quoted = true;
+ result += '"';
+ }
+ } else {
+ result += c;
+ }
}
- if (is_tool_id && current_tool) {
- current_tool->id = std::string(trim_trailing_space(node.text));
+ return result;
+}
+
+void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+ arena.visit(result, [this](const common_peg_ast_node & node) {
+ if (!node.tag.empty()) {
+ tags[node.tag] = std::string(node.text);
+ }
+ });
+}
+
+tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const {
+ common_peg_parse_context ctx(input, is_partial);
+ auto parse_result = arena.parse(ctx);
+
+ tag_based_peg_mapper mapper;
+ mapper.from_ast(ctx.ast, parse_result);
+
+ return { std::move(parse_result), std::move(mapper.tags) };
+}
+
+tagged_parse_result tagged_peg_parser::parse_anywhere_and_extract(const std::string & input) const {
+ if (input.empty()) {
+ return parse_and_extract(input, false);
+ }
+ for (size_t i = 0; i < input.size(); i++) {
+ common_peg_parse_context ctx(input, false);
+ ctx.debug = debug;
+ auto parse_result = arena.parse(ctx, i);
+ if (parse_result.success() || i == input.size() - 1) {
+ tag_based_peg_mapper mapper;
+ mapper.from_ast(ctx.ast, parse_result);
+ return { std::move(parse_result), std::move(mapper.tags) };
+ }
}
+ GGML_ABORT("Should not happen");
+}
- if (is_tool_name && current_tool) {
- current_tool->name = std::string(trim_trailing_space(node.text));
+tagged_peg_parser build_tagged_peg_parser(
+ const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn) {
+ common_peg_parser_builder builder;
+ builder.set_root(fn(builder));
+ return { builder.build() };
+}
+
+common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string & tag_name,
+ const std::string & marker,
+ const common_peg_parser & p) {
+ if (marker.empty()) {
+ return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
}
+ auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
+ return zero_or_more(choice({ p, content_chunk }));
+}
- if (is_tool_args && current_tool) {
- current_tool->arguments = std::string(trim_trailing_space(node.text));
+std::string & common_chat_peg_mapper::args_target() {
+ return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
+}
+
+void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena,
+ const common_peg_parse_result & parse_result_arg) {
+ arena.visit(parse_result_arg, [this](const common_peg_ast_node & node) { map(node); });
+ // Flush any pending tool call that was started but never got a name
+ // This happens during partial parsing when the tool call is incomplete
+ if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
+ if (!args_buffer.empty()) {
+ pending_tool_call->arguments = args_buffer;
+ }
+ if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
+ pending_tool_call->arguments += "\"";
+ }
+ result.tool_calls.push_back(pending_tool_call.value());
+ pending_tool_call.reset();
}
}
-void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
- common_chat_peg_mapper::map(node);
+void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
+ // Handle reasoning/content tags
+ bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
+ bool is_content = node.tag == common_chat_peg_builder::CONTENT;
- bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
- bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
- bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
- bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
- bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
- bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
- bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
- bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+ if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
+ result.reasoning_content += std::string(node.text);
+ }
+
+ if (is_content) {
+ // Concatenate content from multiple content nodes (e.g., when reasoning markers
+ // are preserved before content markers in reasoning_format=NONE mode)
+ result.content += std::string(node.text);
+ }
+
+ // Handle tool-related tags (supporting both JSON and tagged formats)
+ bool is_tool_open = node.tag == common_chat_peg_builder::TOOL_OPEN;
+ bool is_tool_close = node.tag == common_chat_peg_builder::TOOL_CLOSE;
+ bool is_tool_name = node.tag == common_chat_peg_builder::TOOL_NAME;
+ bool is_tool_id = node.tag == common_chat_peg_builder::TOOL_ID;
+ bool is_tool_args = node.tag == common_chat_peg_builder::TOOL_ARGS;
+ bool is_arg_open = node.tag == common_chat_peg_builder::TOOL_ARG_OPEN;
+ bool is_arg_close = node.tag == common_chat_peg_builder::TOOL_ARG_CLOSE;
+ bool is_arg_name = node.tag == common_chat_peg_builder::TOOL_ARG_NAME;
+ bool is_arg_value = node.tag == common_chat_peg_builder::TOOL_ARG_VALUE;
+ bool is_arg_string_value = node.tag == common_chat_peg_builder::TOOL_ARG_STRING_VALUE;
if (is_tool_open) {
- result.tool_calls.emplace_back();
- current_tool = &result.tool_calls.back();
- arg_count = 0;
+ pending_tool_call = common_chat_tool_call();
+ current_tool = &pending_tool_call.value();
+ arg_count = 0;
+ args_buffer.clear();
+ closing_quote_pending = false;
+ }
+
+ if (is_tool_id && current_tool) {
+ auto text = trim_trailing_space(node.text);
+ if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
+ text = text.substr(1, text.size() - 2);
+ }
+ current_tool->id = std::string(text);
+ }
+
+ if (is_tool_name && current_tool) {
+ current_tool->name = std::string(trim_trailing_space(node.text));
+ // Now that we have the name, populate the arguments from the buffer
+ if (!args_buffer.empty()) {
+ current_tool->arguments = args_buffer;
+ args_buffer.clear();
+ } else if (current_tool->arguments.empty()) {
+ current_tool->arguments = "{";
+ }
+ // Add the tool call to results so streaming can see it
+ if (pending_tool_call.has_value()) {
+ result.tool_calls.push_back(pending_tool_call.value());
+ pending_tool_call.reset();
+ current_tool = &result.tool_calls.back();
+ }
}
- if (is_tool_name) {
- current_tool->name = std::string(node.text);
- current_tool->arguments = "{";
+ if (is_tool_args && current_tool) {
+ // For JSON format: arguments come as a complete JSON object
+ // For tagged format: built up from individual arg_name/arg_value nodes
+ auto text = trim_trailing_space(node.text);
+ if (!text.empty() && text.front() == '{') {
+ args_target() = std::string(text);
+ }
}
if (is_arg_open) {
- needs_closing_quote = false;
+ closing_quote_pending = false;
}
if (is_arg_name && current_tool) {
+ std::string arg_entry;
if (arg_count > 0) {
- current_tool->arguments += ",";
+ arg_entry = ",";
}
- current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
+ arg_entry += json(trim(node.text)).dump() + ":";
++arg_count;
+
+ auto & target = args_target();
+ if (target.empty()) {
+ target = "{";
+ }
+ target += arg_entry;
}
- if (is_arg_string && current_tool) {
- // Serialize to JSON, but exclude the end quote
- std::string dumped = json(trim_trailing_space(node.text)).dump();
- current_tool->arguments += dumped.substr(0, dumped.size() - 1);
- needs_closing_quote = true;
+ if ((is_arg_value || is_arg_string_value) && current_tool) {
+ std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+
+ std::string value_to_add;
+ if (value_content.empty() && is_arg_string_value) {
+ // Empty string value - arg_close will add the closing quote
+ value_to_add = "\"";
+ closing_quote_pending = true;
+ } else if (!value_content.empty() && is_arg_string_value) {
+ // Schema declares this as string type - always treat as literal string value
+ if (!closing_quote_pending) {
+ value_to_add = "\"";
+ closing_quote_pending = true;
+ }
+ value_to_add += escape_json_string_inner(value_content);
+ } else if (!value_content.empty()) {
+ // For potential containers, normalize Python-style single quotes to JSON double quotes
+ bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
+ if (is_potential_container) {
+ value_content = normalize_quotes_to_json(value_content);
+ }
+
+ // Try to parse as JSON value (number, bool, null, object, array)
+ try {
+ json parsed = json::parse(value_content);
+ if (parsed.is_string()) {
+ // Don't add closing quote yet (added by arg_close) for monotonic streaming
+ std::string escaped = parsed.dump();
+ if (!escaped.empty() && escaped.back() == '"') {
+ escaped.pop_back();
+ }
+ value_to_add = escaped;
+ closing_quote_pending = true;
+ } else {
+ // Non-string values: use raw content to preserve whitespace for monotonicity
+ value_to_add = value_content;
+ }
+ } catch (...) {
+ if (node.is_partial && is_potential_container) {
+ // Partial container: pass through the already-normalized content
+ value_to_add = value_content;
+ } else {
+ // Not valid JSON - treat as string value
+ if (!closing_quote_pending) {
+ value_to_add = "\"";
+ closing_quote_pending = true;
+ }
+ value_to_add += escape_json_string_inner(value_content);
+ }
+ }
+ }
+
+ args_target() += value_to_add;
}
if (is_arg_close && current_tool) {
- if (needs_closing_quote) {
+ if (closing_quote_pending) {
+ args_target() += "\"";
+ closing_quote_pending = false;
+ }
+ }
+
+ if (is_tool_close && current_tool) {
+ // Flush buffer to arguments if tool name was never seen
+ if (current_tool->name.empty() && !args_buffer.empty()) {
+ current_tool->arguments = args_buffer;
+ args_buffer.clear();
+ }
+ // Close any pending string quote
+ if (closing_quote_pending) {
current_tool->arguments += "\"";
- needs_closing_quote = false;
+ closing_quote_pending = false;
+ }
+ // Close any unclosed braces (accounts for nested objects)
+ for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
+ current_tool->arguments += "}";
+ }
+ // Add tool call to results if named; otherwise discard
+ if (pending_tool_call.has_value()) {
+ if (!current_tool->name.empty()) {
+ result.tool_calls.push_back(pending_tool_call.value());
+ }
+ pending_tool_call.reset();
+ }
+ }
+}
+
+common_peg_parser common_chat_peg_builder::standard_constructed_tools(
+ const std::map<std::string, std::string> & markers,
+ const nlohmann::json & tools,
+ bool parallel_tool_calls,
+ bool force_tool_calls) {
+ if (!tools.is_array() || tools.empty()) {
+ return eps();
+ }
+
+ // Extract markers with defaults
+ auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
+ auto it = markers.find(key);
+ return it != markers.end() ? it->second : default_val;
+ };
+
+ std::string section_start = get_marker("tool_call_start_marker", "<tool_call>");
+ std::string section_end = get_marker("tool_call_end_marker", "</tool_call>");
+ std::string func_opener = get_marker("function_opener", "<function=");
+ std::string func_name_suffix = get_marker("function_name_suffix", ">");
+ std::string func_closer = get_marker("function_closer", "</function>");
+ std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
+ std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
+ std::string param_closer = get_marker("parameter_closer", "</param>");
+
+ // Build tool choices for tagged format
+ auto tool_choices = choice();
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
}
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ // Build argument parsers
+ auto args = eps();
+ if (params.contains("properties") && !params["properties"].empty()) {
+ auto arg_choice = choice();
+ for (const auto & el : params["properties"].items()) {
+ const std::string & prop_name = el.key();
+
+ auto arg_name_parser =
+ choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+ auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
+ literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
+ tool_arg_close(literal(param_closer)));
+ arg_choice |= arg_rule;
+ }
+ args = zero_or_more(arg_choice + space());
+ }
+
+ // Build function parser: <function=name>args</function>
+ auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
+ space() + tool_args(args) + space() + tool_close(literal(func_closer)));
+
+ tool_choices |= rule("tool-" + name, tool_parser);
}
- if (is_arg_json && current_tool) {
- current_tool->arguments += std::string(trim_trailing_space(node.text));
+ // Build the section with markers
+ auto section =
+ parallel_tool_calls ?
+ trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
+ literal(section_end)) :
+ trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
+
+ return force_tool_calls ? section : optional(section);
+}
+
+// Helper: Parse dot notation key into prefix and field name
+static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
+ auto dot_pos = key.find('.');
+ if (dot_pos == std::string::npos) {
+ return {"", key}; // Top-level field
}
+ return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
+}
- if (is_tool_close && current_tool) {
- if (needs_closing_quote) {
- current_tool->arguments += "\"";
- needs_closing_quote = false;
+// Mode 1: function_is_key — parse {"function_name": {...}}
+common_peg_parser common_chat_peg_builder::build_json_tools_function_is_key(
+ const nlohmann::json & tools,
+ const std::string & args_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key) {
+
+ auto tool_choices = choice();
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
+ }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ // Build inner object fields
+ std::vector<common_peg_parser> inner_fields;
+
+ if (!call_id_key.empty()) {
+ auto id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_id(json_string_content()) + literal("\"")
+ );
+ inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
+ }
+
+ if (!gen_call_id_key.empty()) {
+ auto gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
+ }
+
+ // Arguments — either wrapped in args_key or parsed directly
+ common_peg_parser args_parser = eps();
+ if (args_key.empty()) {
+ args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
+ } else {
+ args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+ }
+ inner_fields.push_back(args_parser);
+
+ // Build inner object parser
+ common_peg_parser inner_object = eps();
+ if (args_key.empty() && inner_fields.size() == 1) {
+ inner_object = inner_fields[0];
+ } else {
+ inner_object = literal("{") + space();
+ for (size_t i = 0; i < inner_fields.size(); i++) {
+ inner_object = inner_object + inner_fields[i];
+ if (i < inner_fields.size() - 1) {
+ inner_object = inner_object + space();
+ }
+ }
+ inner_object = inner_object + space() + literal("}");
+ }
+
+ auto tool_parser = tool(
+ tool_open(literal("{")) + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"") +
+ space() + literal(":") + space() +
+ inner_object +
+ space() + tool_close(literal("}"))
+ );
+
+ tool_choices |= rule("tool-" + name, tool_parser);
+ }
+
+ return tool_choices;
+}
+
+// Mode 2: Nested keys (dot notation like "function.name")
+common_peg_parser common_chat_peg_builder::build_json_tools_nested_keys(
+ const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key) {
+
+ auto tool_choices = choice();
+
+ auto name_spec = parse_key_spec(effective_name_key);
+ auto args_spec = parse_key_spec(effective_args_key);
+
+ std::string nested_prefix = !name_spec.first.empty() ? name_spec.first : args_spec.first;
+ std::string nested_name_field = !name_spec.first.empty() ? name_spec.second : effective_name_key;
+ std::string nested_args_field = !args_spec.first.empty() ? args_spec.second : effective_args_key;
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
}
- current_tool->arguments += "}";
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"");
+ auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+ auto nested_object = literal("{") + space() +
+ nested_name + space() + literal(",") + space() +
+ nested_args +
+ space() + literal("}");
+
+ // Format: { id?, "function": {...} }
+ auto tool_parser_body = tool_open(literal("{")) + space();
+
+ if (!call_id_key.empty()) {
+ auto id_spec = parse_key_spec(call_id_key);
+ if (id_spec.first.empty()) {
+ auto id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ literal("\"") + tool_id(json_string_content()) + literal("\"")
+ );
+ tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
+ }
+ }
+
+ if (!gen_call_id_key.empty()) {
+ auto gen_id_spec = parse_key_spec(gen_call_id_key);
+ if (gen_id_spec.first.empty()) {
+ auto gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
+ }
+ }
+
+ auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
+ tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
+
+ tool_choices |= rule("tool-" + name, tool(tool_parser_body));
+ }
+
+ return tool_choices;
+}
+
+// Mode 3: Flat keys with optional ID fields and parameter ordering
+common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
+ const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key,
+ const std::vector<std::string> & parameters_order) {
+
+ auto tool_choices = choice();
+ auto name_key_parser = literal("\"" + effective_name_key + "\"");
+ auto args_key_parser = literal("\"" + effective_args_key + "\"");
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) {
+ continue;
+ }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+ auto tool_name_ = name_key_parser + space() + literal(":") + space() +
+ literal("\"") + tool_name(literal(name)) + literal("\"");
+ auto tool_args_ = args_key_parser + space() + literal(":") + space() +
+ tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+ // Build ID parsers if keys are provided
+ common_peg_parser id_parser = eps();
+ if (!call_id_key.empty()) {
+ id_parser = atomic(
+ literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ }
+
+ common_peg_parser gen_id_parser = eps();
+ if (!gen_call_id_key.empty()) {
+ gen_id_parser = atomic(
+ literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+ choice({
+ literal("\"") + tool_id(json_string_content()) + literal("\""),
+ tool_id(json_number())
+ })
+ );
+ }
+
+ // Create (parser, key) pairs for all fields, then sort by parameters_order
+ std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
+ parser_pairs.emplace_back(tool_name_, effective_name_key);
+ parser_pairs.emplace_back(tool_args_, effective_args_key);
+ if (!call_id_key.empty()) {
+ parser_pairs.emplace_back(optional(id_parser), call_id_key);
+ }
+ if (!gen_call_id_key.empty()) {
+ parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
+ }
+
+ std::sort(parser_pairs.begin(), parser_pairs.end(),
+ [¶meters_order](const auto & a, const auto & b) {
+ auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
+ auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
+ size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
+ size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
+ return idx_a < idx_b;
+ });
+
+ auto ordered_body = tool_open(literal("{")) + space();
+ for (size_t i = 0; i < parser_pairs.size(); i++) {
+ ordered_body = ordered_body + parser_pairs[i].first;
+ if (i < parser_pairs.size() - 1) {
+ ordered_body = ordered_body + space() + literal(",") + space();
+ }
+ }
+ ordered_body = ordered_body + space() + tool_close(literal("}"));
+
+ tool_choices |= rule("tool-" + name, tool(ordered_body));
+ }
+
+ return tool_choices;
+}
+
+common_peg_parser common_chat_peg_builder::standard_json_tools(
+ const std::string & section_start,
+ const std::string & section_end,
+ const nlohmann::json & tools,
+ bool parallel_tool_calls,
+ bool force_tool_calls,
+ const std::string & name_key,
+ const std::string & args_key,
+ bool array_wrapped,
+ bool function_is_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key,
+ const std::vector<std::string> & parameters_order) {
+ if (!tools.is_array() || tools.empty()) {
+ return eps();
}
+
+ std::string effective_name_key = name_key.empty() ? "name" : name_key;
+ std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
+
+ // Dispatch to the appropriate builder based on the JSON layout mode
+ common_peg_parser tool_choices = eps();
+ if (function_is_key) {
+ tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
+ } else {
+ auto name_spec = parse_key_spec(effective_name_key);
+ auto args_spec = parse_key_spec(effective_args_key);
+ if (!name_spec.first.empty() || !args_spec.first.empty()) {
+ tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
+ } else {
+ tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
+ }
+ }
+
+ // Build the section with markers
+ auto tool_calls = tool_choices;
+ if (parallel_tool_calls) {
+ tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
+ }
+
+ if (array_wrapped) {
+ tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
+ }
+
+ auto section =
+ trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
+
+ return force_tool_calls ? section : optional(section);
}
#include "chat.h"
#include "peg-parser.h"
-class common_chat_peg_builder : public common_peg_parser_builder {
- public:
- static constexpr const char * REASONING_BLOCK = "reasoning-block";
- static constexpr const char * REASONING = "reasoning";
- static constexpr const char * CONTENT = "content";
-
- common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
- common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
- common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
-};
-
-inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
- common_chat_peg_builder builder;
- builder.set_root(fn(builder));
- return builder.build();
-}
+#include <map>
+#include <optional>
+#include <vector>
class common_chat_peg_mapper {
public:
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
+ virtual ~common_chat_peg_mapper() = default;
+
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
virtual void map(const common_peg_ast_node & node);
+ private:
+ // Tool call handling state
+ std::optional<common_chat_tool_call> pending_tool_call; // Tool call waiting for name
+ common_chat_tool_call * current_tool = nullptr;
+ int arg_count = 0;
+ bool closing_quote_pending = false;
+ std::string args_buffer; // Buffer to delay arguments until tool name is known
+
+ // Returns a reference to the active argument destination string.
+ // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
+ std::string & args_target();
};
-class common_chat_peg_native_builder : public common_chat_peg_builder {
- public:
- static constexpr const char * TOOL = "tool";
- static constexpr const char * TOOL_OPEN = "tool-open";
- static constexpr const char * TOOL_CLOSE = "tool-close";
- static constexpr const char * TOOL_ID = "tool-id";
- static constexpr const char * TOOL_NAME = "tool-name";
- static constexpr const char * TOOL_ARGS = "tool-args";
-
- common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
- common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
- common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
- common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
- common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
- common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
-};
-
-class common_chat_peg_native_mapper : public common_chat_peg_mapper {
- common_chat_tool_call * current_tool;
+struct content_structure;
+struct tool_call_structure;
+class common_chat_peg_builder : public common_peg_parser_builder {
public:
- common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+ // Tag constants (from former common_chat_peg_base_builder)
+ static constexpr const char * REASONING_BLOCK = "reasoning-block";
+ static constexpr const char * REASONING = "reasoning";
+ static constexpr const char * CONTENT = "content";
+
+ // Tag constants
+ static constexpr const char * TOOL = "tool";
+ static constexpr const char * TOOL_OPEN = "tool-open";
+ static constexpr const char * TOOL_CLOSE = "tool-close";
+ static constexpr const char * TOOL_ID = "tool-id";
+ static constexpr const char * TOOL_NAME = "tool-name";
+ static constexpr const char * TOOL_ARGS = "tool-args";
+ static constexpr const char * TOOL_ARG = "tool-arg";
+ static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
+ static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+ static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
+ static constexpr const char * TOOL_ARG_VALUE = "tool-arg-value";
+ static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value"; // For schema-declared string types
- void map(const common_peg_ast_node & node) override;
-};
+ // Low-level tag methods (from former common_chat_peg_base_builder)
+ common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
-inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
- common_chat_peg_native_builder builder;
- builder.set_root(fn(builder));
- return builder.build();
-}
+ common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
-class common_chat_peg_constructed_builder : public common_chat_peg_builder {
- public:
- static constexpr const char * TOOL = "tool";
- static constexpr const char * TOOL_OPEN = "tool-open";
- static constexpr const char * TOOL_CLOSE = "tool-close";
- static constexpr const char * TOOL_NAME = "tool-name";
- static constexpr const char * TOOL_ARG = "tool-arg";
- static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
- static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
- static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
- static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
- static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+ common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+
+ common_peg_parser tag_with_safe_content(const std::string & tag_name,
+ const std::string & marker,
+ const common_peg_parser & p);
+ // Low-level tag methods
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+ common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+ common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+ common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+ // Use for schema-declared string types - won't be treated as potential JSON container
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
- common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+ common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
+
+ // Legacy-compatible helper for building standard JSON tool calls
+ // Used by tests and manual parsers
+ // name_key/args_key: JSON key names for function name and arguments
+ // Empty or "name"/"arguments" will accept both common variations
+ // Supports dot notation for nested objects (e.g., "function.name")
+ // array_wrapped: if true, tool calls are wrapped in JSON array [...]
+ // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
+ // call_id_key: JSON key for string call ID (e.g., "id")
+ // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
+ // parameters_order: order in which JSON fields should be parsed
+ common_peg_parser standard_json_tools(const std::string & section_start,
+ const std::string & section_end,
+ const nlohmann::json & tools,
+ bool parallel_tool_calls,
+ bool force_tool_calls,
+ const std::string & name_key = "",
+ const std::string & args_key = "",
+ bool array_wrapped = false,
+ bool function_is_key = false,
+ const std::string & call_id_key = "",
+ const std::string & gen_call_id_key = "",
+ const std::vector<std::string> & parameters_order = {});
+
+ // Legacy-compatible helper for building XML/tagged style tool calls
+ // Used by tests and manual parsers
+ common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
+ const nlohmann::json & tools,
+ bool parallel_tool_calls,
+ bool force_tool_calls);
+
+ private:
+ // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
+ common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
+ const std::string & args_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key);
+
+ common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key);
+
+ common_peg_parser build_json_tools_flat_keys(const nlohmann::json & tools,
+ const std::string & effective_name_key,
+ const std::string & effective_args_key,
+ const std::string & call_id_key,
+ const std::string & gen_call_id_key,
+ const std::vector<std::string> & parameters_order);
};
-class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
- common_chat_tool_call * current_tool;
- int arg_count = 0;
- bool needs_closing_quote = false;
+inline common_peg_arena build_chat_peg_parser(
+ const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+ common_chat_peg_builder builder;
+ builder.set_root(fn(builder));
+ return builder.build();
+}
+class tag_based_peg_mapper {
public:
- common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+ std::map<std::string, std::string> tags;
- void map(const common_peg_ast_node & node) override;
+ void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
};
-inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
- common_chat_peg_constructed_builder builder;
- builder.set_root(fn(builder));
- return builder.build();
-}
+struct tagged_parse_result {
+ common_peg_parse_result result;
+ std::map<std::string, std::string> tags;
+};
+
+struct tagged_peg_parser {
+ common_peg_arena arena;
+ bool debug = false;
+
+ tagged_peg_parser & withDebug() {
+ debug = true;
+ return *this;
+ }
+
+ tagged_peg_parser & withoutDebug() {
+ debug = false;
+ return *this;
+ }
+
+ tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const;
+ tagged_parse_result parse_anywhere_and_extract(const std::string & input) const;
+};
+
+tagged_peg_parser build_tagged_peg_parser(
+ const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
+
#include "chat.h"
-#include "chat-parser.h"
+
+#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "common.h"
-#include "json-partial.h"
+#include "ggml.h"
#include "json-schema-to-grammar.h"
#include "log.h"
-#include "regex-partial.h"
-#include "jinja/parser.h"
#include "jinja/value.h"
#include "jinja/runtime.h"
#include "jinja/caps.h"
+#include "peg-parser.h"
-#include <algorithm>
#include <cstdio>
-#include <cctype>
+#include <cstdlib>
+#include <ctime>
#include <exception>
#include <functional>
-#include <iostream>
+
#include <optional>
+#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
using json = nlohmann::ordered_json;
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
- auto time = std::chrono::system_clock::to_time_t(now);
- auto local_time = *std::localtime(&time);
+ auto time = std::chrono::system_clock::to_time_t(now);
+ auto local_time = *std::localtime(&time);
std::ostringstream ss;
ss << std::put_time(&local_time, format.c_str());
auto res = ss.str();
return res;
}
+static json safe_args_parse(const std::string & to_parse) {
+ std::string stripped = to_parse;
+ if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') {
+ stripped = to_parse.substr(1, to_parse.length() - 1);
+ }
+ try {
+ return json::parse(stripped);
+ } catch (json::exception & e) {
+ return stripped;
+ }
+}
+
static std::string string_diff(const std::string & last, const std::string & current) {
if (last.empty()) {
return current;
{"type", "function"},
{"function", {
{"name", tool_call.name},
- {"arguments", tool_call.arguments},
+ {"arguments", json::parse(tool_call.arguments)},
}},
};
if (!tool_call.id.empty()) {
return jmsg;
}
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv,
+ const common_chat_msg & msg_new) {
std::vector<common_chat_msg_diff> diffs;
if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
// TODO: these can become expensive for long messages - how to optimize?
if (msg_prv.reasoning_content != msg_new.reasoning_content) {
- auto & diff = diffs.emplace_back();
+ auto & diff = diffs.emplace_back();
diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
}
if (msg_prv.content != msg_new.content) {
- auto & diff = diffs.emplace_back();
+ auto & diff = diffs.emplace_back();
diff.content_delta = string_diff(msg_prv.content, msg_new.content);
}
if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
- throw std::runtime_error("Invalid diff: now finding less tool calls!");
+ std::string err = "Invalid diff: now finding less tool calls!\n";
+ err += " Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n";
+ for (const auto & tc : msg_prv.tool_calls) {
+ err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+ }
+ err += " Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n";
+ for (const auto & tc : msg_new.tool_calls) {
+ err += " - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+ }
+ err += " Current msg text content:\n" + msg_new.content + "\n";
+ throw std::runtime_error(err);
}
if (!msg_prv.tool_calls.empty()) {
- const auto idx = msg_prv.tool_calls.size() - 1;
+ const auto idx = msg_prv.tool_calls.size() - 1;
const auto & pref = msg_prv.tool_calls[idx];
const auto & newf = msg_new.tool_calls[idx];
- if (pref.name != newf.name) {
- throw std::runtime_error("Invalid diff: tool call mismatch!");
+ // Allow tool name to change during incremental parsing:
+ // - empty -> non-empty (initial discovery)
+ // - prefix -> longer string (name grows as more input is parsed)
+ if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) {
+ // Check if one is a prefix of the other (for incremental parsing where names grow or shrink)
+ bool is_prefix = (newf.name.rfind(pref.name, 0) == 0);
+ if (!is_prefix) {
+ LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str());
+ throw std::runtime_error("Invalid diff: tool call mismatch!");
+ }
}
const auto args_diff = string_diff(pref.arguments, newf.arguments);
- if (!args_diff.empty() || pref.id != newf.id) {
- auto & diff = diffs.emplace_back();
+ if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) {
+ auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
- if (pref.id != newf.id) {
- diff.tool_call_delta.id = newf.id;
+ if (pref.id != newf.id || pref.name != newf.name) {
+ diff.tool_call_delta.id = newf.id;
diff.tool_call_delta.name = newf.name;
}
diff.tool_call_delta.arguments = args_diff;
}
}
for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
- auto & diff = diffs.emplace_back();
+ auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
diff.tool_call_delta = msg_new.tool_calls[idx];
}
using chat_template_caps = jinja::caps;
-struct common_chat_template {
- jinja::program prog;
- std::string bos_tok;
- std::string eos_tok;
- std::string src;
- chat_template_caps caps;
-
- common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
- jinja::lexer lexer;
- auto lexer_res = lexer.tokenize(src);
- this->prog = jinja::parse_from_tokens(lexer_res);
-
- this->src = lexer_res.source;
- this->bos_tok = bos_token;
- this->eos_tok = eos_token;
-
- this->caps = jinja::caps_get(prog);
- // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
- }
-
- const std::string & source() const { return src; }
- const std::string & bos_token() const { return bos_tok; }
- const std::string & eos_token() const { return eos_tok; }
-
- // TODO: this is ugly, refactor it somehow
- json add_system(const json & messages, const std::string & system_prompt) const {
- GGML_ASSERT(messages.is_array());
- auto msgs_copy = messages;
- if (!caps.supports_system_role) {
- if (msgs_copy.empty()) {
- msgs_copy.insert(msgs_copy.begin(), json{
- {"role", "user"},
- {"content", system_prompt}
- });
- } else {
- auto & first_msg = msgs_copy[0];
- if (!first_msg.contains("content")) {
- first_msg["content"] = "";
- }
- first_msg["content"] = system_prompt + "\n\n"
- + first_msg["content"].get<std::string>();
- }
- } else {
- if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
- msgs_copy.insert(msgs_copy.begin(), json{
- {"role", "system"},
- {"content", system_prompt}
- });
- } else if (msgs_copy[0].at("role") == "system") {
- msgs_copy[0]["content"] = system_prompt;
- }
- }
- return msgs_copy;
- }
-
- chat_template_caps original_caps() const {
- return caps;
- }
-
-};
-
struct common_chat_templates {
bool add_bos;
bool add_eos;
- bool has_explicit_template; // Model had builtin template or template overridde was specified.
- std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+ bool has_explicit_template; // Model had builtin template or template overridde was specified.
+ std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
std::unique_ptr<common_chat_template> template_tool_use;
};
-struct templates_params {
- json messages;
- json tools;
- common_chat_tool_choice tool_choice;
- json json_schema;
- bool parallel_tool_calls;
- common_reasoning_format reasoning_format;
- bool stream;
- std::string grammar;
- bool add_generation_prompt = true;
- bool enable_thinking = true;
- std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
- json extra_context;
- bool add_bos;
- bool add_eos;
- bool is_inference = true;
- bool mark_input = true; // whether to mark input strings in the jinja context
-};
-
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
if (tool_choice == "auto") {
return COMMON_CHAT_TOOL_CHOICE_AUTO;
}
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
- common_chat_templates_inputs dummy_inputs;
+ common_chat_templates_inputs inputs;
+ inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
common_chat_msg msg;
- msg.role = "user";
+ msg.role = "user";
msg.content = "test";
- dummy_inputs.messages = {msg};
- dummy_inputs.enable_thinking = false;
- const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
- dummy_inputs.enable_thinking = true;
- const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
- return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
+ inputs.messages = { msg };
+ inputs.enable_thinking = true;
+ inputs.add_generation_prompt = true;
+ inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+
+ auto params = common_chat_templates_apply(chat_templates, inputs);
+ return params.supports_thinking;
}
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
std::vector<common_chat_msg> msgs;
try {
-
if (!messages.is_array()) {
throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
}
}
msg.role = message.at("role");
- auto has_content = message.contains("content");
+ auto has_content = message.contains("content");
auto has_tool_calls = message.contains("tool_calls");
if (has_content) {
const auto & content = message.at("content");
msg.content_parts.push_back(msg_part);
}
} else if (!content.is_null()) {
- throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+ throw std::invalid_argument("Invalid 'content' type: expected string or array, got " +
+ content.dump() +
+ " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
}
}
if (has_tool_calls) {
if (!fc.contains("name")) {
throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
}
- tc.name = fc.at("name");
- tc.arguments = fc.at("arguments");
+ tc.name = fc.at("name");
+ const auto & args = fc.at("arguments");
+ if (args.is_string()) {
+ tc.arguments = args;
+ } else {
+ tc.arguments = args.dump();
+ }
if (tool_call.contains("id")) {
tc.id = tool_call.at("id");
}
}
}
if (!has_content && !has_tool_calls) {
- throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+ throw std::invalid_argument(
+ "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & "
+ "https://github.com/ggml-org/llama.cpp/issues/12279)");
}
if (message.contains("reasoning_content")) {
msg.reasoning_content = message.at("reasoning_content");
auto result = json::array();
for (const auto & tool : tools) {
result.push_back({
- {"type", "function"},
- {"function", {
- {"name", tool.name},
- {"description", tool.description},
- {"parameters", json::parse(tool.parameters)},
- }},
+ { "type", "function" },
+ { "function",
+ {
+ { "name", tool.name },
+ { "description", tool.description },
+ { "parameters", json::parse(tool.parameters) },
+ } },
});
}
return result;
json tool_call;
tool_call["index"] = diff.tool_call_index;
if (!diff.tool_call_delta.id.empty()) {
- tool_call["id"] = diff.tool_call_delta.id;
+ tool_call["id"] = diff.tool_call_delta.id;
tool_call["type"] = "function";
}
- json function = json::object();
- if (!diff.tool_call_delta.name.empty()) {
- function["name"] = diff.tool_call_delta.name;
+ if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
+ json function = json::object();
+ if (!diff.tool_call_delta.name.empty()) {
+ function["name"] = diff.tool_call_delta.name;
+ }
+ if (!diff.tool_call_delta.arguments.empty()) {
+ function["arguments"] = diff.tool_call_delta.arguments;
+ }
+ tool_call["function"] = function;
}
- function["arguments"] = diff.tool_call_delta.arguments;
- tool_call["function"] = function;
- delta["tool_calls"] = json::array({tool_call});
+ delta["tool_calls"] = json::array({ tool_call });
}
return delta;
}
if (use_jinja) {
try {
common_chat_msg msg;
- msg.role = "user";
+ msg.role = "user";
msg.content = "test";
auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
common_chat_templates_inputs inputs;
- inputs.messages = {msg};
+ inputs.messages = { msg };
common_chat_templates_apply(tmpls.get(), inputs);
return true;
return false;
}
}
- llama_chat_message chat[] = {{"user", "test"}};
+ llama_chat_message chat[] = {
+ { "user", "test" }
+ };
const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
return res >= 0;
}
-std::string common_chat_format_single(
- const struct common_chat_templates * tmpls,
- const std::vector<common_chat_msg> & past_msg,
- const common_chat_msg & new_msg,
- bool add_ass,
- bool use_jinja) {
-
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+ const std::vector<common_chat_msg> & past_msg,
+ const common_chat_msg & new_msg,
+ bool add_ass,
+ bool use_jinja) {
common_chat_templates_inputs inputs;
inputs.use_jinja = use_jinja;
- inputs.add_bos = tmpls->add_bos;
- inputs.add_eos = tmpls->add_eos;
+ inputs.add_bos = tmpls->add_bos;
+ inputs.add_eos = tmpls->add_eos;
std::string fmt_past_msg;
if (!past_msg.empty()) {
- inputs.messages = past_msg;
+ inputs.messages = past_msg;
inputs.add_generation_prompt = false;
- fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+ fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
}
std::ostringstream ss;
// if the past_msg ends with a newline, we must preserve it in the formatted version
// format chat with new_msg
inputs.messages.push_back(new_msg);
inputs.add_generation_prompt = add_ass;
- auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+ auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
// get the diff part
ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
return ss.str();
}
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
+std::string common_chat_format_example(const struct common_chat_templates * tmpls,
+ bool use_jinja,
+ const std::map<std::string, std::string> & chat_template_kwargs) {
common_chat_templates_inputs inputs;
- inputs.use_jinja = use_jinja;
- inputs.add_bos = tmpls->add_bos;
- inputs.add_eos = tmpls->add_eos;
+ inputs.use_jinja = use_jinja;
+ inputs.add_bos = tmpls->add_bos;
+ inputs.add_eos = tmpls->add_eos;
inputs.chat_template_kwargs = chat_template_kwargs;
- auto add_simple_msg = [&](auto role, auto content) {
+ auto add_simple_msg = [&](auto role, auto content) {
common_chat_msg msg;
- msg.role = role;
+ msg.role = role;
msg.content = content;
inputs.messages.push_back(msg);
};
- add_simple_msg("system", "You are a helpful assistant");
- add_simple_msg("user", "Hello");
+ add_simple_msg("system", "You are a helpful assistant");
+ add_simple_msg("user", "Hello");
add_simple_msg("assistant", "Hi there");
- add_simple_msg("user", "How are you?");
+ add_simple_msg("user", "How are you?");
return common_chat_templates_apply(tmpls, inputs).prompt;
}
-#define CHATML_TEMPLATE_SRC \
- "{%- for message in messages -%}\n" \
+#define CHATML_TEMPLATE_SRC \
+ "{%- for message in messages -%}\n" \
" {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
- "{%- endfor -%}\n" \
- "{%- if add_generation_prompt -%}\n" \
- " {{- '<|im_start|>assistant\n' -}}\n" \
+ "{%- endfor -%}\n" \
+ "{%- if add_generation_prompt -%}\n" \
+ " {{- '<|im_start|>assistant\n' -}}\n" \
"{%- endif -%}"
void common_chat_templates_free(struct common_chat_templates * tmpls) {
return tmpls->template_tool_use->source();
}
return "";
- } else {
- LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
}
+ LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
}
return tmpls->template_default->source();
}
-common_chat_templates_ptr common_chat_templates_init(
- const struct llama_model * model,
- const std::string & chat_template_override,
- const std::string & bos_token_override,
- const std::string & eos_token_override)
-{
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+ const std::string & chat_template_override,
+ const std::string & bos_token_override,
+ const std::string & eos_token_override) {
std::string default_template_src;
std::string template_tool_use_src;
GGML_ASSERT(model != nullptr);
const auto * str = llama_model_chat_template(model, /* name */ nullptr);
if (str) {
- default_template_src = str;
+ default_template_src = str;
has_explicit_template = true;
}
str = llama_model_chat_template(model, /* name */ "tool_use");
// TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
// Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
if (default_template_src.find("<|channel|>") != std::string::npos
- // search for the error message and patch it
- && default_template_src.find("in message.content or") != std::string::npos) {
+ // search for the error message and patch it
+ && default_template_src.find("in message.content or") != std::string::npos) {
string_replace_all(default_template_src,
- "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
- "{%- if false %}");
+ "{%- if \"<|channel|>analysis<|message|>\" in message.content or "
+ "\"<|channel|>final<|message|>\" in message.content %}",
+ "{%- if false %}");
}
// TODO @aldehir : this is a temporary fix, pending Minja changes
// Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
- // search for the error message and patch it
- && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
+ // search for the error message and patch it
+ && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
string_replace_all(default_template_src,
- "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
- "{%- if false %}");
+ "{%- if (message['content'] is none or message['content'] == '' or "
+ "message['content']|length == 0) and (message['tool_calls'] is not defined or "
+ "message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
+ "{%- if false %}");
}
std::string token_bos = bos_token_override;
std::string token_eos = eos_token_override;
- bool add_bos = false;
- bool add_eos = false;
+ bool add_bos = false;
+ bool add_eos = false;
if (model) {
- const auto * vocab = llama_model_get_vocab(model);
- const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
+ const auto * vocab = llama_model_get_vocab(model);
+ const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
if (token == LLAMA_TOKEN_NULL) {
- if (default_template_src.find(jinja_variable_name) != std::string::npos
- || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
- LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
+ if (default_template_src.find(jinja_variable_name) != std::string::npos ||
+ template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
+ LOG_WRN(
+ "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't "
+ "work as intended.\n",
+ name);
}
return std::string();
}
};
token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
- add_bos = llama_vocab_get_add_bos(vocab);
- add_eos = llama_vocab_get_add_eos(vocab);
+ add_bos = llama_vocab_get_add_bos(vocab);
+ add_eos = llama_vocab_get_add_eos(vocab);
}
common_chat_templates_ptr tmpls(new common_chat_templates());
tmpls->has_explicit_template = has_explicit_template;
- tmpls->add_bos = add_bos;
- tmpls->add_eos = add_eos;
+ tmpls->add_bos = add_bos;
+ tmpls->add_eos = add_eos;
try {
tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
} catch (const std::exception & e) {
const char * common_chat_format_name(common_chat_format format) {
switch (format) {
- case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
- case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
- case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
- case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
- case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
- case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
- case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
- case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
- case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
- case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
- case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
- case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
- case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
- case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
- case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
- case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
- case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
- case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
- case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
- case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
- case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
- case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
- case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
- case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
- case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
- case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
- case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
- case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
+ case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+ return "Content-only";
+ case COMMON_CHAT_FORMAT_PEG_SIMPLE:
+ return "peg-simple";
+ case COMMON_CHAT_FORMAT_PEG_NATIVE:
+ return "peg-native";
default:
throw std::runtime_error("Unknown chat format");
}
const char * common_reasoning_format_name(common_reasoning_format format) {
switch (format) {
- case COMMON_REASONING_FORMAT_NONE: return "none";
- case COMMON_REASONING_FORMAT_AUTO: return "auto";
- case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
- case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
+ case COMMON_REASONING_FORMAT_NONE:
+ return "none";
+ case COMMON_REASONING_FORMAT_AUTO:
+ return "auto";
+ case COMMON_REASONING_FORMAT_DEEPSEEK:
+ return "deepseek";
+ case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY:
+ return "deepseek-legacy";
default:
throw std::runtime_error("Unknown reasoning format");
}
common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
if (format == "none") {
return COMMON_REASONING_FORMAT_NONE;
- } else if (format == "auto") {
+ }
+ if (format == "auto") {
return COMMON_REASONING_FORMAT_AUTO;
- } else if (format == "deepseek") {
+ }
+ if (format == "deepseek") {
return COMMON_REASONING_FORMAT_DEEPSEEK;
- } else if (format == "deepseek-legacy") {
+ }
+ if (format == "deepseek-legacy") {
return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
}
throw std::runtime_error("Unknown reasoning format: " + format);
}
}
-static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
+static void foreach_parameter(const json & function,
+ const std::function<void(const std::string &, const json &, bool)> & fn) {
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
return;
}
if (!params.contains("properties") || !params.at("properties").is_object()) {
return;
}
- const auto & props = params.at("properties");
+ const auto & props = params.at("properties");
std::set<std::string> required;
if (params.contains("required") && params.at("required").is_array()) {
params.at("required").get_to(required);
}
}
-static std::string apply(
+std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
- const struct templates_params & inputs,
- const std::optional<json> & messages_override = std::nullopt,
- const std::optional<json> & tools_override = std::nullopt,
- const std::optional<json> & additional_context = std::nullopt)
-{
+ const autoparser::templates_params & inputs,
+ const std::optional<json> & messages_override,
+ const std::optional<json> & tools_override,
+ const std::optional<json> & additional_context) {
jinja::context ctx(tmpl.source());
nlohmann::ordered_json inp = nlohmann::ordered_json{
{"messages", messages_override.has_value() ? *messages_override : inputs.messages},
{"bos_token", tmpl.bos_token()},
{"eos_token", tmpl.eos_token()},
+ {"enable_thinking", inputs.enable_thinking},
};
if (tools_override.has_value() || !inputs.tools.empty()) {
inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
// render
jinja::runtime runtime(ctx);
const jinja::value results = runtime.execute(tmpl.prog);
- auto parts = runtime.gather_string_parts(results);
+ auto parts = jinja::runtime::gather_string_parts(results);
std::string result = parts->as_string().str();
return result;
}
-static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- auto tool_call_schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- auto tool_schema = json {
- {"type", "object"},
- {"properties", {
- {"name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"arguments", function.at("parameters")},
- }},
- {"required", json::array({"name", "arguments"})},
- };
- if (function.contains("description")) {
- tool_schema["description"] = function.at("description");
- }
- if (inputs.parallel_tool_calls) {
- tool_schema.at("properties")["id"] = {
- {"type", "string"},
- {"minLength", 4},
- };
- tool_schema.at("required").push_back("id");
- }
- tool_call_schemas.emplace_back(tool_schema);
- });
- const auto tool_call =
- inputs.parallel_tool_calls
- ? json {
- {"type", "object"},
- {"properties", {
- {"tool_calls", {
- {"type", "array"},
- {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
- {"anyOf", tool_call_schemas},
- }},
- {"minItems", 1},
- }},
- }},
- {"required", json::array({"tool_calls"})},
- }
- : json {
- {"type", "object"},
- {"properties", {
- {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
- {"anyOf", tool_call_schemas},
- }},
- }},
- {"required", json::array({"tool_call"})},
- };
- const auto schema =
- inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
- ? json {
- {"anyOf", json::array({
- tool_call,
- {
- {"type", "object"},
- {"properties", {
- {"response", inputs.json_schema.is_null()
- ? json {{"type", "string"}}
- : inputs.json_schema
- },
- }},
- {"required", json::array({"response"})},
- },
- })}
- }
- : tool_call;
-
- data.grammar_lazy = false;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- builder.add_schema("root", schema);
- });
-
- auto tweaked_messages = tmpl.add_system(
- inputs.messages,
- "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
-
- // ensure all messages has "content" field
- for (auto & message : tweaked_messages) {
- if (!message.contains("content") || message["content"].is_null()) {
- message["content"] = "";
- }
- }
-
- data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
- data.format = COMMON_CHAT_FORMAT_GENERIC;
- return data;
-}
-
-static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- {"type", "object"},
- {"properties", {
- // Important note: the model is probably trained to take a JSON stringified arguments value.
- // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
- {"name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"arguments", function.at("parameters")},
- {"id", {
- {"type", "string"},
- // Nemo's template expects a 9-character alphanumeric ID.
- {"pattern", "^[a-zA-Z0-9]{9}$"},
- }},
- }},
- {"required", json::array({"name", "arguments", "id"})},
- });
- });
- auto schema = json {
- {"type", "array"},
- {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
- {"minItems", 1},
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
- });
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
- data.preserved_tokens = {
- "[TOOL_CALLS]",
- };
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
- return data;
-}
-
-
-// Case-insensitive find
-static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
- auto it = std::search(
- haystack.begin() + pos, haystack.end(),
- needle.begin(), needle.end(),
- [](char a, char b) { return std::tolower(a) == std::tolower(b); }
- );
- return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
-}
-
-static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- const auto is_json_schema_provided = !inputs.json_schema.is_null();
- const auto is_grammar_provided = !inputs.grammar.empty();
- const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
-
- // the logic requires potentially modifying the messages
- auto tweaked_messages = inputs.messages;
-
- auto replace_json_schema_marker = [](json & messages) -> bool {
- static std::string marker1 = "force json schema.\n";
- static std::string marker2 = "force json schema.";
-
- if (messages.empty() || messages.at(0).at("role") != "system") {
- return false;
- }
-
- std::string content = messages.at(0).at("content");
-
- for (const auto & marker : {marker1, marker2}) {
- const auto pos = ifind_string(content, marker);
- if (pos != std::string::npos) {
- content.replace(pos, marker.length(), "");
- // inject modified content back into the messages
- messages.at(0).at("content") = content;
- return true;
- }
- }
-
- return false;
- };
-
- // Lfm2 model does not natively work with json, but can generally understand the tools structure
- //
- // Example of the pytorch dialog structure:
- // <|startoftext|><|im_start|>system
- // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
- // <|im_start|>user
- // What is the current status of candidate ID 12345?<|im_end|>
- // <|im_start|>assistant
- // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
- // <|im_start|>tool
- // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
- // <|im_start|>assistant
- // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
- //
- // For the llama server compatibility with json tools semantic,
- // the client can add "Follow json schema." line into the system message prompt to force the json output.
- //
- if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
- // server/utils.hpp prohibits that branch for the custom grammar anyways
- throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
- } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
- LOG_INF("%s: Using tools to build a grammar\n", __func__);
-
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- {"type", "object"},
- {"properties", {
- {"name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"arguments", function.at("parameters")},
- }},
- {"required", json::array({"name", "arguments", "id"})},
- });
- });
- auto schema = json {
- {"type", "array"},
- {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
- {"minItems", 1},
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
-
- builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
- });
- // model has no concept of tool selection mode choice,
- // if the system prompt rendered correctly it will produce a tool call
- // the grammar goes inside the tool call body
- data.grammar_lazy = true;
- data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
- data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
- data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
- } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
- LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
- // output those tokens
- data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
- } else if (is_json_schema_provided) {
- LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
- data.grammar = json_schema_to_grammar(inputs.json_schema);
- } else if (is_grammar_provided) {
- LOG_INF("%s: Using provided grammar\n", __func__);
- data.grammar = inputs.grammar;
- } else {
- LOG_INF("%s: Using content relying on the template\n", __func__);
- }
-
- data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
- LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl,
+ const autoparser::templates_params & inputs) {
common_chat_params data;
// Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
// If message contains `reasoning_content`, add it as a block of type `thinking`
if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
content.push_back({
- {"type", "thinking"},
- {"thinking", msg.at("reasoning_content").get<std::string>()},
+ { "type", "thinking" },
+ { "thinking", msg.at("reasoning_content").get<std::string>() },
});
}
if (msg.contains("content")) {
if (msg.at("content").is_string()) {
content.push_back({
- {"type", "text"},
- {"text", msg.at("content").get<std::string>()},
+ { "type", "text" },
+ { "text", msg.at("content").get<std::string>() },
});
} else if (msg.at("content").is_array()) {
auto blocks = msg.at("content");
}
}
- auto adjusted = msg;
+ auto adjusted = msg;
adjusted["content"] = content;
adjusted.erase("reasoning_content");
adjusted_messages.push_back(adjusted);
}
- auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
- auto include_grammar = true;
+ auto include_grammar = true;
- data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
- data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
- data.preserved_tokens = {
+ data.supports_thinking = true;
+ data.prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+ data.preserved_tokens = {
"[THINK]",
"[/THINK]",
"[TOOL_CALLS]",
"[ARGS]",
};
- auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
- auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ auto reasoning =
+ extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
// Response format parser
if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
// Ministral wants to emit json surrounded by code fences
- return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
+ return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema))
+ << "```";
}
// Tool call parser
auto tool_choice = p.choice();
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- std::string name = function.at("name");
- const auto & schema = function.at("parameters");
+ std::string name = function.at("name");
+ const auto & schema = function.at("parameters");
- tool_choice |= p.rule("tool-" + name,
- p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
- + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
- );
+ tool_choice |=
+ p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") +
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
});
- auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
- auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+ auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+ auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- auto schema = function.at("parameters");
+ auto schema = function.at("parameters");
builder.resolve_refs(schema);
});
parser.build_grammar(builder, data.grammar_lazy);
});
data.grammar_triggers = {
- {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" }
};
}
return data;
}
-static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
- data.preserved_tokens = {
- "[THINK]",
- "[/THINK]",
- };
-
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- {"type", "object"},
- {"properties", {
- {"name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"arguments", function.at("parameters")},
- {"id", {
- {"type", "string"},
- {"pattern", "^[a-zA-Z0-9]{9}$"},
- }},
- }},
- {"required", json::array({"name", "arguments", "id"})},
- });
- });
- auto schema = json {
- {"type", "array"},
- {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
- {"minItems", 1},
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
- });
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
- data.preserved_tokens.push_back("[TOOL_CALLS]");
- } else {
- data.grammar_lazy = false;
- if (!inputs.json_schema.is_null()) {
- if (!inputs.grammar.empty()) {
- throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
- }
- data.grammar = json_schema_to_grammar(inputs.json_schema);
- } else {
- data.grammar = inputs.grammar;
- }
- }
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl,
+ const autoparser::templates_params & inputs) {
common_chat_params data;
+ // Copy reasoning to the "thinking" field as expected by the gpt-oss template
auto adjusted_messages = json::array();
for (const auto & msg : inputs.messages) {
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
- auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+ auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+
if (has_reasoning_content && has_tool_calls) {
- auto adjusted_message = msg;
- adjusted_message["tool_plan"] = msg.at("reasoning_content");
- adjusted_message.erase("reasoning_content");
+ auto adjusted_message = msg;
+ adjusted_message["thinking"] = msg.at("reasoning_content");
adjusted_messages.push_back(adjusted_message);
} else {
adjusted_messages.push_back(msg);
}
}
- data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
- data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
- if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
- if (!inputs.enable_thinking) {
- data.prompt += "<|END_THINKING|>";
- } else {
- data.thinking_forced_open = true;
- }
- } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
- data.prompt += "<|START_THINKING|><|END_THINKING|>";
- }
-
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- {"type", "object"},
- {"properties", {
- {"tool_call_id", {
- {"type", "string"},
- // Command-R's template expects an integer string.
- {"pattern", "^[0-9]{1,10}$"},
- }},
- {"tool_name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"parameters", function.at("parameters")},
- }},
- {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
- });
- });
- auto schema = json {
- {"type", "array"},
- {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
- {"minItems", 1},
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
- "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
- });
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
- "(<\\|START_ACTION\\|>)[\\s\\S]*"
- });
- data.preserved_tokens = {
- "<|START_ACTION|>",
- "<|END_ACTION|>",
- "<|START_RESPONSE|>",
- "<|END_RESPONSE|>",
- "<|START_THINKING|>",
- "<|END_THINKING|>",
- };
- return data;
-}
-
-static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
- if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
- throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
- }
- const auto & parameters_properties = parameters.at("properties");
- const auto & parameters_required = parameters.at("required");
- for (const auto & prop : expected_properties) {
- if (!parameters_properties.contains(prop)) {
- throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
- }
- if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
- throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
- }
- }
- if (parameters_properties.size() != expected_properties.size()) {
- throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
- }
-}
-
-static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
- auto builtin_tools = json::array();
- common_chat_params data;
- if (!inputs.tools.is_null()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
-
- auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
- if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
- expect_tool_parameters(name, parameters, {"query"});
- } else if (name == "python" || name == "code_interpreter") {
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
- expect_tool_parameters(name, parameters, {"code"});
- } else {
- return false;
- }
-
- std::vector<std::string> kvs;
- for (const auto & [key, value] : parameters.at("properties").items()) {
- kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
- }
-
- tool_rules.push_back(
- builder.add_rule(
- name + "-call",
- "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
- builtin_tools.push_back(name);
-
- return true;
- };
-
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
-
- // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
- if (allow_python_tag_builtin_tools) {
- handle_builtin_tool(name, parameters);
- }
- tool_rules.push_back(
- builder.add_rule(
- name + "-call",
- "\"{\" space "
- "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
- " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
- " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
- "\"}\" space"));
- });
- // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
- });
- if (!builtin_tools.empty()) {
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
- data.preserved_tokens.push_back("<|python_tag|>");
- }
- // Allow a few empty lines on top of the usual constrained json schema space rule.
- builder.add_rule("root", string_join(tool_rules, " | "));
- data.additional_stops.push_back("<|eom_id|>");
- });
- data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
- ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
- : COMMON_CHAT_FORMAT_LLAMA_3_X;
- } else {
- data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- }
- data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
- {"date_string", format_time(inputs.now, "%d %b %Y")},
- {"tools_in_user_message", false},
- {"builtin_tools", builtin_tools},
- });
- return data;
-}
-
-static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- // Generate the prompt using the apply() function with the template
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
+ auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
- // Handle thinking tags appropriately based on inputs.enable_thinking
- if (string_ends_with(data.prompt, "<think>\n")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
+ // Check if we need to replace the return token with end token during
+ // inference and without generation prompt. For more details see:
+ // https://github.com/ggml-org/llama.cpp/issues/15417
+ if (inputs.is_inference && !inputs.add_generation_prompt) {
+ static constexpr std::string_view return_token = "<|return|>";
+ static constexpr std::string_view end_token = "<|end|>";
+ if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
+ prompt.replace(pos, return_token.length(), end_token);
}
}
- // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
- if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = true;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- { "type", "object" },
- { "properties",
- {
- { "name",
- {
- { "type", "string" },
- { "const", function.at("name") },
- } },
- { "arguments", function.at("parameters") },
- } },
- { "required", json::array({ "name", "arguments" }) },
- });
- });
- auto schema = json{
- { "type", "array" },
- { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
- { "minItems", 1 },
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
- "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
- " \"</TOOLCALL>\"");
- });
- data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ?
- "[\\s\\S]*?(</think>\\s*)" :
- "(?:<think>[\\s\\S]*?</think>\\s*)?") +
- "(<TOOLCALL>)[\\s\\S]*" });
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_qwen3_coder(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
-
- // Nemotron Nano 3 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
- bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
-
- // Handle thinking tags appropriately based on inputs.enable_thinking
- if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
+ data.prompt = prompt;
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+ data.supports_thinking = true;
+ // These special tokens are required to parse properly, so we include them
+ // even if parse_tool_calls is false.
data.preserved_tokens = {
- "<tool_call>",
- "</tool_call>",
+ "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
};
- if (supports_reasoning) {
- data.preserved_tokens.insert(data.preserved_tokens.end(), {"<think>", "</think>"});
- }
-
- auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
- auto include_grammar = true;
-
- auto parser = build_chat_peg_constructed_parser([&](auto & p) {
- auto reasoning = p.eps();
- if (supports_reasoning && inputs.enable_thinking && extract_reasoning) {
- auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
- if (data.thinking_forced_open) {
- reasoning = reasoning_content;
- }
- }
+ auto include_grammar = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools;
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ const std::string END = "<|end|>";
+ const std::string START = "<|start|>";
+ const std::string MESSAGE = "<|message|>";
+ const std::string CHANNEL = "<|channel|>";
+ const std::string CONSTRAIN = "<|constrain|>";
+ const std::string START_ASSISTANT = START + "assistant";
+ const std::string CHANNEL_ANALYSIS = CHANNEL + "analysis";
+ const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary";
+ const std::string CHANNEL_FINAL = CHANNEL + "final";
+
+ auto the_end = END | p.end();
+
+ const std::string analysis_header = CHANNEL_ANALYSIS + MESSAGE;
+ auto segment_content = p.until(END);
+ auto analysis_segment = extract_reasoning ?
+ p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end :
+ p.content(analysis_header + p.until(END) + the_end);
+
+ auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE });
+ auto content_header = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) });
+ auto content_segment = p.rule("content-segment", content_header + channel_header_content + MESSAGE +
+ p.content(segment_content) + the_end);
- // Response format parser
- if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
- return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
+ if (!inputs.json_schema.is_null()) {
+ auto final_header = p.literal(CHANNEL_FINAL);
+ auto constraint = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content);
+ return p.optional(analysis_segment) + final_header + constraint + MESSAGE +
+ p.content(p.schema(p.json(), "response-format", inputs.json_schema));
}
+ auto segment = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment });
+ auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end();
+
// Tool call parser
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
auto tool_choice = p.choice();
+
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
-
- auto schema_info = common_schema_info();
- schema_info.resolve_refs(parameters);
-
- auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
- auto tool_close = p.literal("</function>\n");
- auto args = p.sequence();
- auto arg_string = p.rule("xml-arg-string", p.until_one_of({
- "\n</parameter>",
- "\n<parameter=",
- "\n</function>"
- }));
-
- foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
- auto rule_name = "tool-" + name + "-arg-" + param_name;
-
- auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
- auto arg_close = p.literal("</parameter>\n");
- auto arg_value = p.eps();
-
- if (schema_info.resolves_to_string(param_schema)) {
- arg_value = p.tool_arg_string_value(arg_string) + "\n";
- } else {
- arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
- }
+ std::string name = function.at("name");
+ const auto & params = function.at("parameters");
- // Model may or my not close with </parameter>
- auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
- args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
- });
+ // Tool call can appear as:
+ // 1. In role header: " to=functions.NAME<|channel|>..."
+ // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..."
+ auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name));
+
+ auto channel = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS);
+ auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content);
+ auto args = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
- tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
+ // Pattern 1: recipient in role header
+ // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS"
+ auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args);
+
+ // Pattern 2: recipient in channel header
+ // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS"
+ auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args);
+
+ tool_choice |= tool_in_role | tool_in_channel;
});
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
- auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
- auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
- return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
+ auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT));
+ auto tool_call = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end());
+
+ return p.choice({ p.trigger_rule("single-tool", tool_call), p.trigger_rule("tools", p.one_or_more(segment) + tool_call) });
}
- // Content only parser
- include_grammar = false;
- return reasoning << p.content(p.rest());
+ return contents;
});
data.parser = parser.save();
if (include_grammar) {
data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- auto schema = function.at("parameters");
+ auto schema = function.at("parameters");
builder.resolve_refs(schema);
});
parser.build_grammar(builder, data.grammar_lazy);
});
data.grammar_triggers = {
- {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
+ { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+ { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+ { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+ "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)" }
};
}
return data;
}
-
-static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
+static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl,
+ const autoparser::templates_params & inputs) {
common_chat_params data;
- // Generate the prompt using the apply() function with the template
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_APERTUS;
+ data.prompt = common_chat_template_direct_apply(tmpl, inputs);
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+ data.preserved_tokens = {
+ ">>>all",
+ };
- // Handle thinking tags appropriately based on inputs.enable_thinking
- if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
- if (!inputs.enable_thinking) {
- data.prompt += "<|inner_suffix|>";
- } else {
- data.thinking_forced_open = true;
- }
- }
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
- // When tools are present, build grammar for the <|tools_prefix|> format
- if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = true;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- { "type", "object" },
- { "properties",
- {
- { function.at("name"), function.at("parameters") }
- } },
- { "required", json::array({ function.at("name") }) },
- });
- });
- auto schema = json{
- { "type", "array" },
- { "items", schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
- { "minItems", 1 },
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
- "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
- });
- data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ?
- "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
- "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
- "(<\\|tools_prefix\\|>)[\\s\\S]*" });
- data.preserved_tokens = {
- "<|system_start|>",
- "<|system_end|>",
- "<|developer_start|>",
- "<|developer_end|>",
- "<|user_start|>",
- "<|user_end|>",
- "<|assistant_start|>",
- "<|assistant_end|>",
- "<|inner_prefix|>",
- "<|inner_suffix|>",
- "<|tools_prefix|>",
- "<|tools_suffix|>",
- };
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- auto prompt = apply(tmpl, inputs);
-
- // Hacks to fix the official (broken) prompt.
- // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
- // until the official template is fixed.
- if (tmpl.source().find("{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}") != std::string::npos) {
- // Don't leave the chat dangling after tool results
- if (string_ends_with(prompt, "<|tool▁outputs▁end|>")) {
- prompt += "<|end▁of▁sentence|>";
- if (inputs.add_generation_prompt) {
- prompt += "<|Assistant|>";
- }
- }
- // Fix up tool call delta example added by Minja
- prompt = std::regex_replace(
- prompt,
- std::regex("(<|tool▁call▁end|>)[\\s\\r\\n]*(<|tool▁outputs▁begin|>|<|User|>)"),
- "$1<|tool▁calls▁end|><|end▁of▁sentence|>$2");
- }
- data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
- if (string_ends_with(data.prompt, "<think>\n")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
-
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- tool_rules.push_back(builder.add_rule(name + "-call",
- "( \"<|tool▁call▁begin|>\" )? \"function<|tool▁sep|>" + name + "\\n"
- "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
- "\"```<|tool▁call▁end|>\""));
- });
- // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
- // so we accept common variants (then it's all constrained)
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
- "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
- "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
- "\"<|tool▁calls▁end|>\""
- " space");
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
- "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
- });
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<|tool▁calls▁begin|>",
- "<|tool▁call▁begin|>",
- "<|tool▁sep|>",
- "<|tool▁call▁end|>",
- "<|tool▁calls▁end|",
- };
- });
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- // Pass thinking context for DeepSeek V3.1 template
- json additional_context = {
- {"thinking", inputs.enable_thinking},
- };
-
- auto prompt = apply(tmpl, inputs,
- /* messages_override= */ inputs.messages,
- /* tools_override= */ std::nullopt,
- additional_context);
- data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- if (string_ends_with(data.prompt, "<think>")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- tool_rules.push_back(builder.add_rule(name + "-call",
- "( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
- "\" " + builder.add_schema(name + "-args", parameters) + " "
- "\"<|tool▁call▁end|>\""));
- });
- // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
- // so we accept common variants (then it's all constrained)
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
- "( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
- "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
- "\"<|tool▁calls▁end|>\""
- " space");
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
- "(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
- });
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<|tool▁calls▁begin|>",
- "<|tool▁call▁begin|>",
- "<|tool▁sep|>",
- "<|tool▁call▁end|>",
- "<|tool▁calls▁end|>",
- };
- });
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
- common_chat_params data;
- data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- data.prompt = apply(tmpl, params);
- data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
-
- // Handle thinking tags based on prompt ending
- if (string_ends_with(data.prompt, "<think>\n")) {
- if (!params.enable_thinking) {
- // Close the thinking tag immediately if thinking is disabled
- data.prompt += "</think>\n\n";
- } else {
- // Mark thinking as forced open (template started with <think>)
- data.thinking_forced_open = true;
- }
- }
-
- // Preserve MiniMax-M2 special tokens
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<minimax:tool_call>",
- "</minimax:tool_call>",
- };
-
- // build grammar for tool call
- static const xml_tool_call_format form {
- /* form.scope_start = */ "<minimax:tool_call>\n",
- /* form.tool_start = */ "<invoke name=\"",
- /* form.tool_sep = */ "\">\n",
- /* form.key_start = */ "<parameter name=\"",
- /* form.key_val_sep = */ "\">",
- /* form.val_end = */ "</parameter>\n",
- /* form.tool_end = */ "</invoke>\n",
- /* form.scope_end = */ "</minimax:tool_call>",
- };
- build_grammar_xml_tool_call(data, params.tools, form);
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
- common_chat_params data;
- data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- data.prompt = apply(tmpl, params);
- data.format = COMMON_CHAT_FORMAT_KIMI_K2;
-
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<|tool_calls_section_begin|>",
- "<|tool_call_begin|>",
- "<|tool_call_argument_begin|>",
- "<|tool_call_end|>",
- "<|tool_calls_section_end|>",
- "<|im_end|>",
- "<|im_system|>",
- "<|im_middle|>",
- };
-
- data.additional_stops.insert(data.additional_stops.end(), {
- "<|im_end|>",
- "<|im_middle|>"
- });
- // build grammar for tool call
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "<|tool_calls_section_begin|>";
- form.tool_start = "<|tool_call_begin|>";
- form.tool_sep = "<|tool_call_argument_begin|>{";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}<|tool_call_end|>";
- form.scope_end = "<|tool_calls_section_end|>";
- form.raw_argval = false;
- form.last_val_end = "";
- return form;
- })();
- build_grammar_xml_tool_call(data, params.tools, form);
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
- common_chat_params data;
- data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- data.prompt = apply(tmpl, params);
- data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
-
- data.preserved_tokens = {
- "<thinking>",
- "</thinking>",
- "<tool_calls>",
- "</tool_calls>",
- };
-
- // build grammar for tool call
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "<tool_calls>[";
- form.tool_start = "{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}, ";
- form.scope_end = "]</tool_calls>";
- form.raw_argval = false;
- form.last_val_end = "";
- form.last_tool_end = "}";
- return form;
- })();
- build_grammar_xml_tool_call(data, params.tools, form);
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
- common_chat_params data;
- data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- data.prompt = apply(tmpl, params);
- data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
-
- data.preserved_tokens = {
- "<tool_call>",
- "</tool_call>",
- };
-
- // build grammar for tool call
- static const xml_tool_call_format form = ([]() {
- xml_tool_call_format form {};
- form.scope_start = "\n";
- form.tool_start = "<tool_call>\n{\"name\": \"";
- form.tool_sep = "\", \"arguments\": {";
- form.key_start = "\"";
- form.key_val_sep = "\": ";
- form.val_end = ", ";
- form.tool_end = "}\n</tool_call>";
- form.scope_end = "";
- form.raw_argval = false;
- form.last_val_end = "";
- return form;
- })();
- build_grammar_xml_tool_call(data, params.tools, form);
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- // Copy reasoning to the "thinking" field as expected by the gpt-oss template
- auto adjusted_messages = json::array();
- for (const auto & msg : inputs.messages) {
- auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
- auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-
- if (has_reasoning_content && has_tool_calls) {
- auto adjusted_message = msg;
- adjusted_message["thinking"] = msg.at("reasoning_content");
- adjusted_message.erase("content");
- adjusted_messages.push_back(adjusted_message);
- } else {
- adjusted_messages.push_back(msg);
- }
- }
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ // Functionary v3.2 format:
+ // - Normal content: >>>all\n{content}
+ // - Tool calls: >>>function_name\n{json_args}
+ // Generation prompt ends with ">>>" so model outputs recipient immediately
- auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
+ // Build content parser for >>>all\n{content}
+ // When tools are present, content stops before the next ">>>" (tool call)
+ // When no tools, content goes until end
+ auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>"));
+ auto content_until_end = p.literal(">>>all\n") + p.content(p.rest());
- // Check if we need to replace the return token with end token during
- // inference and without generation prompt. For more details see:
- // https://github.com/ggml-org/llama.cpp/issues/15417
- if (inputs.is_inference && !inputs.add_generation_prompt) {
- static constexpr std::string_view return_token = "<|return|>";
- static constexpr std::string_view end_token = "<|end|>";
- if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
- prompt.replace(pos, return_token.length(), end_token);
+ // If no tools or tool_choice is NONE, just parse content
+ if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+ // When no tools, just match the prefix and capture everything after
+ return content_until_end + p.end();
}
- }
-
- data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_GPT_OSS;
- // These special tokens are required to parse properly, so we include them
- // even if parse_tool_calls is false.
- data.preserved_tokens = {
- "<|channel|>",
- "<|constrain|>",
- "<|message|>",
- "<|start|>",
- "<|end|>",
- };
+ // Build tool call parsers for each available function
+ auto tool_choice = p.choice();
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ const auto & schema = function.at("parameters");
- if (!inputs.json_schema.is_null()) {
- data.grammar_lazy = false;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schema = inputs.json_schema;
- builder.resolve_refs(schema);
-
- auto not_end = builder.add_rule("not-end",
- "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
- auto analysis = builder.add_rule("analysis",
- "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
- auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
- auto final = builder.add_rule("final",
- "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
- builder.add_schema("response", schema)
+ // Tool format: >>>function_name\n{json_args}
+ auto tool_parser = p.tool(
+ p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) +
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
);
- builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
+ tool_choice |= p.rule("tool-" + name, tool_parser);
});
- }
-
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- // tool calls can appear in commentary or analysis channels
- auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
- std::vector<std::string> tool_rules_recipient_in_role;
- std::vector<std::string> tool_rules_recipient_in_channel;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
-
- tool_rules_recipient_in_role.push_back(
- builder.add_rule(name + "-call",
- "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
- builder.add_schema(name + "-args", parameters)
- )
- );
-
- tool_rules_recipient_in_channel.push_back(
- builder.add_rule(name + "-call",
- "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
- builder.add_schema(name + "-args", parameters)
- )
- );
- });
-
- auto recipient_in_channel = builder.add_rule("recipient_in_channel",
- channel + " \" to=functions.\" ( " +
- string_join(tool_rules_recipient_in_channel, " | ") + " )"
- );
+ auto content_only = content_until_end;
+ auto tools_only = p.trigger_rule("tools", p.one_or_more(tool_choice));
+ auto content_and_tools = content_until_tool + tools_only;
- if (data.grammar_lazy) {
- auto recipient_in_role = builder.add_rule("recipient_in_role",
- "\"<|start|>assistant\"? \" to=functions.\" ( " +
- string_join(tool_rules_recipient_in_role, " | ") + " )"
- );
-
- builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
- } else {
- auto not_end = builder.add_rule("not-end",
- "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
- auto analysis = builder.add_rule("analysis",
- "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
- auto commentary = builder.add_rule("commentary",
- "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-
- auto recipient_in_role = builder.add_rule("recipient_in_role",
- "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
- );
-
- builder.add_rule("root",
- "( " + analysis + " \"<|start|>assistant\" )? " +
- "( " + commentary + " \"<|start|>assistant\" )? " +
- "( " + recipient_in_role + " | " + recipient_in_channel + " )"
- );
+ if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+ if (inputs.parallel_tool_calls) {
+ return p.choice({ content_and_tools, tools_only }) + p.end();
}
-
- // Trigger on tool calls that appear in the commentary channel
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
- "<\\|channel\\|>(?:commentary|analysis) to"
- });
-
- // Trigger tool calls that appear in the role section, either at the
- // start or in the middle.
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- "^ to"
- });
-
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
- "<\\|start\\|>assistant to"
- });
- });
- }
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
- std::string prompt = apply(tmpl, inputs);
-
- // match the existing trimming behavior
- if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
- prompt.erase(0, tmpl.bos_token().size());
- }
- if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
- prompt.erase(prompt.size() - tmpl.eos_token().size());
- }
- if (string_ends_with(prompt, "<think>")) {
- if (!inputs.enable_thinking) {
- prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
+ return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
}
- }
-
- // add GLM preserved tokens
- data.preserved_tokens = {
- "<|endoftext|>",
- "[MASK]",
- "[gMASK]",
- "[sMASK]",
- "<sop>",
- "<eop>",
- "<|system|>",
- "<|user|>",
- "<|assistant|>",
- "<|observation|>",
- "<|begin_of_image|>",
- "<|end_of_image|>",
- "<|begin_of_video|>",
- "<|end_of_video|>",
- "<|begin_of_audio|>",
- "<|end_of_audio|>",
- "<|begin_of_transcription|>",
- "<|end_of_transcription|>",
- "<|code_prefix|>",
- "<|code_middle|>",
- "<|code_suffix|>",
- "/nothink",
- "<think>",
- "</think>",
- "<tool_call>",
- "</tool_call>",
- "<arg_key>",
- "</arg_key>",
- "<arg_value>",
- "</arg_value>"
- };
-
- // extra GLM 4.5 stop word
- data.additional_stops.insert(data.additional_stops.end(), {
- "<|user|>",
- "<|observation|>"
+ if (inputs.parallel_tool_calls) {
+ return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
+ }
+ auto content_and_tool = content_until_tool + tool_choice;
+ return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
});
- // build grammar for tool call
- static const xml_tool_call_format form {
- /* form.scope_start = */ "",
- /* form.tool_start = */ "\n<tool_call>",
- /* form.tool_sep = */ "\n",
- /* form.key_start = */ "<arg_key>",
- /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
- /* form.val_end = */ "</arg_value>\n",
- /* form.tool_end = */ "</tool_call>\n",
- /* form.scope_end = */ "",
- };
- build_grammar_xml_tool_call(data, inputs.tools, form);
-
- data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_GLM_4_5;
- return data;
-}
-
-static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
- LOG_DBG("%s\n", __func__);
- common_chat_params data;
- const std::optional<json> additional_context = json {
- {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
- {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
- };
- data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- auto schemas = json::array();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- schemas.push_back({
- {"type", "object"},
- {"properties", {
- {"name", {
- {"type", "string"},
- {"const", function.at("name")},
- }},
- {"arguments", function.at("parameters")},
- }},
- {"required", json::array({"name", "arguments", "id"})},
- });
- });
- auto schema = json {
- {"type", "array"},
- {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
- {"minItems", 1},
- };
- if (!inputs.parallel_tool_calls) {
- schema["maxItems"] = 1;
- }
- builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
- });
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
- data.preserved_tokens = {
- " functools[",
- };
- data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
- } else {
- data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
- // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
- // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
- // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
- common_chat_params data;
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> first_tool_rules;
- std::vector<std::string> subsequent_tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- std::string args_pattern = "[\\s\\S]*";
- auto args_rule = builder.add_schema(name + "-args", parameters);
- if (name == "python") {
- args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
- } else {
- args_pattern = "\\{" + args_pattern;
- }
- auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
- first_tool_rules.push_back(call_rule);
- if (inputs.parallel_tool_calls) {
- subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
- }
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
- });
- });
- data.preserved_tokens = {
- "<|end_header_id|>",
- };
- auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
- if (inputs.parallel_tool_calls) {
- auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
- builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
- } else {
- builder.add_rule("root", first_rule);
- }
-
- });
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
- // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
- common_chat_params data;
+ data.parser = parser.save();
- if (!inputs.tools.is_null()) {
- std::string python_code_argument_name;
- auto has_raw_python = false;
+ if (include_grammar) {
+ data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- const auto & parameters = function.at("parameters");
- std::string name = function.at("name");
- if (name == "python" || name == "ipython") {
- if (!parameters.contains("type")) {
- throw std::runtime_error("Missing type in python tool");
- }
- has_raw_python = true;
- const auto & type = parameters.at("type");
- if (type == "object") {
- auto properties = parameters.at("properties");
- for (auto it = properties.begin(); it != properties.end(); ++it) {
- if (it.value().at("type") == "string") {
- if (!python_code_argument_name.empty()) {
- throw std::runtime_error("Multiple string arguments found in python tool");
- }
- python_code_argument_name = it.key();
- }
- }
- if (python_code_argument_name.empty()) {
- throw std::runtime_error("No string argument found in python tool");
- }
- } else if (type != "string") {
- throw std::runtime_error("Invalid type in python tool: " + type.dump());
- }
- }
- tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
+ auto schema = function.at("parameters");
+ builder.resolve_refs(schema);
});
- if (has_raw_python) {
- tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
- data.preserved_tokens.push_back("<|python_tag|>");
- }
- auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
- builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
+ parser.build_grammar(builder, data.grammar_lazy);
});
- data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
- } else {
- data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- }
-
- data.prompt = apply(tmpl, inputs);
- // TODO: if (has_raw_python)
- return data;
-}
-static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- json extra_context = json {
- {"enable_thinking", inputs.enable_thinking},
- };
- extra_context.update(inputs.extra_context);
-
- data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
- data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
- if (string_ends_with(data.prompt, "<think>\n")) {
- if (!extra_context["enable_thinking"]) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
-
- if (!inputs.tools.is_null()) {
- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- std::vector<std::string> tool_call_alts;
- std::vector<std::string> escaped_names;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- tool_rules.push_back(builder.add_schema(name + "-call", {
- {"type", "object"},
- {"properties", json {
- {"name", json {{"const", name}}},
- {"arguments", parameters},
- }},
- {"required", json::array({"name", "arguments"})},
- }));
- tool_call_alts.push_back(builder.add_rule(
- name + "-function-tag",
- "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
- builder.add_schema(name + "-args", parameters) + " "
- "\"</function>\" space"));
-
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
- "<function=" + name + ">",
- });
- auto escaped_name = regex_escape(name);
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
- "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
- });
- escaped_names.push_back(escaped_name);
- });
- auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
- std::vector<std::string> alt_tags {
- any_tool_call,
- "\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
- // The rest is just to accommodate common "good bad" outputs.
- "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
- "\"<response>\" space " + any_tool_call + " \"</response>\"",
- "\"<tools>\" space " + any_tool_call + " \"</tools>\"",
- "\"<json>\" space " + any_tool_call + " \"</json>\"",
- "\"<xml>\" space " + any_tool_call + " \"</xml>\"",
- "\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
- };
- auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
- tool_call_alts.push_back(wrappable_tool_call);
- tool_call_alts.push_back(
- "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
- auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
- (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
- // If thinking_forced_open, then we capture the </think> tag in the grammar,
- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
- std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
- "\\s*("
- "(?:<tool_call>"
- "|<function"
- "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
- "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
- ")"
- ")"
- ),
- });
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<tool_call>",
- "</tool_call>",
- "<function",
- "<tools>",
- "</tools>",
- "<response>",
- "</response>",
- "<function_call>",
- "</function_call>",
- "<json>",
- "</json>",
- "<JSON>",
- "</JSON>",
- "```",
- "```json",
- "```xml",
- };
- });
+ // Grammar trigger for when the model starts outputting a tool call
+ // (after the initial ">>>" in the generation prompt but recipient other than "all")
+ data.grammar_triggers = {
+ { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, ">>>(?!all)" }
+ };
}
return data;
}
-static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+// The ID contains both the function name and an incrementing counter
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl,
+ const autoparser::templates_params & inputs) {
common_chat_params data;
- // Pass thinking context for Granite template
- json additional_context = {
- {"thinking", inputs.enable_thinking},
+ data.prompt = common_chat_template_direct_apply(tmpl, inputs);
+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+ data.supports_thinking = true;
+ data.preserved_tokens = {
+ "<|tool_calls_section_begin|>",
+ "<|tool_calls_section_end|>",
+ "<|tool_call_begin|>",
+ "<|tool_call_argument_begin|>",
+ "<|tool_call_end|>",
+ "<think>",
+ "</think>",
};
- data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
- data.format = COMMON_CHAT_FORMAT_GRANITE;
-
- if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
-
- if (!inputs.tools.is_null()) {
- // Granite uses <|tool_call|> followed by JSON list
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
-"-args", {
- {"type", "object"},
- {"properties", {
- {"name", {{"const", name}}},
- {"arguments", parameters},
- }},
- {"required", json::array({"name", "arguments"})},
- })));
- });
-
- auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
- auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
-
- if (data.thinking_forced_open) {
- builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
- } else {
- builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
- }
-
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
- "<|tool_call|>"
- });
+ auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+ auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ // Kimi K2 Thinking format:
+ // - Reasoning: <think>{reasoning}</think>
+ // - Content: text after reasoning
+ // - Tool calls section:
+ // <|tool_calls_section_begin|>
+ // <|tool_call_begin|>functions.<name>:<index><|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+ // ...
+ // <|tool_calls_section_end|>
+ // The ID format is: functions.<function_name>:<counter> where counter is 0, 1, 2, ...
+
+ // Tool call markers
+ const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>";
+ const std::string SECTION_END = "<|tool_calls_section_end|>";
+ const std::string CALL_BEGIN = "<|tool_call_begin|>";
+ const std::string ARGS_BEGIN = "<|tool_call_argument_begin|>";
+ const std::string CALL_END = "<|tool_call_end|>";
+
+ const std::string THINK_START = "<think>";
+ const std::string THINK_END = "</think>";
+
+ auto end = p.end();
+
+ // Note: this model is CRAZY. It can diverge from its supposed tool calling pattern in so many ways it's not funny.
+ // For example, it can call tools at the end of reasoning without closing reasoning...
+ auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning(
+ p.until_one_of({ THINK_END, "<|tool_calls_section_begin|>", "<|tool_call_begin|>" })) +
+ p.optional(p.literal(THINK_END))) : p.eps();
+
+
+ // Content only parser (no tools)
+ if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+ return reasoning + p.content(p.rest()) + end;
+ }
+
+ // Build tool call parsers for each available function
+ // The ID format is: functions.<name>:<index>
+ // We need to match: functions.<name>:<digits>
+ auto tool_choice = p.choice();
+ foreach_function(inputs.tools, [&](const json & tool) {
+ const auto & function = tool.at("function");
+ std::string name = function.at("name");
+ const auto & schema = function.at("parameters");
+
+ // Match: functions.<name>:<digits>
+ // Capture the full call id (functions.<name>:<digits>) using tool_id tag
+ auto tool_id = p.tool_id(p.literal("functions.") + p.tool_name(p.literal(name)) + p.literal(":") + p.chars("[0-9]", 1, -1));
+ auto tool_parser = p.tool(
+ p.tool_open(tool_id + p.literal(ARGS_BEGIN)) +
+ p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) +
+ p.tool_close(p.optional((p.literal(CALL_END))))
+ );
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<response>",
- "</response>",
- "<|tool_call|>",
- };
+ tool_choice |= p.rule("tool-" + name, tool_parser);
});
- } else {
- // Handle thinking tags for non-tool responses
- if (data.thinking_forced_open && inputs.enable_thinking) {
- data.grammar_lazy = false;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
- });
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<response>",
- "</response>",
- };
- }
- }
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- // Copy `reasoning_content` to `reasoning`
- auto adjusted_messages = json::array();
- for (const auto & msg : inputs.messages) {
- if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
- auto adjusted_message = msg;
- adjusted_message["reasoning"] = msg.at("reasoning_content");
- adjusted_message.erase("reasoning_content");
- adjusted_messages.push_back(adjusted_message);
- } else {
- adjusted_messages.push_back(msg);
- }
- }
-
- auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
- auto include_grammar = true;
-
- auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
- // Check if we need to replace the flush token with end token during inference and without generation prompt.
- if (inputs.is_inference && !inputs.add_generation_prompt) {
- static constexpr std::string_view return_token = "<|flush|>";
- static constexpr std::string_view end_token = "<|end|>";
- if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
- prompt.replace(pos, return_token.length(), end_token);
- }
- }
-
- data.prompt = prompt;
- data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
- data.preserved_tokens = {
- "<|think|>",
- "<|content|>",
- "<|begin|>",
- "<|end|>",
- "<|tool_calls|>",
- "<|tool_call:begin|>",
- "<|tool_call:end|>",
- "<|tool_call:name|>",
- "<|tool_call:args|>",
- };
-
- auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
- auto lit_think = p.atomic(p.literal("<|think|>"));
- auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
- auto lit_content = p.atomic(p.literal("<|content|>"));
- auto lit_end = p.atomic(p.literal("<|end|>"));
- auto parser_until_end = p.until("<|end|>");
-
- // reasoning <- "<|think|>" (!"<|end|>" .)*
- auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
-
- // content <- "<|content|>" (!"<|end|>" .)*
- auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
-
- // wrap_choice(items) <- item-choice wrapped*
- // item-choice <- items[0] / ... / items[n]
- // wrapped <- "<|end|><|begin|>assistant" item-choice
- auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
- auto choice = p.choice(items);
- return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
- };
-
- // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
- auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
- auto seq = p.sequence();
- for (auto i = 0u; i < items.size(); i++) {
- if (i == 0) {
- seq += items[i];
- continue;
- }
- seq += lit_end + lit_assistant_begin + items[i];
- }
- return seq;
- };
-
- // Response format parser
- if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
- auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
- return p.choice({
- wrap_seq({parser_reasoning, parser_response_format}),
- wrap_seq({parser_response_format})
- });
- }
- auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
- auto lit_tool_call_name = p.literal("<|tool_call:name|>");
- auto lit_tool_call_args = p.literal("<|tool_call:args|>");
- auto lit_tool_call_end = p.literal("<|tool_call:end|>");
+ // Tool calls section: <|tool_calls_section_begin|> tool_calls <|tool_calls_section_end|>
+ auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+ auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+ // Use trigger_rule so grammar generator knows where to start generating rules
+ auto tool_calls = p.rule("tool-calls",
+ p.optional(p.literal(SECTION_BEGIN)) +
+ p.trigger_rule("tool-call", p.repeat(CALL_BEGIN + tool_choice, min_calls, max_calls) +
+ p.optional(p.literal(SECTION_END)))
+ );
- // Tool call parser
- if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
- auto parser_tool_call = p.choice();
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- const auto & schema = function.at("parameters");
+ auto content_before_tools = p.content(p.until_one_of({ SECTION_BEGIN, CALL_BEGIN }));
- // tool(name, schema) <- name "<|tool_call:args|>" schema
- parser_tool_call |= p.rule("tool-" + name,
- p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
- + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
- });
-
- auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
- auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-
- // tool-calls <- "<|tool_calls|>" tool-call+
- // tool-call <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
- // call-id <- [a-zA-Z0-9_-]+
- // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
- auto parser_tool_calls = p.trigger_rule("tool-calls",
- p.atomic(p.literal("<|tool_calls|>"))
- + p.repeat(
- p.tool_open(
- lit_tool_call_begin
- + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
- + lit_tool_call_name
- + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
- + parser_tool_call
- + p.tool_close(lit_tool_call_end),
- /* min = */ 1,
- /* max = */ max_calls));
-
- if (min_calls == 1) {
- // If required, then try any combination of the reasoning, content, and tool call
- return p.choice({
- wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
- wrap_seq({parser_reasoning, parser_tool_calls}),
- wrap_seq({parser_content, parser_tool_calls}),
- wrap_seq({parser_tool_calls})
- });
- }
-
- return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
- }
-
- // Content only parser
- include_grammar = false;
- return wrap_choice({parser_reasoning, parser_content});
+ return reasoning + content_before_tools + tool_calls + end;
});
data.parser = parser.save();
if (include_grammar) {
- data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+ data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
- auto schema = function.at("parameters");
+ auto schema = function.at("parameters");
builder.resolve_refs(schema);
});
parser.build_grammar(builder, data.grammar_lazy);
});
data.grammar_triggers = {
- {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call_begin|>" }
};
}
return data;
}
-static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
- if (string_ends_with(data.prompt, "<think>\n")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</think>\n\n";
- } else {
- data.thinking_forced_open = true;
- }
- }
-
- if (inputs.tools.is_array() && !inputs.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(inputs.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
- // Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
- tool_rules.push_back(builder.add_rule(
- name + "-call",
- "\"<tool_call>\" space " +
- builder.add_schema(name + "-obj", json{
- {"type", "object"},
- {"properties", {
- {"name", json{{"const", name}}},
- {"arguments", parameters},
- }},
- {"required", json::array({"name", "arguments"})},
- }) +
- " space \"</tool_call>\" space"));
- });
-
- auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
- builder.add_rule("root",
- std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
- (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-
- data.grammar_triggers.push_back({
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
- std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
- "(<tool_call>)[\\s\\S]*"
- });
- data.preserved_tokens = {
- "<think>",
- "</think>",
- "<tool_call>",
- "</tool_call>",
- };
- });
- }
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
-
- // This template does not support tools or reasoning
- // we just need to transform the messages into the correct schema
-
- templates_params inputs_new = inputs;
- json & messages = inputs_new.messages;
-
- // default to chat_template_kwargs, or en-GB if not specified
- std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
- std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
-
- GGML_ASSERT(messages.is_array());
- for (auto & message : messages) {
- if (message.contains("role") && message["role"].get<std::string>() != "user") {
- continue;
- }
- if (!message.contains("content")) {
- message["content"] = json::array();
- }
- if (message.contains("content") && !message["content"].is_array()) {
- auto content_str = message["content"].get<std::string>();
- // default to en-GB if not specified (to make common_chat_format_example works)
- auto src_lang = message.contains("source_lang_code")
- ? message["source_lang_code"].get<std::string>() : default_src_lang;
- auto tgt_lang = message.contains("target_lang_code")
- ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
- message["content"] = json::array({
- json{
- {"type", "text"},
- {"text", content_str},
- {"source_lang_code", src_lang},
- {"target_lang_code", tgt_lang},
- }
- });
- }
- }
-
- data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
- data.format = COMMON_CHAT_FORMAT_GENERIC;
-
- return data;
-}
-
-static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
- common_chat_params data;
- data.prompt = apply(tmpl, inputs);
- data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- data.grammar_lazy = false;
- if (!inputs.json_schema.is_null()) {
- if (!inputs.grammar.empty()) {
- throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
- }
- data.grammar = json_schema_to_grammar(inputs.json_schema);
- } else {
- data.grammar = inputs.grammar;
- }
- return data;
-}
-
-static common_chat_params common_chat_params_init_seed_oss(
- const common_chat_template & tmpl,
- templates_params & params,
- const common_chat_templates_inputs & inputs)
-{
- common_chat_params data;
- data.prompt = apply(tmpl, params);
- data.format = COMMON_CHAT_FORMAT_SEED_OSS;
- if (string_ends_with(data.prompt, "<seed:think>")) {
- if (!inputs.enable_thinking) {
- data.prompt += "</seed:think>";
- } else {
- data.thinking_forced_open = true;
- }
- }
-
- if (params.tools.is_array() && !params.tools.empty()) {
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
- std::vector<std::string> tool_rules;
- foreach_function(params.tools, [&](const json & tool) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- builder.resolve_refs(parameters);
-
- // Create rule for Seed-OSS function call format
- std::string param_rules;
- if (parameters.contains("properties")) {
- for (const auto & [key, value] : parameters.at("properties").items()) {
- param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
- "\"</parameter>\"";
- }
- }
-
- tool_rules.push_back(builder.add_rule(name + "-call",
- "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
- param_rules +
- " \"</function>\" space \"</seed:tool_call>\""));
- });
-
- data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
-
- data.preserved_tokens = {
- "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
- "<function=", "</function>", "<parameter=", "</parameter>",
- };
-
- builder.add_rule("root", string_join(tool_rules, " | "));
- });
- }
- return data;
-}
-
-// various workarounds for known issues with certain templates or model behaviors
-// TODO @ngxson : improve this (how?)
namespace workaround {
// if first message is system and template does not support it, merge it with next message
}
}
+static void requires_non_null_content(json & messages) {
+ GGML_ASSERT(messages.is_array());
+ for (auto & message : messages) {
+ if (message.contains("tool_calls") && !message.contains("content")) {
+ message["content"] = "";
+ }
+ }
+}
+
static void func_args_not_string(json & messages) {
GGML_ASSERT(messages.is_array());
for (auto & message : messages) {
}
}
-static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
- GGML_ASSERT(messages.is_array());
- for (auto & message : messages) {
- if (message.contains("tool_calls")) {
- auto tool_calls_new = json{
- {"tool_calls", message.at("tool_calls")}
- };
- message.erase("tool_calls");
- auto content = message.at("content");
- std::string content_new = content.is_null() ? "" : content.get<std::string>();
- message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
- }
- }
}
-// TODO @ngxson : we may remove support for generic schema in the future
-static void use_generic_schema(json & messages) {
- GGML_ASSERT(messages.is_array());
- for (auto & message : messages) {
- if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
- auto & tool_calls = message.at("tool_calls");
- for (auto & tool_call : tool_calls) {
- if (tool_call.contains("type") && tool_call.at("type") == "function" &&
- tool_call.contains("function") && tool_call.at("function").is_object()) {
- // Copy values before erasing to avoid use-after-free
- json name_value;
- json arguments_value;
- json id_value;
- const auto & function = tool_call.at("function");
- if (function.contains("name")) {
- name_value = function.at("name");
- }
- if (function.contains("arguments")) {
- arguments_value = function.at("arguments");
- }
- if (tool_call.contains("id")) {
- id_value = tool_call.at("id");
- }
- // Now safely erase and assign in the correct order
- tool_call.erase("type");
- tool_call.erase("function");
- tool_call.erase("id");
- // Reassign in desired order: name, arguments, id
- if (!name_value.is_null()) {
- tool_call["name"] = name_value;
- }
- if (!arguments_value.is_null()) {
- tool_call["arguments"] = arguments_value;
- }
- if (!id_value.is_null()) {
- tool_call["id"] = id_value;
- }
- }
- }
- }
- }
+static json common_chat_extra_context() {
+ json ctx = json::object();
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+ std::string datetime_str = format_time(now, "%b %d %Y");
+ std::string date_str = format_time(now, "%d %b %Y");
+ ctx["datetime"] = datetime_str;
+ ctx["date_string"] = date_str;
+ return ctx;
}
-} // namespace workaround
-
-static common_chat_params common_chat_templates_apply_jinja(
- const struct common_chat_templates * tmpls,
- const struct common_chat_templates_inputs & inputs)
-{
- templates_params params;
+static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
+ const struct common_chat_templates_inputs & inputs) {
+ autoparser::templates_params params;
params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
? *tmpls->template_tool_use
workaround::system_message_not_supported(params.messages);
}
- params.extra_context = json::object();
+ if (tmpl.original_caps().supports_tool_calls) {
+ // some templates will require the content field in tool call messages
+ // to still be non-null, this puts an empty string everywhere where the
+ // content field is null
+ workaround::requires_non_null_content(params.messages);
+ }
+
+ params.extra_context = common_chat_extra_context();
for (auto el : inputs.chat_template_kwargs) {
params.extra_context[el.first] = json::parse(el.second);
}
params.json_schema = json::parse(inputs.json_schema);
}
- if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
- LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
- params.parallel_tool_calls = false;
- } else {
- params.parallel_tool_calls = inputs.parallel_tool_calls;
- }
+ // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
+ // LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
+ // params.parallel_tool_calls = false;
+ // } else {
+ params.parallel_tool_calls = inputs.parallel_tool_calls;
+ //}
if (params.tools.is_array()) {
if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
throw std::runtime_error("Cannot specify grammar with tools");
}
if (caps.supports_tool_calls && !caps.supports_tools) {
- LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
- }
- }
-
- // DeepSeek V3.1: detect based on specific patterns in the template
- if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
- params.json_schema.is_null()) {
- return common_chat_params_init_deepseek_v3_1(tmpl, params);
- }
-
- // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
- if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
- return common_chat_params_init_deepseek_r1(tmpl, params);
- }
-
- // Command R7B: : use handler in all cases except json schema (thinking / tools).
- if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_command_r7b(tmpl, params);
- }
-
- // Granite (IBM) - detects thinking / tools support
- if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
- workaround::func_args_not_string(params.messages);
- workaround::use_generic_schema(params.messages);
- workaround::move_tool_calls_to_content(params.messages);
- return common_chat_params_init_granite(tmpl, params);
- }
-
- // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
- if (src.find("[gMASK]<sop>") != std::string::npos &&
- src.find("<arg_key>") != std::string::npos &&
- src.find("<arg_value>") != std::string::npos &&
- params.json_schema.is_null()) {
- workaround::func_args_not_string(params.messages);
- if (!params.extra_context.contains("clear_thinking")) {
- // by default, do not clear reasoning_content (added since GLM-4.7)
- params.extra_context["clear_thinking"] = false;
+ LOG_WRN(
+ "Template supports tool calls but does not natively describe tools. The fallback behaviour used may "
+ "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
}
- return common_chat_params_init_glm_4_5(tmpl, params);
- }
-
- // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
- // Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
- // Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format.
- if (src.find("<tool_call>") != std::string::npos &&
- src.find("<function=") != std::string::npos &&
- src.find("<parameter=") != std::string::npos) {
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_qwen3_coder(tmpl, params);
- }
-
- // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
- if (src.find("<tools>") != std::string::npos &&
- src.find("# Tools") != std::string::npos &&
- src.find("</tools>") != std::string::npos &&
- src.find("<tool_calls>") != std::string::npos &&
- src.find("</tool_calls>") != std::string::npos &&
- src.find("<tool_response>") != std::string::npos) {
- return common_chat_params_init_xiaomi_mimo(tmpl, params);
}
- // EXAONE MoE format detection
- if (src.find("<tool_call>") != std::string::npos &&
- src.find("<tool_result>") != std::string::npos &&
- src.find("<|tool_declare|>") != std::string::npos) {
- return common_chat_params_init_exaone_moe(tmpl, params);
- }
-
- // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
- if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
- return common_chat_params_init_hermes_2_pro(tmpl, params);
+ // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
+ // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
+ if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
+ src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
+ LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
+ return common_chat_params_init_ministral_3(tmpl, params);
}
- // GPT-OSS
+ // GPT-OSS - has unique channel-based structure that needs dedicated handler
if (src.find("<|channel|>") != std::string::npos) {
+ LOG_DBG("Using specialized template: GPT-OSS\n");
return common_chat_params_init_gpt_oss(tmpl, params);
}
- // Seed-OSS
- if (src.find("<seed:think>") != std::string::npos) {
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_seed_oss(tmpl, params, inputs);
- }
-
- // Nemotron v2
- if (src.find("<SPECIAL_10>") != std::string::npos) {
- return common_chat_params_init_nemotron_v2(tmpl, params);
- }
-
- // Apertus format detection
- if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
- return common_chat_params_init_apertus(tmpl, params);
- }
-
- // LFM2 (w/ tools)
- if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
- src.find("]<|tool_list_end|>") != std::string::npos) {
- return common_chat_params_init_lfm2(tmpl, params);
- }
-
- // MiniMax-M2 format detection
- if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_minimax_m2(tmpl, params);
- }
-
- // Kimi K2 format detection
- if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
- src.find("<|tool_calls_section_begin|>") != std::string::npos &&
- src.find("## Return of") != std::string::npos) {
- return common_chat_params_init_kimi_k2(tmpl, params);
- }
-
- // Apriel 1.5 format detection
- if (src.find("<thinking>") != std::string::npos &&
- src.find("</thinking>") != std::string::npos &&
- src.find("<available_tools>") != std::string::npos &&
- src.find("<|assistant|>") != std::string::npos &&
- src.find("<|tool_result|>") != std::string::npos &&
- src.find("<tool_calls>[") != std::string::npos &&
- src.find("]</tool_calls>") != std::string::npos) {
- return common_chat_params_init_apriel_1_5(tmpl, params);
- }
-
- // Solar Open
- if (src.find("<|tool_response:begin|>") != std::string::npos &&
- src.find("<|tool_response:name|>") != std::string::npos &&
- src.find("<|tool_response:result|>") != std::string::npos) {
- return common_chat_params_init_solar_open(tmpl, params);
- }
-
- // Use generic handler when mixing tools + JSON schema.
- // TODO: support that mix in handlers below.
- if ((params.tools.is_array() && params.json_schema.is_object())) {
- return common_chat_params_init_generic(tmpl, params);
- }
-
- // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
- if (src.find(">>>all") != std::string::npos) {
+ // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
+ // Detection: template has ">>>all" for content and ">>>" prefix for tool calls
+ if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
+ LOG_DBG("Using specialized template: Functionary v3.2\n");
return common_chat_params_init_functionary_v3_2(tmpl, params);
}
- // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
- if (src.find(" functools[") != std::string::npos) {
- return common_chat_params_init_firefunction_v2(tmpl, params);
- }
-
- // Functionary v3.1 (w/ tools)
- if (src.find("<|start_header_id|>") != std::string::npos
- && src.find("<function=") != std::string::npos) {
- return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
- }
-
- // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
- if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
- auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
- }
-
- // Ministral/Mistral Large 3
- if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
- src.find("[TOOL_CALLS]") != std::string::npos &&
- src.find("[ARGS]") != std::string::npos) {
- return common_chat_params_init_ministral_3(tmpl, params);
- }
-
- if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
- return common_chat_params_init_magistral(tmpl, params);
- }
-
- // Solar Open
- if (src.find("<|tool_response:begin|>") != std::string::npos &&
- src.find("<|tool_response:name|>") != std::string::npos &&
- src.find("<|tool_response:result|>") != std::string::npos) {
- return common_chat_params_init_solar_open(tmpl, params);
- }
-
- // TranslateGemma
- if (src.find("[source_lang_code]") != std::string::npos &&
- src.find("[target_lang_code]") != std::string::npos) {
- return common_chat_params_init_translate_gemma(tmpl, params);
- }
-
- // Plain handler (no tools)
- if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
- return common_chat_params_init_without_tools(tmpl, params);
+ // Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+ // Detection: template has "<|tool_calls_section_begin|>" and "functions." prefix in tool call IDs
+ if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+ src.find("<|tool_call_begin|>") != std::string::npos) {
+ LOG_DBG("Using specialized template: Kimi K2 Thinking\n");
+ return common_chat_params_init_kimi_k2(tmpl, params);
}
- // Mistral Nemo (w/ tools)
- if (src.find("[TOOL_CALLS]") != std::string::npos) {
- workaround::func_args_not_string(params.messages);
- return common_chat_params_init_mistral_nemo(tmpl, params);
+ try {
+ LOG_DBG("Using differential autoparser\n");
+ struct autoparser::autoparser autoparser;
+ autoparser.analyze_template(tmpl);
+ auto auto_params = autoparser::peg_generator::generate_parser(tmpl, params, autoparser);
+ auto_params.supports_thinking = autoparser.reasoning.mode != autoparser::reasoning_mode::NONE;
+ return auto_params;
+ } catch (const std::exception & e) {
+ throw std::invalid_argument(std::string("Unable to generate parser for this template. Automatic parser generation failed: ") + e.what());
}
-
- // Generic fallback
- workaround::func_args_not_string(params.messages);
- workaround::use_generic_schema(params.messages);
- workaround::move_tool_calls_to_content(params.messages);
- return common_chat_params_init_generic(tmpl, params);
}
// Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
-static common_chat_params common_chat_templates_apply_legacy(
- const struct common_chat_templates * tmpls,
- const struct common_chat_templates_inputs & inputs)
-{
- size_t alloc_size = 0;
+static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates * tmpls,
+ const struct common_chat_templates_inputs & inputs) {
+ size_t alloc_size = 0;
std::vector<llama_chat_message> chat;
- std::vector<std::string> contents;
+ std::vector<std::string> contents;
for (const auto & msg : inputs.messages) {
auto content = msg.content;
continue;
}
if (!content.empty()) {
- content += "\n";;
+ content += "\n";
+ ;
}
content += part.text;
}
contents.emplace_back(std::move(content));
}
for (size_t i = 0; i < contents.size(); ++i) {
- const auto & msg = inputs.messages[i];
+ const auto & msg = inputs.messages[i];
const auto & content = contents[i];
- chat.push_back({msg.role.c_str(), content.c_str()});
+ chat.push_back({ msg.role.c_str(), content.c_str() });
size_t msg_size = msg.role.size() + content.size();
- alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
+ alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
}
std::vector<char> buf(alloc_size);
// run the first time to get the total output length
const auto & src = tmpls->template_default->source();
- int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+ int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt,
+ buf.data(), buf.size());
// error: chat template is not supported
if (res < 0) {
// if it turns out that our buffer is too small, we resize it
if ((size_t) res > buf.size()) {
buf.resize(res);
- res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+ res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(),
+ buf.size());
}
// for safety, we check the result again
return params;
}
-common_chat_params common_chat_templates_apply(
- const struct common_chat_templates * tmpls,
- const struct common_chat_templates_inputs & inputs)
-{
+common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls,
+ const struct common_chat_templates_inputs & inputs) {
GGML_ASSERT(tmpls != nullptr);
- return inputs.use_jinja
- ? common_chat_templates_apply_jinja(tmpls, inputs)
- : common_chat_templates_apply_legacy(tmpls, inputs);
+ return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) :
+ common_chat_templates_apply_legacy(tmpls, inputs);
+}
+
+common_chat_msg common_chat_parse(const std::string & input,
+ bool is_partial,
+ const common_chat_parser_params & params) {
+ return common_chat_peg_parse(params.parser, input, is_partial, params);
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser,
+ const std::string & input,
+ bool is_partial,
+ const common_chat_parser_params & params) {
+ const common_peg_arena & parser = src_parser.empty() ?
+ build_chat_peg_parser([](common_chat_peg_builder & p) { return p.content(p.rest()) + p.end(); }) :
+ src_parser;
+
+ if (src_parser.empty()) {
+ LOG_WRN("No parser definition detected, assuming pure content parser.");
+ }
+
+ LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
+
+ common_peg_parse_context ctx(input, is_partial);
+ ctx.debug = params.debug;
+ auto result = parser.parse(ctx);
+
+ if (result.fail()) {
+ // During partial parsing, return partial results if any AST nodes were captured
+ // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
+ if (is_partial && result.end > 0) {
+ // Try to extract any partial results from what was successfully parsed
+ common_chat_msg msg;
+ msg.role = "assistant";
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ if (ctx.debug) {
+ fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
+ fflush(stderr);
+ }
+ return msg;
+ }
+ throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
+ input.substr(result.end));
+ }
+
+ common_chat_msg msg;
+ msg.role = "assistant";
+
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ if (ctx.debug) {
+ fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
+ fflush(stderr);
+ }
+
+ if (!is_partial) {
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str());
+ }
+ return msg;
}
std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
GGML_ASSERT(chat_templates->template_default != nullptr);
return chat_templates->template_default->caps.to_map();
}
+
#pragma once
#include "common.h"
+#include "jinja/parser.h"
+#include "nlohmann/json_fwd.hpp"
#include "peg-parser.h"
-#include <functional>
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
+#include "nlohmann/json.hpp"
+
#include <chrono>
+#include <functional>
+#include <map>
#include <string>
#include <vector>
-#include <map>
+
+using chat_template_caps = jinja::caps;
+using json = nlohmann::ordered_json;
#include <nlohmann/json_fwd.hpp>
struct common_chat_templates;
+namespace autoparser {
+struct templates_params;
+} // namespace autoparser
+
struct common_chat_tool_call {
std::string name;
std::string arguments;
}
};
+struct common_chat_template {
+ jinja::program prog;
+ std::string bos_tok;
+ std::string eos_tok;
+ std::string src;
+ chat_template_caps caps;
+
+ common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+ jinja::lexer lexer;
+ auto lexer_res = lexer.tokenize(src);
+ this->prog = jinja::parse_from_tokens(lexer_res);
+
+ this->src = lexer_res.source;
+ this->bos_tok = bos_token;
+ this->eos_tok = eos_token;
+
+ this->caps = jinja::caps_get(prog);
+ // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+ }
+
+ const std::string & source() const { return src; }
+ const std::string & bos_token() const { return bos_tok; }
+ const std::string & eos_token() const { return eos_tok; }
+
+ // TODO: this is ugly, refactor it somehow
+ json add_system(const json & messages, const std::string & system_prompt) const {
+ GGML_ASSERT(messages.is_array());
+ auto msgs_copy = messages;
+ if (!caps.supports_system_role) {
+ if (msgs_copy.empty()) {
+ msgs_copy.insert(msgs_copy.begin(), json{
+ {"role", "user"},
+ {"content", system_prompt}
+ });
+ } else {
+ auto & first_msg = msgs_copy[0];
+ if (!first_msg.contains("content")) {
+ first_msg["content"] = "";
+ }
+ first_msg["content"] = system_prompt + "\n\n"
+ + first_msg["content"].get<std::string>();
+ }
+ } else {
+ if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
+ msgs_copy.insert(msgs_copy.begin(), json{
+ {"role", "system"},
+ {"content", system_prompt}
+ });
+ } else if (msgs_copy[0].at("role") == "system") {
+ msgs_copy[0]["content"] = system_prompt;
+ }
+ }
+ return msgs_copy;
+ }
+
+ chat_template_caps original_caps() const {
+ return caps;
+ }
+
+};
+
struct common_chat_msg {
- std::string role;
- std::string content;
+ std::string role;
+ std::string content;
std::vector<common_chat_msg_content_part> content_parts;
- std::vector<common_chat_tool_call> tool_calls;
- std::string reasoning_content;
- std::string tool_name;
- std::string tool_call_id;
+ std::vector<common_chat_tool_call> tool_calls;
+ std::string reasoning_content;
+ std::string tool_name;
+ std::string tool_call_id;
nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
bool empty() const {
- return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
+ return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
+ tool_name.empty() && tool_call_id.empty();
}
- void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
+
+ void set_tool_call_ids(std::vector<std::string> & ids_cache,
+ const std::function<std::string()> & gen_tool_call_id) {
for (auto i = 0u; i < tool_calls.size(); i++) {
if (ids_cache.size() <= i) {
auto id = tool_calls[i].id;
tool_calls[i].id = ids_cache[i];
}
}
+
bool operator==(const common_chat_msg & other) const {
- return role == other.role
- && content == other.content
- && content_parts == other.content_parts
- && tool_calls == other.tool_calls
- && reasoning_content == other.reasoning_content
- && tool_name == other.tool_name
- && tool_call_id == other.tool_call_id;
- }
- bool operator!=(const common_chat_msg & other) const {
- return !(*this == other);
+ return role == other.role && content == other.content && content_parts == other.content_parts &&
+ tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
+ tool_name == other.tool_name && tool_call_id == other.tool_call_id;
}
+
+ bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
};
struct common_chat_msg_diff {
- std::string reasoning_content_delta;
- std::string content_delta;
- size_t tool_call_index = std::string::npos;
+ std::string reasoning_content_delta;
+ std::string content_delta;
+ size_t tool_call_index = std::string::npos;
common_chat_tool_call tool_call_delta;
- static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
+ static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
+ const common_chat_msg & msg_new);
bool operator==(const common_chat_msg_diff & other) const {
- return content_delta == other.content_delta
- && tool_call_index == other.tool_call_index
- && tool_call_delta == other.tool_call_delta;
+ return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
+ tool_call_delta == other.tool_call_delta;
}
};
enum common_chat_format {
COMMON_CHAT_FORMAT_CONTENT_ONLY,
- COMMON_CHAT_FORMAT_GENERIC,
- COMMON_CHAT_FORMAT_MISTRAL_NEMO,
- COMMON_CHAT_FORMAT_MAGISTRAL,
- COMMON_CHAT_FORMAT_LLAMA_3_X,
- COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
- COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
- COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
- COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- COMMON_CHAT_FORMAT_HERMES_2_PRO,
- COMMON_CHAT_FORMAT_COMMAND_R7B,
- COMMON_CHAT_FORMAT_GRANITE,
- COMMON_CHAT_FORMAT_GPT_OSS,
- COMMON_CHAT_FORMAT_SEED_OSS,
- COMMON_CHAT_FORMAT_NEMOTRON_V2,
- COMMON_CHAT_FORMAT_APERTUS,
- COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
- COMMON_CHAT_FORMAT_GLM_4_5,
- COMMON_CHAT_FORMAT_MINIMAX_M2,
- COMMON_CHAT_FORMAT_KIMI_K2,
- COMMON_CHAT_FORMAT_APRIEL_1_5,
- COMMON_CHAT_FORMAT_XIAOMI_MIMO,
- COMMON_CHAT_FORMAT_SOLAR_OPEN,
- COMMON_CHAT_FORMAT_EXAONE_MOE,
// These are intended to be parsed by the PEG parser
COMMON_CHAT_FORMAT_PEG_SIMPLE,
COMMON_CHAT_FORMAT_PEG_NATIVE,
- COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
- COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+ COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
struct common_chat_templates_inputs {
- std::vector<common_chat_msg> messages;
- std::string grammar;
- std::string json_schema;
- bool add_generation_prompt = true;
- bool use_jinja = true;
+ std::vector<common_chat_msg> messages;
+ std::string grammar;
+ std::string json_schema;
+ bool add_generation_prompt = true;
+ bool use_jinja = true;
// Parameters below only supported when use_jinja is true
- std::vector<common_chat_tool> tools;
- common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
- bool parallel_tool_calls = false;
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
- bool enable_thinking = true;
- std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
- std::map<std::string, std::string> chat_template_kwargs;
- bool add_bos = false;
- bool add_eos = false;
+ std::vector<common_chat_tool> tools;
+ common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+ bool parallel_tool_calls = false;
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
+ bool enable_thinking = true;
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+ std::map<std::string, std::string> chat_template_kwargs;
+ bool add_bos = false;
+ bool add_eos = false;
};
struct common_chat_params {
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
std::string prompt;
std::string grammar;
- bool grammar_lazy = false;
+ bool grammar_lazy = false;
bool thinking_forced_open = false;
+ bool supports_thinking = false;
std::vector<common_grammar_trigger> grammar_triggers;
std::vector<std::string> preserved_tokens;
std::vector<std::string> additional_stops;
// per-message parsing syntax
// should be derived from common_chat_params
struct common_chat_parser_params {
- common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
+ common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
- bool reasoning_in_content = false;
- bool thinking_forced_open = false;
- bool parse_tool_calls = true;
- common_peg_arena parser = {};
+ bool reasoning_in_content = false;
+ bool thinking_forced_open = false;
+ bool parse_tool_calls = true;
+ bool debug = false; // Enable debug output for PEG parser
+ common_peg_arena parser = {};
common_chat_parser_params() = default;
common_chat_parser_params(const common_chat_params & chat_params) {
format = chat_params.format;
void common_chat_templates_free(struct common_chat_templates * tmpls);
-struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+struct common_chat_templates_deleter {
+ void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
+};
typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
-common_chat_templates_ptr common_chat_templates_init(
- const struct llama_model * model,
- const std::string & chat_template_override,
- const std::string & bos_token_override = "",
- const std::string & eos_token_override = "");
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+ const std::string & chat_template_override,
+ const std::string & bos_token_override = "",
+ const std::string & eos_token_override = "");
bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
-
-struct common_chat_params common_chat_templates_apply(
- const struct common_chat_templates * tmpls,
- const struct common_chat_templates_inputs & inputs);
+struct common_chat_params common_chat_templates_apply(const struct common_chat_templates * tmpls,
+ const struct common_chat_templates_inputs & inputs);
// Format single message, while taking into account the position of that message in chat history
-std::string common_chat_format_single(
- const struct common_chat_templates * tmpls,
- const std::vector<common_chat_msg> & past_msg,
- const common_chat_msg & new_msg,
- bool add_ass,
- bool use_jinja);
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+ const std::vector<common_chat_msg> & past_msg,
+ const common_chat_msg & new_msg,
+ bool add_ass,
+ bool use_jinja);
// Returns an example of formatted chat
-std::string common_chat_format_example(
- const struct common_chat_templates * tmpls,
- bool use_jinja,
- const std::map<std::string, std::string> & chat_template_kwargs);
+std::string common_chat_format_example(const struct common_chat_templates * tmpls,
+ bool use_jinja,
+ const std::map<std::string, std::string> & chat_template_kwargs);
-const char* common_chat_format_name(common_chat_format format);
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
+const char * common_chat_format_name(common_chat_format format);
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
+common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
// used by arg and server
-const char * common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format common_reasoning_format_from_name(const std::string & format);
+const char * common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
// get template caps, useful for reporting to server /props endpoint
std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
+
+std::string common_chat_template_direct_apply(
+ const common_chat_template & tmpl,
+ const autoparser::templates_params & inputs,
+ const std::optional<json> & messages_override = std::nullopt,
+ const std::optional<json> & tools_override = std::nullopt,
+ const std::optional<json> & additional_context = std::nullopt);
size_t offset = 0;
while (offset < filename.size()) {
- utf8_parse_result result = parse_utf8_codepoint(filename, offset);
+ utf8_parse_result result = common_parse_utf8_codepoint(filename, offset);
if (result.status != utf8_parse_result::SUCCESS) {
return false;
+#include "log.h"
#include "value.h"
#include "runtime.h"
#include "caps.h"
auto tools = ctx.get_val("tools");
bool success = false;
+ std::string result;
try {
jinja::runtime runtime(ctx);
- runtime.execute(prog);
+ auto results = runtime.execute(prog);
+ auto parts = jinja::runtime::gather_string_parts(results);
+ result = parts->as_string().str();
success = true;
} catch (const std::exception & e) {
JJ_DEBUG("Exception during execution: %s", e.what());
+ result = "";
// ignore exceptions during capability analysis
}
return v->stats.ops.find(op_name) != v->stats.ops.end();
};
+ JJ_DEBUG("%s\n", ">>> Running capability check: typed content");
+
// case: typed content support
caps_try_execute(
prog,
}
);
+ JJ_DEBUG("%s\n", ">>> Running capability check: system prompt");
// case: system prompt support
caps_try_execute(
}
);
- // case: tools support
+ JJ_DEBUG("%s\n", ">>> Running capability check: single tool support");
+
+ // case: tools support: single call
caps_try_execute(
prog,
[&]() {
},
{
{"role", "assistant"},
- {"content", "Assistant message"},
+ {"content", ""}, // Some templates expect content to be empty with tool calls
{"tool_calls", json::array({
{
- {"id", "call1"},
+ {"id", "call00001"},
{"type", "function"},
{"function", {
{"name", "tool1"},
{"arg", "value"}
}}
}}
- },
- {
- {"id", "call2"},
- {"type", "function"},
- {"function", {
- {"name", "tool2"},
- {"arguments", {
- {"arg", "value"}
- }}
- }}
}
})}
},
+ {
+ {"role", "tool"},
+ {"content", "Tool response"},
+ {"tool_call_id", "call00001"}
+ },
+ {
+ {"role", "assistant"},
+ {"content", "The tool response was 'tool response'"}
+ },
{
{"role", "user"},
{"content", "User message"},
{"name", "tool"},
{"type", "function"},
{"function", {
- {"name", "tool"},
+ {"name", "tool1"},
{"description", "Tool description"},
{"parameters", {
{"type", "object"},
auto & tool_name = tools->at(0)->at("function")->at("name");
caps_print_stats(tool_name, "tools[0].function.name");
+ caps_print_stats(tools, "tools");
if (!tool_name->stats.used) {
result.supports_tools = false;
}
if (!tool_calls->stats.used) {
result.supports_tool_calls = false;
}
+ }
+ );
+
+ JJ_DEBUG("%s\n", ">>> Running capability check: parallel tool support");
+
+ // case: tools support: parallel calls
+ caps_try_execute(
+ prog,
+ [&]() {
+ // messages
+ return json::array({
+ {
+ {"role", "user"},
+ {"content", "User message"},
+ },
+ {
+ {"role", "assistant"},
+ {"content", ""}, // Some templates expect content to be empty with tool calls
+ {"tool_calls", json::array({
+ {
+ {"id", "call00001"},
+ {"type", "function"},
+ {"function", {
+ {"name", "tool1"},
+ {"arguments", {
+ {"arg", "value"}
+ }}
+ }}
+ },
+ {
+ {"id", "call00002"},
+ {"type", "function"},
+ {"function", {
+ {"name", "tool1"},
+ {"arguments", {
+ {"arg", "value"}
+ }}
+ }}
+ }
+ })}
+ },
+ {
+ {"role", "tool"},
+ {"content", "Tool response"},
+ {"tool_call_id", "call00001"}
+ },
+ {
+ {"role", "assistant"},
+ {"content", "The tool response was 'tool response'"}
+ },
+ {
+ {"role", "user"},
+ {"content", "User message"},
+ },
+ });
+ },
+ [&]() {
+ // tools
+ return json::array({
+ {
+ {"name", "tool"},
+ {"type", "function"},
+ {"function", {
+ {"name", "tool1"},
+ {"description", "Tool description"},
+ {"parameters", {
+ {"type", "object"},
+ {"properties", {
+ {"arg", {
+ {"type", "string"},
+ {"description", "Arg description"},
+ }},
+ }},
+ {"required", json::array({ "arg" })},
+ }},
+ }},
+ },
+ });
+ },
+ [&](bool success, value & messages, value & /*tools*/) {
+ if (!success) {
+ result.supports_parallel_tool_calls = false;
+ return;
+ }
+
+ auto & tool_calls = messages->at(1)->at("tool_calls");;
+ caps_print_stats(tool_calls, "messages[1].tool_calls");
// check for second tool call usage
auto & tool_call_1 = tool_calls->at(1)->at("function");
}
);
+ JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning");
+
// case: preserve reasoning content in chat history
caps_try_execute(
prog,
// Logical operators
if (op.value == "and") {
+ JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str());
return left_val->as_bool() ? right->execute(ctx) : std::move(left_val);
} else if (op.value == "or") {
+ JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str());
return left_val->as_bool() ? std::move(left_val) : right->execute(ctx);
}
for (auto & arg_stmt : this->args) {
auto arg_val = arg_stmt->execute(ctx);
JJ_DEBUG(" Argument type: %s", arg_val->type().c_str());
- args.push_back(std::move(arg_val));
+ args.push_back(arg_val);
}
// execute callee
value callee_val = callee->execute(ctx);
#include <set>
#include <sstream>
#include <string>
-#include <unordered_map>
#include <vector>
+#include <unordered_map>
namespace jinja {
if (separator_rule.empty()) {
if (min_items == 1 && !has_max) {
return item_rule + "+";
- } else if (min_items == 0 && !has_max) {
+ }
+ if (min_items == 0 && !has_max) {
return item_rule + "*";
- } else {
- return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
}
+ return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
}
auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
return result;
}
-static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
+static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
auto has_min = min_value != std::numeric_limits<int64_t>::min();
auto has_max = max_value != std::numeric_limits<int64_t>::max();
if (has_min && has_max) {
if (min_value < 0 && max_value < 0) {
out << "\"-\" (";
- _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
+ build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
out << ")";
return;
}
if (min_value < 0) {
out << "\"-\" (";
- _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
+ build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
out << ") | ";
min_value = 0;
}
if (has_min) {
if (min_value < 0) {
out << "\"-\" (";
- _build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
+ build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
out << ") | [0] | [1-9] ";
more_digits(0, decimals_left - 1);
} else if (min_value == 0) {
}
digit_range(c, c);
out << " (";
- _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
+ build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
out << ")";
if (c < '9') {
out << " | ";
more_digits(0, less_decimals);
out << " | ";
}
- _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
+ build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
} else {
out << "\"-\" (";
- _build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
+ build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
out << ")";
}
return;
std::vector<std::string> deps;
};
-std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
+static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
{"boolean", {"(\"true\" | \"false\") space", {}}},
{"decimal-part", {"[0-9]{1,16}", {}}},
{"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
{"null", {"\"null\" space", {}}},
};
-std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
+static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
{"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
{"date-time", {"date \"T\" time", {"date", "time"}}},
static const std::unordered_set<std::string> RESERVED_NAMES = [] {
std::unordered_set<std::string> s;
s.insert("root");
- for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
- for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
+ for (const auto & p : PRIMITIVE_RULES) {
+ s.insert(p.first);
+ }
+ for (const auto & p : STRING_FORMAT_RULES) {
+ s.insert(p.first);
+ }
return s;
}();
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
}
-std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
-std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
-std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
-std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
+static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
+static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
+static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
+static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
{'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
};
-std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
-std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
std::smatch match;
if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
_rules[esc_name] = rule;
return esc_name;
- } else {
- int i = 0;
- while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
- i++;
- }
- std::string key = esc_name + std::to_string(i);
- _rules[key] = rule;
- return key;
}
+ int i = 0;
+ while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
+ i++;
+ }
+ std::string key = esc_name + std::to_string(i);
+ _rules[key] = rule;
+ return key;
}
std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
std::vector<std::string> rules;
+ rules.reserve(alt_schemas.size());
for (size_t i = 0; i < alt_schemas.size(); i++) {
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
}
flush_literal();
std::vector<std::string> results;
+ results.reserve(ret.size());
for (const auto & item : ret) {
results.push_back(to_rule(item));
}
TrieNode() : is_end_of_string(false) {}
void insert(const std::string & string) {
- auto node = this;
+ auto *node = this;
for (char c : string) {
node = &node->children[c];
}
if (ks.empty()) {
return res;
}
- std::string k = ks[0];
+ const std::string& k = ks[0];
std::string kv_rule_name = prop_kv_rule_names[k];
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
if (first_is_optional) {
std::string pointer = ref.substr(ref.find('#') + 1);
std::vector<std::string> tokens = string_split(pointer, "/");
for (size_t i = 1; i < tokens.size(); ++i) {
- std::string sel = tokens[i];
+ const std::string& sel = tokens[i];
if (target.is_object() && target.contains(sel)) {
target = target[sel];
} else if (target.is_array()) {
_refs[ref] = target;
}
} else {
- for (auto & kv : n.items()) {
+ for (const auto & kv : n.items()) {
visit_refs(kv.value());
}
}
visit_refs(schema);
}
- std::string _generate_constant_rule(const json & value) {
+ static std::string _generate_constant_rule(const json & value) {
return format_literal(value.dump());
}
if (schema.contains("$ref")) {
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
- } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
+ }
+ if (schema.contains("oneOf") || schema.contains("anyOf")) {
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
- } else if (schema_type.is_array()) {
+ }
+ if (schema_type.is_array()) {
std::vector<json> schema_types;
for (const auto & t : schema_type) {
json schema_copy(schema);
schema_types.push_back(schema_copy);
}
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
- } else if (schema.contains("const")) {
+ }
+ if (schema.contains("const")) {
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
- } else if (schema.contains("enum")) {
+ }
+ if (schema.contains("enum")) {
std::vector<std::string> enum_values;
for (const auto & v : schema["enum"]) {
enum_values.push_back(_generate_constant_rule(v));
}
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
- } else if ((schema_type.is_null() || schema_type == "object")
+ }
+ if ((schema_type.is_null() || schema_type == "object")
&& (schema.contains("properties") ||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
std::unordered_set<std::string> required;
_build_object_rule(
properties, required, name,
schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
- } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
+ }
+ if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
std::unordered_set<std::string> required;
std::vector<std::pair<std::string, json>> properties;
std::map<std::string, size_t> enum_values;
- std::string hybrid_name = name;
+ const std::string& hybrid_name = name;
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
if (comp_schema.contains("$ref")) {
add_component(_refs[comp_schema["$ref"]], is_required);
// todo warning
}
};
- for (auto & t : schema["allOf"]) {
+ for (const auto & t : schema["allOf"]) {
if (t.contains("anyOf")) {
- for (auto & tt : t["anyOf"]) {
+ for (const auto & tt : t["anyOf"]) {
add_component(tt, false);
}
} else {
}
}
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
- } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
+ }
+ if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
if (items.is_array()) {
std::string rule = "\"[\" space ";
}
rule += " \"]\" space";
return _add_rule(rule_name, rule);
- } else {
- std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
- int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
- json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
- int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
-
- return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
}
- } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
+ std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
+ int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
+ json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
+ int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
+
+ return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
+ }
+ if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
return _visit_pattern(schema["pattern"], rule_name);
- } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
+ }
+ if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
- } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
+ }
+ if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
auto prim_name = schema_format + "-string";
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
- } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
+ }
+ if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
- } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
+ }
+ if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
int64_t min_value = std::numeric_limits<int64_t>::min();
int64_t max_value = std::numeric_limits<int64_t>::max();
if (schema.contains("minimum")) {
}
std::stringstream out;
out << "(";
- _build_min_max_int(min_value, max_value, out);
+ build_min_max_int(min_value, max_value, out);
out << ") space";
return _add_rule(rule_name, out.str());
- } else if (schema.empty() || schema_type == "object") {
+ }
+ if (schema.empty() || schema_type == "object") {
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
- } else {
- if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
- _errors.push_back("Unrecognized schema: " + schema.dump());
- return "";
- }
- // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
- return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
}
+ if (schema_type.is_null() && schema.is_object()) {
+ // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+ // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+ return _add_rule(rule_name, _add_primitive("value", PRIMITIVE_RULES.at("value")));
+ }
+ if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
+ _errors.push_back("Unrecognized schema: " + schema.dump());
+ return "";
+ }
+ // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
+ return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
}
void check_errors() {
std::string format_grammar() {
std::stringstream ss;
for (const auto & kv : _rules) {
- ss << kv.first << " ::= " << kv.second << std::endl;
+ ss << kv.first << " ::= " << kv.second << '\n';
}
return ss.str();
}
-#include "common.h"
#include "peg-parser.h"
+
+#include "common.h"
#include "json-schema-to-grammar.h"
+#include "log.h"
#include "unicode.h"
-#include <nlohmann/json.hpp>
-
#include <algorithm>
#include <initializer_list>
#include <map>
#include <memory>
+#include <nlohmann/json.hpp>
#include <regex>
#include <stdexcept>
#include <unordered_set>
// This is used in common_peg_until_parser and to build a GBNF exclusion grammar
struct trie {
struct node {
- size_t depth = 0;
- std::map<unsigned char, size_t> children;
+ std::map<uint32_t, size_t> children; // Use uint32_t to store Unicode codepoints
bool is_word;
};
size_t current = 0; // Start at root
size_t pos = start_pos;
+ // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str());
+
while (pos < sv.size()) {
- auto it = nodes[current].children.find(sv[pos]);
+ auto result = common_parse_utf8_codepoint(sv, pos);
+ if (result.status != utf8_parse_result::SUCCESS) {
+ break;
+ }
+
+ auto it = nodes[current].children.find(result.codepoint);
if (it == nodes[current].children.end()) {
// Can't continue matching
return match_result{match_result::NO_MATCH};
}
current = it->second;
- pos++;
+ pos += result.bytes_consumed;
// Check if we've matched a complete word
if (nodes[current].is_word) {
}
struct prefix_and_next {
- std::string prefix;
- std::string next_chars;
+ std::vector<uint32_t> prefix;
+ std::vector<uint32_t> next_chars;
};
std::vector<prefix_and_next> collect_prefix_and_next() {
- std::string prefix;
+ std::vector<uint32_t> prefix;
std::vector<prefix_and_next> result;
collect_prefix_and_next(0, prefix, result);
return result;
}
private:
- void collect_prefix_and_next(size_t index, std::string & prefix, std::vector<prefix_and_next> & out) {
+ void collect_prefix_and_next(size_t index, std::vector<uint32_t> & prefix, std::vector<prefix_and_next> & out) {
if (!nodes[index].is_word) {
if (!nodes[index].children.empty()) {
- std::string chars;
+ std::vector<uint32_t> chars;
chars.reserve(nodes[index].children.size());
for (const auto & p : nodes[index].children) {
chars.push_back(p.first);
}
for (const auto & p : nodes[index].children) {
- unsigned char ch = p.first;
+ uint32_t ch = p.first;
auto child = p.second;
prefix.push_back(ch);
collect_prefix_and_next(child, prefix, out);
void insert(const std::string & word) {
size_t current = 0;
- for (unsigned char ch : word) {
+ size_t pos = 0;
+ while (pos < word.length()) {
+ auto result = common_parse_utf8_codepoint(word, pos);
+ if (result.status != utf8_parse_result::SUCCESS) {
+ break;
+ }
+
+ uint32_t ch = result.codepoint;
+ pos += result.bytes_consumed;
+
auto it = nodes[current].children.find(ch);
if (it == nodes[current].children.end()) {
size_t child = create_node();
- nodes[child].depth = nodes[current].depth + 1;
nodes[current].children[ch] = child;
current = child;
} else {
parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start)
: arena(arena), ctx(ctx), start_pos(start) {}
+ std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); }
+
+ std::string debug_input_snippet(size_t pos, size_t len = 60) const {
+ if (pos >= ctx.input.size()) {
+ return "<EOF>";
+ }
+ auto snippet = ctx.input.substr(pos, len);
+ // Escape newlines for display
+ std::string result;
+ for (char c : snippet) {
+ if (c == '\n') {
+ result += "\\n";
+ } else if (c == '\r') {
+ result += "\\r";
+ } else if (c == '\t') {
+ result += "\\t";
+ } else {
+ result += c;
+ }
+ }
+ if (pos + len < ctx.input.size()) {
+ result += "...";
+ }
+ return result;
+ }
+
common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const {
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos);
}
}
common_peg_parse_result operator()(const common_peg_sequence_parser & p) {
+ if (ctx.debug) {
+ LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos,
+ debug_input_snippet(start_pos).c_str(), p.children.size());
+ }
+ ctx.parse_depth++;
+
auto pos = start_pos;
std::vector<common_peg_ast_id> nodes;
- for (const auto & child_id : p.children) {
+ for (size_t i = 0; i < p.children.size(); i++) {
+ const auto & child_id = p.children[i];
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+ }
auto result = arena.parse(child_id, ctx, pos);
+
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i,
+ common_peg_parse_result_type_name(result.type), result.start, result.end);
+ }
+
if (result.fail()) {
+ ctx.parse_depth--;
+ if (ctx.is_partial && result.end >= ctx.input.size()) {
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str());
+ }
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+ std::move(nodes));
+ }
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str());
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end);
}
}
if (result.need_more_input()) {
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str());
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
}
pos = result.end;
}
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos);
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
}
common_peg_parse_result operator()(const common_peg_choice_parser & p) {
+ if (ctx.debug) {
+ fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos,
+ debug_input_snippet(start_pos).c_str(), p.children.size());
+ }
+ ctx.parse_depth++;
+
auto pos = start_pos;
- for (const auto & child_id : p.children) {
+ for (size_t i = 0; i < p.children.size(); i++) {
+ const auto & child_id = p.children[i];
+ if (ctx.debug) {
+ fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+ }
auto result = arena.parse(child_id, ctx, pos);
+ if (ctx.debug) {
+ fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i,
+ common_peg_parse_result_type_name(result.type));
+ }
if (!result.fail()) {
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(),
+ common_peg_parse_result_type_name(result.type), i);
+ }
return result;
}
}
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str());
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
}
common_peg_parse_result operator()(const common_peg_repetition_parser & p) {
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos,
+ debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count);
+ }
+ ctx.parse_depth++;
+
auto pos = start_pos;
int match_count = 0;
std::vector<common_peg_ast_id> nodes;
// Try to match up to max_count times (or unlimited if max_count is -1)
while (p.max_count == -1 || match_count < p.max_count) {
if (pos >= ctx.input.size()) {
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count);
+ }
break;
}
auto result = arena.parse(p.child, ctx, pos);
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count,
+ common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size());
+ fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str());
+ }
+
if (result.success()) {
// Prevent infinite loop on empty matches
if (result.end == pos) {
+ if (ctx.debug) {
+ fprintf(stderr, "%s REPEAT: empty match, stopping\n", debug_indent().c_str());
+ }
break;
}
nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end());
}
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(),
+ match_count, nodes.size());
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
}
// Child failed - stop trying
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str());
+ }
break;
}
// Check if we got enough matches
if (p.min_count > 0 && match_count < p.min_count) {
+ ctx.parse_depth--;
if (pos >= ctx.input.size() && ctx.is_partial) {
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(),
+ match_count, p.min_count);
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes));
}
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count,
+ p.min_count);
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
}
+ ctx.parse_depth--;
+ if (ctx.debug) {
+ fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count,
+ nodes.size());
+ }
return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
}
common_peg_parse_result operator()(const common_peg_any_parser & /* p */) const {
// Parse a single UTF-8 codepoint (not just a single byte)
- auto result = parse_utf8_codepoint(ctx.input, start_pos);
+ auto result = common_parse_utf8_codepoint(ctx.input, start_pos);
if (result.status == utf8_parse_result::INCOMPLETE) {
if (!ctx.is_partial) {
// Try to match up to max_count times (or unlimited if max_count is -1)
while (p.max_count == -1 || match_count < p.max_count) {
- auto result = parse_utf8_codepoint(ctx.input, pos);
+ auto result = common_parse_utf8_codepoint(ctx.input, pos);
if (result.status == utf8_parse_result::INCOMPLETE) {
if (match_count >= p.min_count) {
switch (ctx.input[pos]) {
case '"':
+ case '\'':
case '\\':
case '/':
case 'b':
return result;
}
} else {
- auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+ auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
+
+ if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
+ if (!ctx.is_partial) {
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+ }
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+ }
+
+ if (utf8_result.status == utf8_parse_result::INVALID) {
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+ }
+
+ pos += utf8_result.bytes_consumed;
+ }
+ }
+
+ // Reached end without finding closing quote
+ if (!ctx.is_partial) {
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
+ }
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+ }
+
+ common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) {
+ auto pos = start_pos;
+
+ // Parse string content (without quotes)
+ while (pos < ctx.input.size()) {
+ char c = ctx.input[pos];
+
+ if (c == '\'') {
+ // Found closing quote - success (don't consume it)
+ return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
+ }
+
+ if (c == '\\') {
+ auto result = handle_escape_sequence(ctx, start_pos, pos);
+ if (!result.success()) {
+ return result;
+ }
+ } else {
+ auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
if (!ctx.is_partial) {
size_t last_valid_pos = start_pos;
while (pos < ctx.input.size()) {
- auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+ auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
// Incomplete UTF-8 sequence
common_peg_parse_result operator()(const common_peg_tag_parser & p) {
// Parse the child
+ if (ctx.debug) {
+ fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str());
+ }
auto result = arena.parse(p.child, ctx, start_pos);
if (!result.fail()) {
return id;
}
+static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) {
+ for (int i = 0; i < indent; i++) {
+ oss << " ";
+ }
+ oss << "NODE " << node.id;
+ if (!node.rule.empty()) {
+ oss << " (rule " << node.rule << ")";
+ }
+ if (!node.tag.empty()) {
+ oss << " (tag " << node.tag << ")";
+ }
+ oss << " ['" << node.text << "']\n";
+ for (const auto child : node.children) {
+ bfs_node(arena, oss, arena.get(child), indent + 1);
+ }
+}
+
+std::string common_peg_ast_arena::dump() {
+ std::ostringstream oss;
+ for (auto & node : nodes_) {
+ bfs_node(*this, oss, node, 0);
+ }
+ return oss.str();
+}
+
void common_peg_arena::resolve_refs() {
// Walk through all parsers and replace refs with their corresponding rule IDs
for (auto & parser : parsers_) {
std::is_same_v<T, common_peg_until_parser> ||
std::is_same_v<T, common_peg_literal_parser> ||
std::is_same_v<T, common_peg_json_string_parser> ||
+ std::is_same_v<T, common_peg_python_dict_string_parser> ||
std::is_same_v<T, common_peg_chars_parser> ||
std::is_same_v<T, common_peg_any_parser> ||
std::is_same_v<T, common_peg_space_parser>) {
}
std::string common_peg_arena::dump(common_peg_parser_id id) const {
+ std::unordered_set<common_peg_parser_id> visited;
+ return dump_impl(id, visited);
+}
+
+std::string common_peg_arena::dump_impl(common_peg_parser_id id,
+ std::unordered_set<common_peg_parser_id> & visited) const {
+ // Check for cycles
+ if (visited.count(id)) {
+ return "[cycle]";
+ }
+ visited.insert(id);
+
const auto & parser = parsers_.at(id);
- return std::visit([this](const auto & p) -> std::string {
+ return std::visit([this, &visited](const auto & p) -> std::string {
using T = std::decay_t<decltype(p)>;
if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
} else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
std::vector<std::string> parts;
for (const auto & child : p.children) {
- parts.push_back(dump(child));
+ parts.push_back(dump_impl(child, visited));
}
return "Sequence(" + string_join(parts, ", ") + ")";
} else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
std::vector<std::string> parts;
for (const auto & child : p.children) {
- parts.push_back(dump(child));
+ parts.push_back(dump_impl(child, visited));
}
return "Choice(" + string_join(parts, ", ") + ")";
} else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
if (p.max_count == -1) {
- return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)";
+ return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) +
+ ", unbounded)";
}
- return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
+ return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
} else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
- return "And(" + dump(p.child) + ")";
+ return "And(" + dump_impl(p.child, visited) + ")";
} else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
- return "Not(" + dump(p.child) + ")";
+ return "Not(" + dump_impl(p.child, visited) + ")";
+ } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+ return "Atomic(" + dump_impl(p.child, visited) + ")";
} else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
return "Any";
} else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
} else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
return "JsonString()";
+ } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+ return "PythonDictString()";
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
return "Until(" + string_join(p.delimiters, " | ") + ")";
} else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
- return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
+ return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
} else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
- return "Rule(" + p.name + ", " + dump(p.child) + ")";
+ return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")";
} else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
return "Ref(" + p.name + ")";
+ } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+ return "Tag(" + p.tag + ", " + dump(p.child) + ")";
+ } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+ return "Atomic(" + dump(p.child) + ")";
} else {
return "Unknown";
}
return std::move(arena_);
}
+// String primitives
+
+common_peg_parser common_peg_parser_builder::json_string_content() {
+ return wrap(arena_.add_parser(common_peg_json_string_parser{}));
+}
+
+common_peg_parser common_peg_parser_builder::single_quoted_string_content() {
+ return wrap(arena_.add_parser(common_peg_python_dict_string_parser{}));
+}
+
+common_peg_parser common_peg_parser_builder::double_quoted_string() {
+ return rule("dq-string",
+ [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::single_quoted_string() {
+ return rule("sq-string",
+ [this]() { return sequence({ literal("'"), single_quoted_string_content(), literal("'"), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::flexible_string() {
+ return rule("flexible-string", [this]() { return choice({ double_quoted_string(), single_quoted_string() }); });
+}
+
+// Generic helpers for object/array structure
+
+common_peg_parser common_peg_parser_builder::generic_object(const std::string & name,
+ const common_peg_parser & string_parser,
+ const common_peg_parser & value_parser) {
+ return rule(name, [this, string_parser, value_parser]() {
+ auto ws = space();
+ auto member = sequence({ string_parser, ws, literal(":"), ws, value_parser });
+ auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+ return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
+ });
+}
+
+common_peg_parser common_peg_parser_builder::generic_array(const std::string & name,
+ const common_peg_parser & value_parser) {
+ return rule(name, [this, value_parser]() {
+ auto ws = space();
+ auto elements = sequence({ value_parser, zero_or_more(sequence({ literal(","), ws, value_parser })) });
+ return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
+ });
+}
+
// JSON parsers
+
common_peg_parser common_peg_parser_builder::json_number() {
return rule("json-number", [this]() {
auto digit1_9 = chars("[1-9]", 1, 1);
auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})});
auto frac = sequence({literal("."), digits});
auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits});
- return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()});
+ // Negative lookahead: only commit the number when the next character can't extend it.
+ // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
+ // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
+ auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
+ return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
});
}
}
common_peg_parser common_peg_parser_builder::json_object() {
- return rule("json-object", [this]() {
- auto ws = space();
- auto member = sequence({json_string(), ws, literal(":"), ws, json()});
- auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))});
- return sequence({
- literal("{"),
- ws,
- choice({
- literal("}"),
- sequence({members, ws, literal("}")})
- }),
- ws
- });
- });
+ return generic_object("json-object", json_string(), json());
}
common_peg_parser common_peg_parser_builder::json_array() {
- return rule("json-array", [this]() {
- auto ws = space();
- auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))});
- return sequence({
- literal("["),
- ws,
- choice({
- literal("]"),
- sequence({elements, ws, literal("]")})
- }),
- ws
- });
- });
+ return generic_array("json-array", json());
}
common_peg_parser common_peg_parser_builder::json() {
});
}
-common_peg_parser common_peg_parser_builder::json_string_content() {
- return wrap(arena_.add_parser(common_peg_json_string_parser{}));
+common_peg_parser common_peg_parser_builder::python_string() {
+ return rule("python-string", [this]() { return choice({ double_quoted_string(), single_quoted_string() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_number() {
+ return json_number();
+}
+
+common_peg_parser common_peg_parser_builder::python_bool() {
+ return rule("python-bool", [this]() { return sequence({ choice({ literal("True"), literal("False") }), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_null() {
+ return rule("python-none", [this]() { return sequence({ literal("None"), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict() {
+ return generic_object("python-dict", python_string(), python_value());
+}
+
+common_peg_parser common_peg_parser_builder::python_array() {
+ return generic_array("python-array", python_value());
+}
+
+common_peg_parser common_peg_parser_builder::python_value() {
+ return rule("python-value", [this]() {
+ return choice({ python_dict(), python_array(), python_string(), python_number(), python_bool(), python_null() });
+ });
+}
+
+common_peg_parser common_peg_parser_builder::marker() {
+ auto sharp_bracket_parser = literal("<") + until(">") + literal(">");
+ auto square_bracket_parser = literal("[") + until("]") + literal("]");
+ return choice({ sharp_bracket_parser, square_bracket_parser });
}
common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) {
});
}
+static std::string gbnf_escape_char_class(uint32_t c) {
+ if (c == '-' || c == ']' || c == '[' || c == '\\') {
+ return "\\" + std::string(1, (char) c);
+ }
+ // Escape whitespace control characters
+ if (c == '\n') {
+ return "\\n";
+ }
+ if (c == '\t') {
+ return "\\t";
+ }
+ if (c == '\r') {
+ return "\\r";
+ }
+
+ // Printable ASCII
+ if (c >= 0x20 && c <= 0x7E) {
+ return std::string(1, (char) c);
+ }
+
+ // Hex escape
+ char buf[16];
+ const char * hex = "0123456789ABCDEF";
-static std::string gbnf_escape_char_class(char c) {
- switch (c) {
- case '\n': return "\\n";
- case '\t': return "\\t";
- case '\r': return "\\r";
- case '\\': return "\\\\";
- case ']': return "\\]";
- case '[': return "\\[";
- default: return std::string(1, c);
+ if (c <= 0xFF) {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[(c >> 4) & 0xF];
+ buf[3] = hex[c & 0xF];
+ buf[4] = '\0';
+ } else if (c <= 0xFFFF) {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(c >> 12) & 0xF];
+ buf[3] = hex[(c >> 8) & 0xF];
+ buf[4] = hex[(c >> 4) & 0xF];
+ buf[5] = hex[c & 0xF];
+ buf[6] = '\0';
+ } else {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ for (int i = 0; i < 8; i++) {
+ buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF];
+ }
+ buf[10] = '\0';
}
+
+ return std::string(buf);
}
static std::string gbnf_excluding_pattern(const std::vector<std::string> & strings) {
std::string cls;
cls.reserve(chars.size());
- for (const auto & ch : chars) {
+ for (uint32_t ch : chars) {
cls += gbnf_escape_char_class(ch);
}
if (!pre.empty()) {
- pattern += gbnf_format_literal(pre) + " [^" + cls + "]";
+ pattern += gbnf_format_literal(common_unicode_cpts_to_utf8(pre)) + " [^" + cls + "]";
} else {
pattern += "[^" + cls + "]";
}
std::is_same_v<T, common_peg_chars_parser> ||
std::is_same_v<T, common_peg_space_parser> ||
std::is_same_v<T, common_peg_any_parser> ||
- std::is_same_v<T, common_peg_json_string_parser>) {
+ std::is_same_v<T, common_peg_json_string_parser> ||
+ std::is_same_v<T, common_peg_python_dict_string_parser>) {
// These parsers do not have any children
} else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
for (auto child : p.children) {
return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
} else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+ } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+ return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
if (p.delimiters.empty()) {
return ".*";
};
} else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
return json{{"type", "json_string"}};
+ } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+ return json{{ "type", "python_dict_string" }};
} else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
return json{{"type", "until"}, {"delimiters", p.delimiters}};
} else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
if (type == "json_string") {
return common_peg_json_string_parser{};
}
+ if (type == "python_dict_string") {
+ return common_peg_python_dict_string_parser{};
+ }
if (type == "until") {
if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
throw std::runtime_error("until parser missing or invalid 'delimiters' field");
#include <memory>
#include <unordered_map>
+#include <unordered_set>
#include <string>
#include <string_view>
#include <functional>
void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
+
+ std::string dump();
};
struct common_peg_parse_result {
struct common_peg_parse_context {
std::string input;
bool is_partial;
+ bool debug = false; // Enable debug output for parser tracing
common_peg_ast_arena ast;
int parse_depth;
};
struct common_peg_json_string_parser {};
+struct common_peg_python_dict_string_parser {};
struct common_peg_until_parser {
std::vector<std::string> delimiters;
common_peg_space_parser,
common_peg_chars_parser,
common_peg_json_string_parser,
+ common_peg_python_dict_string_parser,
common_peg_until_parser,
common_peg_schema_parser,
common_peg_rule_parser,
friend class common_peg_parser_builder;
private:
+ std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
+
common_peg_parser_id add_parser(common_peg_parser_variant parser);
void add_rule(const std::string & name, common_peg_parser_id id);
common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); }
common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); }
+ // Generic helpers for building object/array structures with configurable string/value parsers.
+ common_peg_parser generic_object(const std::string & name, const common_peg_parser & string_parser, const common_peg_parser & value_parser);
+ common_peg_parser generic_array(const std::string & name, const common_peg_parser & value_parser);
+
public:
common_peg_parser_builder();
// S -> A{n}
common_peg_parser repeat(const common_peg_parser & p, int n) { return repeat(p, n, n); }
+ // Matches a double-quoted string: '"' content '"' space
+ common_peg_parser double_quoted_string();
+
+ // Matches a single-quoted string: "'" content "'" space
+ common_peg_parser single_quoted_string();
+
+ // Matches a string that accepts both double-quoted and single-quoted styles.
+ common_peg_parser flexible_string();
+
+ // Matches double-quoted string content without the surrounding quotes.
+ common_peg_parser json_string_content();
+
+ // Matches single-quoted string content without the surrounding quotes.
+ common_peg_parser single_quoted_string_content();
+
// Creates a complete JSON parser supporting objects, arrays, strings, numbers, booleans, and null.
// value -> object | array | string | number | true | false | null
common_peg_parser json();
common_peg_parser json_bool();
common_peg_parser json_null();
- // Matches JSON string content without the surrounding quotes.
- // Useful for extracting content within a JSON string.
- common_peg_parser json_string_content();
-
// Matches a JSON object member with a key and associated parser as the
// value.
common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
+ // Creates a complete Python format parser supporting dicts, arrays, strings, numbers, booleans, and None.
+ // Differs from JSON: uses True/False/None, accepts both single and double-quoted strings.
+ // value -> dict | array | string | number | True | False | None
+ common_peg_parser python_value();
+ common_peg_parser python_dict();
+ common_peg_parser python_string();
+ common_peg_parser python_array();
+ common_peg_parser python_number();
+ common_peg_parser python_bool();
+ common_peg_parser python_null();
+
+ // A marker, i.e. text delimited by a pair of <> or []
+ common_peg_parser marker();
+
// Wraps a parser with JSON schema metadata for grammar generation.
// Used internally to convert JSON schemas to GBNF grammar rules.
common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);
#include "unicode.h"
+#include <cassert>
+#include <stdexcept>
+#include <vector>
+#include <string>
// implementation adopted from src/unicode.cpp
-size_t utf8_sequence_length(unsigned char first_byte) {
+size_t common_utf8_sequence_length(unsigned char first_byte) {
const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
uint8_t highbits = static_cast<uint8_t>(first_byte) >> 4;
return lookup[highbits];
}
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset) {
if (offset >= input.size()) {
return utf8_parse_result(utf8_parse_result::INCOMPLETE);
}
// Invalid first byte
return utf8_parse_result(utf8_parse_result::INVALID);
}
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
+ std::string result;
+ for (size_t i = 0; i < cps.size(); ++i) {
+ result.append(common_unicode_cpt_to_utf8(cps[i]));
+ }
+ return result;
+}
+
+std::string common_unicode_cpt_to_utf8(uint32_t cpt) {
+ std::string result;
+
+ if (/* 0x00 <= cpt && */ cpt <= 0x7f) {
+ result.push_back(cpt);
+ return result;
+ }
+ if (0x80 <= cpt && cpt <= 0x7ff) {
+ result.push_back(0xc0 | ((cpt >> 6) & 0x1f));
+ result.push_back(0x80 | (cpt & 0x3f));
+ return result;
+ }
+ if (0x800 <= cpt && cpt <= 0xffff) {
+ result.push_back(0xe0 | ((cpt >> 12) & 0x0f));
+ result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+ result.push_back(0x80 | (cpt & 0x3f));
+ return result;
+ }
+ if (0x10000 <= cpt && cpt <= 0x10ffff) {
+ result.push_back(0xf0 | ((cpt >> 18) & 0x07));
+ result.push_back(0x80 | ((cpt >> 12) & 0x3f));
+ result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+ result.push_back(0x80 | (cpt & 0x3f));
+ return result;
+ }
+
+ throw std::invalid_argument("invalid codepoint");
+}
+
+
+
#include <cstdint>
#include <string_view>
+#include <vector>
+#include <string>
// UTF-8 parsing utilities for streaming-aware unicode support
// Determine the expected length of a UTF-8 sequence from its first byte
// Returns 0 for invalid first bytes
-size_t utf8_sequence_length(unsigned char first_byte);
+size_t common_utf8_sequence_length(unsigned char first_byte);
// Parse a single UTF-8 codepoint from input
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset);
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset);
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps);
+std::string common_unicode_cpt_to_utf8(uint32_t cpt);
--- /dev/null
+# Auto-Parser Architecture
+
+The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
+
+## Overview
+
+The unified auto-parser uses a pure differential, compositional approach (inspired by the `git diff` algorithm) to analyze chat templates:
+
+**Core Philosophy**:
+
+- **Minimize Hardcoded Patterns**: All markers extracted through template comparison (the only heuristic is JSON detection to distinguish `JSON_NATIVE` from tag-based formats)
+- **Compositional Architecture**: Separate analyzer structs for reasoning, content, and tools — each responsible for its own analysis and parser construction
+
+**Analysis + Parser Building in Two Steps**:
+
+1. `autoparser::autoparser tmpl_analysis(tmpl)` — runs all differential comparisons and populates the analysis structs
+2. `autoparser::peg_generator::generate_parser(tmpl, params, tmpl_analysis)` — uses the analysis to build a PEG parser and optional GBNF grammar
+
+## Data Structures
+
+All structs are defined in [common/chat-auto-parser.h](common/chat-auto-parser.h).
+
+### Top-Level: `autoparser` (main analyzer and generator)
+
+[common/chat-auto-parser.h:367-388](common/chat-auto-parser.h#L367-L388) — top-level analysis result aggregating `jinja_caps`, `reasoning`, `content`, and `tools` sub-analyses, plus `preserved_tokens` (union of all non-empty markers).
+
+### `analyze_reasoning`
+
+[common/chat-auto-parser.h:254-274](common/chat-auto-parser.h#L254-L274) — reasoning analysis result: `mode` enum, `start` marker (e.g. `<think>`), and `end` marker (e.g. `</think>`).
+
+### `analyze_content`
+
+[common/chat-auto-parser.h:280-295](common/chat-auto-parser.h#L280-L295) — content analysis result: `mode` enum, `start`/`end` markers, and `requires_nonnull_content` flag.
+
+### `analyze_tools` and its sub-structs
+
+- [common/chat-auto-parser.h:176-194](common/chat-auto-parser.h#L176-L194) — `tool_format_analysis`: `mode` enum, `section_start/end`, `per_call_start/end`, JSON field names (`function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field`), and format flags (`fun_name_is_key`, `tools_array_wrapped`, `uses_python_dicts`)
+- [common/chat-auto-parser.h:196-200](common/chat-auto-parser.h#L196-L200) — `tool_function_analysis`: `name_prefix`, `name_suffix`, `close` markers around function names
+- [common/chat-auto-parser.h:202-210](common/chat-auto-parser.h#L202-L210) — `tool_arguments_analysis`: `start/end` container markers, `name_prefix/suffix`, `value_prefix/suffix`, `separator`
+- [common/chat-auto-parser.h:212-217](common/chat-auto-parser.h#L212-L217) — `tool_id_analysis`: `pos` enum, `prefix`/`suffix` markers around call ID values
+- [common/chat-auto-parser.h:301-361](common/chat-auto-parser.h#L301-L361) — `analyze_tools`: aggregates the four sub-structs above
+
+### Enums
+
+**`reasoning_mode`**: How the template handles reasoning/thinking blocks.
+
+| Value | Description |
+|-----------------|-----------------------------------------------------------------------------------|
+| `NONE` | No reasoning markers detected |
+| `TAG_BASED` | Standard tag-based: `<think>...</think>` |
+| `DELIMITER` | Delimiter-based: reasoning ends at a delimiter (e.g., `[BEGIN FINAL RESPONSE]`) |
+| `FORCED_OPEN` | Template ends with open reasoning tag when `enable_thinking=true` |
+| `FORCED_CLOSED` | `enable_thinking=false` emits both tags; `enable_thinking=true` emits only start |
+| `TOOLS_ONLY` | Reasoning only appears in tool call responses, not plain content |
+
+**`content_mode`**: How the template wraps assistant content.
+
+| Value | Description |
+|--------------------------|----------------------------------------------------------------|
+| `PLAIN` | No content markers |
+| `ALWAYS_WRAPPED` | Content always wrapped: `<response>...</response>` |
+| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present |
+
+**`tool_format`**: Classification of tool call structure.
+
+| Value | Description |
+|------------------|------------------------------------------------------------------|
+| `NONE` | No tool support detected |
+| `JSON_NATIVE` | Pure JSON: `{"name": "X", "arguments": {...}}` |
+| `TAG_WITH_JSON` | Tag-based with JSON args: `<function=X>{...}</function>` |
+| `TAG_WITH_TAGGED`| Tag-based with tagged args: `<param=key>value</param>` |
+
+**`call_id_position`**: Where call IDs appear in tag-based formats.
+
+| Value | Description |
+|--------------------------|----------------------------------------------|
+| `NONE` | No call ID support detected |
+| `PRE_FUNC_NAME` | Before function name |
+| `BETWEEN_FUNC_AND_ARGS` | Between function name and arguments |
+| `POST_ARGS` | After arguments |
+
+## Tool Calling Formats
+
+### JSON_NATIVE
+
+**Structure**: The entire tool call (function name, arguments, values) is in JSON format. Optional enclosing tags around the section.
+
+**Detection**: Function name appears inside a JSON structure (quotes preceded by `{` or `:`).
+
+**Examples**:
+
+Standard OpenAI-style:
+
+```json
+<tool_call>
+{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
+</tool_call>
+```
+
+Mistral Nemo with array wrapper:
+
+```json
+[TOOL_CALLS]
+[{"name": "calculate", "arguments": {"expr": "2+2"}}]
+```
+
+Function name as JSON key (Apertus style):
+
+```json
+{"get_weather": {"location": "Paris"}}
+```
+
+---
+
+### TAG_WITH_JSON
+
+**Structure**: Function name is outside JSON, in tag attributes or XML-style tags. Arguments are a JSON object.
+
+**Detection**: Function name not in JSON, but argument names appear in JSON context.
+
+**Examples**:
+
+Functionary v3.1:
+
+```xml
+<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
+```
+
+MiniMax:
+
+```xml
+<minimax:tool_call>
+<tool_name>calculate</tool_name>
+<arguments>{"expr": "2+2"}</arguments>
+</minimax:tool_call>
+```
+
+---
+
+### TAG_WITH_TAGGED
+
+**Structure**: Both function name and argument names are in XML-style tags. String values are unquoted; non-string values are JSON-formatted.
+
+**Detection**: Neither function name nor argument names appear in a JSON context.
+
+**Examples**:
+
+Qwen/Hermes XML format:
+
+```xml
+<function=get_weather>
+<param=location>Paris</param>
+<param=unit>celsius</param>
+</function>
+```
+
+Mixed types:
+
+```xml
+<function=calculate>
+<param=expr>2+2</param>
+<param=precision>2</param>
+<param=options>{"round": true}</param>
+</function>
+```
+
+String values (`Paris`, `celsius`, `2+2`) are unquoted; `options` (object type) is JSON-formatted.
+
+---
+
+## Analysis Flow
+
+```text
+autoparser::autoparser(tmpl)
+ |
+ |-- Phase 1: analyze_reasoning(tmpl, jinja_caps.supports_tool_calls)
+ | |-- R1: compare_reasoning_presence() — with/without reasoning_content field
+ | |-- R2: compare_thinking_enabled() — enable_thinking=false vs true
+ | '-- R3: compare_reasoning_scope() — reasoning+content vs reasoning+tools
+ | (only if supports_tool_calls)
+ |
+ |-- Phase 2: analyze_content(tmpl, reasoning)
+ | '-- C1: compares content-only vs tools output and content-only vs reasoning output
+ |
+ |-- Phase 3: analyze_tools(tmpl, jinja_caps, reasoning)
+ | (skipped entirely if !jinja_caps.supports_tool_calls)
+ | |
+ | |-- T1: analyze_tool_calls() — no tools vs with tools; classifies format
+ | | |-- JSON path → analyze_tool_call_format_json_native()
+ | | '-- tag path → analyze_tool_call_format_non_json()
+ | |
+ | (if format != NONE and format != JSON_NATIVE:)
+ | |
+ | |-- T2: check_per_call_markers() — 1 call vs 2 calls; moves section→per-call if needed
+ | | (only if supports_parallel_tool_calls)
+ | |
+ | |-- T3: extract_function_markers() — func_alpha vs func_beta; extracts name prefix/suffix/close
+ | |
+ | |-- T4: analyze_arguments() — (TAG_WITH_TAGGED only)
+ | | |-- A1: extract_argument_name_markers() — arg_name_A vs arg_name_B
+ | | '-- A2: extract_argument_value_markers() — value "XXXX" vs "YYYY"
+ | |
+ | |-- T5: extract_argument_separator() — 1 arg vs 2 args; finds separator between args
+ | |
+ | |-- T6: extract_args_markers() — 0 args vs 1 arg; finds args container markers
+ | |
+ | '-- T7: extract_call_id_markers() — call_id "call00001" vs "call99999"
+ |
+ '-- collect_preserved_tokens() — union of all non-empty markers
+ |
+ '-- apply workarounds() — post-hoc patches for edge-case templates
+ |
+ v
+autoparser (analysis result)
+ |
+ v
+autoparser::peg_generator::generate_parser(tmpl, inputs, analysis)
+ |-- analysis.build_parser(inputs) — builds PEG parser arena
+ | |-- reasoning.build_parser(ctx) — reasoning parser (mode-dependent)
+ | |-- content.build_parser(ctx) — content parser (mode-dependent)
+ | '-- tools.build_parser(ctx) — tool parser (dispatches by tool_format)
+ | |-- build_tool_parser_json_native()
+ | |-- build_tool_parser_tag_json()
+ | '-- build_tool_parser_tag_tagged()
+ |
+ |-- Build GBNF grammar (if tools present and trigger_marker non-empty)
+ '-- Set grammar_triggers from section_start or per_call_start
+ |
+ v
+common_chat_params (prompt, parser, grammar, triggers, preserved_tokens)
+```
+
+## Entry Point
+
+The auto-parser is invoked in [common/chat.cpp:1280-1310](common/chat.cpp#L1280-L1310) in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS with `<|channel|>`, Functionary v3.2 with `>>>all`), then the auto-parser handles everything else via `autoparser::autoparser` + `peg_generator::generate_parser`.
+
+## Algorithm Details
+
+### Core Mechanism: Differential Comparison
+
+All analysis phases use the same factorized comparison function declared in [common/chat-auto-parser-helpers.h:68](common/chat-auto-parser-helpers.h#L68):
+
+```cpp
+compare_variants(tmpl, params_A, params_modifier)
+```
+
+This creates variant B by applying a modifier lambda to a copy of `params_A`, renders both through the template, and computes a `diff_split` ([common/chat-auto-parser.h:28-37](common/chat-auto-parser.h#L28-L37)):
+
+- `prefix` — common prefix between A and B
+- `suffix` — common suffix between A and B
+- `left` — unique to variant A
+- `right` — unique to variant B
+
+The diff is computed via `calculate_diff_split()`, which finds the longest-common-prefix and longest-common-suffix, then iteratively moves incomplete `<...>` or `[...]` markers from the prefix/suffix into left/right until stable (tag boundary fixing).
+
+Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries.
+
+### Phase 1: Reasoning Analysis
+
+**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field.
+
+- Searches `diff.right` (output with reasoning) for the reasoning content needle
+- Uses PEG parsers to find surrounding markers:
+ - If both pre/post markers found in `diff.right` → `TAG_BASED` (both tags visible in diff = no forced close)
+ - If both found but post marker only in the full output B → `FORCED_CLOSED`
+ - If only post marker found → `DELIMITER`
+- Sets `reasoning.start` and `reasoning.end`
+
+**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true` with a generation prompt.
+
+- Detects `FORCED_OPEN`: `enable_thinking=true` adds a non-empty marker at the end of the prompt (where model will start generating) — sets `reasoning.start`, mode = `FORCED_OPEN`
+- Detects `FORCED_CLOSED`: `enable_thinking=false` produces both start+end markers; `enable_thinking=true` produces only start marker
+- Handles the reverse case: if both start and end are still empty, looks for a single-segment diff on each side to extract both markers
+
+**R3 — `compare_reasoning_scope()`**: Compares assistant message with reasoning+text-content vs reasoning+tool-calls.
+
+- Only runs if `jinja_caps.supports_tool_calls`
+- Detects `TOOLS_ONLY`: reasoning content present in B (with tools) but not in A (with text content)
+- Extracts reasoning markers from the tool call output using PEG parsers
+
+### Phase 2: Content Analysis
+
+**C1**: Two comparisons in the `analyze_content` constructor:
+
+- Comparison 1: content-only output vs tool-call output → `diff_tools`
+- Comparison 2: content-only output vs reasoning+empty-content output → `diff_reasoning`
+
+Classification logic:
+
+- `PLAIN`: `diff_tools.left` equals the response string (content is the entire diff, no wrapper)
+- `ALWAYS_WRAPPED`: markers found surrounding the content text in `pure_content` → extracts `start`/`end`
+
+### Phase 3: Tool Call Analysis
+
+**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output.
+
+- Extracts the tool call section as `diff.right`
+- Calls `analyze_tool_call_format()` which first strips reasoning markers from the haystack, then:
+ - Calls `in_json_haystack()` for both function name and argument name needles
+ - `in_json_haystack()` uses a PEG parser to check whether the needle appears in a JSON context (preceded by `{` or `:` with surrounding quotes)
+ - If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`
+ - If function name not in JSON, arg name is in JSON → `TAG_WITH_JSON`
+ - If neither in JSON → `TAG_WITH_TAGGED`
+ - `analyze_tool_call_format_json_native()`: parses the JSON object, matches field values to needles to populate `name_field`, `args_field`, `id_field`, `gen_id_field`; detects `tools_array_wrapped`; extracts `section_start`/`section_end`
+ - `analyze_tool_call_format_non_json()`: uses PEG parsers on the haystack to find up to two opening markers (section + per-call) then up to two closing markers
+
+**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls.
+
+- Computes a secondary diff of the second call portion vs the common suffix
+- If the second call content starts with `section_start` → the section marker is actually per-call → moves `section_start/end` to `per_call_start/end` and clears the section markers
+
+**T3 — `extract_function_markers()`**: Compares function name `FUN_FIRST` vs `FUN_SECOND` (two different named functions).
+
+- Finds where the function name appears in `diff.left`
+- Extracts `function.name_prefix` from the common prefix up to the function marker, and `function.name_suffix` from after the name up to the next marker
+- Extends `name_suffix` into `diff.suffix` (to the first marker for TAG_WITH_TAGGED; to the first `{` or `[` for TAG_WITH_JSON)
+- Extracts `function.close` from after the last argument value up to the per-call/section end marker
+
+**T4 — `analyze_arguments()`** (TAG_WITH_TAGGED only):
+
+- **A1 `extract_argument_name_markers()`**: Compares `arg_name_A` vs `arg_name_B` (two different argument names).
+ - Finds shared surrounding structure → `arguments.name_prefix`, `arguments.name_suffix`
+- **A2 `extract_argument_value_markers()`**: Compares argument value `"XXXX"` vs `"YYYY"` (same arg, different value).
+ - Finds markers surrounding the value → `arguments.value_prefix`, `arguments.value_suffix`
+
+**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments (same function).
+
+- Uses `until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND)` to find what separates the two argument blocks
+
+**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument.
+
+- Uses `until_common_prefix()` and `after_common_suffix()` with the empty and single-arg JSON strings as anchors to find container markers (`arguments.start`, `arguments.end`)
+
+**T7 — `extract_call_id_markers()`**: Compares call IDs `"call00001"` vs `"call99999"`.
+
+- Determines whether function name appears in `diff.prefix` or `diff.suffix` to classify position:
+ - Function name in prefix only → `BETWEEN_FUNC_AND_ARGS` or `POST_ARGS` (further distinguished by where `{` appears)
+ - Function name in suffix only → `PRE_FUNC_NAME`
+- Extracts `call_id.prefix` and `call_id.suffix` markers around the call ID value
+- Clears `per_call_end` if it incorrectly incorporated the call ID suffix
+
+### Workarounds
+
+A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches after analysis. Each workaround is a lambda that inspects the template source and overrides analysis results. Current workarounds:
+
+1. **Old Qwen/DeepSeek thinking templates** — source contains `content.split('</think>')`: sets `reasoning.mode = FORCED_OPEN` with `<think>`/`</think>` markers if no reasoning was detected
+2. **Granite 3.3** — source contains specific "Write your thoughts" text: forces `TAG_BASED` reasoning with `<think>`/`</think>` and `WRAPPED_WITH_REASONING` content with `<response>`/`</response>`
+3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
+4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
+5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
+
+### Parser Building
+
+Each analyzer struct (`analyze_reasoning`, `analyze_content`, `analyze_tools`) implements `build_parser(parser_build_context&)`. They share a `parser_build_context` that carries the PEG builder, inference inputs, the pre-built reasoning parser, and a pointer to the content analyzer.
+
+#### Reasoning Parser (`analyze_reasoning::build_parser`)
+
+| Mode | Parser |
+|-----------------------------------|---------------------------------------------------------------------|
+| Not extracting reasoning | `eps()` |
+| `FORCED_OPEN` or `FORCED_CLOSED` | `reasoning(until(end)) + end` — opening tag was in the prompt |
+| `TAG_BASED` or `TOOLS_ONLY` | `optional(start + reasoning(until(end)) + end)` |
+| `DELIMITER` | `optional(reasoning(until(end)) + end)` — no start marker |
+
+#### Content Parser (`analyze_content::build_parser`)
+
+| Condition | Parser |
+|----------------------------------------|---------------------------------------------------------------------------------|
+| `json_schema` present | `reasoning + space() + content(schema(json(), "response-format", ...)) + end()` |
+| Tools present | Dispatches to `analyze_tools::build_parser()` |
+| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` |
+| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` |
+| Default (PLAIN) | `reasoning + content(rest()) + end()` |
+
+#### Tool Parsers (`analyze_tools::build_parser`)
+
+Dispatches by `format.mode`:
+
+**`build_tool_parser_json_native()`**: Calls `p.standard_json_tools()` which internally dispatches to:
+
+- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {...}}`
+- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
+- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
+
+Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
+
+**`build_tool_parser_tag_json()`**: For each tool function:
+
+```text
+tool_open(name_prefix + tool_name(literal(name)) + name_suffix) +
+ call_id_section +
+ tool_args(schema(json(), tool_schema))
+ [+ function.close if non-empty]
+```
+
+Wrapped in per-call markers (with optional parallel call repetition) then optionally in section markers.
+
+**`build_tool_parser_tag_tagged()`**: For each tool function, builds one parser per argument:
+
+- String types: `tool_arg_string_value(schema(until(value_suffix), ...))`
+- JSON types: `tool_arg_json_value(schema(json(), ...))`
+- Required args are plain; optional args wrapped in `optional()`
+- Arguments joined with `space()` between consecutive parsers
+
+For closing: uses `function.close` if present; otherwise uses `peek(per_call_end)` to avoid premature close during partial streaming; falls back to `tool_close(space())` to trigger mapper callbacks.
+
+All three tool parsers return:
+
+```text
+reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
+```
+
+### Python Dict Format
+
+When `format.uses_python_dicts` is true (detected when single-quoted strings appear in JSON argument context), `build_parser()` pre-registers a `json-string` rule that accepts both single-quoted and double-quoted strings. This is done before any `p.json()` call so all JSON parsing inherits the flexible rule.
+
+## Mapper
+
+`common_chat_peg_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design:
+
+- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once the name is set, the buffer is flushed to `current_tool->arguments`
+- **`args_target()`**: Returns a reference to whichever destination is currently active (buffer or tool args), eliminating branching
+- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized (for schema-declared string types in tagged format)
+- **Quote normalization**: Python-style quotes (`'key': 'value'`) are converted to JSON (`"key": "value"`)
+- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically
+
+## Files
+
+| File | Purpose |
+|-------------------------------------------|----------------------------------------------------------------------|
+| `common/chat-auto-parser.h` | All analysis structs, enums, `autoparser`, `peg_generator`, `templates_params` |
+| `common/chat-auto-parser-generator.cpp` | Parser generator: `generate_parser()` and `build_parser()` methods |
+| `common/chat-diff-analyzer.cpp` | Differential analysis implementation and workarounds |
+| `common/chat-auto-parser-helpers.h/cpp` | `calculate_diff_split()`, `segmentize_markers()`, |
+| | `compare_variants()`, string helpers |
+| `common/chat-peg-parser.h/cpp` | `common_chat_peg_builder`, `common_chat_peg_mapper`, and helpers |
+| `common/chat.cpp` | Entry point: `common_chat_templates_apply_jinja()` |
+| `tools/parser/debug-template-parser.cpp` | Debug tool for template analysis |
+| `tools/parser/template-analysis.cpp` | Template analysis tool |
+
+## Testing & Debugging
+
+### Debug Tools
+
+**Template Debugger**: `tools/parser/debug-template-parser.cpp`
+
+- Usage: `./bin/llama-debug-template-parser path/to/template.jinja`
+- Shows detected format, markers, generated parser, and GBNF grammar
+
+**Template Analysis**: `tools/parser/template-analysis.cpp`
+
+- Usage: `./bin/llama-template-analysis path/to/template.jinja`
+
+**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
+
+- Shows detailed analysis steps, pattern extraction results, and generated parser structure
+
+**PEG Test Builder**: Fluent API for creating test cases — see [tests/test-chat.cpp:947-1043](tests/test-chat.cpp#L947-L1043). Example usage:
+
+```cpp
+auto tst = peg_tester("models/templates/Template.jinja");
+tst.test("input text")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({tool_json})
+ .parallel_tool_calls(true)
+ .enable_thinking(true)
+ .expect(expected_message)
+ .run();
+```
+
+### Tested Templates
+
+The following templates have active tests in `tests/test-chat.cpp`:
+
+| Template | Format | Notes |
+| -------- | ------ | ----- |
+| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags (specialized handler) |
+| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools |
+| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers |
+| Google Gemma 2 2B | Content only | No tool support |
+| Qwen-QwQ-32B | Reasoning | Forced-open thinking |
+| NousResearch Hermes 2 Pro | JSON_NATIVE | `<tool_call>` wrapper |
+| IBM Granite 3.3 | JSON_NATIVE | `<think></think>` + `<response></response>` |
+| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `<seed:think>` and `<seed:tool_call>` tags |
+| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format |
+| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode |
+| GLM-4.6 | TAG_WITH_TAGGED | `<tool_call>name\n<arg_key>...<arg_value>...` format |
+| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format |
+| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools |
+| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key |
+| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args |
+| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `<TOOLCALL>` wrapper (nested) |
+| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format |
+| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field |
+| Functionary v3.1 | TAG_WITH_JSON | `<function=X>` format |
+| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) |
+| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format |
+| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking |
+| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking |
+| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers |
+| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format |
+| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) |
+| Apriel 1.5 | JSON_NATIVE | `<tool_calls>` wrapper with JSON array |
+| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start |
+| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID |
+| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID |
+| StepFun 3.5 Flash | TAG_WITH_TAGGED | `<function=X><parameter=Y>` format |
+
+## Adding Support for New Templates
+
+To support a new template format:
+
+1. **If it follows standard patterns** — The auto-parser should detect it automatically. Run `llama-debug-template-parser` to verify markers are correctly extracted.
+2. **If differential analysis extracts incorrect markers** — Add a workaround lambda to the `workarounds` vector in `common/chat-diff-analyzer.cpp`. Inspect the template source for a unique identifying substring.
+3. **If it needs fundamentally different handling** — Add a dedicated handler function in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral).
+
+## Edge Cases and Quirks
+
+1. **Forced Thinking**: When `enable_thinking=true` and the model prompt ends with an open reasoning tag (e.g., `<think>`), the parser enters forced thinking mode and immediately expects reasoning content without waiting for a start marker.
+2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start/end`); others wrap the entire section (`section_start/end`). T2 (`check_per_call_markers()`) disambiguates by checking if the second call in a two-call output starts with the section marker.
+3. **Python Dict Format**: The Seed template family uses single-quoted JSON (`'key': 'value'`). The `uses_python_dicts` flag causes the PEG builder to register a flexible `json-string` rule accepting both quote styles before any JSON rules are built.
+4. **Tag Boundary Fixing**: `calculate_diff_split()` iteratively adjusts prefix/suffix boundaries to avoid splitting `<tag>` or `[marker]` tokens, ensuring clean extraction.
+5. **Call ID Side Effects**: When a call ID is detected, `per_call_end` may have been incorrectly set to include the call ID suffix. T7 clears `per_call_end` in this case.
+6. **Tool Analysis Gating**: `analyze_tools` is only constructed (and all tool analysis phases run) when `jinja_caps.supports_tool_calls` is true. Within tool analysis, `check_per_call_markers()` (T2) only runs if `jinja_caps.supports_parallel_tool_calls`.
+7. **`analyze_arguments()` Gating**: Within tool analysis, A1 and A2 (argument name/value marker extraction) only run for `TAG_WITH_TAGGED` format. `extract_argument_separator()` and `extract_args_markers()` run for all non-`JSON_NATIVE` formats.
output from a model that emits arguments as JSON.
```cpp
-auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
// Build a choice of all available tools
auto tool_choice = p.choice();
for (const auto & tool : tools) {
### Native
-The `common_chat_peg_native_builder` builds a `native` parser suitable for
+The `common_chat_peg_builder` builds a `native` parser suitable for
models that emit tool arguments as a direct JSON object.
- **`reasoning(p)`** - Tag node for `reasoning_content`
- **`tool_args(p)`** - Tag the tool arguments
```cpp
-build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto get_weather_tool = p.tool(p.sequence({
p.tool_open(p.literal("{")),
p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
### Constructed
-The `common_chat_peg_constructed_builder` builds a `constructed` parser
+The `common_chat_peg_builder` builds a `constructed` parser
suitable for models that emit tool arguments as separate entities, such as XML
tags.
- **`tool_arg_json_value(p)`** - Tag JSON value for the argument
```cpp
-build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto location_arg = p.tool_arg(
p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
p.tool_arg_string_value(p.until("</parameter>")),
elif (schema_type == 'object') or (len(schema) == 0):
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
+ elif schema_type is None and isinstance(schema, dict):
+ # No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+ # Per JSON Schema semantics this is equivalent to {} and accepts any value.
+ return self._add_rule(rule_name, self._add_primitive('value', PRIMITIVE_RULES['value']))
+
else:
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
{%- macro render_tools(tools) -%}
{%- for tool in tools %}
- {{- "// " + tool.description + "\n" }}
- {{- "type "+ tool.name + " = " }}
- {%- if tool.parameters and tool.parameters.properties %}
+ {{- "// " + tool.function.description + "\n" }}
+ {{- "type "+ tool.function.name + " = " }}
+ {%- if tool.function.parameters and tool.function.parameters.properties %}
{{- "(_: {\n" }}
- {%- for param_name, param_spec in tool.parameters.properties.items() %}
+ {%- for param_name, param_spec in tool.function.parameters.properties.items() %}
{%- if param_spec.description %}
{{- "// " + param_spec.description + "\n" }}
{%- endif %}
{{- param_name }}
- {%- if param_name not in (tool.parameters.required or []) -%}
+ {%- if param_name not in (tool.function.parameters.required or []) -%}
{{- "?" }}
{%- endif -%}
{{- ": " }}
- {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+ {{- render_typescript_type(param_spec, tool.function.parameters.required or []) }}
{%- if param_spec.default is defined -%}
{%- if param_spec.enum %}
{{- ", // default: " + param_spec.default }}
{%- for tool_call in message.tool_calls -%}
{%- if tool_call.type == 'function' -%}
{%- set function = tool_call.function -%}
- {{- '{"' + function.name + '": ' + function.arguments + '}' }}
+ {{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }}
{%- if not loop.last -%}
{{- ", " }}
{%- endif -%}
--- /dev/null
+{# ---------------------------------------------------------------------- #}
+{# ƛƬ Default setup and flags #}
+{# ---------------------------------------------------------------------- #}
+{%- set messages = messages or [] -%}
+{%- set tools = tools or [] -%}
+{%- set add_generation_prompt = add_generation_prompt or false -%}
+{%- set available_tool_string = '' -%}
+{%- set add_tool_id = true -%}
+{%- set add_thoughts = true -%} {# whether to include <thinking> reasoning blocks #}
+{%- set add_generation_prompt = true -%} {# whether to emit reasoning starter before assistant response #}
+{# Optional token placeholders (safe defaults) #}
+{%- set bos_token = bos_token or '' -%}
+{%- set eos_token = eos_token or '' -%}
+{# ---------------------------------------------------------------------- #}
+{# Core reasoning prompt and assistant reasoning prefix #}
+{# ---------------------------------------------------------------------- #}
+{%- set reasoning_prompt -%}
+ You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
+ Analyze each question carefully, present your reasoning step-by-step, then provide the final
+ response after the marker [BEGIN FINAL RESPONSE].
+{%- endset -%}
+{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
+{# ---------------------------------------------------------------------- #}
+{# Tool list and tool call output format #}
+{# ---------------------------------------------------------------------- #}
+{%- if tools|length > 0 -%}
+ {%- set available_tool_string -%}
+ You are provided with function signatures within <available_tools></available_tools> XML tags.
+ You may call one or more functions to assist with the user query.
+ Don't make assumptions about the arguments. You should infer the argument values from previous
+ user responses and the system message.
+ Here are the available tools:
+ <available_tools>
+ {% for tool in tools %}{{ tool|string }}{% endfor %}
+
+ </available_tools>.
+
+ Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
+ Each JSON object should contain a function name and arguments as follows:
+ <tool_calls>[
+ {"name": <function-name-1>, "arguments": <args-dict-1>},
+ {"name": <function-name-2>, "arguments": <args-dict-2>},
+ ...
+ ]</tool_calls>
+ {%- endset -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Start system block if first message is not system #}
+{# ---------------------------------------------------------------------- #}
+{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
+ {%- if tools|length > 0 -%}
+ {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
+ {%- else -%}
+ {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
+ {%- endif -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Iterate through messages #}
+{# ---------------------------------------------------------------------- #}
+{%- for message in messages -%}
+
+ {# ---------------- USER MESSAGE ---------------- #}
+ {%- if message['role'] == 'user' -%}
+ {{ '<|begin_user|>\n' }}
+ {%- if message['content'] is not string -%}
+ {%- for chunk in message['content'] -%}
+ {%- if chunk['type'] == 'text' -%}
+ {{ chunk['text'] }}
+ {%- elif chunk['type'] in ['image', 'image_url'] -%}
+ {{ '[IMG]' }}
+ {%- else -%}
+ {{ raise_exception('Unrecognized content type!') }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- else -%}
+ {{ message['content'] }}
+ {%- endif -%}
+
+ {# ---------------- SYSTEM MESSAGE ---------------- #}
+ {%- elif message['role'] == 'system' -%}
+ {%- set sys_content = message.get('content', '') -%}
+ {%- if sys_content and sys_content|length > 0 -%}
+ {%- if sys_content is string -%}
+ {%- set system_message = sys_content -%}
+ {%- else -%}
+ {%- set system_message = sys_content[0]['text'] -%}
+ {%- endif -%}
+ {%- else -%}
+ {%- set system_message = '' -%}
+ {%- endif -%}
+
+ {%- if tools|length > 0 -%}
+ {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
+ {%- else -%}
+ {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
+ {%- endif -%}
+
+ {# ---------------- ASSISTANT MESSAGE ---------------- #}
+ {%- elif message['role'] == 'assistant' -%}
+ {%- if loop.last -%}
+ {%- set add_tool_id = false -%}
+ {%- endif -%}
+
+ {{ '\n<|begin_assistant|>\n' }}
+
+ {%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
+ {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
+ {%- endif -%}
+
+ {%- set asst_content = message.get('content', '') -%}
+ {%- if asst_content and asst_content|length > 0 -%}
+ {%- if asst_content is not string -%}
+ {%- set asst_text = asst_content[0]['text'] -%}
+ {%- else -%}
+ {%- set asst_text = asst_content -%}
+ {%- endif -%}
+ {# For historical turns (not the last), strip reasoning and keep only final response #}
+ {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
+ {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
+ {%- else -%}
+ {{- asst_text -}}
+ {%- endif -%}
+ {%- elif message.get('chosen') and message['chosen']|length > 0 -%}
+ {{ message['chosen'][0] }}
+ {%- endif -%}
+
+ {# Tool call output #}
+ {%- set tool_calls = message.get('tool_calls', []) -%}
+ {%- if tool_calls and tool_calls|length > 0 -%}
+ {{ '\n<tool_calls>[' }}
+ {%- for tool_call in tool_calls -%}
+ {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }}
+ {%- if add_tool_id == true and 'id' in tool_call -%}
+ {{ ', "id": "' + tool_call['id'] + '"' }}
+ {%- endif -%}
+ {{ '}' }}
+ {%- if not loop.last -%}{{ ', ' }}{%- endif -%}
+ {%- endfor -%}
+ {{ ']</tool_calls>' }}
+ {%- endif -%}
+
+ {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
+ {%- if not loop.last or training_prompt -%}
+ {{ '\n<|end|>\n' }}
+ {%- endif -%}
+
+ {# ---------------- TOOL RESULT MESSAGE ---------------- #}
+ {%- elif message['role'] == 'tool' -%}
+ {%- set tool_content = message.get('content', '') -%}
+ {%- if tool_content is string -%}
+ {%- set tool_message = tool_content -%}
+ {%- else -%}
+ {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
+ {%- endif -%}
+ {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
+
+ {# ---------------- CONTENT MESSAGE ---------------- #}
+ {%- elif message['role'] == 'content' -%}
+ {%- set msg_content = message.get('content', '') -%}
+ {%- if msg_content is not string -%}
+ {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
+ {%- else -%}
+ {{ '<|begin_content|>\n' + msg_content + '\n' }}
+ {%- endif -%}
+ {%- endif -%}
+
+ {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
+ {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+ {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
+ {%- endif -%}
+
+{%- endfor -%}
--- /dev/null
+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+ {%- set system_message = messages[0]['content'] %}
+ {%- set loop_start_index = 1 %}
+{%- else %}
+ {%- set system_message = "" %}
+ {%- set loop_start_index = 0 %}
+{%- endif %}
+
+{%- if system_message or tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if system_message %}
+ {{- system_message }}
+ {%- endif %}
+ {%- if tools %}
+ {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
+ {{- '[' }}
+ {%- for tool in tools %}
+ {{- tool | tojson }}
+ {%- if not loop.last %}
+ {{- ',\n' }}
+ {%- endif %}
+ {%- endfor %}
+ {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
+ {%- endif %}
+ {%- if enable_thinking %}
+ {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
+ {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
+ {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
+ {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
+ {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- endif %}
+
+{%- for message in messages[loop_start_index:] %}
+ {%- if message['role'] == 'user' %}
+ {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
+ {%- elif message['role'] == 'assistant' %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- set content = message.content | default('') %}
+ {%- set reasoning_content = message.reasoning_content | default('') %}
+ {%- if not reasoning_content and '<think>' in content and '</think>' in content %}
+ {%- set reasoning_parts = content.split('</think>') %}
+ {%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
+ {%- set content = reasoning_parts[1:] | join('</think>') %}
+ {%- endif %}
+ {%- if reasoning_content %}
+ {{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
+ {%- endif %}
+ {{- content.lstrip() }}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message['role'] == 'tool' %}
+ {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
+ {{- '<|im_start|>user\n' }}
+ {%- endif %}
+ {{- '<|function_output|>' + message['content'] }}
+ {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking %}
+ {{- '<think>\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
{%- elif message.role|lower == 'user' %}
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
+<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
{% for tc in message.tool_calls %}
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
{%- endif %}
-{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
\ No newline at end of file
+{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{%- if not enable_thinking -%}<|START_THINKING|><|END_THINKING|>{%- endif %}
\ No newline at end of file
--- /dev/null
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+ {%- if content is string -%}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping -%}
+ {%- for item in content -%}
+ {%- if item is mapping and item.type == 'text' -%}
+ {{- item.text }}
+ {%- elif item is string -%}
+ {{- item }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- else -%}
+ {{- content }}
+ {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+ {%- if m.role == 'user' %}
+ {% set ns.last_user_index = loop.index0 -%}
+ {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+ {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+ {%- if '</think>' in content %}
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+ {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() + '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+ {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+ <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
\ No newline at end of file
--- /dev/null
+{{- bos_token -}}
+{%- set system_prompt = "" -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+ {%- set ns.system_prompt = messages[0]["content"] -%}
+ {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%}
+ {%- for tool in tools -%}
+ {%- if tool is not string -%}
+ {%- set tool = tool | tojson -%}
+ {%- endif -%}
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
+ {%- if not loop.last -%}
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
+ {{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
+ {%- set content = message["content"] -%}
+ {%- if content is not string -%}
+ {%- set content = content | tojson -%}
+ {%- endif -%}
+ {%- if message["role"] == "tool" -%}
+ {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
+ {%- elif message["role"] == "assistant" -%}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {%- endif -%}
+ {{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+ {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n<think>\n' }}
+ {%- if not enable_thinking -%}{{- '</think>' -}}{%- endif -%}
{%- endif %}
{%- endif %}
{%- endif %}
{%- if tools is iterable and tools | length > 0 %}
- {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+ {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
{{- "<tools>" }}
{%- for tool in tools %}
{%- if tool.function is defined %}
{{- '\n</function>' }}
{%- endfor %}
{{- "\n</tools>" }}
- {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+ {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
{%- endif %}
{%- if system_message is defined %}
{{- '<|im_end|>\n' }}
--- /dev/null
+{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
+{{bos_token}}{%- if tools %}
+ {{- '<|im_start|>system\n' }}
+ {%- if messages[0].role == 'system' %}
+ {{- render_content(messages[0].content) + '\n\n' }}
+ {%- endif %}
+ {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson(ensure_ascii=False) }}
+ {%- endfor %}
+ {{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content) %}
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+ {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
+ {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = render_content(message.reasoning_content) %}
+ {%- else %}
+ {%- if '</think>' in content %}
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+ {%- else %}
+ {%- set reasoning_content = '' %}
+ {%- endif %}
+ {%- endif %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+ {%- if tool_call.arguments is defined %}
+ {%- set arguments = tool_call.arguments %}
+ {%- for args_name, args_value in arguments|items %}
+ {{- '<parameter=' + args_name + '>\n' }}
+ {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n</parameter>\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '</function>\n</tool_call>' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+ {{- '<|im_start|>tool_response\n' }}
+ {%- endif %}
+ {{- '<tool_response>' }}
+ {{- content }}
+ {{- '</tool_response>' }}
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n<think>\n' }}
+{%- endif %}
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+ {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+ {%- if message['role'] == 'system' -%}
+ {%- set ns.system_prompt = message['content'] -%}
+ {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+ {%- if message['role'] == 'user' -%}
+ {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['content'] is none -%}
+ {%- set ns.is_tool = false -%}
+ {%- for tool in message['tool_calls']-%}
+ {%- if not ns.is_first -%}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {%- set ns.is_first = true -%}
+ {%- else -%}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+ {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
+ {%- set ns.is_tool = false -%}
+ {%- else -%}
+ {%- set content = message['content'] -%}
+ {%- if '</think>' in content -%}
+ {%- set content = content.split('</think>')[-1] -%}
+ {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- endif -%}
+ {%- if message['role'] == 'tool' -%}
+ {%- set ns.is_tool = true -%}
+ {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- set ns.is_output_first = false -%}
+ {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- endif -%}
+ {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|><think>\n'}}
+{%- endif %}
\ No newline at end of file
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+ {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+ {%- if message['role'] == 'system' -%}
+ {%- set ns.system_prompt = message['content'] -%}
+ {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+ {%- if message['role'] == 'user' -%}
+ {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+ {%- set ns.is_tool = false -%}
+ {%- for tool in message['tool_calls']-%}
+ {%- if not ns.is_first -%}
+ {{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {%- set ns.is_first = true -%}
+ {%- else -%}
+ {{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}
+ {%- endif -%}
+ {%- endfor -%}
+ {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+ {%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}
+ {%- set ns.is_tool = false -%}
+ {%- else -%}
+ {%- set content = message['content'] -%}
+ {%- if '</think>' in content -%}
+ {%- set content = content.split('</think>')[-1] -%}
+ {%- endif -%}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- endif -%}
+ {%- if message['role'] == 'tool' -%}
+ {%- set ns.is_tool = true -%}
+ {%- if ns.is_output_first -%}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- set ns.is_output_first = false -%}
+ {%- else -%}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- endif -%}
+ {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<|tool▁outputs▁end|>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<|Assistant|><think>\n'}}{% if not enable_thinking %}{{- '</think>' -}}{% endif %}
+{%- endif %}
\ No newline at end of file
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+{% if not add_generation_prompt is defined -%}
+ {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+ {%- if enable_thinking is defined -%}
+ {%- set thinking = enable_thinking -%}
+ {%- else -%}
+ {%- set thinking = false -%}
+ {%- endif -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
+{%- for message in messages -%}
+ {%- if message['role'] == 'system' -%}
+ {%- if ns.is_first_sp -%}
+ {%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
+ {%- set ns.is_first_sp = false -%}
+ {%- else -%}
+ {%- set ns.system_prompt = ns.system_prompt + '
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
\ No newline at end of file
+' + message['content'] -%}
+ {%- endif -%}
+ {%- endif -%}
+{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages -%}
+ {%- if message['role'] == 'user' -%}
+ {%- set ns.is_tool = false -%}
+ {%- set ns.is_first = false -%}
+ {%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+ {%- if ns.is_last_user -%}{{'<|Assistant|></think>'}}
+ {%- endif -%}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_first = false -%}
+ {%- set ns.is_tool = false -%}
+ {%- for tool in message['tool_calls'] -%}
+ {%- if not ns.is_first -%}
+ {%- if not message['content'] -%}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
+ {%- else -%}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
+ {%- endif -%}
+ {%- set ns.is_first = true -%}
+ {%- else -%}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}
+ {%- endif -%}
+ {%- endfor -%}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and not message['tool_calls'] -%}
+ {%- if ns.is_last_user -%}{{'<|Assistant|>'}}
+ {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
+ {%- else -%}{{'</think>'}}
+ {%- endif -%}
+ {%- endif -%}
+ {%- set ns.is_last_user = false -%}
+ {%- if ns.is_tool -%}{{message['content'] + '<|end▁of▁sentence|>'}}
+ {%- set ns.is_tool = false -%}
+ {%- else -%}
+ {%- set content = message['content'] -%}
+ {%- if '</think>' in content -%}
+ {%- set content = content.split('</think>', 1)[1] -%}
+ {%- endif -%}{{content + '<|end▁of▁sentence|>'}}
+ {%- endif -%}
+ {%- endif -%}
+ {%- if message['role'] == 'tool' -%}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<|Assistant|>'}}
+ {%- if not thinking -%}{{'</think>'}}
+ {%- else -%}{{'<think>'}}
+ {%- endif -%}
+{%- endif %}
\ No newline at end of file
{%- if 'tool_calls' in message and message['tool_calls'] -%}
{%- set tool = namespace(calls=[]) -%}
{%- for call in message['tool_calls'] -%}
- {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%}
+ {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%}
{%- endfor -%}
{%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%}
{%- endif -%}
-{%- if tools -%}\r
- <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>\r
-{%- endif -%}\r
-{%- for message in messages -%}\r
- {%- if loop.first and messages[0]['role'] != 'system' -%}\r
- <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>\r
- {%- endif -%}\r
- {%- if message['role'] == 'system' -%}\r
- <|im_system|>system<|im_middle|>\r
- {%- elif message['role'] == 'user' -%}\r
- <|im_user|>user<|im_middle|>\r
- {%- elif message['role'] == 'assistant' -%}\r
- <|im_assistant|>assistant<|im_middle|>\r
- {%- elif message['role'] == 'tool' -%}\r
- <|im_system|>tool<|im_middle|>\r
- {%- endif -%}\r
- {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}\r
- {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}\r
- <|tool_calls_section_begin|>\r
- {%- for tool_call in message['tool_calls'] -%}\r
- {%- set func_name = tool_call['function']['name'] -%}\r
- {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}\r
- <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>\r
- {%- endfor -%}\r
- <|tool_calls_section_end|>\r
- {%- elif message['role'] == 'tool' -%}\r
- ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}\r
- {%- elif message['content'] is string -%}\r
- {{ message['content'] }}\r
- {%- elif message['content'] is not none -%}\r
- {% for content in message['content'] -%}\r
- {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\r
- <|media_start|>image<|media_content|><|media_pad|><|media_end|>\r
- {% else -%}\r
- {{ content['text'] }}\r
- {%- endif -%}\r
- {%- endfor -%}\r
- {%- endif -%}\r
- <|im_end|>\r
-{%- endfor -%}\r
-{%- if add_generation_prompt -%}\r
- <|im_assistant|>assistant<|im_middle|>\r
-{%- endif -%}\r
+{%- if tools -%}
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
+{%- endif -%}
+{%- for message in messages -%}
+ {%- if loop.first and messages[0]['role'] != 'system' -%}
+ <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
+ {%- endif -%}
+ {%- if message['role'] == 'system' -%}
+ <|im_system|>system<|im_middle|>
+ {%- elif message['role'] == 'user' -%}
+ <|im_user|>user<|im_middle|>
+ {%- elif message['role'] == 'assistant' -%}
+ <|im_assistant|>assistant<|im_middle|>
+ {%- elif message['role'] == 'tool' -%}
+ <|im_system|>tool<|im_middle|>
+ {%- endif -%}
+ {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+ {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+ <|tool_calls_section_begin|>
+ {%- for tool_call in message['tool_calls'] -%}
+ {%- set func_name = tool_call['function']['name'] -%}
+ {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
+ <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
+ {%- endfor -%}
+ <|tool_calls_section_end|>
+ {%- elif message['role'] == 'tool' -%}
+ ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
+ {%- elif message['content'] is string -%}
+ {{ message['content'] }}
+ {%- elif message['content'] is not none -%}
+ {% for content in message['content'] -%}
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+ {% else -%}
+ {{ content['text'] }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- endif -%}
+ <|im_end|>
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+ <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
{%- set add_tool_id = false -%}
{%- endif -%}
{{- '<|assistant|>\n' -}}
- {%- if message['content'] is not none and message['content']|length > 0 -%}
+ {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
{%- if message['content'] is not string and message['content'][0]['text'] is not none %}
{{- message['content'][0]['text'] }}
{%- else %}
{{- message['content'] -}}
{%- endif -%}
- {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
+ {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
{{- message['chosen'][0] -}}
{%- endif -%}
{%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
{{- '<thinking>' + message['thought'] + '</thinking>' -}}
{%- endif -%}
- {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
+ {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
{{- '\n<tool_calls>[' -}}
{%- for tool_call in message["tool_calls"] -%}
- {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
+ {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}}
{%- if add_tool_id == true -%}
{{- ', "id": "' + tool_call['id'] + '"' -}}
{%- endif -%}
--- /dev/null
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = "" # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+ payload = {
+ "messages": messages,
+ "stream": stream,
+ "max_tokens": 5000,
+ }
+ if tools:
+ payload["tools"] = tools
+ if tool_choice:
+ payload["tool_choice"] = tool_choice
+
+ try:
+ response = requests.post(url, json=payload, stream=stream)
+ response.raise_for_status()
+ except requests.exceptions.RequestException as e:
+ if e.response is not None:
+ logger.info(f"Response error: {e} for {e.response.content}\n")
+ else:
+ logger.info(f"Error connecting to server: {e}\n")
+ return None
+
+ full_content = ""
+ reasoning_content = ""
+ tool_calls = []
+
+ if stream:
+ logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+ for line in response.iter_lines():
+ if line:
+ decoded_line = line.decode("utf-8")
+ if decoded_line.startswith("data: "):
+ data_str = decoded_line[6:]
+ if data_str == "[DONE]":
+ break
+ try:
+ data = json.loads(data_str)
+ if "choices" in data and len(data["choices"]) > 0:
+ delta = data["choices"][0].get("delta", {})
+
+ # Content
+ content_chunk = delta.get("content", "")
+ if content_chunk:
+ full_content += content_chunk
+ logger.info(content_chunk)
+
+ # Reasoning
+ reasoning_chunk = delta.get("reasoning_content", "")
+ if reasoning_chunk:
+ reasoning_content += reasoning_chunk
+ logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+ # Tool calls
+ if "tool_calls" in delta:
+ for tc in delta["tool_calls"]:
+ index = tc.get("index")
+ if index is not None:
+ while len(tool_calls) <= index:
+ # Using "function" as type default but could be flexible
+ tool_calls.append(
+ {
+ "id": "",
+ "type": "function",
+ "function": {
+ "name": "",
+ "arguments": "",
+ },
+ }
+ )
+
+ if "id" in tc:
+ tool_calls[index]["id"] += tc["id"]
+ if "function" in tc:
+ if "name" in tc["function"]:
+ tool_calls[index]["function"][
+ "name"
+ ] += tc["function"]["name"]
+ if "arguments" in tc["function"]:
+ tool_calls[index]["function"][
+ "arguments"
+ ] += tc["function"]["arguments"]
+
+ except json.JSONDecodeError:
+ logger.info(f"Failed to decode JSON: {data_str}\n")
+ logger.info("\n--- End of Stream ---\n")
+ else:
+ logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+ data = response.json()
+ if "choices" in data and len(data["choices"]) > 0:
+ message = data["choices"][0].get("message", {})
+ full_content = message.get("content", "")
+ reasoning_content = message.get("reasoning_content", "")
+ tool_calls = message.get("tool_calls", [])
+ logger.info(full_content)
+ logger.info("--- End of Response ---\n")
+
+ return {
+ "content": full_content,
+ "reasoning_content": reasoning_content,
+ "tool_calls": tool_calls,
+ }
+
+
+def test_chat(url, stream):
+ logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+ messages = [{"role": "user", "content": "What is the capital of France?"}]
+ result = run_query(url, messages, stream=stream)
+
+ if result:
+ if result["content"]:
+ logger.info("PASS: Output received.\n")
+ else:
+ logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+ if result.get("reasoning_content"):
+ logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+ else:
+ logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+ else:
+ logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+ logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+ messages = [
+ {
+ "role": "user",
+ "content": "What is the weather in London? Please use the get_weather tool.",
+ }
+ ]
+ tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ },
+ }
+ ]
+
+ result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+ if result:
+ tcs = result.get("tool_calls")
+ if tcs and len(tcs) > 0:
+ logger.info("PASS: Tool calls detected.")
+ for tc in tcs:
+ func = tc.get("function", {})
+ logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+ else:
+ logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+ if result.get("reasoning_content"):
+ logger.info(
+ f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+ )
+ else:
+ logger.info("FAIL: Query failed.\n")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+ parser.add_argument("--host", default="localhost", help="Server host")
+ parser.add_argument("--port", default=8080, type=int, help="Server port")
+ args = parser.parse_args()
+
+ base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+ logger.info(f"Testing server at {base_url}\n")
+
+ # Non-streaming tests
+ test_chat(base_url, stream=False)
+ test_tool_call(base_url, stream=False)
+
+ # Streaming tests
+ test_chat(base_url, stream=True)
+ test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+ main()
# llama_build_and_test(test-double-float.cpp) # SLOW
endif()
-llama_build_and_test(test-chat-parser.cpp)
llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
-llama_build_and_test(test-chat-template.cpp)
llama_build_and_test(test-jinja.cpp)
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
+llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+llama_build_and_test(test-chat-template.cpp)
llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(
peg-parser/test-gbnf-generation.cpp
peg-parser/test-json-parser.cpp
peg-parser/test-json-serialization.cpp
+ peg-parser/test-python-dict-parser.cpp
peg-parser/test-unicode.cpp
peg-parser/tests.h
)
llama_build_and_test(test-alloc.cpp)
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
+
+
+#include "peg-parser.h"
#include "tests.h"
void test_basic(testing & t) {
t.assert_equal("result_is_fail", true, result.fail());
});
+
+ // Test markers
+ t.test("marker", [](testing &t) {
+ auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.marker();
+ });
+
+ common_peg_parse_context ctx_square("[marker]", false);
+ common_peg_parse_context ctx_sharp("<marker>", false);
+
+ auto result_square = bracket_parser.parse(ctx_square);
+ auto result_sharp = bracket_parser.parse(ctx_sharp);
+
+ t.assert_true("result_square_is_success", result_square.success());
+ t.assert_true("result_sharp_is_success", result_sharp.success());
+ });
});
}
--- /dev/null
+#include "tests.h"
+
+void test_python_dict_parser(testing &t) {
+ // Test parsing a simple Python dict object with single quotes
+ t.test("simple Python dict object parsing", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'name': 'test', 'value': 42, 'flag': True}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test parsing a Python array with mixed types
+ t.test("Python array with mixed types", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "[1, 'hello', True, None, 3.14]";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test parsing nested Python dict with objects and arrays
+ t.test("nested Python dict with objects and arrays", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input =
+ "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test parsing Python dict with escaped single quotes
+ t.test("Python dict with escaped single quotes", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'message': 'It\\'s working!'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test parsing Python dict with double quotes inside single quotes
+ t.test("Python dict with double quotes inside single quotes", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'quote': 'He said \"Hello\"'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test the example from the requirements
+ t.test("complex Python dict example from requirements", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test need_more_input() parsing - incomplete object
+ t.test("need_more_input() parsing - incomplete object", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'name': 'test', 'value': ";
+ common_peg_parse_context ctx(input, true);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+ });
+
+ // Test need_more_input() parsing - incomplete single-quoted string
+ t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'name': 'test";
+ common_peg_parse_context ctx(input, true);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+ });
+
+ // Test unicode in Python dict strings
+ t.test("unicode in Python dict strings", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'message': 'Hello, 世界!'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test Python dict with unicode escapes
+ t.test("Python dict with unicode escapes", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{'unicode': 'Hello\\u0041'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test that Python parser accepts double-quoted strings too
+ t.test("Python parser accepts double-quoted strings", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{\"name\": \"test\"}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test Python parser with mixed quote styles
+ t.test("Python parser with mixed quote styles", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ std::string input = "{\"name\": 'test', 'value': \"hello\"}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+
+ t.assert_equal("result_is_success", true, result.success());
+ t.assert_equal("result_end", input.size(), result.end);
+ });
+
+ // Test Python True/False/None
+ t.test("Python True/False/None", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+ t.test("True", [&](testing &t) {
+ std::string input = "True";
+ common_peg_parse_context ctx(input);
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("False", [&](testing &t) {
+ std::string input = "False";
+ common_peg_parse_context ctx(input);
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("None", [&](testing &t) {
+ std::string input = "None";
+ common_peg_parse_context ctx(input);
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("rejects JSON-style true/false/null", [&](testing &t) {
+ for (const auto & kw : {"true", "false", "null"}) {
+ std::string input = kw;
+ common_peg_parse_context ctx(input);
+ auto result = parser.parse(ctx);
+ t.assert_true(std::string("rejects ") + kw, result.fail());
+ }
+ });
+ });
+
+ // Test single-quoted string content parser directly
+ t.test("single-quoted string content parser", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.sequence({ p.literal("'"), p.single_quoted_string_content(), p.literal("'"), p.space() });
+ });
+
+ t.test("simple string", [&](testing &t) {
+ std::string input = "'hello'";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("string with escaped single quote", [&](testing &t) {
+ std::string input = "'it\\'s'";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("string with double quotes", [&](testing &t) {
+ std::string input = "'say \"hello\"'";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("incomplete string", [&](testing &t) {
+ std::string input = "'hello";
+ common_peg_parse_context ctx(input, true);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("need_more_input", result.need_more_input());
+ });
+ });
+
+ // Test json() with pre-registered flexible json-string rule (python dict support)
+ t.test("json() parser with flexible json-string rule", [](testing &t) {
+ t.test("json() rejects single quotes by default", [&](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.json();
+ });
+
+ std::string input = "{'name': 'test'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("fail", result.fail());
+ });
+
+ t.test("json() accepts single quotes with pre-registered flexible json-string rule", [&](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ // Pre-register json-string rule with both quote styles
+ p.rule("json-string", [&]() {
+ return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+ });
+ return p.json();
+ });
+
+ std::string input = "{'name': 'test'}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("json() still accepts double quotes with flexible json-string rule", [&](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ p.rule("json-string", [&]() {
+ return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+ });
+ return p.json();
+ });
+
+ std::string input = "{\"name\": \"test\"}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("json() accepts mixed quote styles with flexible json-string rule", [&](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ p.rule("json-string", [&]() {
+ return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+ });
+ return p.json();
+ });
+
+ std::string input = "{\"name\": 'test', 'value': \"hello\"}";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+
+ t.test("complex nested structure with flexible json-string rule", [&](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ p.rule("json-string", [&]() {
+ return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+ });
+ return p.json();
+ });
+
+ std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+ common_peg_parse_context ctx(input);
+
+ auto result = parser.parse(ctx);
+ t.assert_true("success", result.success());
+ t.assert_equal("end", input.size(), result.end);
+ });
+ });
+}
void test_gbnf_generation(testing &t);
void test_unicode(testing &t);
void test_json_serialization(testing &t);
+void test_python_dict_parser(testing &t);
test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1, 1}, {1, 1}));
}
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1}));
+
#if 0
// test the mat-mat path for Metal
for (int k = 1; k < 512; ++k) {
--- /dev/null
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "peg-parser.h"
+#include "testing.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+using namespace autoparser;
+
+static void test_calculate_diff_split_basic(testing & t);
+static void test_calculate_diff_split_identical(testing & t);
+static void test_calculate_diff_split_common_prefix(testing & t);
+static void test_calculate_diff_split_common_suffix(testing & t);
+static void test_calculate_diff_split_common_both(testing & t);
+static void test_calculate_diff_split_empty_cases(testing & t);
+static void test_calculate_diff_split_no_common(testing & t);
+static void test_calculate_diff_split_single_char(testing & t);
+static void test_calculate_diff_split_overlaps(testing & t);
+static void test_calculate_diff_split_tag_boundaries(testing & t);
+static void test_calculate_diff_split(testing & t);
+
+static void test_until_common_prefix_basic(testing & t);
+static void test_until_common_prefix(testing & t);
+
+static void test_after_common_suffix_basic(testing & t);
+static void test_after_common_suffix(testing & t);
+
+static void test_analyze_tool_call_pure_json(testing & t);
+static void test_analyze_tool_call_function_name_markers(testing & t);
+static void test_analyze_tool_call_full_markers(testing & t);
+static void test_analyze_tool_call_edge_cases(testing & t);
+
+static void test_compare_variants_basic(testing & t);
+static void test_compare_variants_messages_modifier(testing & t);
+static void test_compare_variants_tools_modifier(testing & t);
+static void test_compare_variants_both_modifiers(testing & t);
+static void test_compare_variants_template_failure(testing & t);
+static void test_compare_variants_identity(testing & t);
+static void test_compare_variants(testing & t);
+
+// Seed-OSS template tool calling analysis tests
+static void test_seed_oss_tool_analysis(testing & t);
+static void test_seed_oss_tool_presence(testing & t);
+static void test_seed_oss_call_count(testing & t);
+static void test_seed_oss_function_names(testing & t);
+static void test_seed_oss_argument_count(testing & t);
+static void test_seed_oss_args_presence(testing & t);
+static void test_seed_oss_tool_with_reasoning(testing & t);
+
+// Nemotron template analysis tests
+static void test_nemotron_analysis(testing & t);
+static void test_nemotron_reasoning_detection(testing & t);
+static void test_nemotron_tool_format(testing & t);
+
+// CohereForAI template analysis tests
+static void test_cohere_reasoning_detection(testing & t);
+static void test_cohere_analysis(testing & t);
+
+// Marker separation
+static void test_marker_separation(testing & t);
+
+// standard_json_tools format tests
+static void test_standard_json_tools_formats(testing & t);
+static void test_standard_json_tools_openai(testing & t);
+static void test_standard_json_tools_cohere(testing & t);
+static void test_standard_json_tools_function_key(testing & t);
+
+// normalize_quotes_to_json tests
+static void test_normalize_quotes_to_json(testing & t);
+static void test_normalize_quotes_with_embedded_quotes(testing & t);
+
+// TAG_WITH_TAGGED argument parsing tests
+static void test_tagged_args_with_embedded_quotes(testing & t);
+
+int main(int argc, char * argv[]) {
+ testing t(std::cout);
+ t.verbose = true;
+
+ // usage: test-chat-auto-parser-helpers [filter_regex]
+
+ if (argc > 1) {
+ t.set_filter(argv[1]);
+ }
+
+ t.test("diff_split", test_calculate_diff_split);
+ t.test("common_prefix", test_until_common_prefix);
+ t.test("common_suffix", test_after_common_suffix);
+ t.test("compare_variants", test_compare_variants);
+ t.test("segments", test_marker_separation);
+ t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
+ t.test("cohere", test_cohere_analysis);
+ t.test("nemotron", test_nemotron_analysis);
+ t.test("standard_json_tools", test_standard_json_tools_formats);
+ t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
+ t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
+
+ return t.summary();
+}
+
+static void test_marker_separation(testing & t) {
+ auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
+ auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
+ auto paired_markers = segmentize_markers("<hello>world</hello>");
+ auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
+ auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
+
+ t.test("single_square_marker", [&] (testing & t) {
+ t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
+ t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
+ t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
+
+ t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
+ t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
+ t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
+ });
+
+ t.test("single_diagonal_marker", [&] (testing & t) {
+ t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
+ t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
+ t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
+
+ t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
+ t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
+ t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
+ });
+
+ t.test("paired_markers", [&] (testing & t) {
+ t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
+ t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
+ t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
+
+ t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
+ t.assert_equal("second is 'world'", "world", paired_markers[1].value);
+ t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
+ });
+
+ t.test("double_different_markers", [&] (testing & t) {
+ t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
+ t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
+ t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
+ t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
+
+ t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
+ t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
+ t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
+ t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
+ });
+
+ t.test("in_between", [&] (testing & t) {
+ t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
+ t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
+ t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
+ t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
+ t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
+ t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
+
+ t.assert_equal("first is 'im'", "im", in_between[0].value);
+ t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
+ t.assert_equal("third is 'daba'", "daba", in_between[2].value);
+ t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
+ t.assert_equal("fifth is 'da'", "da", in_between[4].value);
+ t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
+ });
+}
+
+static void test_calculate_diff_split(testing & t) {
+ t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
+ t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
+ t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
+ t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
+ t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
+ t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
+ t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
+ t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
+ t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
+ t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
+}
+
+static void test_calculate_diff_split_basic(testing & t) {
+ diff_split result = calculate_diff_split("hello world", "hello test");
+ t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
+ t.assert_equal("left should be 'world'", "world", result.left);
+ t.assert_equal("right should be 'test'", "test", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("abc", "xyz");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'abc'", "abc", result.left);
+ t.assert_equal("right should be 'xyz'", "xyz", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("prefixA suffix", "prefixB suffix");
+ t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+ t.assert_equal("left should be 'A'", "A", result.left);
+ t.assert_equal("right should be 'B'", "B", result.right);
+ t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_identical(testing & t) {
+ diff_split result = calculate_diff_split("hello", "hello");
+ t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("", "");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("a", "a");
+ t.assert_equal("prefix should be 'a'", "a", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("<row><row><row><your><boat><gently>", "<row><row><row><your><boat><gently>");
+ t.assert_equal("prefix should be '<row><row><row><your><boat><gently>'", "<row><row><row><your><boat><gently>", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_prefix(testing & t) {
+ diff_split result = calculate_diff_split("abcdef", "abcxyz");
+ t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
+ t.assert_equal("left should be 'def'", "def", result.left);
+ t.assert_equal("right should be 'xyz'", "xyz", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("same", "sameagain");
+ t.assert_equal("prefix should be 'same'", "same", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'again'", "again", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("test", "testing");
+ t.assert_equal("prefix should be 'test'", "test", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'ing'", "ing", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_suffix(testing & t) {
+ diff_split result = calculate_diff_split("123end", "456end");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be '123'", "123", result.left);
+ t.assert_equal("right should be '456'", "456", result.right);
+ t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+ result = calculate_diff_split("start", "end");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'start'", "start", result.left);
+ t.assert_equal("right should be 'end'", "end", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("abcsuffix", "xyzsuffix");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'abc'", "abc", result.left);
+ t.assert_equal("right should be 'xyz'", "xyz", result.right);
+ t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_common_both(testing & t) {
+ diff_split result = calculate_diff_split("helloXworld", "helloYworld");
+ t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+ t.assert_equal("left should be 'X'", "X", result.left);
+ t.assert_equal("right should be 'Y'", "Y", result.right);
+ t.assert_equal("suffix should be 'world'", "world", result.suffix);
+
+ result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
+ t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
+ t.assert_equal("left should be 'middle'", "middle", result.left);
+ t.assert_equal("right should be 'different'", "different", result.right);
+ t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
+
+ result = calculate_diff_split("startAend", "startBend");
+ t.assert_equal("prefix should be 'start'", "start", result.prefix);
+ t.assert_equal("left should be 'A'", "A", result.left);
+ t.assert_equal("right should be 'B'", "B", result.right);
+ t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+ // Edge case: common prefix and suffix overlap
+ result = calculate_diff_split("aa", "ab");
+ t.assert_equal("prefix should be 'a'", "a", result.prefix);
+ t.assert_equal("left should be 'a'", "a", result.left);
+ t.assert_equal("right should be 'b'", "b", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_empty_cases(testing & t) {
+ // Empty left, non-empty right
+ diff_split result = calculate_diff_split("", "hello");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'hello'", "hello", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Non-empty left, empty right
+ result = calculate_diff_split("hello", "");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'hello'", "hello", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Both empty
+ result = calculate_diff_split("", "");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Left single char, empty right
+ result = calculate_diff_split("a", "");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'a'", "a", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Empty left, right single char
+ result = calculate_diff_split("", "a");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'a'", "a", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_no_common(testing & t) {
+ diff_split result = calculate_diff_split("abc", "xyz");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'abc'", "abc", result.left);
+ t.assert_equal("right should be 'xyz'", "xyz", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("left", "right");
+ // The algorithm finds "t" as a common suffix since both strings end with 't'
+ // This is the algorithm's actual behavior - it finds maximal common suffix
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'lef'", "lef", result.left);
+ t.assert_equal("right should be 'righ'", "righ", result.right);
+ t.assert_equal("suffix should be 't'", "t", result.suffix);
+
+ result = calculate_diff_split("123", "456");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be '123'", "123", result.left);
+ t.assert_equal("right should be '456'", "456", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_single_char(testing & t) {
+ diff_split result = calculate_diff_split("a", "b");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'a'", "a", result.left);
+ t.assert_equal("right should be 'b'", "b", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("a", "a");
+ t.assert_equal("prefix should be 'a'", "a", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("a", "ab");
+ t.assert_equal("prefix should be 'a'", "a", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'b'", "b", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("ab", "a");
+ t.assert_equal("prefix should be 'a'", "a", result.prefix);
+ t.assert_equal("left should be 'b'", "b", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_overlaps(testing & t) {
+ // One string is substring of another
+ diff_split result = calculate_diff_split("test", "testing");
+ t.assert_equal("prefix should be 'test'", "test", result.prefix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be 'ing'", "ing", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ result = calculate_diff_split("testing", "test");
+ t.assert_equal("prefix should be 'test'", "test", result.prefix);
+ t.assert_equal("left should be 'ing'", "ing", result.left);
+ t.assert_equal("right should be empty", "", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Similar strings with one extra char at start
+ result = calculate_diff_split("Xtest", "Ytest");
+ // The algorithm finds "test" as a common suffix since both strings end with "test"
+ // This is the algorithm's actual behavior - it finds maximal common suffix
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'X'", "X", result.left);
+ t.assert_equal("right should be 'Y'", "Y", result.right);
+ t.assert_equal("suffix should be 'test'", "test", result.suffix);
+
+ // Similar strings with one extra char at end
+ result = calculate_diff_split("testX", "testY");
+ t.assert_equal("prefix should be 'test'", "test", result.prefix);
+ t.assert_equal("left should be 'X'", "X", result.left);
+ t.assert_equal("right should be 'Y'", "Y", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Strings that are reverses
+ result = calculate_diff_split("abc", "cba");
+ t.assert_equal("prefix should be empty", "", result.prefix);
+ t.assert_equal("left should be 'abc'", "abc", result.left);
+ t.assert_equal("right should be 'cba'", "cba", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_tag_boundaries(testing & t) {
+ // Test with unclosed XML tags
+ diff_split result = calculate_diff_split("test<tag", "test>content");
+ // The fix_tag_boundaries should move incomplete tags appropriately
+ t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
+ t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+ // Test with unclosed brackets
+ result = calculate_diff_split("test[", "test]value");
+ t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+ // Test with partial tags on both sides
+ result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
+ // fix_tag_boundaries moves the incomplete '<' from prefix to left/right
+ t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+ t.assert_equal("left should be '<tag>'", "<tag>", result.left);
+ t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Test with complex nested tags
+ result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
+ // Algorithm finds "ent</div>" as a common suffix because both strings end with it
+ // This is the actual algorithm behavior, though not semantically ideal
+ t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
+ t.assert_equal("left should be 'cont'", "cont", result.left);
+ t.assert_equal("right should be 'differ'", "differ", result.right);
+ t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
+
+ // Test with unclosed angle bracket
+ result = calculate_diff_split("Hello <world>", "Hello test");
+ t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
+ t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
+ t.assert_equal("right should be 'test'", "test", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Test with unclosed square bracket
+ result = calculate_diff_split("test [array]", "test other");
+ t.assert_equal("prefix should be 'test '", "test ", result.prefix);
+ t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
+ t.assert_equal("right should be 'other'", "other", result.right);
+ t.assert_equal("suffix should be empty", "", result.suffix);
+
+ // Test empty prefix and suffix with tags
+ result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
+ t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
+ t.assert_equal("left should be 'left'", "left", result.left);
+ t.assert_equal("right should be 'righ'", "righ", result.right);
+ t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
+
+ {
+ // real case from template tests, simplified
+ std::string left = "PREFIX</think>Sure";
+ std::string right = "PREFIX<think>Lemme think</think>Sure";
+ result = calculate_diff_split(left, right);
+ t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
+ t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
+ t.assert_equal("left should be empty", "", result.left);
+ t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
+ }
+
+ {
+ // Real case: special tokens with |> boundary issue
+ // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
+ std::string prefix = "SOME_PREFIX";
+ std::string suffix = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+ std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
+ std::string right_diff =
+ "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
+ "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
+ "]<|END_ACTION";
+
+ std::string left = prefix + left_diff + suffix;
+ std::string right = prefix + right_diff + suffix;
+ result = calculate_diff_split(left, right);
+
+ t.assert_equal("special token prefix", prefix, result.prefix);
+ // The |> should be moved from suffix to complete the tokens
+ t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
+ t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
+ t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+ result.suffix);
+ }
+}
+
+static void test_until_common_prefix(testing & t) {
+ t.test("until_common_prefix basic", test_until_common_prefix_basic);
+}
+
+static void test_until_common_prefix_basic(testing & t) {
+ // Test case from the user request
+ std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
+ t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
+
+ // Additional test cases to ensure robustness
+ // Test with different common prefix lengths
+ result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
+ t.assert_equal("should return 'prefix'", "prefix", result);
+
+ // Test when common prefix is at the start
+ result = until_common_prefix("<common>rest", "<common>left", "<common>right");
+ t.assert_equal("should return empty string when common prefix at start", "", result);
+
+ // Test when there's no common prefix
+ result = until_common_prefix("something", "left", "right");
+ t.assert_equal("should return empty string when no common prefix", "", result);
+
+ // Test with empty strings
+ result = until_common_prefix("test", "", "right");
+ t.assert_equal("should return empty string when left is empty", "", result);
+
+ // Test with longer common prefix
+ result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
+ t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
+}
+
+static void test_after_common_suffix(testing & t) {
+ t.test("after_common_suffix basic", test_after_common_suffix_basic);
+}
+
+static void test_after_common_suffix_basic(testing & t) {
+ // Test case from the user request
+ std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
+ "<arg name=bar>100</arg>",
+ "<arg name=baz>535</arg>");
+ t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
+
+ // Test when common suffix is at the end
+ result = after_common_suffix("rest<common>", "left<common>", "right<common>");
+ t.assert_equal("should return empty string when common suffix at end", "", result);
+
+ // Test with empty strings
+ result = after_common_suffix("test", "left", "");
+ t.assert_equal("should return empty string when right is empty", "", result);
+
+ // Test case with XML-like structure similar to the main example
+ result = after_common_suffix("<outer><inner>value</inner></outer>",
+ "<inner>value</inner>",
+ "<inner>different</inner>");
+ t.assert_equal("should return '</outer>'", "</outer>", result);
+
+ // Test with longer common suffix appearing at the end of full
+ result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
+ t.assert_equal("should return '' when common suffix is at end of full", "", result);
+
+ // Test with common suffix appearing in middle but not at end
+ result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
+ t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
+
+ // Test with multi-character common suffix at the very end of full
+ result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
+ t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
+}
+
+static void test_compare_variants(testing & t) {
+ t.test("compare_variants basic", test_compare_variants_basic);
+ t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
+ t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
+ t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
+ t.test("compare_variants template failure", test_compare_variants_template_failure);
+ t.test("compare_variants identity", test_compare_variants_identity);
+}
+
+static void test_compare_variants_basic(testing & t) {
+ // Create a simple template that just echoes messages
+ common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+ template_params params;
+ params.messages = json::array({
+ json {{"role", "user"}, {"content", "Hello"}}
+ });
+
+ auto modifier = [](template_params & p) {
+ p.messages[0]["content"] = "World";
+ };
+
+ auto result = ::compare_variants(tmpl, params, modifier);
+
+ if (!t.assert_true("result should have value", result.has_value())) {
+ return;
+ }
+ // The template might not output anything if messages is empty or format is different
+ // Check that we get a valid result
+ t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
+}
+
+static void test_compare_variants_messages_modifier(testing & t) {
+ // Test with messages modifier only
+ common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+ template_params params;
+ params.messages = json::array({
+ json {{"role", "user"}, {"content", "A"}}
+ });
+
+ auto modifier = [](template_params & p) {
+ p.messages[0]["content"] = "B";
+ };
+
+ std::optional<compare_variants_result> result = ::compare_variants(tmpl, params, modifier);
+
+ if (!t.assert_true("result should have value", result.has_value())) {
+ return;
+ }
+ t.assert_equal("left should be 'A'", "A", result->diff.left);
+ t.assert_equal("right should be 'B'", "B", result->diff.right);
+}
+
+static void test_compare_variants_tools_modifier(testing & t) {
+ // Test with tools modifier only
+ common_chat_template tmpl(
+ "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
+
+ template_params params;
+ params.tools = json::array({
+ json {{"name", "foo"}}
+ });
+
+ auto modifier = [](template_params & p) {
+ p.tools[0]["name"] = "bar";
+ };
+
+ auto result = ::compare_variants(tmpl, params, modifier);
+
+ if (!t.assert_true("result should have value", result.has_value())) {
+ return;
+ }
+ t.assert_equal("left should be 'foo'", "foo", result->diff.left);
+ t.assert_equal("right should be 'bar'", "bar", result->diff.right);
+}
+
+static void test_compare_variants_both_modifiers(testing & t) {
+ // Test with both messages and tools modifiers using the for loop approach
+ common_chat_template tmpl(
+ "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+ template_params params;
+ params.messages = json::array({
+ json {{"role", "user"}, {"content", "A"}}
+ });
+
+ auto modifier = [](template_params & p) {
+ p.messages[0]["content"] = "B";
+ p.messages[0]["role"] = "newuser";
+ };
+
+ auto result = ::compare_variants(tmpl, params, modifier);
+
+ if (!t.assert_true("result should have value", result.has_value())) {
+ return;
+ }
+ t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
+ t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
+}
+
+static void test_compare_variants_template_failure(testing & t) {
+ // Test with template that causes failure during application (not construction)
+ // We use a valid template syntax but one that will fail during application
+ common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
+
+ template_params params;
+ params.messages = json::array({
+ json {{"role", "user"}, {"content", "Hello"}}
+ });
+
+ auto modifier = [](template_params & p) {
+ p.messages[0]["content"] = "World";
+ };
+
+ auto result = ::compare_variants(tmpl, params, modifier);
+
+ t.assert_true("result should be nullopt on template failure", !result.has_value());
+}
+
+static void test_compare_variants_identity(testing & t) {
+ // Test with identity modifier (no change)
+ common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+ template_params params;
+ params.messages = json::array({
+ json {{"role", "user"}, {"content", "Hello"}}
+ });
+
+ // No modifier - should use identity
+ auto result = ::compare_variants(tmpl, params, nullptr);
+
+ if (!t.assert_true("result should have value", result.has_value())) {
+ return;
+ }
+ t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
+ t.assert_equal("left should be empty", "", result->diff.left);
+ t.assert_equal("right should be empty", "", result->diff.right);
+ t.assert_equal("suffix should be empty", "", result->diff.suffix);
+}
+
+// ============================================================================
+// Seed-OSS Template Tool Calling Analysis Tests
+// ============================================================================
+
+static void test_seed_oss_tool_analysis(testing & t) {
+ t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
+ t.test("Seed-OSS call count", test_seed_oss_call_count);
+ t.test("Seed-OSS function names", test_seed_oss_function_names);
+ t.test("Seed-OSS argument count", test_seed_oss_argument_count);
+ t.test("Seed-OSS args presence", test_seed_oss_args_presence);
+ t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
+}
+
+// Helper to load Seed-OSS template
+static common_chat_template load_seed_oss_template(testing & t) {
+ std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
+ std::ifstream fin(template_path, std::ios::binary);
+ std::ostringstream buf;
+ if (fin.is_open()) {
+ buf << fin.rdbuf();
+ }
+ std::string template_source = buf.str();
+ common_chat_template tmpl(template_source, "", "");
+ t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
+ return tmpl;
+}
+
+// Helper to build tool call JSON
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
+ return json{
+ {"id", id},
+ {"type", "function"},
+ {"function", json{
+ {"name", name},
+ {"arguments", args}
+ }}
+ };
+}
+
+// Helper to build tools definition
+static json build_tools_definition() {
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["param1"] = json::object({
+ {"type", "string"},
+ {"description", "First parameter"}
+ });
+ parameters_schema["properties"]["param2"] = json::object({
+ {"type", "string"},
+ {"description", "Second parameter"}
+ });
+ parameters_schema["required"] = json::array({"param1", "param2"});
+
+ return json::array({
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "test_function_name"},
+ {"description", "A test function for debugging"},
+ {"parameters", parameters_schema}
+ }}
+ }
+ });
+}
+
+// T1: Compare with/without tool call (user, assistant)
+static void test_seed_oss_tool_presence(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ json assistant_no_tools = json{
+ {"role", "assistant"},
+ {"content", "Let me help you."}
+ };
+
+ json assistant_with_tools = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello, please help me."}
+ };
+
+ template_params params_no_tools;
+ params_no_tools.messages = json::array({user_msg, assistant_no_tools});
+ params_no_tools.tools = build_tools_definition();
+ params_no_tools.add_generation_prompt = false;
+ params_no_tools.enable_thinking = true;
+
+ template_params params_with_tools;
+ params_with_tools.messages = json::array({user_msg, assistant_with_tools});
+ params_with_tools.tools = build_tools_definition();
+ params_with_tools.add_generation_prompt = false;
+ params_with_tools.enable_thinking = true;
+
+ auto result = ::compare_variants(tmpl, params_no_tools,
+ [&](template_params & p) {
+ p.messages = params_with_tools.messages;
+ });
+
+ if (!t.assert_true("T1 result should have value", result.has_value())) {
+ return;
+ }
+
+ const auto & diff = result->diff;
+ t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
+ t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
+ t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+ // Left should be the assistant content without tool
+ t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
+
+ // Right should contain the tool call markers
+ t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+ t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
+ t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
+ t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
+ t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
+ t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
+ t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+ // Suffix should be the eos token
+ t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
+}
+
+// T2: Compare one vs two tool calls
+static void test_seed_oss_call_count(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ json assistant_one_call = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })}
+ };
+
+ json assistant_two_calls = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+ build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+ })}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello, please help me."}
+ };
+
+ template_params params_one;
+ params_one.messages = json::array({user_msg, assistant_one_call});
+ params_one.tools = build_tools_definition();
+ params_one.add_generation_prompt = false;
+ params_one.enable_thinking = true;
+
+ auto result = ::compare_variants(tmpl, params_one,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_two_calls});
+ });
+
+ if (!t.assert_true("T2 result should have value", result.has_value())) {
+ return;
+ }
+
+ const auto & diff = result->diff;
+
+ // Prefix should include the first tool call
+ t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
+ t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
+ t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
+ t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
+ t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
+
+ // Left should be empty (no second tool call in variant A)
+ t.assert_equal("T2 left should be empty", "", diff.left);
+
+ // Right should contain the second tool call
+ t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+ t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
+ t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
+ t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
+ t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+ // Suffix should end with the eos token
+ t.assert_equal("T2 suffix should end with '<seed:eos>'", "<seed:eos>", diff.suffix.substr(diff.suffix.length() - 10, 10));
+}
+
+// T3: Compare different function names
+static void test_seed_oss_function_names(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ // Build tools with two different function names
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["arg1"] = json::object({
+ {"type", "string"},
+ {"description", "Argument 1"}
+ });
+ parameters_schema["required"] = json::array({"arg1"});
+
+ json tools = json::array({
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "func_alpha"},
+ {"description", "First function"},
+ {"parameters", parameters_schema}
+ }}
+ },
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "func_beta"},
+ {"description", "Second function"},
+ {"parameters", parameters_schema}
+ }}
+ }
+ });
+
+ json assistant_func_alpha = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
+ })}
+ };
+
+ json assistant_func_beta = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
+ })}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello"}
+ };
+
+ template_params params_alpha;
+ params_alpha.messages = json::array({user_msg, assistant_func_alpha});
+ params_alpha.tools = tools;
+ params_alpha.add_generation_prompt = false;
+ params_alpha.enable_thinking = true;
+
+ auto result = ::compare_variants(tmpl, params_alpha,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_func_beta});
+ });
+
+ if (!t.assert_true("T3 result should have value", result.has_value())) {
+ return;
+ }
+
+ const auto & diff = result->diff;
+
+ bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
+ bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
+ bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
+ bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
+ bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
+
+ // Left should contain func_alpha (or be in prefix)
+ t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
+
+ // Right should contain func_beta
+ t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
+
+ // Both should have the same parameter value (in common parts, not in diffs)
+ // Since both have same args, test_value will be in prefix/suffix
+ t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
+ diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
+}
+
+// T4: Compare different argument counts (zero, one, two parameters)
+static void test_seed_oss_argument_count(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ // Build tools with 0, 1, or 2 required parameters
+ json params_2_required = json::object();
+ params_2_required["type"] = "object";
+ params_2_required["properties"] = json::object();
+ params_2_required["properties"]["arg1"] = json::object({
+ {"type", "string"},
+ {"description", "Argument 1"}
+ });
+ params_2_required["properties"]["arg2"] = json::object({
+ {"type", "string"},
+ {"description", "Argument 2"}
+ });
+ params_2_required["required"] = json::array({"arg1", "arg2"});
+
+ json params_1_required = json::object();
+ params_1_required["type"] = "object";
+ params_1_required["properties"] = json::object();
+ params_1_required["properties"]["arg1"] = json::object({
+ {"type", "string"},
+ {"description", "Argument 1"}
+ });
+ params_1_required["required"] = json::array({"arg1"});
+
+ json tools = json::array({
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "test_func"},
+ {"description", "Test function"},
+ {"parameters", params_2_required}
+ }}
+ }
+ });
+
+ // Test: zero args vs one arg
+ json assistant_zero_args = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_func", json::object())
+ })}
+ };
+
+ json assistant_one_arg = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_func", json::object({{"arg1", "value1"}}))
+ })}
+ };
+
+ json assistant_two_args = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
+ })}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello"}
+ };
+
+ // Test zero vs one
+ template_params params_zero;
+ params_zero.messages = json::array({user_msg, assistant_zero_args});
+ params_zero.tools = tools;
+ params_zero.add_generation_prompt = false;
+ params_zero.enable_thinking = true;
+
+ auto result_zero_one = ::compare_variants(tmpl, params_zero,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_one_arg});
+ });
+
+ if (!t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value())) {
+ return;
+ }
+ t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
+ t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
+
+ // Test one vs two
+ template_params params_one;
+ params_one.messages = json::array({user_msg, assistant_one_arg});
+ params_one.tools = tools;
+ params_one.add_generation_prompt = false;
+ params_one.enable_thinking = true;
+
+ auto result_one_two = ::compare_variants(tmpl, params_one,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_two_args});
+ });
+
+ if (!t.assert_true("T4 one vs two result should have value", result_one_two.has_value())) {
+ return;
+ }
+
+ const auto & diff4 = result_one_two->diff;
+ t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
+ diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+ t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
+ diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+ t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
+ diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
+}
+
+// T5: Compare different argument values
+static void test_seed_oss_args_presence(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ json assistant_same_arg = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
+ })}
+ };
+
+ json assistant_other_arg = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
+ })}
+ };
+
+ json assistant_both_args = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello"}
+ };
+
+ template_params params_same;
+ params_same.messages = json::array({user_msg, assistant_same_arg});
+ params_same.tools = build_tools_definition();
+ params_same.add_generation_prompt = false;
+ params_same.enable_thinking = true;
+
+ // Test same arg vs other arg
+ auto result_same_other = ::compare_variants(tmpl, params_same,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_other_arg});
+ });
+
+ if (!t.assert_true("T5 same vs other result should have value", result_same_other.has_value())) {
+ return;
+ }
+ const auto & diff5a = result_same_other->diff;
+ t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
+ diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
+ t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
+ diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
+ t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
+ diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
+ t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
+ diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
+
+ // Test same arg vs both args
+ auto result_same_both = ::compare_variants(tmpl, params_same,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_both_args});
+ });
+
+ if (!t.assert_true("T5 same vs both result should have value", result_same_both.has_value())) {
+ return;
+ }
+ const auto & diff5b = result_same_both->diff;
+ t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
+ diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+ t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
+ diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+ t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
+ diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
+}
+
+// T6: Tool call with vs without reasoning_content
+static void test_seed_oss_tool_with_reasoning(testing & t) {
+ common_chat_template tmpl = load_seed_oss_template(t);
+
+ json assistant_tool_only = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })}
+ };
+
+ json assistant_tool_with_reasoning = json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })},
+ {"reasoning_content", "I need to call the tool first."}
+ };
+
+ json user_msg = json{
+ {"role", "user"},
+ {"content", "Hello, please help me."}
+ };
+
+ template_params params_tool_only;
+ params_tool_only.messages = json::array({user_msg, assistant_tool_only});
+ params_tool_only.tools = build_tools_definition();
+ params_tool_only.add_generation_prompt = false;
+ params_tool_only.enable_thinking = true;
+
+ auto result = ::compare_variants(tmpl, params_tool_only,
+ [&](template_params & p) {
+ p.messages = json::array({user_msg, assistant_tool_with_reasoning});
+ });
+
+ if (!t.assert_true("T6 result should have value", result.has_value())) {
+ return;
+ }
+
+ const auto & diff = result->diff;
+
+ // Left should be empty (no reasoning in variant A)
+ t.assert_equal("T6 left should be empty", "", diff.left);
+
+ // Right should contain the thinking token with reasoning content
+ t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
+ t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
+ t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
+
+ // Prefix should contain the assistant role
+ t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+ // Suffix should contain the tool call
+ t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
+ t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
+ t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
+}
+
+static common_chat_template load_template(testing & t, const std::string & template_path) {
+ std::ifstream fin(template_path, std::ios::binary);
+ std::ostringstream buf;
+ if (fin.is_open()) {
+ buf << fin.rdbuf();
+ }
+ std::string template_source = buf.str();
+ common_chat_template tmpl(template_source, "", "");
+ t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
+ return tmpl;
+}
+
+// ============================================================================
+// Nemotron Template Analysis Tests
+// ============================================================================
+static common_chat_template load_nemotron_template(testing & t) {
+ return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
+}
+
+static void test_nemotron_analysis(testing & t) {
+ t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
+ t.test("Nemotron tool format", test_nemotron_tool_format);
+}
+
+static void test_nemotron_reasoning_detection(testing & t) {
+ common_chat_template tmpl = load_nemotron_template(t);
+
+ // Test the comparison manually to see what's happening
+ json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
+ json assistant_no_reasoning = json{
+ { "role", "assistant" },
+ { "content", "I can help." }
+ };
+ json assistant_with_reasoning = json{
+ { "role", "assistant" },
+ { "content", "I can help." },
+ { "reasoning_content", "Let me think about this." }
+ };
+
+ template_params params;
+ params.messages = json::array({ user_msg, assistant_no_reasoning });
+ params.add_generation_prompt = false;
+ params.enable_thinking = true;
+
+ // Run differential analysis
+ struct autoparser analysis;
+ analysis.analyze_template(tmpl);
+
+ // Check reasoning markers
+ t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
+ t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);
+
+ // Check reasoning mode detection
+ // Nemotron uses forced closed reasoning with add_generation_prompt
+ t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning.mode);
+
+ // Make sure reasoning markers don't spill over to content markers
+ t.assert_equal("content start should be empty", "", analysis.content.start);
+ t.assert_equal("content end should be empty", "", analysis.content.end);
+
+ t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode);
+}
+
+static void test_nemotron_tool_format(testing & t) {
+ common_chat_template tmpl = load_nemotron_template(t);
+
+ // Run differential analysis
+ struct autoparser analysis;
+ analysis.analyze_template(tmpl);
+
+ // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
+ t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start);
+ t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.tools.format.section_end);
+ t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.tools.format.per_call_start);
+ t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.tools.format.per_call_end);
+ t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+
+ // Check function markers
+ t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.tools.function.name_prefix);
+ t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.tools.function.name_suffix);
+ t.assert_equal("func_close should be '</function>\\n'", "</function>\n", analysis.tools.function.close);
+
+ // Check argument markers (note: markers retain trailing newlines for proper parsing)
+ t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
+ t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
+ t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
+
+ // Check format classification
+ t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
+
+ // Verify tool support
+ t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+}
+
+static common_chat_template load_cohere_template(testing & t) {
+ return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
+}
+
+static void test_cohere_analysis(testing & t) {
+ t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
+}
+
+static void test_cohere_reasoning_detection(testing & t) {
+ common_chat_template tmpl = load_cohere_template(t);
+
+ // Run differential analysis
+ struct autoparser analysis;
+ analysis.analyze_template(tmpl);
+
+ // Check reasoning markers - Cohere uses special token format
+ t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start);
+ t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.reasoning.end);
+
+ // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
+ t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning.mode);
+
+ // Check content markers - Cohere wraps all content with START/END_RESPONSE
+ t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.content.start);
+ t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.content.end);
+
+ // Content is always wrapped (both with and without tools)
+ t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content.mode);
+}
+
+static void test_tool_format_cohere(testing & t) {
+ common_chat_template tmpl = load_cohere_template(t);
+
+ // Run differential analysis
+ struct autoparser analysis;
+ analysis.analyze_template(tmpl);
+
+ // Check tool section markers - Cohere uses ACTION markers
+ t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start);
+ t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.tools.format.section_end);
+
+ // JSON_NATIVE format has no per-call markers
+ t.assert_equal("per_call_start should be empty", "", analysis.tools.format.per_call_start);
+ t.assert_equal("per_call_end should be empty", "", analysis.tools.format.per_call_end);
+
+ // JSON_NATIVE format has empty function markers (no XML-style markers)
+ t.assert_equal("func_name_prefix should be empty", "", analysis.tools.function.name_prefix);
+ t.assert_equal("func_name_suffix should be empty", "", analysis.tools.function.name_suffix);
+ t.assert_equal("func_close should be empty", "", analysis.tools.function.close);
+
+ // JSON_NATIVE format has empty args markers
+ t.assert_equal("args_start should be empty", "", analysis.tools.arguments.start);
+ t.assert_equal("args_end should be empty", "", analysis.tools.arguments.end);
+
+ // JSON_NATIVE format has empty argument markers
+ t.assert_equal("arg_name_prefix should be empty", "", analysis.tools.arguments.name_prefix);
+ t.assert_equal("arg_name_suffix should be empty", "", analysis.tools.arguments.name_suffix);
+ t.assert_equal("arg_value_prefix should be empty", "", analysis.tools.arguments.value_prefix);
+ t.assert_equal("arg_value_suffix should be empty", "", analysis.tools.arguments.value_suffix);
+ t.assert_equal("arg_separator should be empty", "", analysis.tools.arguments.separator);
+
+ // Check JSON field names - Cohere uses non-standard names
+ t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.tools.format.name_field);
+ t.assert_equal("args_field should be 'parameters'", "parameters", analysis.tools.format.args_field);
+ // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
+ t.assert_equal("id_field should be 'tool_call_id'", "", analysis.tools.format.id_field);
+
+ // Check format classification
+ t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools.format.mode);
+
+ // Check flags
+ t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+ t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+ t.assert_true("should not require nonnull content", !analysis.content.requires_nonnull_content);
+ t.assert_true("tools_array_wrapped should be true", analysis.tools.format.tools_array_wrapped);
+}
+
+// ============================================================================
+// standard_json_tools Format Tests
+// ============================================================================
+
+// Helper to build tools definition for tests
+static json build_test_tools() {
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["location"] = json::object({
+ {"type", "string"},
+ {"description", "The city and state"}
+ });
+ parameters_schema["properties"]["unit"] = json::object({
+ {"type", "string"},
+ {"description", "Temperature unit"},
+ {"enum", json::array({"celsius", "fahrenheit"})}
+ });
+ parameters_schema["required"] = json::array({"location"});
+
+ return json::array({
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "get_current_weather"},
+ {"description", "Get the current weather in a given location"},
+ {"parameters", parameters_schema}
+ }}
+ }
+ });
+}
+
+static void test_standard_json_tools_formats(testing & t) {
+ t.test("OpenAI format", test_standard_json_tools_openai);
+ t.test("Cohere format", test_standard_json_tools_cohere);
+ t.test("function-as-key format", test_standard_json_tools_function_key);
+}
+
+// Test 1: OpenAI Standard Format
+// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
+static void test_standard_json_tools_openai(testing & t) {
+ json tools = build_test_tools();
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ auto tool_call = p.standard_json_tools(
+ "<tool_call>", "</tool_call>", tools,
+ /* parallel */ true,
+ /* force */ false,
+ /* name_key */ "function.name",
+ /* args_key */ "function.arguments",
+ /* array_wrapped */ false,
+ /* function_is_key */ false,
+ /* call_id_key */ "id",
+ /* gen_call_id_key */ "",
+ /* parameters_order */ {}
+ );
+ return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
+ });
+
+ std::string input =
+ "Let me check the weather."
+ "<tool_call>"
+ R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
+ "</tool_call>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ if (!t.assert_true("parse success", result.success())) {
+ return;
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+ t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
+ }
+ t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
+}
+
+// Test 2: Cohere Format
+// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
+static void test_standard_json_tools_cohere(testing & t) {
+ json tools = build_test_tools();
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ auto tool_call = p.standard_json_tools(
+ "<|START_ACTION|>[", "]<|END_ACTION|>", tools,
+ /* parallel */ true,
+ /* force */ false,
+ /* name_key */ "tool_name",
+ /* args_key */ "parameters",
+ /* array_wrapped */ false, // Brackets are part of section markers
+ /* function_is_key */ false,
+ /* call_id_key */ "",
+ /* gen_call_id_key */ "tool_call_id",
+ /* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
+ );
+ return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
+ });
+
+ std::string input =
+ "Let me search for that."
+ "<|START_ACTION|>["
+ R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
+ "]<|END_ACTION|>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ if (!t.assert_true("parse success", result.success())) {
+ return;
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+ t.assert_equal("tool id", "0", msg.tool_calls[0].id);
+ }
+ t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
+}
+
+// Test 3: Function-as-Key Format
+// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
+static void test_standard_json_tools_function_key(testing & t) {
+ json tools = build_test_tools();
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ auto tool_call = p.standard_json_tools(
+ "<tool_calls>[", "]</tool_calls>", tools,
+ /* parallel */ true,
+ /* force */ false,
+ /* name_key */ "", // Name is the key itself
+ /* args_key */ "args",
+ /* array_wrapped */ false,
+ /* function_is_key */ true,
+ /* call_id_key */ "id",
+ /* gen_call_id_key */ "",
+ /* parameters_order */ {}
+ );
+ return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
+ });
+
+ std::string input =
+ "I'll call the weather function."
+ "<tool_calls>["
+ R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
+ "]</tool_calls>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ if (!t.assert_true("parse success", result.success())) {
+ return;
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+ t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
+ }
+ t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
+}
+
+// ============================================================================
+// normalize_quotes_to_json Tests
+// ============================================================================
+
+// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
+static std::string normalize_quotes_to_json(const std::string & input) {
+ std::string result;
+ result.reserve(input.size() + 16);
+
+ bool in_single_quoted = false;
+ bool in_double_quoted = false;
+
+ for (size_t i = 0; i < input.size(); ++i) {
+ char c = input[i];
+
+ if (c == '\\' && i + 1 < input.size()) {
+ char next = input[i + 1];
+
+ if (in_single_quoted) {
+ if (next == '\'') {
+ result += '\'';
+ ++i;
+ continue;
+ }
+ if (next == '"') {
+ result += "\\\"";
+ ++i;
+ continue;
+ }
+ result += c;
+ result += next;
+ ++i;
+ continue;
+ }
+
+ if (in_double_quoted) {
+ result += c;
+ result += next;
+ ++i;
+ continue;
+ }
+
+ result += c;
+ continue;
+ }
+
+ if (c == '"') {
+ if (in_single_quoted) {
+ result += "\\\"";
+ } else {
+ in_double_quoted = !in_double_quoted;
+ result += c;
+ }
+ } else if (c == '\'') {
+ if (in_double_quoted) {
+ result += c;
+ } else if (in_single_quoted) {
+ in_single_quoted = false;
+ result += '"';
+ } else {
+ in_single_quoted = true;
+ result += '"';
+ }
+ } else {
+ result += c;
+ }
+ }
+
+ return result;
+}
+
+static void test_normalize_quotes_to_json(testing & t) {
+ t.test("basic single to double quotes", [](testing & t) {
+ std::string input = "{'key': 'value'}";
+ std::string expected = "{\"key\": \"value\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("basic conversion", expected, result);
+ });
+
+ t.test("escaped single quote inside single-quoted string", [](testing & t) {
+ std::string input = "{'code': 'print(\\'hello\\')'}";
+ std::string expected = "{\"code\": \"print('hello')\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("escaped single quote", expected, result);
+ });
+
+ t.test("double quote inside single-quoted string", [](testing & t) {
+ std::string input = "{'msg': 'He said \"hi\"'}";
+ std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("double quote escaping", expected, result);
+ });
+
+ t.test("nested backslash escapes", [](testing & t) {
+ std::string input = "{'path': 'C:\\\\Users\\\\test'}";
+ std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("backslash escaping", expected, result);
+ });
+
+ t.test("newline escapes", [](testing & t) {
+ std::string input = "{'text': 'line1\\nline2'}";
+ std::string expected = "{\"text\": \"line1\\nline2\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("newline escaping", expected, result);
+ });
+
+ t.test("mixed quotes", [](testing & t) {
+ std::string input = "{\"already_double\": 'single_value'}";
+ std::string expected = "{\"already_double\": \"single_value\"}";
+ std::string result = normalize_quotes_to_json(input);
+ t.assert_equal("mixed quotes", expected, result);
+ });
+
+ t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
+}
+
+// Test case that mirrors the Seed-OSS failing test scenario
+static void test_normalize_quotes_with_embedded_quotes(testing & t) {
+ // This is similar to the Seed-OSS template test case
+ // The input has embedded double quotes like "14" and "bar" inside string values
+ std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n pass\\n'}";
+
+ // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
+ std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}";
+
+ std::string result = normalize_quotes_to_json(input);
+
+ t.assert_equal("normalize quotes with embedded double quotes", expected, result);
+
+ // Also verify the result is valid JSON
+ try {
+ json parsed = json::parse(result);
+ t.assert_true("result is valid JSON", true);
+ t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
+ t.assert_true("oldString contains embedded quotes",
+ parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
+ t.assert_true("newString contains embedded quotes",
+ parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
+ } catch (const std::exception & e) {
+ t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
+ }
+}
+
+// ============================================================================
+// TAG_WITH_TAGGED Argument Parsing Tests
+// ============================================================================
+
+// Build tools definition for edit function
+static json build_edit_tool() {
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["filename"] = json::object({
+ {"type", "string"},
+ {"description", "Path of file to edit"}
+ });
+ parameters_schema["properties"]["oldString"] = json::object({
+ {"type", "string"},
+ {"description", "String to replace"}
+ });
+ parameters_schema["properties"]["newString"] = json::object({
+ {"type", "string"},
+ {"description", "New (replacement) value"}
+ });
+ parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
+
+ return json::array({
+ json{
+ {"type", "function"},
+ {"function", json{
+ {"name", "edit"},
+ {"description", "Edit a file"},
+ {"parameters", parameters_schema}
+ }}
+ }
+ });
+}
+
+// Test that reproduces the Seed-OSS template issue with embedded quotes
+static void test_tagged_args_with_embedded_quotes(testing & t) {
+ json tools = build_edit_tool();
+
+ // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ // Build tool choice for the edit function
+ auto tool_choice = p.choice();
+
+ for (const auto & tool_def : tools) {
+ if (!tool_def.contains("function")) { continue; }
+ const auto & function = tool_def.at("function");
+ std::string name = function.at("name");
+ const auto & params = function.at("parameters");
+
+ if (!params.contains("properties") || !params.at("properties").is_object()) { continue; }
+
+ const auto & properties = params.at("properties");
+
+ // Build argument parsers
+ std::vector<common_peg_parser> arg_parsers;
+ for (const auto & [param_name, param_schema] : properties.items()) {
+ auto arg = p.tool_arg(
+ p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
+ p.space() +
+ p.tool_arg_string_value(p.until("</parameter>")) +
+ p.space() +
+ p.tool_arg_close(p.literal("</parameter>"))
+ );
+ arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
+ }
+
+ // Build arg sequence with space() between
+ common_peg_parser args_seq = p.eps();
+ for (size_t i = 0; i < arg_parsers.size(); i++) {
+ if (i > 0) {
+ args_seq = args_seq + p.space();
+ }
+ args_seq = args_seq + arg_parsers[i];
+ }
+
+ auto func_parser =
+ p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
+ p.space() + args_seq + p.space() +
+ p.tool_close(p.literal("</function>"));
+
+ tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
+ }
+
+ auto tool_section =
+ p.literal("<seed:tool_call>") + p.space() +
+ tool_choice +
+ p.space() + p.literal("</seed:tool_call>");
+
+ return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
+ });
+
+ // The exact input from the failing test
+ std::string input =
+ "<seed:tool_call>\n"
+ "<function=edit>\n"
+ "<parameter=filename>\n"
+ "foo.cpp\n"
+ "</parameter>\n"
+ "<parameter=oldString>"
+ "def foo(arg = \"14\"):\n"
+ " return arg + \"bar\"\n"
+ "\n"
+ "</parameter>\n"
+ "<parameter=newString>"
+ "def foo(arg = \"15\"):\n"
+ " pass\n"
+ "\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ if (!t.assert_true("parse success", result.success())) {
+ return;
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
+
+ // Parse the arguments as JSON to verify they're valid
+ std::string args = msg.tool_calls[0].arguments;
+
+ try {
+ json parsed = json::parse(args);
+ t.assert_true("arguments is valid JSON", true);
+
+ // Verify each field has proper value
+ t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
+
+ std::string oldString = parsed.value("oldString", "");
+ t.assert_true("oldString contains embedded quotes",
+ oldString.find("\"14\"") != std::string::npos);
+ t.assert_true("oldString contains bar with quotes",
+ oldString.find("\"bar\"") != std::string::npos);
+
+ std::string newString = parsed.value("newString", "");
+ t.assert_true("newString contains embedded quotes",
+ newString.find("\"15\"") != std::string::npos);
+
+ } catch (const std::exception & e) {
+ t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
+ }
+ }
+}
+
+++ /dev/null
-// Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
-//
-// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
-// e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
-//
-// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
-//
-#include <exception>
-#include <iostream>
-#include <string>
-
-#include "chat-parser.h"
-#include "common.h"
-#include "log.h"
-#include "regex-partial.h"
-
-template <class T>
-static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
- if (expected != actual) {
- std::cerr << label << std::endl;
- std::cerr << "Expected: " << expected << std::endl;
- std::cerr << "Actual: " << actual << std::endl;
- std::cerr << std::flush;
- throw std::runtime_error("Test failed");
- }
-}
-
-template <class T>
-static void assert_equals(const T & expected, const T & actual) {
- assert_equals("", expected, actual);
-}
-static void assert_equals(const char * expected, const std::string & actual) {
- return assert_equals<std::string>(expected, actual);
-}
-
-static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
- try {
- fn();
- } catch (const std::exception & e) {
- if (expected_exception_pattern.empty()) {
- return;
- }
- std::regex expected_exception_regex(expected_exception_pattern);
- std::string actual_message = e.what();
- if (std::regex_search(actual_message, expected_exception_regex)) {
- return;
- }
- throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
- throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
- }
- throw std::runtime_error("Exception was expected but not thrown");
-}
-
-static void test_reasoning() {
- //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
- assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
- assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
- }
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
- assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
- assert_equals(std::string("Cogito"), builder.result().reasoning_content);
- assert_equals("Ergo sum", builder.consume_rest());
- }
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
- assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
- assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
- }
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
- assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
- assert_equals(std::string("Cogito"), builder.result().reasoning_content);
- assert_equals("Ergo sum", builder.consume_rest());
- }
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = true;
- params.thinking_forced_open = true;
- common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
- assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
- assert_equals("<think>Cogito</think>", builder.result().content);
- assert_equals("Ergo sum", builder.consume_rest());
- }
- {
- const std::string variant("content_only_inline_think");
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- params.parse_tool_calls = false;
- const std::string input = "<think>Pense</think>Bonjour";
- auto msg = common_chat_parse(input, false, params);
- assert_equals(variant, std::string("Pense"), msg.reasoning_content);
- assert_equals(variant, std::string("Bonjour"), msg.content);
- }
- {
- const std::string variant("llama_3_inline_think");
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- params.parse_tool_calls = false;
- const std::string input = "<think>Plan</think>Réponse";
- auto msg = common_chat_parse(input, false, params);
- assert_equals(variant, std::string("Plan"), msg.reasoning_content);
- assert_equals(variant, std::string("Réponse"), msg.content);
- }
- // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
- common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
- assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
- assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
- assert_equals(variant, std::string("ok"), builder.consume_rest());
- }
- // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("deepseek_v3_1_reasoning_format_none");
- const std::string input = "REASONING</think>ok";
- auto msg = common_chat_parse(input, false, params);
- assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
- assert_equals(variant, std::string(""), msg.reasoning_content);
- }
-}
-
-static void test_regex() {
- auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
- common_chat_msg_parser builder(input, /* is_partial= */ false, {});
- assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
- };
-
- test_throws("Hello, world!", "abc", "^abc$");
- test_throws("Hello, world!", "e", "^e$");
-
- {
- common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
- builder.consume_regex(common_regex("Hello"));
- assert_equals(", world!", builder.consume_rest());
- }
-
- {
- // When in non partial mode, we can say whether the regex was consumed or not.
- common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
- assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
- }
- {
- common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
- auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
- assert_equals(true, res.has_value());
- // Verify captures
- assert_equals<size_t>(2, res->groups.size());
- assert_equals("Hell", builder.str(res->groups[0]));
- assert_equals("el", builder.str(res->groups[1]));
- // Verify position is after the match
- assert_equals<size_t>(4, builder.pos());
- assert_equals("o,", builder.consume_rest());
- }
- {
- // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
- common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
- assert_throws([&]() {
- builder.try_consume_regex(common_regex("Hello, world!"));
- }, "^Hello, world!$");
- }
-
- // Now regardless of the mode, we can tell these aren't a match.
- for (const auto is_partial : {false, true}) {
- common_chat_msg_parser builder("Hello,", is_partial, {});
- assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
- }
- for (const auto is_partial : {false, true}) {
- common_chat_msg_parser builder("Hello,", is_partial, {});
- assert_equals(false, builder.try_consume_literal("Oh"));
- }
-}
-
-const std::vector<std::string> barely_healable_jsons = {
- "{",
- "{\"",
- "{\"\\",
- "{\"n",
- "{\"name\"",
- "{\"name\":",
- "{\"name\":\"",
- "{\"name\":\"\\",
- "{\"name\":\"python",
- "{\"name\":\"python\\",
- "{\",",
- "{\":",
- "{\"[",
- "{\"]",
- "{\"{",
- "{\"}",
- "{\"1",
- "{\"name\":\",",
- "{\"name\":\":",
- "{\"name\":\"[",
- "{\"name\":\"]",
- "{\"name\":\"{",
- "{\"name\":\"}",
- "{\"name\":\"1",
-};
-
-static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
- common_chat_msg_parser builder(input, is_partial, {});
- auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
- assert_equals(true, js.has_value());
- assert_equals(is_partial, js->is_partial);
- assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
-}
-
-static void test_deepseek_v3_1_tool_calls() {
- //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
- // variant: happy path for when it works as the model card says it should
- const std::string variant("simple");
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- params.parse_tool_calls = true;
- const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto msg = common_chat_parse(input, false, params);
- assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
- assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
- // JSON arguments are dumped without spaces
- assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
- assert_equals(variant, std::string(""), msg.content);
- assert_equals(variant, std::string(""), msg.reasoning_content);
-
- // variant: simple + thinking open
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("simple_thinking");
- const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto m = common_chat_parse(in, false, params);
- assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
- assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
- assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
- assert_equals(variant, std::string(""), m.content);
- assert_equals(variant, std::string("REASONING"), m.reasoning_content);
- }
- // variant: simple + multiple tool calls
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- params.parse_tool_calls = true;
- const std::string variant("simple_multiple_tool_calls");
- const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto m = common_chat_parse(in, false, params);
- assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
- assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
- assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
- assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
- assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
- assert_equals(variant, std::string("CONTENT"), m.content);
- assert_equals(variant, std::string(""), m.reasoning_content);
- }
-
-
- // variant: thinking forced open + tool call in reasoning content
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("thinking_forced_open_tool_call_in_reasoning");
- const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto m = common_chat_parse(in, false, params);
- assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
- assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
- assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
- assert_equals(variant, std::string(""), m.content);
- assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
- }
-
- // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
- // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
- // to make tool calls in reasoning content according to the model card, but it does sometimes, so
- // add the reasoning content as regular content and parse the tool calls.
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
- const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto m = common_chat_parse(in, false, params);
- assert_equals(variant, std::string("REASONING"), m.content);
- assert_equals(variant, std::string(""), m.reasoning_content);
- assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
- assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
- assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
- }
-
- // variant: thinking forced open + tool call in reasoning content + no closing think + partial
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
- const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
- auto m = common_chat_parse(in, /* is_partial= */ true, params);
- assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
- assert_equals(variant, std::string(""), m.content);
- assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
- }
-
- // variant: thinking not forced open + reasoning + regular content + no tool calls
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = true;
- params.parse_tool_calls = true;
- const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
- const std::string in = "REASONING</think>CONTENT";
- auto m = common_chat_parse(in, false, params);
- assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
- assert_equals(variant, std::string("CONTENT"), m.content);
- assert_equals(variant, std::string("REASONING"), m.reasoning_content);
- }
- // variant: thinking not forced open + missing reasoning + no tool calls
- {
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
- params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- params.parse_tool_calls = true;
- const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
- const std::string in = "CONTENT";
- auto m = common_chat_parse(in, false, params);
- assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
- assert_equals(variant, std::string("CONTENT"), m.content);
- assert_equals(variant, std::string(""), m.reasoning_content);
- }
-}
-
-static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
- common_chat_msg_parser builder(input, parse_as_partial, {});
- auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
- assert_equals(true, js.has_value());
- assert_equals(is_partial, js->is_partial);
- assert_equals(expected, js->value.dump());
-}
-
-static void test_json_with_dumped_args_no_args() {
- // Normal JSON, nothing to heal, nothing to dump
- test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
- // Full json is args
- test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
-
- // If the arguments are further down, don't heal partial content.
- for (const auto & src : barely_healable_jsons) {
- test(src, true, {{"arguments"}}, {}, "{}");
- }
- // But heal content that isn't partial.
- test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
-}
-
-static void test_json_with_dumped_args() {
-
- // Partial content.
- test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
- test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
- test("{\"content\": ", true, {}, {{"content"}}, "{}");
-
- // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
- test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
- for (const auto & src : barely_healable_jsons) {
- test(src, true, {{}}, {}, src);
- }
-
- // Full JSON w/ args
- for (auto parse_as_partial : {true, false}) {
- test_with_args(
- R"({"name": "python", "args": {"arg1": 1}})",
- R"({"name":"python","args":"{\"arg1\":1}"})",
- parse_as_partial,
- /* is_partial= */ false
- );
- }
-
- // Partial JSON w/ partial args
- test_with_args(
- R"({"foo": "bar", "args": {")",
- R"({"foo":"bar","args":"{\""})"
- );
- // Partial args broken in object key
- test_with_args(
- R"({"foo": "bar", "args": {"ar)",
- R"({"foo":"bar","args":"{\"ar"})"
- );
- // Partial args broken after object key
- test_with_args(
- R"({"foo": "bar", "args": {"arg1")",
- R"({"foo":"bar","args":"{\"arg1\""})"
- );
- // Partial args broken before object value
- test_with_args(
- R"({"foo": "bar", "args": {"arg1":)",
- R"({"foo":"bar","args":"{\"arg1\":"})"
- );
- // Partial args broken before object value (space)
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": )",
- R"({"foo":"bar","args":"{\"arg1\":"})"
- );
- // Partial args broken in object value that may not be complete (int)
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": 1)",
- R"({"foo":"bar","args":"{\"arg1\":"})"
- );
- // Partial args broken in object value that is complete (int)
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": 1 )",
- R"({"foo":"bar","args":"{\"arg1\":1"})"
- );
- // Partial args broken in object value that is incomplete (string)
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": ")",
- R"({"foo":"bar","args":"{\"arg1\":\""})"
- );
- // Partial args broken in object value that is complete (string)
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "1")",
- R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
- );
- // Partial args broken on array opening
- test_with_args(
- R"({"foo": "bar", "args": [)",
- R"({"foo":"bar","args":"["})"
- );
- // Partial args broken on array value that is incomplete (int)
- test_with_args(
- R"({"foo": "bar", "args": [1)",
- R"({"foo":"bar","args":"["})"
- );
- // Partial args broken on array value that is complete (int)
- test_with_args(
- R"({"foo": "bar", "args": [1 )",
- R"({"foo":"bar","args":"[1"})"
- );
- // Partial args broken on array value that is complete (string)
- test_with_args(
- R"({"foo": "bar", "args": ["1")",
- R"({"foo":"bar","args":"[\"1\""})"
- );
- // Partial args broken after array value
- test_with_args(
- R"({"foo": "bar", "args": [1,)",
- R"({"foo":"bar","args":"[1,"})"
- );
- // Partial args broken on nested array
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": [)",
- R"({"foo":"bar","args":"{\"arg1\":["})"
- );
-
- // Unicode tests
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\u)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\u0)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\u00)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\u000)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\u0000)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud8)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud80)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
- );
- test_with_args(
- R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
- R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
- );
-}
-
-static void test_positions() {
- {
- common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
- assert_equals<size_t>(0, builder.pos());
- assert_throws([&]() { builder.move_to(100); });
- assert_equals<size_t>(0, builder.pos());
- assert_throws([&]() { builder.move_back(1); });
- assert_equals<size_t>(0, builder.pos());
-
- builder.move_to(8);
- assert_equals<size_t>(8, builder.pos());
- builder.move_back(1);
- assert_equals<size_t>(7, builder.pos());
- assert_equals("world!", builder.consume_rest());
-
- builder.move_to(0);
- assert_equals<size_t>(0, builder.pos());
-
- assert_throws([&]() { builder.finish(); });
- assert_equals<size_t>(0, builder.pos());
-
- builder.move_to(builder.input().size());
- builder.finish();
- }
- {
- common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
-
- builder.move_to(builder.input().size());
- assert_equals<size_t>(builder.input().size(), builder.pos());
- builder.finish();
- }
-}
-
-int main() {
- test_positions();
- test_json_with_dumped_args_no_args();
- test_json_with_dumped_args();
- test_reasoning();
- test_regex();
- test_deepseek_v3_1_tool_calls();
- std::cout << "All tests passed!\n";
- return 0;
-}
-#include <string>
-#include <iostream>
-#include <numeric>
-
-#include "chat-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "common.h"
#include "peg-parser.h"
#include "testing.h"
#include "peg-parser/simple-tokenize.h"
+
+#include <iostream>
+#include <numeric>
+#include <string>
+
#include "nlohmann/json.hpp"
using json = nlohmann::ordered_json;
static json create_tools();
static void test_example_native(testing & t);
static void test_example_qwen3_coder(testing & t);
+static void test_example_qwen3_non_coder(testing & t);
static void test_command7_parser_compare(testing & t);
+static void test_prefix_tool_names(testing & t);
+static void test_tagged_peg_parser(testing & t);
-int main(int argc, char *argv[]) {
+int main(int argc, char * argv[]) {
testing t(std::cout);
if (argc >= 2) {
t.set_filter(argv[1]);
t.test("native", test_example_native);
t.test("qwen3 coder", test_example_qwen3_coder);
+ t.test("qwen3 non-coder", test_example_qwen3_non_coder);
t.test("comparison", test_command7_parser_compare);
+ t.test("prefix tool names", test_prefix_tool_names);
+ t.test("tagged peg parser", test_tagged_peg_parser);
return t.summary();
}
json tools = json::array();
json tool_weather = {
- {"type", "function"},
- {"function", {
- {"name", "get_current_weather"},
- {"description", "Get the current weather in a given location"},
- {"parameters", {
- {"type", "object"},
- {"properties", {
- {"location", {
- {"type", "string"},
- {"description", "The city and state, e.g. San Francisco, CA"}
- }},
- {"unit", {
- {"type", "string"},
- {"enum", {"celsius", "fahrenheit"}},
- {"description", "The temperature unit to use. Infer this from the users location."}
- }}
- }},
- {"required", {"location", "unit"}},
- }},
- }}
+ { "type", "function" },
+ { "function",
+ {
+ { "name", "get_current_weather" },
+ { "description", "Get the current weather in a given location" },
+ { "parameters",
+ {
+ { "type", "object" },
+ { "properties",
+ { { "location",
+ { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+ { "unit",
+ { { "type", "string" },
+ { "enum", { "celsius", "fahrenheit" } },
+ { "description",
+ "The temperature unit to use. Infer this from the users location." } } } } },
+ { "required", { "location", "unit" } },
+ } },
+ } }
};
tools.push_back(tool_weather);
json tool_forecast = {
- {"type", "function"},
- {"function", {
- {"name", "get_forecast"},
- {"description", "Get the weather forecast for a given location"},
- {"parameters", {
- {"type", "object"},
- {"properties", {
- {"location", {
- {"type", "string"},
- {"description", "The city and state, e.g. San Francisco, CA"}
- }},
- {"unit", {
- {"type", "string"},
- {"enum", {"celsius", "fahrenheit"}},
- {"description", "The temperature unit to use. Infer this from the users location."}
- }},
- {"days", {
- {"type", "integer"},
- {"description", "Number of days to forecast (1-10)"},
- {"minimum", 1},
- {"maximum", 10}
- }}
- }},
- {"required", {"location", "unit"}},
- }},
- }}
+ { "type", "function" },
+ { "function",
+ {
+ { "name", "get_forecast" },
+ { "description", "Get the weather forecast for a given location" },
+ { "parameters",
+ {
+ { "type", "object" },
+ { "properties",
+ { { "location",
+ { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+ { "unit",
+ { { "type", "string" },
+ { "enum", { "celsius", "fahrenheit" } },
+ { "description", "The temperature unit to use. Infer this from the users location." } } },
+ { "days",
+ { { "type", "integer" },
+ { "description", "Number of days to forecast (1-10)" },
+ { "minimum", 1 },
+ { "maximum", 10 } } } } },
+ { "required", { "location", "unit" } },
+ } },
+ } }
};
tools.push_back(tool_forecast);
json tool_search = {
- {"type", "function"},
- {"function", {
- {"name", "search_knowledge_base"},
- {"description", "Search the internal technical documentation knowledge base."},
- {"parameters", {
- {"type", "object"},
- {"properties", {
- {"query", {
- {"type", "string"},
- {"description", "The search query string."}
- }},
- {"max_results", {
- {"type", "integer"},
- {"description", "The maximum number of results to return."},
- {"default", 5}
- }},
- {"category", {
- {"type", "string"},
- {"enum", {"api", "troubleshooting", "billing", "general"}},
- {"description", "Filter search by specific category."}
- }}
- }},
- {"required", {"query", "category"}},
- {"additionalProperties", false}
- }},
- {"strict", true}
- }}
+ { "type", "function" },
+ { "function",
+ { { "name", "search_knowledge_base" },
+ { "description", "Search the internal technical documentation knowledge base." },
+ { "parameters",
+ { { "type", "object" },
+ { "properties",
+ { { "query", { { "type", "string" }, { "description", "The search query string." } } },
+ { "max_results",
+ { { "type", "integer" },
+ { "description", "The maximum number of results to return." },
+ { "default", 5 } } },
+ { "category",
+ { { "type", "string" },
+ { "enum", { "api", "troubleshooting", "billing", "general" } },
+ { "description", "Filter search by specific category." } } } } },
+ { "required", { "query", "category" } },
+ { "additionalProperties", false } } },
+ { "strict", true } } }
};
tools.push_back(tool_search);
struct tool_argument {
std::string name;
std::string type;
- bool is_required;
- json schema;
+ bool is_required;
+ json schema;
};
struct tool_definition {
- std::string name;
+ std::string name;
std::vector<tool_argument> arguments;
- json schema;
+ json schema;
};
// Test fictitious model output that emits arguments as JSON.
static void test_example_native(testing & t) {
struct test_case {
// Parameters
- std::string name;
- json tools;
+ std::string name;
+ json tools;
common_chat_tool_choice tool_choice;
common_reasoning_format reasoning_format;
- json json_schema;
- bool parallel_tool_calls;
- bool thinking_forced_open;
- std::string input;
+ json json_schema;
+ bool parallel_tool_calls;
+ bool thinking_forced_open;
+ std::string input;
// Expect
- std::string expect_reasoning;
- std::string expect_content;
+ std::string expect_reasoning;
+ std::string expect_content;
std::vector<common_chat_tool_call> expect_tool_calls;
};
auto build_parser = [](const test_case & tc) {
- return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+ return build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
- auto reasoning = p.eps();
+ auto reasoning = p.eps();
if (tc.thinking_forced_open) {
// If thinking is forced open, expect a closing tag
reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
// tool calling parser
if (tc.tools.is_array() && !tc.tools.empty()) {
- auto tools = p.choice();
- for (const auto & tool : tc.tools) {
- const auto & function = tool.at("function");
- std::string name = function.at("name");
- const auto & schema = function.at("parameters");
-
- auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
- auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
+ auto tool_call =
+ p.standard_json_tools("<tool_call>[", "]</tool_call>", tc.tools, tc.parallel_tool_calls,
+ tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
- tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
- };
-
- auto parallel_calls = p.eps();
- if (tc.parallel_tool_calls) {
- parallel_calls = p.zero_or_more("," << tools);
- }
-
- auto tool_call = p.trigger_rule("tool-call",
- p.sequence({
- p.literal("<tool_call>["),
- tools,
- parallel_calls,
- p.literal("]</tool_call>")
- })
- );
-
- return p.sequence({
- (reasoning_in_content ? p.eps() : reasoning),
- p.content(p.until("<tool_call>")),
- p.optional(p.space() + tool_call),
- p.space(),
- p.end()
- });
+ return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("<tool_call>")),
+ p.optional(p.space() + tool_call), p.space(), p.end() });
}
// response_format parser
if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
- return p.sequence({
- (reasoning_in_content ? p.eps() : reasoning),
- p.content(p.schema(p.json(), "response-output", tc.json_schema)),
- p.space(),
- p.end()
- });
+ return p.sequence({ (reasoning_in_content ? p.eps() : reasoning),
+ p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(),
+ p.end() });
}
// Content-only parser
- return p.sequence({
- (reasoning_in_content ? p.eps() : reasoning),
- p.content(p.rest()),
- p.end()
- });
+ return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() });
});
};
std::vector<test_case> test_cases = std::vector<test_case>{
{
- /* .name = */ "content with thinking_forced_open = false",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ false,
- /* .input = */ (
- "<think>The user said hello, I must say hello back</think>\nHello"
- ),
- /* .expect_reasoning = */ "The user said hello, I must say hello back",
- /* .expect_content = */ "Hello",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "content with thinking_forced_open = false",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+ /* .expect_reasoning = */ "The user said hello, I must say hello back",
+ /* .expect_content = */ "Hello",
+ /* .expect_tool_calls = */ {},
+ },
{
- /* .name = */ "content with thinking_forced_open = false and no reasoning",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ false,
- /* .input = */ (
- "Hello"
- ),
- /* .expect_reasoning = */ "",
- /* .expect_content = */ "Hello",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "content with thinking_forced_open = false and no reasoning",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ false,
+ /* .input = */ ("Hello"),
+ /* .expect_reasoning = */ "",
+ /* .expect_content = */ "Hello",
+ /* .expect_tool_calls = */ {},
+ },
{
- /* .name = */ "content with thinking_forced_open = false and reasoning_format = none",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "<think>The user said hello, I must say hello back</think>\nHello"
- ),
- /* .expect_reasoning = */ "",
- /* .expect_content = */ "<think>The user said hello, I must say hello back</think>\nHello",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "content with thinking_forced_open = false and reasoning_format = none",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .input = */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+ /* .expect_reasoning = */ "",
+ /* .expect_content = */ "<think>The user said hello, I must say hello back</think>\nHello",
+ /* .expect_tool_calls = */ {},
+ },
{
- /* .name = */ "content with thinking_forced_open = true",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "The user said hello, I must say hello back</think>\nHello"
- ),
- /* .expect_reasoning = */ "The user said hello, I must say hello back",
- /* .expect_content = */ "Hello",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "content with thinking_forced_open = true",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
+ /* .expect_reasoning = */ "The user said hello, I must say hello back",
+ /* .expect_content = */ "Hello",
+ /* .expect_tool_calls = */ {},
+ },
{
- /* .name = */ "content with thinking_forced_open = true and reasoning_format = none",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "The user said hello, I must say hello back</think>\nHello"
- ),
- /* .expect_reasoning = */ "",
- /* .expect_content = */ "The user said hello, I must say hello back</think>\nHello",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "content with thinking_forced_open = true and reasoning_format = none",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .input = */ ("The user said hello, I must say hello back</think>\nHello"),
+ /* .expect_reasoning = */ "",
+ /* .expect_content = */ "The user said hello, I must say hello back</think>\nHello",
+ /* .expect_tool_calls = */ {},
+ },
{
- /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls",
- /* .tools = */ create_tools(),
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "I must get the weather in New York</think>\n"
- "<tool_call>["
- R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
- "]</tool_call>"
- ),
- /* .expect_reasoning = */ "I must get the weather in New York",
- /* .expect_content = */ "",
- /* .expect_tool_calls = */ {{
+ /* .name = */ "tools with tool_choice = auto and no parallel_tool_calls",
+ /* .tools = */ create_tools(),
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .input = */
+ ("I must get the weather in New York</think>\n"
+ "<tool_call>["
+ R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+ "]</tool_call>"),
+ /* .expect_reasoning = */ "I must get the weather in New York",
+ /* .expect_content = */ "",
+ /* .expect_tool_calls = */
+ { {
/* .name = */ "get_current_weather",
/* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
/* .id = */ "",
- }},
- },
+ } },
+ },
{
- /* .name = */ "tools with tool_choice = auto and parallel_tool_calls",
- /* .tools = */ create_tools(),
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {},
- /* .parallel_tool_calls = */ true,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
- "<tool_call>["
- R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
- ", "
- R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
- ", "
- R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
- ", "
- R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
- "]</tool_call>"
- ),
- /* .expect_reasoning = */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
- /* .expect_content = */ "Let me search that for you.",
- /* .expect_tool_calls = */ {{
- /* .name = */ "get_current_weather",
- /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
- /* .id = */ "",
- }, {
- /* .name = */ "get_current_weather",
- /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
- /* .id = */ "",
- }, {
- /* .name = */ "get_forecast",
- /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
- /* .id = */ "",
- }, {
- /* .name = */ "get_forecast",
- /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
- /* .id = */ "",
- }},
- },
+ /* .name = */ "tools with tool_choice = auto and parallel_tool_calls",
+ /* .tools = */ create_tools(),
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */ {},
+ /* .parallel_tool_calls = */ true,
+ /* .thinking_forced_open = */ true,
+ /* .input = */
+ ("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
+ "search that for you."
+ "<tool_call>["
+ R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+ ", "
+ R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
+ ", "
+ R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
+ ", "
+ R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
+ "]</tool_call>"),
+ /* .expect_reasoning = */
+ "I must get the weather in New York and San Francisco and a 3 day forecast of each.", /* .expect_content = */ "Let me search that for you.",
+ /* .expect_tool_calls = */
+ { {
+ /* .name = */ "get_current_weather",
+ /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
+ /* .id = */ "",
+ },
+ {
+ /* .name = */ "get_current_weather",
+ /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
+ /* .id = */ "",
+ },
+ {
+ /* .name = */ "get_forecast",
+ /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
+ /* .id = */ "",
+ },
+ {
+ /* .name = */ "get_forecast",
+ /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
+ /* .id = */ "",
+ } },
+ },
{
- /* .name = */ "response_format with thinking_forced_open = true",
- /* .tools = */ {},
- /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .json_schema = */ {
- {"type", "object"},
- {"properties", {
- {"invoice_number", {{"type", "string"}}},
- {"amount", {{"type", "number"}}},
- {"due_date", {{"type", "string"}}}
- }},
- {"required", {"invoice_number", "amount", "due_date"}}
- },
- /* .parallel_tool_calls = */ false,
- /* .thinking_forced_open = */ true,
- /* .input = */ (
- "I must produce the invoice in the requested format</think>\n"
- R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
- ),
- /* .expect_reasoning = */ "I must produce the invoice in the requested format",
- /* .expect_content = */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
- /* .expect_tool_calls = */ {},
- },
+ /* .name = */ "response_format with thinking_forced_open = true",
+ /* .tools = */ {},
+ /* .tool_choice = */ COMMON_CHAT_TOOL_CHOICE_NONE,
+ /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
+ /* .json_schema = */
+ { { "type", "object" },
+ { "properties",
+ { { "invoice_number", { { "type", "string" } } },
+ { "amount", { { "type", "number" } } },
+ { "due_date", { { "type", "string" } } } } },
+ { "required", { "invoice_number", "amount", "due_date" } } },
+ /* .parallel_tool_calls = */ false,
+ /* .thinking_forced_open = */ true,
+ /* .input = */
+ ("I must produce the invoice in the requested format</think>\n"
+ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
+ /* .expect_reasoning = */ "I must produce the invoice in the requested format",
+ /* .expect_content = */
+ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls = */ {},
+ },
};
for (const auto & tc : test_cases) {
t.test(tc.name, [&](testing & t) {
- auto parser = build_parser(tc);
- auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+ auto parser = build_parser(tc);
+ auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
auto grammar = build_grammar([&](const common_grammar_builder & builder) {
- for (auto const & def : tc.tools) {
- auto function = def.at("function");
+ for (const auto & def : tc.tools) {
+ auto function = def.at("function");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
};
});
t.log("Grammar:");
- for (auto const & line : string_split(grammar, "\n")) {
+ for (const auto & line : string_split(grammar, "\n")) {
t.log(line);
}
common_peg_parse_context ctx(tc.input, false);
- auto result = parser.parse(ctx);
+ auto result = parser.parse(ctx);
t.assert_true("success", result.success());
common_chat_msg msg;
- auto mapper = common_chat_peg_native_mapper(msg);
+ auto mapper = common_chat_peg_mapper(msg);
mapper.from_ast(ctx.ast, result);
t.assert_equal("content equal", tc.expect_content, msg.content);
}
static void test_example_qwen3_coder(testing & t) {
- auto tools = create_tools();
- auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+ auto tools = create_tools();
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto content = p.rule("content", p.content(p.until("<tool_call>")));
std::vector<common_peg_parser> tool_parsers;
- for (auto const & def : tools) {
- auto function = def.at("function");
- std::string name = function.at("name");
- auto parameters = function.at("parameters");
- auto properties = parameters.at("properties");
+ for (const auto & def : tools) {
+ auto function = def.at("function");
+ std::string name = function.at("name");
+ auto parameters = function.at("parameters");
+ auto properties = parameters.at("properties");
std::set<std::string> required_properties;
if (function.contains("required")) {
std::vector<common_peg_parser> arg_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required_properties.find(param_name) != required_properties.end();
- auto type = param_schema.value("type", "object");
-
- auto arg = p.tool_arg(p.sequence({
- p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
- (type == "string" ?
- p.tool_arg_string_value(
- p.schema(
- p.until_one_of({
- "</parameter>\n<parameter=",
- "</parameter>\n</function>"
- }),
- "tool-" + name + "-arg-" + param_name + "-schema",
- param_schema,
- true
- )
- ) : p.tool_arg_json_value(
- p.schema(
- p.json(),
- "tool-" + name + "-arg-" + param_name + "-schema",
- param_schema
- )
- )
- ),
- p.tool_arg_close(
- "</parameter>\n" +
- p.peek(p.literal("<parameter=") | p.literal("</function>"))
- )
- }));
-
- arg_parsers.push_back(is_required ?
- p.rule("tool-" + name + "-arg-" + param_name, arg) :
- p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+ auto type = param_schema.value("type", "object");
+
+ auto arg = p.tool_arg(
+ p.sequence({ p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
+ (type == "string" ?
+ p.tool_arg_string_value(p.schema(
+ p.until_one_of({ "</parameter>\n<parameter=", "</parameter>\n</function>" }),
+ "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+ p.tool_arg_json_value(p.schema(
+ p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))),
+ p.tool_arg_close("</parameter>\n" +
+ p.peek(p.literal("<parameter=") | p.literal("</function>"))) }));
+
+ arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) :
+ p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
}
- tool_parsers.push_back(p.rule("tool-" + name,
- p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
- << p.sequence(arg_parsers)
- << p.tool_close(p.literal("</function>"))
- ));
+ tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
+ << p.sequence(arg_parsers)
+ << p.tool_close(p.literal("</function>"))));
};
- auto tool_call = p.trigger_rule("tool-call",
- "<tool_call>"
- << p.choice(tool_parsers)
- << "</tool_call>"
- );
+ auto tool_call = p.trigger_rule("tool-call", "<tool_call>" << p.choice(tool_parsers) << "</tool_call>");
return content + p.zero_or_more(p.space() + tool_call) + p.end();
});
auto grammar = build_grammar([&](const common_grammar_builder & builder) {
- for (auto const & def : tools) {
- auto function = def.at("function");
+ for (const auto & def : tools) {
+ auto function = def.at("function");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
};
});
t.log("Grammar:");
- for (auto const & line : string_split(grammar, "\n")) {
+ for (const auto & line : string_split(grammar, "\n")) {
t.log(line);
}
- t.test("incremental parsing", [&](testing &t) {
+ t.test("incremental parsing", [&](testing & t) {
std::string input =
"Let me search the knowledge base for cat pictures."
"<tool_call>\n"
}
common_chat_msg msg;
- auto mapper = common_chat_peg_constructed_mapper(msg);
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ //t.log("Input: " + input);
+ t.log("===========================================");
+ t.log("Iteration " + std::to_string(in.size()));
+ t.log("Reasoning: " + msg.reasoning_content);
+ t.log("Content : " + msg.content);
+ for (const auto & tc : msg.tool_calls) {
+ t.log("Tool name: " + tc.name);
+ t.log("Tool args: " + tc.arguments);
+ }
+
+ try {
+ // This shouldn't emit any runtime errors
+ auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+ } catch (const std::exception & e) {
+ t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+ t.assert_true(std::string("failed with ") + e.what(), false);
+ }
+
+ prev = msg;
+ }
+ });
+}
+
+static void test_example_qwen3_non_coder(testing & t) {
+ auto tools = create_tools();
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ // tool calling parser using standard JSON format
+ auto tool_call = p.standard_json_tools("<tool_call>", "</tool_call>", tools, true, false);
+
+ return p.sequence({ p.content(p.until("<tool_call>")), p.optional(p.space() + tool_call), p.end() });
+ });
+
+ auto grammar = build_grammar([&](const common_grammar_builder & builder) {
+ for (const auto & def : tools) {
+ auto function = def.at("function");
+ auto parameters = function.at("parameters");
+ builder.resolve_refs(parameters);
+ };
+ parser.build_grammar(builder);
+ });
+
+ t.log("Grammar:");
+ for (const auto & line : string_split(grammar, "\n")) {
+ t.log(line);
+ }
+
+ t.test("tool call parsing", [&](testing & t) {
+ std::string input =
+ "I need to get the weather.\n"
+ "<tool_call>"
+ "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+ "\"fahrenheit\"}}"
+ "</tool_call>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ t.assert_true("success", result.success());
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("content", "I need to get the weather.\n", msg.content);
+ t.assert_equal("reasoning", "", msg.reasoning_content);
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+ t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}",
+ msg.tool_calls[0].arguments);
+ }
+ });
+
+ t.test("incremental parsing", [&](testing & t) {
+ std::string input =
+ "I need to get the weather.\n"
+ "<tool_call>"
+ "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+ "\"fahrenheit\"}}"
+ "</tool_call>";
+
+ std::vector<std::string> tokens = simple_tokenize(input);
+
+ common_chat_msg prev;
+ for (auto it = tokens.begin(); it != tokens.end(); it++) {
+ std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+ common_peg_parse_context ctx(in, it + 1 < tokens.end());
+
+ auto result = parser.parse(ctx);
+ if (!t.assert_equal("not fail", false, result.fail())) {
+ t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
mapper.from_ast(ctx.ast, result);
//t.log("Input: " + input);
try {
// This shouldn't emit any runtime errors
auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
- } catch(const std::exception & e) {
+ } catch (const std::exception & e) {
t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
t.assert_true(std::string("failed with ") + e.what(), false);
}
}
void test_command7_parser_compare(testing & t) {
- auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
- auto thinking = p.reasoning_block(
- "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
+ auto parser = build_chat_peg_parser([](common_chat_peg_builder & p) {
+ auto thinking =
+ p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
- auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
+ auto tool_call_name =
+ p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
- auto tool_call = p.rule("tool-call", p.tool(
- p.tool_open(p.literal("{"))
- << tool_call_fields
- << p.zero_or_more( p.literal(",") << tool_call_fields)
- << p.tool_close(p.literal("}"))
- ));
-
- auto tool_calls = p.rule("tool-calls",
- "<|START_ACTION|>"
- << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
- << "<|END_ACTION|>");
+ auto tool_call =
+ p.rule("tool-call", p.tool(p.tool_open(p.literal("{"))
+ << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields)
+ << p.tool_close(p.literal("}"))));
+
+ auto tool_calls = p.rule(
+ "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
+ << "<|END_ACTION|>");
return p.optional(thinking) << (tool_calls | response) + p.end();
});
- auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
+ auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial,
+ bool print_results) {
common_peg_parse_context ctx(input, is_partial);
- auto result = p.parse(ctx);
+ auto result = p.parse(ctx);
common_chat_msg msg;
- auto mapper = common_chat_peg_native_mapper(msg);
+ auto mapper = common_chat_peg_mapper(msg);
mapper.from_ast(ctx.ast, result);
if (print_results) {
}
};
- auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
- // Original common_chat_combinator_parser taken from chat.cpp
- common_chat_parser_params params;
- params.format = COMMON_CHAT_FORMAT_GENERIC;
- params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- params.reasoning_in_content = false;
- params.thinking_forced_open = false;
- common_chat_msg_parser builder(
- input,
- /* .is_partial = */ need_more_input,
- params
- );
-
- builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
- static const common_regex start_action_regex("<\\|START_ACTION\\|>");
- static const common_regex end_action_regex("<\\|END_ACTION\\|>");
- static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
- static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
- if (auto res = builder.try_find_regex(start_action_regex)) {
- // If we didn't extract thoughts, prelude includes them.
- auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
- for (const auto & tool_call : tool_calls.value) {
- std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
- std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
- std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
- if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- }
- if (tool_calls.is_partial) {
- throw common_chat_msg_partial_exception("incomplete tool call");
- }
- builder.consume_regex(end_action_regex);
- } else if (auto res = builder.try_find_regex(start_response_regex)) {
- if (!builder.try_find_regex(end_response_regex)) {
- builder.add_content(builder.consume_rest());
- throw common_chat_msg_partial_exception(end_response_regex.str());
- }
- } else {
- builder.add_content(builder.consume_rest());
- }
-
- if (print_results) {
- std::cout << "== Parsed (legacy) ==\n";
- std::cout << "=== Reasoning ===\n";
- std::cout << builder.result().reasoning_content << "\n";
- std::cout << "\n\n=== Content ===\n";
- std::cout << builder.result().content << "\n";
- std::cout << "\n\n=== Tool Calls ===\n";
- for (const auto & tc : builder.result().tool_calls) {
- std::cout << "id: " << tc.id << "\n";
- std::cout << "name: " << tc.name << "\n";
- std::cout << "args: " << tc.arguments << "\n";
- }
- }
- };
-
- std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
- "budget of $4000 for a two-week stay, we need to:\n\n"
- "1. Identify key historical sites and modern attractions in Japan.\n"
- "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
- "3. Determine the best modes of transportation for getting around Japan.\n"
- "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
- "overspending.\n"
- "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
- "to attractions.";
-
- std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
- "call_0",
- "plan_trip",
- nlohmann::json::parse(R"({
+ std::string reasoning =
+ "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
+ "budget of $4000 for a two-week stay, we need to:\n\n"
+ "1. Identify key historical sites and modern attractions in Japan.\n"
+ "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
+ "3. Determine the best modes of transportation for getting around Japan.\n"
+ "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
+ "overspending.\n"
+ "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
+ "to attractions.";
+
+ std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {
+ { "call_0", "plan_trip", nlohmann::json::parse(R"({
"destination": "Japan",
"duration": 14,
"budget": 4000,
"accommodation_preferences": "affordable",
"transportation_preferences": "efficient",
"meal_preferences": "local cuisine"
- })")
- }};
+ })") }
+ };
std::vector<std::string> tokens;
auto json = nlohmann::json::array();
for (const auto & tc : tool_calls) {
- auto tc_json = nlohmann::json::object();
+ auto tc_json = nlohmann::json::object();
tc_json["tool_call_id"] = std::get<0>(tc);
- tc_json["tool_name"] = std::get<1>(tc);
- tc_json["parameters"] = std::get<2>(tc);
+ tc_json["tool_name"] = std::get<1>(tc);
+ tc_json["parameters"] = std::get<2>(tc);
json.push_back(tc_json);
}
std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
- // Run tests
- t.test("legacy_parse", [&](testing & /* t */) {
- test_legacy(input, false, false);
- });
+ t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); });
+ t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100);
+ t.bench(
+ "current_parse_benchmark incremental",
+ [&]() {
+ std::string in;
+ for (auto i = 0u; i < tokens.size(); i++) {
+ in += tokens[i];
+ test_current(parser, in, i + 1 < tokens.size(), false);
+ }
+ },
+ 20);
+}
+
+// Test that tool names that are proper prefixes of other tool names don't cause
+// premature matching during incremental parsing.
+// For example, "special_function" should not match when parsing "special_function_with_opt".
+static void test_prefix_tool_names(testing & t) {
+ // Create tools where one name is a proper prefix of another
+ json tools = json::array();
- t.test("current_parse", [&](testing & /* t */) {
- test_current(parser, input, false, false);
+ json tool_short = {
+ { "type", "function" },
+ { "function",
+ {
+ { "name", "special_function" },
+ { "description", "A special function" },
+ { "parameters",
+ {
+ { "type", "object" },
+ { "properties",
+ {
+ { "arg1", { { "type", "integer" } } },
+ } },
+ { "required", { "arg1" } },
+ } },
+ } }
+ };
+ tools.push_back(tool_short);
+
+ json tool_long = {
+ { "type", "function" },
+ { "function",
+ {
+ { "name", "special_function_with_opt" },
+ { "description", "A special function with optional params" },
+ { "parameters",
+ {
+ { "type", "object" },
+ { "properties",
+ {
+ { "arg1", { { "type", "integer" } } },
+ { "arg2", { { "type", "integer" } } },
+ } },
+ { "required", { "arg1" } },
+ } },
+ } }
+ };
+ tools.push_back(tool_long);
+
+ // Use standard_constructed_tools which had the prefix matching bug
+ std::map<std::string, std::string> markers = {
+ { "tool_call_start_marker", "<tool_call>" },
+ { "tool_call_end_marker", "</tool_call>" },
+ { "function_opener", "<function=" },
+ { "function_closer", "</function>" },
+ { "function_name_suffix", ">" },
+ { "parameter_key_prefix", "<param=" },
+ { "parameter_key_suffix", ">" },
+ { "parameter_closer", "</param>" },
+ };
+
+ auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+ auto content = p.rule("content", p.content(p.until("<tool_call>")));
+ auto tool_call = p.standard_constructed_tools(markers, tools, false, false);
+ return content + p.zero_or_more(p.space() + tool_call) + p.end();
});
- // Run benchmarks
- t.bench("legacy_parse_benchmark complete", [&]() {
- test_legacy(input, false, false);
+ // Test parsing the long tool name - this should NOT trigger the short tool name
+ t.test("parse long tool name", [&](testing & t) {
+ std::string input =
+ "Let me call the function."
+ "<tool_call>"
+ "<function=special_function_with_opt>"
+ "<param=arg1>42</param>"
+ "</function>"
+ "</tool_call>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ t.assert_true("success", result.success());
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("content", "Let me call the function.", msg.content);
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name);
+ }
});
- t.bench("legacy_parse_benchmark incremental", [&]() {
- std::string in;
- for (auto i = 0u; i < tokens.size(); i++) {
- in += tokens[i];
+ // Test incremental parsing - the key test case
+ // This ensures that when incrementally parsing "special_function_with_opt",
+ // we don't prematurely emit "special_function" as a tool call
+ t.test("incremental parse long tool name", [&](testing & t) {
+ std::string input =
+ "Let me call the function."
+ "<tool_call>"
+ "<function=special_function_with_opt>"
+ "<param=arg1>42</param>"
+ "</function>"
+ "</tool_call>";
+
+ std::vector<std::string> tokens = simple_tokenize(input);
+
+ common_chat_msg prev;
+ for (auto it = tokens.begin(); it != tokens.end(); it++) {
+ std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+ common_peg_parse_context ctx(in, it + 1 < tokens.end());
+ auto result = parser.parse(ctx);
+
+ if (!t.assert_equal("not fail", false, result.fail())) {
+ t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+ return;
+ }
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ // The critical check: during incremental parsing, we should never
+ // see "special_function" as the tool name when parsing "special_function_with_opt"
+ for (const auto & tc : msg.tool_calls) {
+ if (!t.assert_equal("tool name should not be short prefix", false,
+ tc.name == "special_function")) {
+ t.log("Premature tool name match at input: " + in);
+ return;
+ }
+ }
try {
- test_legacy(in, i + 1 < tokens.size(), false);
- } catch (common_chat_msg_partial_exception & /* e */) {
- // Do nothing, this is expected
+ auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+ } catch (const std::exception & e) {
+ t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+ t.assert_true(std::string("diff failed with ") + e.what(), false);
+ return;
}
+
+ prev = msg;
}
- }, 20);
- t.bench("current_parse_benchmark complete", [&]() {
- test_current(parser, input, false, false);
- }, 100);
+ // Final check: the complete parse should have the correct tool name
+ t.assert_equal("final tool calls count", 1u, prev.tool_calls.size());
+ if (!prev.tool_calls.empty()) {
+ t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name);
+ }
+ });
- t.bench("current_parse_benchmark incremental", [&]() {
- std::string in;
- for (auto i = 0u; i < tokens.size(); i++) {
- in += tokens[i];
- test_current(parser, in, i + 1 < tokens.size(), false);
+ // Test parsing the short tool name still works
+ t.test("parse short tool name", [&](testing & t) {
+ std::string input =
+ "Let me call the function."
+ "<tool_call>"
+ "<function=special_function>"
+ "<param=arg1>42</param>"
+ "</function>"
+ "</tool_call>";
+
+ common_peg_parse_context ctx(input, false);
+ auto result = parser.parse(ctx);
+
+ t.assert_true("success", result.success());
+
+ common_chat_msg msg;
+ auto mapper = common_chat_peg_mapper(msg);
+ mapper.from_ast(ctx.ast, result);
+
+ t.assert_equal("content", "Let me call the function.", msg.content);
+ t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+ if (!msg.tool_calls.empty()) {
+ t.assert_equal("tool name", "special_function", msg.tool_calls[0].name);
}
- }, 20);
+ });
+}
+
+static void test_tagged_peg_parser(testing & t) {
+ t.test("basic tag extraction", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end();
+ });
+
+ auto result = parser.parse_and_extract("Hello World");
+ t.assert_true("success", result.result.success());
+ t.assert_equal("greeting tag", "Hello", result.tags.at("greeting"));
+ t.assert_equal("name tag", "World", result.tags.at("name"));
+ });
+
+ t.test("duplicate tags overwrite", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end();
+ });
+
+ auto result = parser.parse_and_extract("first,second");
+ t.assert_true("success", result.result.success());
+ t.assert_equal("item tag", "second", result.tags.at("item"));
+ });
+
+ t.test("no tags extracted", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.rest() + p.end();
+ });
+
+ auto result = parser.parse_and_extract("Hello");
+ t.assert_true("success", result.result.success());
+ t.assert_equal("empty tags", 0u, result.tags.size());
+ });
+
+ t.test("structured extraction", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ auto header = p.tag("header", p.until("\n"));
+ auto body = p.tag("body", p.rest());
+ return header + "\n" + body + p.end();
+ });
+
+ auto result = parser.parse_and_extract("Title\nBody content here");
+ t.assert_true("success", result.result.success());
+ t.assert_equal("header", "Title", result.tags.at("header"));
+ t.assert_equal("body", "Body content here", result.tags.at("body"));
+ });
+
+ t.test("partial parse", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end();
+ });
+
+ auto result = parser.parse_and_extract("key:val", true);
+ t.assert_true("not fail", !result.result.fail());
+ t.assert_equal("prefix tag", "key", result.tags.at("prefix"));
+ t.assert_equal("value tag", "val", result.tags.at("value"));
+ });
+
+ t.test("find in the middle", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+ });
+
+ std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call>{ \"fun_name\" : { \"arg\" : 1 }</tool_call>";
+ auto result = parser.parse_anywhere_and_extract(tpl);
+ t.assert_true("success", result.result.success());
+ });
+
+ t.test("fail find in the middle", [&](testing & t) {
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+ });
+
+ std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call><fun=fun_name><arg name=arg>1</arg></tool_call>";
+ auto result = parser.parse_anywhere_and_extract(tpl);
+ t.assert_true("failure", result.result.fail());
+ });
+
+ t.test("find function tag with name", [&](testing &t) {
+ std::string haystack = "\n<tool_call>\n<function=foofoo>\n<parameter=first>\nXXXX\n</parameter>\n<parameter=second>\nYYYY\n</parameter>\n</function>\n</tool_call>\n";
+ auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+ std::string needle = "foofoo";
+ return p.tag("fun_marker", p.choice({
+ p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", needle })) + p.literal(needle) +
+ p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+ p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", needle })) + p.literal(needle) +
+ p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+ });
+ auto result = parser.parse_anywhere_and_extract(haystack);
+ t.assert_true("success", result.result.success());
+ t.assert_equal("fun_pre should be '<function='", "<function=", result.tags["fun_pre"]);
+ t.assert_equal("fun_post should be '>'", ">", result.tags["fun_post"]);
+ });
}
#include <string>
+#include <utility>
#include <vector>
#include <sstream>
#include <regex>
using json = nlohmann::ordered_json;
-int main_automated_tests(void);
+static int main_automated_tests(void);
-void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
-void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
+static void run_multiple(const std::string& dir_path, bool stop_on_first_failure, const json& input, bool use_common = false);
+static void run_single(const std::string& contents, json input, bool use_common = false, const std::string & output_path = "");
-
-
-std::string HELP = R"(
+static std::string HELP = R"(
Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
Options:
-h, --help Show this help message and exit.
+ --with-tools Add a tool and a tool call to the default JSON input
--json <path> Path to the JSON input file.
--stop-on-first-fail Stop testing on the first failure (default: false).
--no-common Use direct Jinja engine instead of common chat templates (default: use common).
If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
)";
-std::string DEFAULT_JSON = R"({
+static std::string DEFAULT_JSON = R"({
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, how are you?"
+ },
+ {
+ "role": "assistant",
+ "content": "I am fine, thank you!"
+ }
+ ],
+ "bos_token": "<s>",
+ "eos_token": "</s>",
+ "add_generation_prompt": true
+})";
+
+static std::string DEFAULT_JSON_WITH_TOOLS = R"({
"messages": [
{
"role": "user",
{
"role": "assistant",
"content": "I am fine, thank you!"
+ },
+ {
+ "role": "user",
+ "content": "Call a tool!"
+ },
+ {
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "id": "call00001",
+ "type": "function",
+ "function": {
+ "name": "test",
+ "arguments": { "arg": "hello" }
+ }
+ }
+ ]
+ }
+ ],
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "test",
+ "description": "Test",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "arg": {
+ "type": "string"
+ }
+ }
+ },
+ "required": ["arg"]
+ }
}
],
"bos_token": "<s>",
"add_generation_prompt": true
})";
+
int main(int argc, char ** argv) {
std::vector<std::string> args(argv, argv + argc);
std::string tmpl_path;
std::string json_path;
std::string output_path;
+ std::string & json_to_use = DEFAULT_JSON;
bool stop_on_first_fail = false;
bool use_common = true;
if (args[i] == "--help" || args[i] == "-h") {
std::cout << HELP << "\n";
return 0;
- } else if (args[i] == "--json" && i + 1 < args.size()) {
+ }
+ if (args[i] == "--json" && i + 1 < args.size()) {
json_path = args[i + 1];
i++;
+ } else if (args[i] == "--with-tools") {
+ json_to_use = DEFAULT_JSON_WITH_TOOLS;
} else if (args[i] == "--stop-on-first-fail") {
stop_on_first_fail = true;
} else if (args[i] == "--output" && i + 1 < args.size()) {
std::istreambuf_iterator<char>());
input_json = json::parse(content);
} else {
- input_json = json::parse(DEFAULT_JSON);
+ input_json = json::parse(json_to_use);
}
std::filesystem::path p(tmpl_path);
return 0;
}
-void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
+void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) {
std::vector<std::string> failed_tests;
// list all files in models/templates/ and run each
common_chat_templates_inputs inputs;
inputs.use_jinja = true;
inputs.messages = messages;
- inputs.tools = tools;
+ inputs.tools = std::move(tools);
inputs.add_generation_prompt = true;
auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
output = normalize_newlines(output);
jinja::runtime runtime(ctx);
const jinja::value results = runtime.execute(ast);
- auto parts = runtime.gather_string_parts(results);
+ auto parts = jinja::runtime::gather_string_parts(results);
std::cout << "\n=== RESULTS ===\n";
for (const auto & part : parts->as_string().parts) {
}
-void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
+void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) {
jinja::enable_debug(true);
jinja::value_string output_parts;
supported_tmpl.resize(res);
res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
std::cout << "Built-in chat templates:\n";
- for (auto tmpl : supported_tmpl) {
+ for (const auto *tmpl : supported_tmpl) {
std::cout << " " << tmpl << "\n";
}
}
std::vector<common_chat_msg> messages;
+ messages.reserve(conversation.size());
for (const auto & msg : conversation) {
messages.push_back(simple_msg(msg.role, msg.content));
}
}
}
- // TODO: llama_chat_format_single will be deprecated, remove these tests later
-
- // test llama_chat_format_single for system message
- std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
- std::vector<common_chat_msg> chat2;
- auto sys_msg = simple_msg("system", "You are a helpful assistant");
-
- auto fmt_sys = [&](std::string tmpl_str) {
- auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
- auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
- std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
- std::cout << "-------------------------\n";
- return output;
- };
- assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
- assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
- assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
- assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
- assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
- assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
- assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
- assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
- assert(fmt_sys("gemma") == ""); // for gemma, system message is merged with user message
- assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
- assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
-
-
- // test llama_chat_format_single for user message
- std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
- chat2.push_back(simple_msg("system", "You are a helpful assistant"));
- chat2.push_back(simple_msg("user", "Hello"));
- chat2.push_back(simple_msg("assistant", "I am assistant"));
- auto new_msg = simple_msg("user", "How are you");
-
- auto fmt_single = [&](const std::string & tmpl_str) {
- auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
- auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
- std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
- std::cout << "-------------------------\n";
- return output;
- };
- assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
- assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
- assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
- assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
- assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
- assert(fmt_single("llama2") == "[INST] How are you [/INST]");
- assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
- assert(fmt_single("gemma") == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
- assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
- // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
-
std::cout << "\nOK: All tests passed successfully.\n";
return 0;
//
// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
//
+#include "../src/llama-grammar.h"
+#include "../src/unicode.h"
+#include "chat-auto-parser.h"
#include "chat.h"
-
+#include "common.h"
+#include "ggml.h"
#include "log.h"
-#include "../src/unicode.h"
-#include "../src/llama-grammar.h"
-
-#include <nlohmann/json.hpp>
-
+#include <algorithm>
+#include <exception>
#include <fstream>
-#include <iostream>
#include <functional>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <set>
+#include <stdexcept>
#include <string>
using json = nlohmann::ordered_json;
os << "}";
return os;
}
+
// operator<< for vector<common_chat_msg_diff>:
static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
os << "[\n";
os << "]";
return os;
}
+
static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
os << "{ role: " << msg.role << "; ";
os << "content: " << msg.content << "; ";
os << "reasoning_content: " << msg.reasoning_content << "; ";
os << "tool_calls: [\n";
for (const auto & tool_call : msg.tool_calls) {
- os << " { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
+ os << " { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id
+ << " },\n";
}
os << "]";
os << "}";
try {
tool_call.arguments = json::parse(tool_call.arguments).dump();
} catch (const std::exception &) {
- // Do nothing
}
}
return normalized;
}
-
-template <>
-bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
+template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
return normalize(expected) == normalize(actual);
}
template <class T> static void assert_equals(const T & expected, const T & actual) {
if (!equals(expected, actual)) {
- std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
- std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
- std::cerr << std::flush;
+ std::ostringstream oss_expected;
+ oss_expected << expected;
+ std::ostringstream oss_actual;
+ oss_actual << actual;
+ LOG_ERR("Expected: %s\n", oss_expected.str().c_str());
+ LOG_ERR("Actual: %s\n", oss_actual.str().c_str());
+ common_log_flush(common_log_main());
throw std::runtime_error("Test failed");
}
}
static std::string read_file(const std::string & path) {
- std::cerr << "# Reading: " << path << '\n' << std::flush;
std::ifstream fs(path, std::ios_base::binary);
if (!fs.is_open()) {
fs = std::ifstream("../" + path, std::ios_base::binary);
llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0));
}
+// Helper to format a code point as a readable string
+static std::string format_codepoint(uint32_t cp) {
+ if (cp >= 32 && cp < 127) {
+ return std::string("'") + static_cast<char>(cp) + "'";
+ } else if (cp == '\n') {
+ return "'\\n'";
+ } else if (cp == '\r') {
+ return "'\\r'";
+ } else if (cp == '\t') {
+ return "'\\t'";
+ } else {
+ return "U+" + std::to_string(cp);
+ }
+}
+
+// Helper to format expected element from grammar stack
+static std::string format_expected_element(const llama_grammar_rules & /* rules*/, const llama_grammar_element * elem) {
+ if (!elem) {
+ return "<end>";
+ }
+
+ switch (elem->type) {
+ case LLAMA_GRETYPE_END:
+ return "<end of rule>";
+ case LLAMA_GRETYPE_ALT:
+ return "<alternative>";
+ case LLAMA_GRETYPE_RULE_REF:
+ {
+ // Find rule name - just show rule ID for now
+ return "<rule-" + std::to_string(elem->value) + ">";
+ }
+ case LLAMA_GRETYPE_CHAR:
+ {
+ std::string result;
+ const llama_grammar_element * pos = elem;
+ bool first = true;
+
+ do {
+ if (!first) {
+ result += " | ";
+ }
+ first = false;
+
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+ // Range like [a-z]
+ result += "[" + format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value) + "]";
+ pos += 2;
+ } else {
+ result += format_codepoint(pos->value);
+ pos += 1;
+ }
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+ return result;
+ }
+ case LLAMA_GRETYPE_CHAR_NOT:
+ {
+ std::string result = "[^";
+ const llama_grammar_element * pos = elem;
+ bool first = true;
+
+ do {
+ if (!first) {
+ result += " ";
+ }
+ first = false;
+
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+ result += format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value);
+ pos += 2;
+ } else {
+ result += format_codepoint(pos->value);
+ pos += 1;
+ }
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+ return result + "]";
+ }
+ case LLAMA_GRETYPE_CHAR_ANY:
+ return "<any char>";
+ case LLAMA_GRETYPE_TOKEN:
+ return "<token-" + std::to_string(elem->value) + ">";
+ case LLAMA_GRETYPE_TOKEN_NOT:
+ return "<not-token-" + std::to_string(elem->value) + ">";
+ default:
+ return "<unknown>";
+ }
+}
+
+// Get description of what the grammar expects at current position
+static std::string get_expected_description(const llama_grammar_rules & rules, const llama_grammar_stacks & stacks) {
+ if (stacks.empty()) {
+ return "<no valid continuations>";
+ }
+
+ std::string result;
+ std::set<std::string> seen;
+
+ for (const auto & stack : stacks) {
+ if (stack.empty()) {
+ if (seen.insert("<end>").second) {
+ if (!result.empty()) {
+ result += " OR ";
+ }
+ result += "<end>";
+ }
+ continue;
+ }
+
+ const llama_grammar_element * elem = stack.back();
+ std::string desc = format_expected_element(rules, elem);
+ if (seen.insert(desc).second) {
+ if (!result.empty()) {
+ result += " OR ";
+ }
+ result += desc;
+ }
+ }
+
+ return result;
+}
+
+// Result of a detailed grammar match attempt
+struct grammar_match_result {
+ bool success = false; // Did the string fully match the grammar?
+ size_t matched_bytes = 0; // Bytes successfully matched before failure
+ size_t matched_codepoints = 0; // Codepoints successfully matched before failure
+ size_t total_bytes = 0; // Total bytes in input
+ size_t total_codepoints = 0; // Total codepoints in input
+ std::string matched_prefix; // The portion that was successfully matched
+ std::string failing_char; // The character that caused failure (if any)
+ std::string expected_description; // What the grammar expected at failure point
+ bool incomplete = false; // True if matched all input but grammar expects more
+};
+
+// Detailed version of match_string that returns failure information
+static grammar_match_result match_string_detailed(const std::string & input, llama_grammar * grammar) {
+ grammar_match_result result;
+ result.total_bytes = input.size();
+
+ const auto cpts = unicode_cpts_from_utf8(input);
+ result.total_codepoints = cpts.size();
+
+ auto & stacks_cur = llama_grammar_get_stacks(grammar);
+ const auto & rules = llama_grammar_get_rules(grammar);
+
+ size_t byte_pos = 0;
+
+ for (size_t i = 0; i < cpts.size(); i++) {
+ const auto & cpt = cpts[i];
+
+ // Get expected before accepting (for error reporting)
+ std::string expected_before = get_expected_description(rules, stacks_cur);
+
+ llama_grammar_accept(grammar, cpt);
+
+ // Calculate byte position for this codepoint
+ size_t cpt_bytes = 0;
+ if (cpt < 0x80) {
+ cpt_bytes = 1;
+ } else if (cpt < 0x800) {
+ cpt_bytes = 2;
+ } else if (cpt < 0x10000) {
+ cpt_bytes = 3;
+ } else {
+ cpt_bytes = 4;
+ }
+
+ if (stacks_cur.empty()) {
+ // Grammar failed to match at this point
+ result.matched_bytes = byte_pos;
+ result.matched_codepoints = i;
+ result.matched_prefix = input.substr(0, byte_pos);
+ result.failing_char = format_codepoint(cpt);
+ result.expected_description = expected_before;
+ result.incomplete = false;
+ return result;
+ }
+
+ byte_pos += cpt_bytes;
+ }
+
+ // All input matched - check if grammar is complete
+ result.matched_bytes = input.size();
+ result.matched_codepoints = cpts.size();
+ result.matched_prefix = input;
+
+ if (std::any_of(stacks_cur.begin(), stacks_cur.end(), [](const auto & stack) { return stack.empty(); })) {
+ // An empty stack means that the grammar has been completed
+ result.success = true;
+ result.incomplete = false;
+ } else {
+ // Grammar expects more input
+ result.success = false;
+ result.incomplete = true;
+ result.expected_description = get_expected_description(rules, stacks_cur);
+ }
+
+ return result;
+}
+
// TODO: extract to common helper (copied from test-grammar-integration.cpp)
static bool match_string(const std::string & input, llama_grammar * grammar) {
const auto cpts = unicode_cpts_from_utf8(input);
auto json_obj = json::parse(json_str);
return json_obj.dump();
} catch (const std::exception & e) {
- std::cerr << "Failed to parse JSON: " << e.what() << '\n';
- return json_str;
+ return ""; // ignore parial JSON contents for comparison purposes
}
}
-static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
+
+static void assert_msg_equals(const common_chat_msg & expected,
+ const common_chat_msg & actual,
+ bool ignore_whitespace_differences = false) {
assert_equals(expected.role, actual.role);
if (ignore_whitespace_differences) {
assert_equals(string_strip(expected.content), string_strip(actual.content));
}
}
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool{
/* .name = */ "special_function",
/* .description = */ "I'm special",
/* .parameters = */ R"({
"required": ["arg1"]
})",
};
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param{
/* .name = */ "special_function_with_opt",
/* .description = */ "I'm special but have optional stuff",
/* .parameters = */ R"({
"required": ["arg1"]
})",
};
-common_chat_tool python_tool {
+static common_chat_tool empty_args_tool{
+ /* .name = */ "empty_args",
+ /* .description = */ "A tool that takes no arguments",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {}
+ })",
+};
+static common_chat_tool python_tool{
/* .name = */ "python",
/* .description = */ "an ipython interpreter",
/* .parameters = */ R"({
"required": ["code"]
})",
};
-common_chat_tool todo_list_tool {
+
+static common_chat_tool html_tool{
+ /* .name = */ "html",
+ /* .description = */ "an html validator",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "markup": {
+ "type": "string",
+ "description": "HTML markup to validate."
+ }
+ },
+ "required": ["markup"]
+ })",
+};
+
+static common_chat_tool get_time_tool{
+ /* .name = */ "get_time",
+ /* .description = */ "Get the current time in a city",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "city": {
+ "type": "string",
+ "description": "City name"
+ }
+ },
+ "required": ["city"]
+ })",
+};
+
+static common_chat_tool get_weather_tool{
+ /* .name = */ "get_weather",
+ /* .description = */ "Get the current weather in a city",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "city": {
+ "type": "string",
+ "description": "City name"
+ }
+ },
+ "required": ["city"]
+ })",
+};
+
+static common_chat_tool todo_list{
/* .name = */ "todo_list",
/* .description = */ "Create or update the todo list",
/* .parameters = */ R"({
"required": ["todos"]
})",
};
-common_chat_tool code_interpreter_tool {
- /* .name = */ "code_interpreter",
- /* .description = */ "an ipython interpreter",
+
+static common_chat_tool edit_tool{
+ /* .name = */ "edit",
+ /* .description = */ "Edit file",
/* .parameters = */ R"({
"type": "object",
"properties": {
- "code": {
+ "filename": {
"type": "string",
- "description": "Python code to execute."
+ "description": "Path of file to edit"
+ },
+ "oldString": {
+ "type": "string",
+ "description": "String to replace"
+ },
+ "newString": {
+ "type": "string",
+ "description": "New (replacement) value"
}
},
- "required": ["code"]
+ "required": ["filename", "oldString", "newString"]
})",
};
-std::vector<common_chat_tool> tools { special_function_tool, special_function_tool_with_optional_param, python_tool };
-std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
-struct delta_data {
- std::string delta;
- common_chat_params params;
+static common_chat_tool magic_tool{
+ /* .name = */ "magic",
+ /* .description = */ "Magic tool that takes a hash",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "ref": {
+ "type": "string"
+ }
+ },
+ "required": ["name", "ref"]
+ })",
+};
+
+static common_chat_tool magic_int_tool{
+ /* .name = */ "magic_int",
+ /* .description = */ "Magic tool that takes a hash",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "ref": {
+ "type": "integer"
+ },
+ "name": {
+ "type": "string"
+ }
+ },
+ "required": ["ref"]
+ })",
+};
+
+static common_chat_tool amount_tool{
+ /* .name = */ "amount",
+ /* .description = */ "Amount converter",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "orig": {
+ "type": "number"
+ }
+ },
+ "required": ["orig"]
+ })",
+};
+
+static common_chat_tool imaginary_number_tool{
+ /* .name = */ "imaginary_number",
+ /* .description = */ "Imaginary number converter",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "number": {
+ "type": "object",
+ "properties": {
+ "real": {
+ "type": "number"
+ },
+ "imaginary": {
+ "type": "number"
+ }
+ },
+ "required": ["real", "imaginary"]
+ }
+ },
+ "required": ["number"]
+ })",
+};
+
+static common_chat_tool string_param_tool{
+ /* .name = */ "string_param",
+ /* .description = */ "Tool with string parameter for testing",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string",
+ "description": "A text parameter"
+ }
+ },
+ "required": []
+ })",
+};
+
+static common_chat_tool quoted_unquoted_tool{
+ /* .name = */ "quoted_unquoted",
+ /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "quoted": {
+ "type": "string",
+ "description": "Quoted value"
+ },
+ "unquoted": {
+ "type": "string",
+ "description": "Unquoted value"
+ }
+ },
+ "required": ["quoted", "unquoted"]
+ })",
+};
+
+
+static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
+ python_tool, html_tool, todo_list };
+
+const common_chat_msg message_user{
+ "user",
+ "Hey there!",
+ /* .content_parts = */ {},
+ /* .tool_calls = */ {},
+ /* .reasoning_content = */ "",
+ /* .tool_name = */ "",
+ /* .tool_call_id = */ "",
+};
+
+const common_chat_msg message_user_parts{
+ "user",
+ /* .content = */ "",
+ /* .content_parts = */
+ {
+ { "text", "Hey" },
+ { "text", "there" },
+ },
+ /* .tool_calls = */
+ { },
+ /* .reasoning_content = */
+ "",
+ /* .tool_name = */ "",
+ /* .tool_call_id = */ "",
};
-static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
+static common_chat_msg simple_assist_msg(const std::string & content,
+ const std::string & reasoning_content = "",
+ const std::string & tool_name = "",
+ const std::string & arguments = "",
+ const std::string & id = "") {
common_chat_msg msg;
- msg.role = "assistant";
- msg.content = content;
+ msg.role = "assistant";
+ msg.content = content;
msg.reasoning_content = reasoning_content;
- if (!tool_name.empty()) {
+ if (!tool_name.empty() || !id.empty()) {
msg.tool_calls.push_back({ tool_name, arguments, id });
}
return msg;
}
-static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
- const common_chat_msg & user_message,
- const common_chat_msg & delta_message,
+static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) {
+ return simple_assist_msg("", "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name,
+ const std::string & arguments,
+ const std::string & reasoning) {
+ return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls(
+ const std::string & reasoning,
+ const std::string & content,
+ const std::vector<std::pair<std::string, std::string>> & tool_calls) {
+ common_chat_msg msg;
+ msg.role = "assistant";
+ msg.content = content;
+ msg.reasoning_content = reasoning;
+ for (const auto & [name, args] : tool_calls) {
+ msg.tool_calls.push_back({ name, args, "" });
+ }
+ return msg;
+}
+
+static common_chat_msg message_with_content_and_tool_call(const std::string & content,
+ const std::string & tool_name,
+ const std::string & arguments) {
+ return simple_assist_msg(content, "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning,
+ const std::string & tool_name,
+ const std::string & arguments) {
+ return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek =
+ simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md =
+ simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial =
+ simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
+const common_chat_msg message_assist_thoughts_unparsed_r7b =
+ simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_magistral =
+ simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
+const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
+const common_chat_msg message_assist_thoughts_unopened_unparsed =
+ simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
+const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_noopt =
+ simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_withopt =
+ simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
+const common_chat_msg message_assist_call_content =
+ simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function");
+const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
+const common_chat_msg message_assist_call_thoughts =
+ simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_thoughts_unparsed =
+ simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_thoughts_content =
+ simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_id =
+ simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
+const common_chat_msg message_assist_call_idx =
+ simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
+const common_chat_msg message_assist_thoughts_call_idx =
+ simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
+const common_chat_msg message_assist_thoughts_partial_call =
+ simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0");
+const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
+const common_chat_msg message_assist_call_python_lines =
+ simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
+const common_chat_msg message_assist_call_python_lines_unclosed =
+ simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
+const common_chat_msg message_assist_json_content =
+ simple_assist_msg("{\n \"response\": \"Hello, world!\\nWhat's up?\"\n}");
+
+struct delta_data {
+ std::string delta;
+ common_chat_params params;
+};
+
+static delta_data init_delta(const struct common_chat_templates * tmpls,
+ const std::vector<std::string> & end_tokens,
+ const common_chat_msg & user_message,
+ const common_chat_msg & delta_message,
const std::vector<common_chat_tool> & tools,
- const common_chat_tool_choice & tool_choice) {
+ const common_chat_tool_choice & tool_choice) {
common_chat_templates_inputs inputs;
inputs.parallel_tool_calls = true;
inputs.messages.push_back(user_message);
gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
the parsed message is the same as the test_message
*/
-static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
- const common_chat_msg & test_message,
- const std::vector<common_chat_tool> & tools = {},
- const std::string & expected_delta = "",
- bool expect_grammar_triggered = true,
- bool test_grammar_if_triggered = true,
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
- bool ignore_whitespace_differences = false
- ) {
+static void test_templates(const struct common_chat_templates * tmpls,
+ const std::vector<std::string> & end_tokens,
+ const common_chat_msg & test_message,
+ const std::vector<common_chat_tool> & tools = {},
+ const std::string & expected_delta = "",
+ bool expect_grammar_triggered = true,
+ bool test_grammar_if_triggered = true,
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
+ bool ignore_whitespace_differences = false) {
common_chat_msg user_message;
- user_message.role = "user";
+ user_message.role = "user";
user_message.content = "Hello, world!";
- for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
+ common_chat_templates_inputs inputs_tools;
+ inputs_tools.messages = { message_user };
+ inputs_tools.tools = { special_function_tool };
+
+ common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools);
+
+ for (const auto & tool_choice :
+ std::vector<common_chat_tool_choice>{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) {
auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
if (!expected_delta.empty()) {
if (ignore_whitespace_differences) {
if (expect_grammar_triggered) {
// TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
- common_chat_parser_params params;
- params.format = data.params.format;
- params.reasoning_format = reasoning_format;
- const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
+ common_chat_parser_params parser_params;
+ parser_params.format = data.params.format;
+ parser_params.reasoning_format = reasoning_format;
+ if (!parser_params.parser.empty()) {
+ parser_params.parser = common_peg_arena();
+ parser_params.parser.load(params.parser);
+ }
+ const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, parser_params);
assert_msg_equals(test_message, msg, ignore_whitespace_differences);
}
throw std::runtime_error("Failed to build grammar");
}
auto earliest_trigger_pos = std::string::npos;
- auto constrained = data.delta;
+ auto constrained = data.delta;
for (const auto & trigger : data.params.grammar_triggers) {
- size_t pos = std::string::npos;
+ size_t pos = std::string::npos;
std::smatch match;
switch (trigger.type) {
case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
- {
- const auto & word = trigger.value;
- pos = constrained.find(word);
- break;
- }
+ {
+ const auto & word = trigger.value;
+ pos = constrained.find(word);
+ break;
+ }
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
- {
- const auto & pattern = trigger.value;
- if (std::regex_search(constrained, match, std::regex(pattern))) {
- pos = match.position(1);
+ {
+ const auto & pattern = std::regex(trigger.value);
+ if (std::regex_search(constrained, match, pattern)) {
+ pos = match.position(pattern.mark_count());
+ }
+ break;
}
- break;
- }
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
- {
- const auto & pattern = trigger.value;
- if (std::regex_match(constrained, match, std::regex(pattern))) {
- auto mpos = std::string::npos;
- for (size_t i = 1; i < match.size(); ++i) {
- if (match[i].length() > 0) {
- mpos = match.position(i);
- break;
+ {
+ const auto & pattern = trigger.value;
+ if (std::regex_match(constrained, match, std::regex(pattern))) {
+ auto mpos = std::string::npos;
+ for (size_t i = 1; i < match.size(); ++i) {
+ if (match[i].length() > 0) {
+ mpos = match.position(i);
+ break;
+ }
}
+ if (mpos == std::string::npos) {
+ mpos = match.position(0);
+ }
+ pos = mpos;
}
- if (mpos == std::string::npos) {
- mpos = match.position(0);
- }
- pos = mpos;
+ break;
}
- break;
- }
default:
throw std::runtime_error("Unknown trigger type");
}
}
auto grammar_triggered = false;
if (earliest_trigger_pos != std::string::npos) {
- constrained = constrained.substr(earliest_trigger_pos);
+ constrained = constrained.substr(earliest_trigger_pos);
grammar_triggered = true;
}
if (data.params.grammar_lazy) {
if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
- "\n\nConstrained: " + constrained +
- "\n\nGrammar: " + data.params.grammar);
+ "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar);
}
}
}
static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
auto len = s.size();
- if (len == 0) return 0;
+ if (len == 0) {
+ return 0;
+ }
auto i = len;
for (size_t back = 0; back < 4 && i > 0; ++back) {
--i;
unsigned char c = s[i];
if ((c & 0x80) == 0) {
return len;
- } else if ((c & 0xC0) == 0xC0) {
+ }
+ if ((c & 0xC0) == 0xC0) {
size_t expected_len = 0;
- if ((c & 0xE0) == 0xC0) expected_len = 2;
- else if ((c & 0xF0) == 0xE0) expected_len = 3;
- else if ((c & 0xF8) == 0xF0) expected_len = 4;
- else return i;
- if (len - i >= expected_len) {
- return len;
+ if ((c & 0xE0) == 0xC0) {
+ expected_len = 2;
+ } else if ((c & 0xF0) == 0xE0) {
+ expected_len = 3;
+ } else if ((c & 0xF8) == 0xF0) {
+ expected_len = 4;
} else {
return i;
}
+ if (len - i >= expected_len) {
+ return len;
+ }
+ return i;
}
}
return len - std::min(len, size_t(3));
return s.substr(0, utf8_truncate_safe_len(s));
};
- auto merged = simple_assist_msg("");
+ auto merged = simple_assist_msg("");
auto last_msg = parse_msg("");
for (size_t i = 1; i <= raw_message.size(); ++i) {
auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
- if (curr_msg == simple_assist_msg("")) continue;
- LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
- for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
+ if (curr_msg == simple_assist_msg("")) {
+ continue;
+ }
+ LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({ curr_msg }).dump().c_str());
+ for (auto diff : common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
if (!diff.reasoning_content_delta.empty()) {
merged.reasoning_content += diff.reasoning_content_delta;
}
if (diff.tool_call_index != std::string::npos) {
if (!diff.tool_call_delta.name.empty()) {
- merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
+ merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" });
}
if (!diff.tool_call_delta.arguments.empty()) {
GGML_ASSERT(!merged.tool_calls.empty());
merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
}
}
- LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
+ LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({ merged }).dump().c_str());
}
assert_msg_equals(curr_msg, merged, true);
last_msg = curr_msg;
assert_msg_equals(expected, merged, true);
}
-const common_chat_msg message_user {
- "user",
- "Hey there!",
- /* .content_parts = */ {},
- /* .tool_calls = */ {},
- /* .reasoning_content = */ "",
- /* .tool_name = */ "",
- /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_user_parts {
- "user",
- /* .content = */ "",
- /* .content_parts = */ {
- { "text", "Hey" },
- { "text", "there" },
- },
- /* .tool_calls = */ {},
- /* .reasoning_content = */ "",
- /* .tool_name = */ "",
- /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_md = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
-const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
-
-const common_chat_msg message_assist_thoughts_unparsed_r7b = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
-const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
-const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
-const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_noopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_withopt = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
-const common_chat_msg message_assist_call_content = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_empty_args = simple_assist_msg("", "", "special_function");
-const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
-const common_chat_msg message_assist_call_thoughts = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_thoughts_unparsed = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_thoughts_content = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_id = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
-const common_chat_msg message_assist_call_idx = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
-const common_chat_msg message_assist_thoughts_call_idx = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
-const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
-const common_chat_msg message_assist_call_python_lines = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
-const common_chat_msg message_assist_call_python_lines_unclosed = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
-const common_chat_msg message_assist_call_code_interpreter = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
-
-// Use for PEG parser implementations
-struct peg_test_case {
- common_chat_templates_inputs params;
- std::string input;
- common_chat_msg expect;
+// Use for PEG parser implementations
+struct peg_test_case {
+ common_chat_templates_inputs params;
+ std::string input;
+ common_chat_msg expect;
+ bool is_partial = false;
};
struct make_peg_parser {
common_chat_params params_;
- common_peg_arena arena_;
-
- make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
- params_ = common_chat_templates_apply(tmpls, inputs);
+ common_peg_arena arena_;
+ bool detailed_debug_;
+
+ make_peg_parser(common_chat_templates * tmpls,
+ const common_chat_templates_inputs & inputs,
+ bool detailed_debug = false) {
+ detailed_debug_ = detailed_debug;
+ params_ = common_chat_templates_apply(tmpls, inputs);
arena_.load(params_.parser);
}
- common_chat_msg parse(const std::string & msg, bool is_partial) {
+ common_chat_msg parse(const std::string & msg, bool is_partial) const {
common_chat_parser_params parser_params;
parser_params.format = params_.format;
+ parser_params.debug = detailed_debug_;
return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
}
};
-static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
+static void test_peg_parser(common_chat_templates * tmpls,
+ const std::function<void(peg_test_case &)> & init,
+ bool detailed_debug) {
+ // UTF-8-safe truncation helper (same as in test_parser_with_streaming)
+ constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
+ auto len = s.size();
+ if (len == 0) {
+ return 0;
+ }
+ auto i = len;
+ for (size_t back = 0; back < 4 && i > 0; ++back) {
+ --i;
+ unsigned char c = s[i];
+ if ((c & 0x80) == 0) {
+ return len;
+ }
+ if ((c & 0xC0) == 0xC0) {
+ size_t expected_len = 0;
+ if ((c & 0xE0) == 0xC0) {
+ expected_len = 2;
+ } else if ((c & 0xF0) == 0xE0) {
+ expected_len = 3;
+ } else if ((c & 0xF8) == 0xF0) {
+ expected_len = 4;
+ } else {
+ return i;
+ }
+ if (len - i >= expected_len) {
+ return len;
+ }
+ return i;
+ }
+ }
+ return len - std::min(len, size_t(3));
+ };
+
peg_test_case tc;
init(tc);
if (tc.params.messages.empty()) {
- tc.params.messages = {message_user};
+ tc.params.messages = { message_user };
}
if (tc.expect.role.empty()) {
tc.expect.role = "assistant";
}
- auto parser = make_peg_parser(tmpls, tc.params);
+ auto parser = make_peg_parser(tmpls, tc.params, detailed_debug);
+ if (detailed_debug) {
+ LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str());
+ }
common_chat_msg msg_accum;
common_chat_msg msg_prev;
msg_accum.role = msg_prev.role = "assistant";
for (size_t i = 1; i <= tc.input.size(); ++i) {
- auto is_partial = i < tc.input.size();
- common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
+ auto is_partial = i < tc.input.size() || tc.is_partial;
+ // Use UTF-8 safe truncation to avoid corrupting multi-byte characters
+ size_t safe_len = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i));
+ std::string prefix = tc.input.substr(0, safe_len);
+ common_chat_msg msg_current = parser.parse(prefix, is_partial);
for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
if (!diff.reasoning_content_delta.empty()) {
msg_accum.content += diff.content_delta;
}
if (diff.tool_call_index != std::string::npos) {
+ // During partial parsing, a new tool call may appear with empty name initially
+ // The name gets filled in as more input is parsed
+ while (msg_accum.tool_calls.size() <= diff.tool_call_index) {
+ msg_accum.tool_calls.push_back({ "", "", "" });
+ }
+ // Always update name and id from diff (may change during incremental parsing), but only if the delta
+ // actually contains them
if (!diff.tool_call_delta.name.empty()) {
- msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
+ msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name;
+ }
+ if (!diff.tool_call_delta.id.empty()) {
+ msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id;
}
if (!diff.tool_call_delta.arguments.empty()) {
- msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
+ msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments;
}
}
}
- assert_msg_equals(msg_current, msg_accum, true);
+ try {
+ assert_msg_equals(msg_current, msg_accum, true);
+ } catch (std::exception & e) {
+ throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str());
+ }
+
msg_prev = msg_current;
}
- assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+ if (!tc.is_partial) {
+ assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+ }
assert_msg_equals(tc.expect, msg_accum, true);
+
+ // Test grammar if present in params
+ if (!parser.params_.grammar.empty()) {
+ auto grammar = build_grammar(parser.params_.grammar);
+ if (!grammar) {
+ throw std::runtime_error("Failed to build grammar: " + parser.params_.grammar);
+ }
+
+ // Find the earliest trigger position to determine the constrained portion
+ auto earliest_trigger_pos = std::string::npos;
+ for (const auto & trigger : parser.params_.grammar_triggers) {
+ size_t pos = std::string::npos;
+ std::smatch match;
+ switch (trigger.type) {
+ case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
+ {
+ const auto & word = trigger.value;
+ pos = tc.input.find(word);
+ break;
+ }
+ case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
+ {
+ const auto & pattern = std::regex(trigger.value);
+ if (std::regex_search(tc.input, match, pattern)) {
+ pos = match.position(pattern.mark_count());
+ }
+ break;
+ }
+ case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
+ {
+ const auto & pattern = trigger.value;
+ if (std::regex_match(tc.input, match, std::regex(pattern))) {
+ auto mpos = std::string::npos;
+ for (size_t i = 1; i < match.size(); ++i) {
+ if (match[i].length() > 0) {
+ mpos = match.position(i);
+ break;
+ }
+ }
+ if (mpos == std::string::npos) {
+ mpos = match.position(0);
+ }
+ pos = mpos;
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown trigger type");
+ }
+ if (pos != std::string::npos) {
+ if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
+ earliest_trigger_pos = pos;
+ }
+ }
+ }
+
+ // Determine the constrained portion of input to test against grammar
+ std::string constrained = tc.input;
+ bool grammar_triggered = false;
+ if (earliest_trigger_pos != std::string::npos) {
+ constrained = tc.input.substr(earliest_trigger_pos);
+ grammar_triggered = true;
+ } else if (!parser.params_.grammar_lazy) {
+ // For non-lazy grammars, the entire input should match
+ grammar_triggered = true;
+ }
+
+ // Test the constrained portion against the grammar
+ if (grammar_triggered && !tc.is_partial) {
+ auto result = match_string_detailed(constrained, grammar.get());
+ if (!result.success) {
+ std::string error_msg;
+ if (result.incomplete) {
+ error_msg =
+ "Grammar matched all input but expects more:\n\n"
+ ">>> Input: " + tc.input +
+ "\n\n>>> Constrained: " + constrained +
+ "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+ std::to_string(result.matched_codepoints) + " codepoints): " +
+ (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+ "\n\n>>> Expected next: " + result.expected_description +
+ "\n\n>>> Grammar: " + parser.params_.grammar;
+ } else {
+ error_msg =
+ "Grammar match failed:\n\n"
+ ">>> Input: " + tc.input +
+ "\n\n>>> Constrained: " + constrained +
+ "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+ std::to_string(result.matched_codepoints) + " codepoints): " +
+ (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+ "\n\n>>> Failing character: " + result.failing_char +
+ "\n\n>>> Expected: " + result.expected_description +
+ "\n\n>>> Grammar: " + parser.params_.grammar;
+ }
+ throw std::runtime_error(error_msg);
+ }
+ }
+ }
+}
+
+// Global template filter for --template flag
+static std::string g_template_filter;
+
+// Fluent builder for PEG parser tests
+class peg_test_builder;
+
+class peg_tester {
+ common_chat_templates_ptr tmpls_;
+ std::string template_path_;
+ bool detailed_debug_;
+ friend class peg_test_builder;
+
+ public:
+ explicit peg_tester(const std::string & template_path, const bool detailed_debug = false) :
+ tmpls_(read_templates(template_path)),
+ template_path_(template_path),
+ detailed_debug_(detailed_debug) {}
+
+ const std::string & template_path() const { return template_path_; }
+
+ peg_test_builder test(const std::string & input);
+};
+
+class peg_test_builder {
+ peg_tester & tester_;
+ peg_test_case tc_;
+
+ public:
+ peg_test_builder(peg_tester & tester, const std::string & input) : tester_(tester) { tc_.input = input; }
+
+ // Parameter setters
+ peg_test_builder & reasoning_format(common_reasoning_format fmt) {
+ tc_.params.reasoning_format = fmt;
+ return *this;
+ }
+
+ peg_test_builder & tools(std::vector<common_chat_tool> tools) {
+ tc_.params.tools = std::move(tools);
+ return *this;
+ }
+
+ peg_test_builder & enable_thinking(bool val) {
+ tc_.params.enable_thinking = val;
+ return *this;
+ }
+
+ peg_test_builder & parallel_tool_calls(bool val) {
+ tc_.params.parallel_tool_calls = val;
+ return *this;
+ }
+
+ peg_test_builder & json_schema(const std::string & schema) {
+ tc_.params.json_schema = schema;
+ return *this;
+ }
+
+ peg_test_builder & is_partial(bool val) {
+ tc_.is_partial = val;
+ return *this;
+ }
+
+ // Expect setters
+ peg_test_builder & expect(const common_chat_msg & msg) {
+ tc_.expect = msg;
+ return *this;
+ }
+
+ peg_test_builder & expect_content(const std::string & content) {
+ tc_.expect.content = content;
+ return *this;
+ }
+
+ peg_test_builder & expect_reasoning(const std::string & reasoning) {
+ tc_.expect.reasoning_content = reasoning;
+ return *this;
+ }
+
+ peg_test_builder & expect_tool_calls(std::vector<common_chat_tool_call> calls) {
+ tc_.expect.tool_calls = std::move(calls);
+ return *this;
+ }
+
+ // Execute the test
+ void run() {
+ // Check template filter
+ if (!g_template_filter.empty()) {
+ // Case-insensitive substring match
+ std::string template_path_lower = tester_.template_path();
+ std::string filter_lower = g_template_filter;
+ std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(),
+ ::tolower);
+ std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower);
+ if (template_path_lower.find(filter_lower) == std::string::npos) {
+ // Skip this test
+ return;
+ }
+ }
+ LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str());
+ test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_);
+ }
+};
+
+peg_test_builder peg_tester::test(const std::string & input) {
+ return peg_test_builder(*this, input);
}
static void test_msgs_oaicompat_json_conversion() {
- printf("[%s]\n", __func__);
+ LOG_DBG("%s\n", __func__);
std::vector<common_chat_msg> msgs{
message_user,
message_user_parts,
message_assist_call_id,
message_assist_call_idx,
message_assist_call_python,
- message_assist_call_code_interpreter,
};
for (const auto & msg : msgs) {
- auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
- auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
+ auto oai_json = common_chat_msgs_to_json_oaicompat({ msg });
+ auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
assert_equals((size_t) 1, msgs2.size());
- auto msg2 = msgs2[0];
+ const auto & msg2 = msgs2[0];
assert_msg_equals(msg, msg2);
}
- assert_equals(
- std::string(
- "[\n"
- " {\n"
- " \"role\": \"user\",\n"
- " \"content\": [\n"
- " {\n"
- " \"type\": \"text\",\n"
- " \"text\": \"Hey\"\n"
- " },\n"
- " {\n"
- " \"type\": \"text\",\n"
- " \"text\": \"there\"\n"
- " }\n"
- " ]\n"
- " }\n"
- "]"
- ),
- common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
-
- assert_equals(
- std::string(
- "[\n"
- " {\n"
- " \"role\": \"assistant\",\n"
- " \"content\": \"\",\n"
- " \"tool_calls\": [\n"
- " {\n"
- " \"type\": \"function\",\n"
- " \"function\": {\n"
- " \"name\": \"python\",\n"
- " \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
- " }\n"
- " }\n"
- " ]\n"
- " }\n"
- "]"
- ),
- common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
+ assert_equals(std::string("[\n"
+ " {\n"
+ " \"role\": \"user\",\n"
+ " \"content\": [\n"
+ " {\n"
+ " \"type\": \"text\",\n"
+ " \"text\": \"Hey\"\n"
+ " },\n"
+ " {\n"
+ " \"type\": \"text\",\n"
+ " \"text\": \"there\"\n"
+ " }\n"
+ " ]\n"
+ " }\n"
+ "]"),
+ common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2));
+
+ // Note: content is "" instead of null due to workaround for templates that render null as "None"
+ assert_equals(std::string("[\n"
+ " {\n"
+ " \"role\": \"assistant\",\n"
+ " \"content\": \"\",\n"
+ " \"tool_calls\": [\n"
+ " {\n"
+ " \"type\": \"function\",\n"
+ " \"function\": {\n"
+ " \"name\": \"python\",\n"
+ " \"arguments\": {\n"
+ " \"code\": \"print('hey')\"\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ]\n"
+ " }\n"
+ "]"),
+ common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2));
auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
assert_equals<size_t>(1, res.size());
}
static void test_tools_oaicompat_json_conversion() {
- printf("[%s]\n", __func__);
+ LOG_DBG("%s\n", __func__);
std::vector<common_chat_tool> tools{
special_function_tool,
python_tool,
- code_interpreter_tool,
};
for (const auto & tool : tools) {
- auto oai_json = common_chat_tools_to_json_oaicompat({tool});
- auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
+ auto oai_json = common_chat_tools_to_json_oaicompat({ tool });
+ auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
assert_equals((size_t) 1, tools2.size());
auto tool2 = tools2[0];
assert_equals(tool.name, tool2.name);
assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
}
- assert_equals(
- std::string(
- "[\n"
- " {\n"
- " \"type\": \"function\",\n"
- " \"function\": {\n"
- " \"name\": \"special_function\",\n"
- " \"description\": \"I'm special\",\n"
- " \"parameters\": {\n"
- " \"type\": \"object\",\n"
- " \"properties\": {\n"
- " \"arg1\": {\n"
- " \"type\": \"integer\",\n"
- " \"description\": \"The arg.\"\n"
- " }\n"
- " },\n"
- " \"required\": [\n"
- " \"arg1\"\n"
- " ]\n"
- " }\n"
- " }\n"
- " }\n"
- "]"
- ),
- common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
+ assert_equals(std::string("[\n"
+ " {\n"
+ " \"type\": \"function\",\n"
+ " \"function\": {\n"
+ " \"name\": \"special_function\",\n"
+ " \"description\": \"I'm special\",\n"
+ " \"parameters\": {\n"
+ " \"type\": \"object\",\n"
+ " \"properties\": {\n"
+ " \"arg1\": {\n"
+ " \"type\": \"integer\",\n"
+ " \"description\": \"The arg.\"\n"
+ " }\n"
+ " },\n"
+ " \"required\": [\n"
+ " \"arg1\"\n"
+ " ]\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ "]"),
+ common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2));
+}
+
+static void test_template_output_peg_parsers(bool detailed_debug) {
+ LOG_DBG("%s\n", __func__);
+
+ // JSON schemas
+ const char * invoice_schema = R"({
+ "type": "object",
+ "properties": {
+ "amount": {"type": "number"},
+ "date": {"type": "string"}
+ }
+ })";
{
- auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
- R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
- assert_equals((size_t) 1, tools_no_params.size());
- assert_equals(std::string("test_func"), tools_no_params[0].name);
- assert_equals(std::string("A test"), tools_no_params[0].description);
- assert_equals(std::string("{}"), tools_no_params[0].parameters);
+ // Ministral-3-14B-Reasoning-2512
+ auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug);
+
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+ .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+ .run();
+
+ tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect(message_assist_thoughts)
+ .run();
+
+ tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "[THINK]I'm\nthinking[/THINK]"
+ R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+
+ tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
+ R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
+
+ tst.test(
+ "[THINK]I need to output the invoice details in JSON[/THINK]"
+ "```json\n"
+ R"({"amount": 123.45, "date": "2025-12-03"})"
+ "\n```")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .json_schema(invoice_schema)
+ .expect_reasoning("I need to output the invoice details in JSON")
+ .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+ .run();
}
+
{
- auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
- R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
- assert_equals((size_t) 1, tools_no_desc.size());
- assert_equals(std::string("test_func"), tools_no_desc[0].name);
- assert_equals(std::string(""), tools_no_desc[0].description);
+ // NVIDIA Nemotron-3 Nano
+ auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug);
+
+ tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run();
+
+ tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+ .enable_thinking(false)
+ .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+ .expect_content("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+ .run();
+
+ tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect(message_assist_thoughts)
+ .run();
+
+ tst.test(
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n1\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(false)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "I'm\nthinking\n</think>\n"
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n1\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+
+ tst.test(
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n1\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>\n"
+ "<tool_call>\n"
+ "<function=special_function_with_opt>\n"
+ "<parameter=arg1>\n1\n</parameter>\n"
+ "<parameter=arg2>\n2\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(false)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
+
+ tst.test(
+ "<tool_call>\n"
+ "<function=python>\n"
+ "<parameter=code>\n"
+ "def hello():\n"
+ " print(\"Hello, world!\")\n"
+ "\n"
+ "hello()\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(false)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({
+ python_tool
+ })
+ .expect_tool_calls({
+ { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+ })
+ .run();
+
+ tst.test(
+ "I need to output the invoice details in JSON\n"
+ "</think>\n"
+ R"({"amount": 123.45, "date": "2025-12-03"})")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .enable_thinking(true)
+ .json_schema(invoice_schema)
+ .expect_reasoning("I need to output the invoice details in JSON")
+ .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+ .run();
}
+
{
- auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
- R"([{"type": "function", "function": {"name": "test_func"}}])"));
- assert_equals((size_t) 1, tools_minimal.size());
- assert_equals(std::string("test_func"), tools_minimal[0].name);
- assert_equals(std::string(""), tools_minimal[0].description);
- assert_equals(std::string("{}"), tools_minimal[0].parameters);
+ // CohereForAI Command-R 7B (2024-tool_use)
+ auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+
+ tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+
+ tst.test(
+ "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+ "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
+
+ tst.test(
+ "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+ "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+ .expect(message_assist_thoughts_unparsed_r7b)
+ .run();
+
+ tst.test(
+ "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+ "<|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+ "]<|END_ACTION|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .expect(message_assist_thoughts_call_idx)
+ .run();
+
+ tst.test(
+ "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+ "<|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .is_partial(true)
+ .expect(message_assist_thoughts_partial_call)
+ .run();
+
+ tst.test(
+ "<|START_THINKING|><|END_THINKING|>"
+ "<|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+ "]<|END_ACTION|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_idx)
+ .run();
}
-}
-
-// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961
-struct test_parser_params {
- common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
- common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
- bool reasoning_in_content = false;
- bool thinking_forced_open = false;
- bool parse_tool_calls = true;
-};
-static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) {
- common_chat_parser_params params;
- params.format = syntax.format;
- params.reasoning_format = syntax.reasoning_format;
- params.reasoning_in_content = syntax.reasoning_in_content;
- params.thinking_forced_open = syntax.thinking_forced_open;
- params.parse_tool_calls = syntax.parse_tool_calls;
- return common_chat_parse(input, is_partial, params);
-}
+ {
+ // Google Gemma 2 2B - does not support tool calling
+ auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja");
-static void test_template_output_parsers() {
- printf("[%s]\n", __func__);
+ tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
- common_chat_templates_inputs inputs_no_tools;
- inputs_no_tools.messages = {message_user};
+ tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).run();
+ }
- common_chat_templates_inputs inputs_tools;
- inputs_tools.messages = {message_user};
- inputs_tools.tools = {special_function_tool};
+ {
+ // Qwen-QwQ-32B (reasoning model)
+ auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja");
- common_chat_templates_inputs inputs_tools_builtin;
- inputs_tools_builtin.messages = {message_user};
- inputs_tools_builtin.tools = {python_tool};
+ // QwQ always has thinking forced open - input starts after the <think>\n in the prompt
+ tst.test("Let me think about this...\n</think>\nThe answer is 42.")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect(simple_assist_msg("The answer is 42.", "Let me think about this..."))
+ .run();
+ tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
+ }
{
- // Not supported yet
- auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+ // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
+ auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug);
+
+ tst.test(
+ "<tool_call>\n"
+ "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+ "</tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "Hello, world!\nWhat's up?<tool_call>\n"
+ "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+ "</tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call_content)
+ .run();
+
+ // Note: Hermes template doesn't support thinking/reasoning natively
+ // Note: We only support one tool calling format per template, no alternate formats
}
{
- auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
- std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
+ // Test simple content-only template
+ auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug);
- for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
- auto params = common_chat_templates_apply(tmpls.get(), inputs);
- assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format);
- assert_equals(false, params.thinking_forced_open);
- }
-
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_COMMAND_R7B}));
- assert_msg_equals(message_assist,
- test_chat_parse(
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_COMMAND_R7B}));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ true,
- /* .thinking_forced_open = */ false,
- }));
- assert_msg_equals(message_assist_thoughts_unparsed_r7b,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_COMMAND_R7B}));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_call_idx,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_ACTION|>[\n"
- " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
- "]<|END_ACTION|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_no_content,
- test_chat_parse(
- "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
- "<|START_ACTION|>[\n"
- " {\"tool_call_id\": \"0\", \"tool_name\": \"special",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
- "<|START_THINKING|><|END_THINKING|>"
- "<|START_ACTION|>[\n"
- " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
- "]<|END_ACTION|>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- COMMON_REASONING_FORMAT_DEEPSEEK);
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "<|START_RESPONSE|>Hello, world!\n"
- "What's up?<|END_RESPONSE|>",
- /* expect_grammar_triggered= */ false);
- }
- // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future
- {
- auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
- std::vector<std::string> end_tokens{ "<end_of_turn>" };
-
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_GENERIC,
- common_chat_templates_apply(
- read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
- inputs_tools)
- .format);
-
- // Generic tool calls doesn't generate / parse content-only messages symmetrically.
-
- assert_equals(
- simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
- test_chat_parse(
- "{ \"tool_call\" : { \"name\" : \"t",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ false,
- }));
- assert_equals(
- message_assist_empty,
- test_chat_parse(
- "{ \"tool_call\" : { \"name\" : \"t",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GENERIC}));
-
- assert_equals(
- simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","),
- test_chat_parse(
- R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GENERIC}));
-
- assert_equals(
- message_assist_call_empty_args,
- test_chat_parse(
- "{ \"tool_call\" : { \"name\" : \"special_function\"",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GENERIC}));
- assert_equals(
- message_assist_call_cutoff_args,
- test_chat_parse(
- "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GENERIC}));
-
- assert_msg_equals(message_assist,
- test_chat_parse(
- "{\n"
- " \"response\": \"Hello, world!\\nWhat's up?\"\n"
- "}",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GENERIC}));
-#if 0
- test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
- "{\n"
- " \"tool_calls\": [\n"
- " {\n"
- " \"name\": \"special_function\",\n"
- " \"arguments\": {\n"
- " \"arg1\": 1\n"
- " },\n"
- " \"id\": \"123456789\"\n"
- " }\n"
- " ],\n"
- " \"content\": \"\"\n"
- "}");
-#endif
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
}
{
- auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
- std::vector<std::string> end_tokens{ "</s>" };
+ // IBM Granite (reasoning and tool calling model)
+ auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug);
+
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
- assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+ tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(
- tmpls.get(), end_tokens, message_assist_call_id, tools,
- "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
+ // TODO: pending support for WRAPPED_WITH_REASONING
+ // tst.test("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>")
+ // .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ // .expect(message_assist_thoughts)
+ // .run();
}
+
{
- assert_msg_equals(
- simple_assist_msg("Réponse", "raisonnement"),
- test_chat_parse(
- message_assist_thoughts_unparsed_magistral.content,
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
+ // ByteDance-Seed-OSS (reasoning and tool calling model)
+ auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);
+
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ tst.test("<seed:think>I'm thinking about the answer</seed:think>\nHello, world!")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
+ .run();
+
+ tst.test(
+ "<seed:tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>1</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "<seed:tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>1</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>\n"
+ "<seed:tool_call>\n"
+ "<function=special_function_with_opt>\n"
+ "<parameter=arg1>1</parameter>\n"
+ "<parameter=arg2>2</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>")
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
+
+ tst.test(
+ "<seed:tool_call>\n"
+ "<function=todo_list>\n"
+ "<parameter=todos>[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>")
+ .tools({
+ todo_list
+ })
+ .expect_tool_calls({
+ { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+ })
+ .run();
+
+ // tool call with inside quotes
+ tst.test(
+ "<seed:tool_call>\n"
+ "<function=edit>\n"
+ "<parameter=filename>\n"
+ "foo.cpp\n"
+ "</parameter>\n"
+ "<parameter=oldString>"
+ "def foo(arg = \"14\"):\n"
+ " return arg + \"bar\"\n"
+ "\n"
+ "</parameter>\n"
+ "<parameter=newString>"
+ "def foo(arg = \"15\"):\n"
+ " pass\n"
+ "\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</seed:tool_call>")
+ .tools({
+ edit_tool
+ })
+ .expect_tool_calls({
+ { "edit", "{\"filename\": \"foo.cpp\", "
+ "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n return arg + \\\"bar\\\"\\n\", "
+ "\"newString\": \"def foo(arg = \\\"15\\\"):\\n pass\\n\"}", {}
+ }
+ })
+ .run();
}
- {
- auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
- std::vector<std::string> end_tokens{ "<|im_end|>" };
- assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- }
{
- auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
- std::vector<std::string> end_tokens{ "<|im_end|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- assert_equals(
- COMMON_CHAT_FORMAT_HERMES_2_PRO,
- common_chat_templates_apply(
- read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
- inputs_tools)
- .format);
- assert_equals(
- COMMON_CHAT_FORMAT_HERMES_2_PRO,
- common_chat_templates_apply(
- read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
- inputs_tools)
- .format);
-
- // Test parsing
- assert_msg_equals(
- simple_assist_msg("", "", "python", ""),
- test_chat_parse(
- "```json\n"
- "<function_call> { \"name\" : \"python\"",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- simple_assist_msg("Let's call something\n"),
- test_chat_parse(
- "Let's call something\n"
- "<tool_call>{\"name\"",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(
- simple_assist_msg("Let's call something\n"),
- test_chat_parse(
- "Let's call something\n"
- "<tool_call>{\"name",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- // QwQ-32B's template adds a trailing <think> if add_generation_prompt
- "I'm\nthinking</think>\n"
- "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<tool_call>\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "Hello, world!\nWhat's up?<tool_call>\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<function=special_function>{\"arg1\": 1}</function>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<function name=\"special_function\">\n"
- "{\"arg1\": 1}\n"
- "</function>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<tool>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<tools>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tools>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<response>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</response>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```xml\n"
- "<response>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</response>\n"
- "```",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```xml\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "```",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "```",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "```",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```json\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "```",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "```json\n"
- "\n"
- " <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
- " </function_call> \n"
- "``` ",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<json>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</json>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<xml>\n"
- " {\n"
- " \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
- " }\n"
- "</xml>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "<JSON>\n"
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</JSON>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(
- message_assist_call,
- test_chat_parse(
- "{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
- // Test multiple tool calls
- common_chat_msg message_assist_multiple_calls;
- message_assist_multiple_calls.role = "assistant";
- message_assist_multiple_calls.content = "";
- message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
- message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
-
- assert_msg_equals(
- message_assist_multiple_calls,
- test_chat_parse(
- "<tool_call>\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
- "</tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
- assert_msg_equals(
- message_assist_multiple_calls,
- test_chat_parse(
- "<function=special_function>{\"arg1\": 1}</function>\n"
- "<function=python>{\"code\":\"print('hello')\"}</function>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
- assert_msg_equals(
- simple_assist_msg(
- "This is not a tool call:",
- "",
- "special_function",
- "{\"arg1\": 1}"),
- test_chat_parse(
- "This is not a tool call:\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
- // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
- // test_chat_parse(
- // "I'm\nthinking</think>Hello, world!\nWhat's up?",
- // COMMON_CHAT_FORMAT_HERMES_2_PRO));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_unparsed_md,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ true,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ false,
- }));
- assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ true,
- /* .thinking_forced_open = */ false,
- }));
- assert_msg_equals(message_assist_thoughts_unopened_unparsed,
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<tool_call>\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool_call>");
-
- // Test multiple tool calls with template
- common_chat_msg message_assist_multiple_calls_template;
- message_assist_multiple_calls_template.role = "assistant";
- message_assist_multiple_calls_template.content = "";
- message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
- message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
-
- test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
- "<tool_call>\n"
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
- "</tool_call>");
-
- test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
- "<tool_call>\n"
- "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
- "</tool_call>");
- assert_msg_equals(
- simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
- test_chat_parse(
- "<think><tool_call>nah uhg</tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
+ // Qwen3-Coder (tool calling with XML-style format)
+ auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug);
+
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ tst.test(
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n"
+ "1\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ tst.test(
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n"
+ "1\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>\n"
+ "<tool_call>\n"
+ "<function=special_function_with_opt>\n"
+ "<parameter=arg1>\n"
+ "1\n"
+ "</parameter>\n"
+ "<parameter=arg2>\n"
+ "2\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
+
+ // Test with code content (multiline)
+ tst.test(
+ "<tool_call>\n"
+ "<function=python>\n"
+ "<parameter=code>\n"
+ "def hello():\n"
+ " print(\"Hello, world!\")\n"
+ "\n"
+ "hello()\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .tools({
+ python_tool
+ })
+ .expect_tool_calls({
+ { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+ })
+ .run();
+
+ // Test with code content (asian unicode chars)
+ tst.test(
+ "<tool_call>\n"
+ "<function=python>\n"
+ "<parameter=code>\n"
+ "格\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .tools({
+ python_tool
+ })
+ .expect_tool_calls({
+ { "python", "{\"code\": \"格\"}", {} },
+ })
+ .run();
+
+ // Test with HTML tag content
+ tst.test(
+ "<tool_call>\n"
+ "<function=html>\n"
+ "<parameter=markup>\n"
+ "<html>\n"
+ " <head>\n"
+ " <title>Hello!</title>\n"
+ " </head>\n"
+ "</html>\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .tools({
+ html_tool
+ })
+ .expect_tool_calls({
+ { "html", "{\"markup\": \"<html>\\n <head>\\n <title>Hello!</title>\\n </head>\\n</html>\"}", {} },
+ })
+ .run();
+
+ // Test with TODO list (array of objects)
+ tst.test(
+ "<tool_call>\n"
+ "<function=todo_list>\n"
+ "<parameter=todos>\n"
+ "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .tools({
+ todo_list
+ })
+ .expect_tool_calls({
+ { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+ })
+ .run();
}
{
- auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
- common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
- common_chat_templates_apply(
- read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
- inputs_tools_builtin)
- .format);
-
- assert_equals(
- message_assist_call,
- test_chat_parse(
- "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LLAMA_3_X}));
-
- // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
- "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
- test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
- "<|python_tag|>python.call(code=\"print('hey')\")");
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+ tst.test(
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": "
+ "\"XYZCITY\"}<|tool▁call▁end|><|tool▁calls▁end|>")
+ .tools({ get_time_tool })
+ .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+ .run();
}
- {
- auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
- }
{
- auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
- common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
- common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
- common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
- for (auto is_partial : { false, true }) {
- assert_equals(
- message_assist_call,
- test_chat_parse(
- "<function=special_function>{\"arg1\": 1}</function>",
- is_partial,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
- }
-
- assert_equals(
- message_assist_call,
- test_chat_parse(
- "<function=special_function>{\"arg1\": 1}<",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<function=special_function>{\"arg1\": 1}</function>");
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+ tst.test(
+ "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": "
+ "\"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ get_time_tool })
+ .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING"))
+ .run();
}
+
{
- auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- assert_msg_equals(
- simple_assist_msg(
- "Hello, world!\nnono\nWhat's up?",
- "",
- "special_function",
- "{\"arg1\": 1}"),
- test_chat_parse(
- "all\n"
- "Hello, world!\n"
- "nono\n"
- "What's up?>>>special_function\n"
- "{\"arg1\": 1}\n",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
- assert_msg_equals(message_assist_call_python_lines,
- test_chat_parse(
- "python\n"
- "# This is a program:\n"
- "print('hey')",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
- assert_msg_equals(message_assist_call_python_lines_unclosed,
- test_chat_parse(
- "python\n"
- "# This is a program:\n"
- "print('hey')",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "special_function\n"
- "{\"arg1\": 1} \n ",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
- assert_msg_equals(message_assist,
- test_chat_parse(
- "all\n"
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-
- test_templates(tmpls.get(), end_tokens, message_assist, {},
- "all\n"
- "Hello, world!\n"
- "What's up?",
- /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "special_function\n"
- "{\"arg1\": 1}");
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+ tst.test(
+ "REASONING</think>CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": "
+ "\"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": "
+ "\"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>")
+ .tools({
+ get_time_tool, get_weather_tool
+ })
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .parallel_tool_calls(true)
+ .expect(message_with_reasoning_content_and_multiple_tool_calls(
+ "REASONING", "CONTENT",
+ { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } }))
+ .run();
}
- {
- auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
- std::vector<std::string> end_tokens{ "<|eot_id|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
- }
{
- // Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt.
- auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
- std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
-
- for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
- auto params = common_chat_templates_apply(tmpls.get(), inputs);
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
- assert_equals(true, params.thinking_forced_open);
- }
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- assert_msg_equals(
- simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- assert_msg_equals(
- simple_assist_msg("", "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with"),
- test_chat_parse(
- "I need to remember the correct syntax. It starts with <|tool▁calls▁begin|> and ends with",
- /* is_partial= */ true,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_unopened_unparsed,
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- assert_msg_equals(message_assist_thoughts,
- // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
- // "```json\n"
- // "{\"arg1\": 1}\n"
- // // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic)
- // "```<|tool▁call▁end|>",
- // /* expect_grammar_triggered= */ true,
- // /* test_grammar_if_triggered= */ false);
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+ tst.test("REASONING</think>\nCONTENT")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(simple_assist_msg("CONTENT", "REASONING\n"))
+ .run();
}
+
{
- // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
- auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
- std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
-
- assert_msg_equals(message_assist_call_thoughts_unparsed,
- test_chat_parse(
- "<think>I'm\nthinking</think>\n\n"
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
- "```json\n"
- "{\"arg1\": 1}\n"
- "```<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<|tool▁calls|>function<|tool▁sep|>special_function\n"
- "```json\n"
- "{\"arg1\": 1}\n"
- "```<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>\n\n"
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
- "```json\n"
- "{\"arg1\": 1}\n"
- "```<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
- "```json\n"
- "{\"arg1\": 1}\n"
- "```<|tool▁call▁end|><|tool▁calls▁end|>");
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+ tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run();
}
+
+ // GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
{
- auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
- std::vector<std::string> end_tokens{ "<|end_of_text|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
- assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(
- message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GRANITE}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
- test_chat_parse(
- "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(message_assist_empty,
- test_chat_parse(
- "<think",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(message_assist_empty,
- test_chat_parse(
- "<think",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(message_assist_thoughts_no_content,
- test_chat_parse(
- "<think>I'm\nthinking",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
- assert_msg_equals(
- message_assist_empty,
- test_chat_parse(
- "<think>I'm\nthinking</think><response",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GRANITE}));
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(
- message_assist_call_empty_args,
- test_chat_parse(
- "<|tool_call|>[{\"name\": \"special_function\"",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(
- message_assist_call_cutoff_args,
- test_chat_parse(
- "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_GRANITE}));
- assert_msg_equals(
- message_assist_call_cutoff_args,
- test_chat_parse(
- "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test parsing tool calls with thinking
- assert_msg_equals(
- message_assist_call_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "Hello, world!\nWhat's up?",
- /* expect_grammar_triggered= */ false);
- // TODO @ngxson : generic tool call should be removed in the future
-#if 0
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
- "{\n"
- " \"tool_calls\": [\n"
- " {\n"
- " \"name\": \"special_function\",\n"
- " \"arguments\": {\n"
- " \"arg1\": 1\n"
- " },\n"
- " \"id\": \"123456789\"\n"
- " }\n"
- " ],\n"
- " \"content\": \"\"\n"
- "}",
- /* expect_grammar_triggered= */ false
- );
-#endif
- }
+ auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug);
+ tst.test(
+ "<tool_call>special_function\n"
+ "<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n"
+ "</tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
+
+ // GLM-4.7-Flash tests - format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
+ // Note: Template uses forced-open thinking mode (prompt ends with <think>)
{
- auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
- std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthink",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
-
- // Test parse_tool_calls == false
- assert_msg_equals(
- simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ false,
- }));
- assert_msg_equals(
- simple_assist_msg("", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
- /* is_partial= */ true,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ false,
- }));
- assert_msg_equals(
- simple_assist_msg("", "I'm\nthinking"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ false,
- }));
-
- // Test reasoning formats
- assert_msg_equals(
- simple_assist_msg(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
- }));
-
- assert_msg_equals(
- simple_assist_msg(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- /* .reasoning_in_content = */ true,
- }));
-
- // Test tool calling in role header
- assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
- test_chat_parse(
- "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
- "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
- }));
- }
+ auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug);
+
+ // Pure content (no reasoning)
+ tst.test("Hello, world!\nWhat's up?")
+ .enable_thinking(false)
+ .expect(message_assist)
+ .run();
+
+ // Reasoning with content (forced-open mode - input starts after <think>)
+ tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
+
+ // Tool call without reasoning
+ tst.test(
+ "<tool_call>special_function"
+ "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+ "</tool_call>")
+ .enable_thinking(false)
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ // Tool call with reasoning (forced-open mode)
+ tst.test(
+ "I'm\nthinking</think>"
+ "<tool_call>special_function"
+ "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+
+ tst.test(
+ "<tool_call>special_function"
+ "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+ "</tool_call>"
+ "<tool_call>special_function_with_opt"
+ "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+ "<arg_key>arg2</arg_key><arg_value>2</arg_value>"
+ "</tool_call>")
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
+ }
+
+ // Kimi-K2-Thinking tests - custom parser
+ // Unique feature: tool call ID embeds function name as functions.<name>:<counter>
{
- // Seed-OSS format tests
- auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
- std::vector<std::string> end_tokens{ "<seed:eos>" };
-
- assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-
- // Test simple reasoning content
- assert_msg_equals(
- simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
- test_chat_parse(
- "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test budget reflection tags
- common_chat_msg msg_budget_reflect;
- msg_budget_reflect.role = "assistant";
- msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
- msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
- assert_msg_equals(
- msg_budget_reflect,
- test_chat_parse(
- "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
- "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
- "I need to calculate this step by step.",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test tool calls with Seed-OSS format
- common_chat_msg msg_tool_call;
- msg_tool_call.role = "assistant";
- msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
- assert_msg_equals(
- msg_tool_call,
- test_chat_parse(
- "<seed:tool_call>\n"
- "<function=calculate_sum>\n"
- "<parameter=numbers>[1, 2, 3]</parameter>\n"
- "</function>\n"
- "</seed:tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_SEED_OSS}));
-
- // Test reasoning + tool call combination
- common_chat_msg msg_reasoning_tool;
- msg_reasoning_tool.role = "assistant";
- msg_reasoning_tool.content = "";
- msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
- msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
- assert_msg_equals(
- msg_reasoning_tool,
- test_chat_parse(
- "<seed:think>I need to calculate the sum of these numbers</seed:think>"
- "<seed:tool_call>\n"
- "<function=calculate_sum>\n"
- "<parameter=numbers>[1, 2, 3]</parameter>\n"
- "</function>\n"
- "</seed:tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test deltas: the number of tool calls in partial parses should never decrease
- std::string tool_msg = "<seed:tool_call>\n"
- "<function=fun>\n"
- "<parameter=smth>[1, 2, 3]</parameter>\n"
- "</function>";
- std::size_t previousToolCalls = 0;
- for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
- auto partial = tool_msg.substr(0, i);
- auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
- if (partial_res.tool_calls.size() < previousToolCalls) {
- throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
- }
- previousToolCalls = partial_res.tool_calls.size();
- }
-
- // Test multiple parameters in tool call
- common_chat_msg msg_multi_param;
- msg_multi_param.role = "assistant";
- msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
- assert_msg_equals(
- msg_multi_param,
- test_chat_parse(
- "<seed:tool_call>\n"
- "<function=process_data>\n"
- "<parameter=input>test</parameter>\n"
- "<parameter=format>json</parameter>\n"
- "</function>\n"
- "</seed:tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_SEED_OSS}));
-
- // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
- assert_msg_equals(
- simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
- test_chat_parse(
- "<seed:tool_call>\n"
- "<function=calculate_sum>\n"
- "<parameter=numbers>[1,\n",
- /* is_partial= */ true,
- {COMMON_CHAT_FORMAT_SEED_OSS}));
-
- // Test incomplete reasoning tag
- assert_msg_equals(
- simple_assist_msg("", "I was thinking"),
- test_chat_parse(
- "<seed:think>I was thinking",
- /* is_partial= */ true,
+ auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug);
+
+ // Basic content only
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ // Single tool call
+ tst.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ special_function_tool })
+ .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+ .run();
+
+ // Single tool call with reasoning
+ tst.test(
+ "<think>I'm thinking about this</think>"
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(simple_assist_msg("", "I'm thinking about this", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+ .run();
+
+ // Tool call with content
+ tst.test(
+ "Hello, world!\nWhat's up?"
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ special_function_tool })
+ .expect(simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+ .run();
+
+ // Multiple tool calls (parallel) - tests the indexing behavior
+ tst.test(
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+ "<|tool_call_begin|>functions.special_function_with_opt:1<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|>"
+ "<|tool_calls_section_end|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", "functions.special_function_with_opt:1" },
+ })
+ .run();
+
+ // Multiple tool calls with reasoning
+ tst.test(
+ "<think>I need to call two functions</think>"
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+ "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+ "<|tool_calls_section_end|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, python_tool
+ })
+ .expect_reasoning("I need to call two functions")
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+ { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+ })
+ .run();
+
+ // Python tool with multiline code
+ tst.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.python:0<|tool_call_argument_begin|>"
+ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ python_tool })
+ .expect_tool_calls({
+ { "python", "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}", "functions.python:0" },
+ })
+ .run();
+
+ // Tool call with empty arguments
+ tst.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.empty_args:0<|tool_call_argument_begin|>"
+ "{}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ empty_args_tool })
+ .expect(simple_assist_msg("", "", "empty_args", "{}", "functions.empty_args:0"))
+ .run();
+
+ // Partial tool call (streaming)
+ tst.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": ")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .is_partial(true)
+ .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ", "functions.special_function:0"))
+ .run();
+
+ // Three tool calls to verify counter continues incrementing
+ tst.test(
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+ "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print(1)\"}<|tool_call_end|>"
+ "<|tool_call_begin|>functions.html:2<|tool_call_argument_begin|>{\"markup\": \"<p>test</p>\"}<|tool_call_end|>"
+ "<|tool_calls_section_end|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, python_tool, html_tool
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+ { "python", "{\"code\": \"print(1)\"}", "functions.python:1" },
+ { "html", "{\"markup\": \"<p>test</p>\"}", "functions.html:2" },
+ })
+ .run();
+
+ // Multiple tool calls with reasoning, call *inside thinking block*
+ tst.test(
+ "<think>I need to call two functions"
+ "<|tool_calls_section_begin|>"
+ "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+ "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+ "<|tool_calls_section_end|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, python_tool
+ })
+ .expect_reasoning("I need to call two functions")
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+ { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+ })
+ .run();
+
+ // Multiple tool calls with reasoning, call *inside thinking block* and *without section markers or end markers
+ tst.test(
+ "<think>I need to call two functions"
+ "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}"
+ "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, python_tool
+ })
+ .expect_reasoning("I need to call two functions")
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+ { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+ })
+ .run();
+
+ // Real life test - execute_command
+ tst.test("<|tool_call_begin|>functions.execute_command:0<|tool_call_argument_begin|>{\"command\": \"ls -lah\""
+ ", \"cwd\": \"/home/jarvis/development/exllamav3\", \"timeout\": 10}")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .parallel_tool_calls(true)
+ .tools({
+ {
+ /* .name = */ "execute_command",
+ /* .description = */ "Execute shell command",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "command": {
+ "type": "string",
+ "description": "Shell command to execute"
+ },
+ "cwd": {
+ "type": "string",
+ "description": "Working directory"
+ },
+ "timeout": {
+ "type": "integer",
+ "description": "The timeout in seconds"
+ }
+ },
+ "required": ["command"]
+ })"
+ }
+ }).
+ expect_tool_calls({
{
- /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test content without reasoning
- assert_msg_equals(
- simple_assist_msg("This is a simple response without reasoning."),
- test_chat_parse(
- "This is a simple response without reasoning.",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_SEED_OSS}));
+ "execute_command",
+ R"({"command": "ls -lah", "cwd": "/home/jarvis/development/exllamav3", "timeout": 10})",
+ "functions.execute_command:0"
+ }
+ })
+ .run();
}
- {
- auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
- std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
-
- assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
- // Test parsing tool calls with thinking
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test tool calls with extra content
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_NEMOTRON_V2}
- ));
-
- // Test tool calls with extra content AND thinking
- assert_msg_equals(message_assist_call_thoughts_content,
- test_chat_parse(
- "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "Hello, world!\nWhat's up?\n",
- /* expect_grammar_triggered= */ false);
-
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
- /* expect_grammar_triggered= */ true
- );
- }
{
- auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
- std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
-
- for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
- auto params = common_chat_templates_apply(tmpls.get(), inputs);
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
- assert_equals(true, params.thinking_forced_open);
- }
-
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
- assert_msg_equals(
- simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
- test_chat_parse(
- "I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- }));
- // variant: thinking forced open, reasoning_format none
- assert_msg_equals(
- simple_assist_msg("REASONING</think>ok", ""),
- test_chat_parse(
- "REASONING</think>ok",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ true,
- }));
- // variant: happy path for when it works as the model card says it should
- assert_msg_equals(
- simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
- test_chat_parse(
- "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ true,
- }));
- // variant: simple + thinking open
- assert_msg_equals(
- simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
- test_chat_parse(
- "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ true,
- }));
- // variant: simple + multiple tool calls
- common_chat_msg message_assist_multiple_calls;
- message_assist_multiple_calls.role = "assistant";
- message_assist_multiple_calls.content = "CONTENT";
- message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
- message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
- assert_msg_equals(
- message_assist_multiple_calls,
- test_chat_parse(
- "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ true,
- }));
- // variant: thinking forced open + tool call in reasoning content
- assert_msg_equals(
- simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
- test_chat_parse(
- "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ true,
- }));
- // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
- // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
- // to make tool calls in reasoning content according to the model card, but it does sometimes, so
- // add the reasoning content as regular content and parse the tool calls.
- assert_msg_equals(
- simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
- test_chat_parse(
- "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ true,
- }));
- // variant: thinking forced open + tool call in reasoning content + no closing think + partial
- assert_msg_equals(
- simple_assist_msg("", "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>", "", ""),
- test_chat_parse(
- "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>",
- /* is_partial= */ true,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ true,
- /* .parse_tool_calls = */ true,
- }));
- // variant: thinking not forced open + missing reasoning + no tool calls
- assert_msg_equals(
- simple_assist_msg("CONTENT", ""),
- test_chat_parse(
- "CONTENT",
- /* is_partial= */ false,
- {
- COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* .reasoning_in_content = */ false,
- /* .thinking_forced_open = */ false,
- /* .parse_tool_calls = */ true,
- }));
- }
+ auto kimi_id_special_func_tool_call =
+ simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0");
+
+ // Kimi-K2 old template
+ auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ special_function_tool })
+ .expect(kimi_id_special_func_tool_call)
+ .run();
+
+ // Kimi-K2-Instruct
+ auto tst2 = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug);
+ tst2.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst2.test(
+ "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+ "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+ .tools({ special_function_tool })
+ .expect(kimi_id_special_func_tool_call)
+ .run();
+ }
+
+ // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
+ // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
{
- auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
- std::vector<std::string> end_tokens{ "<|assistant_end|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_APERTUS}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_APERTUS}));
-
- // Test parsing tool calls with thinking
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test tool calls with extra content
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_APERTUS}
- ));
-
- // Test tool calls with extra content AND thinking
- assert_msg_equals(message_assist_call_thoughts_content,
- test_chat_parse(
- "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "Hello, world!\nWhat's up?",
- /* expect_grammar_triggered= */ false);
-
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
- /* expect_grammar_triggered= */ true
- );
-
- // TODO @ngxson : not sure why this fails, but not very important for now
- // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+ auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug);
+ tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
}
- {
- // LFM2 format tests
- auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
- std::vector<std::string> end_tokens{ "<|im_end|>" };
-
- auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
- common_chat_templates_inputs inputs;
- inputs.messages = {
- std::invoke([&]() -> common_chat_msg {
- common_chat_msg msg;
- msg.role = "system";
- msg.content = "force json schema.\n";
- return msg;
- }),
- message_user,
- };
- inputs.tools = {special_function_tool};
- return inputs;
- });
-
- {
- auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
- assert_equals(false, params.grammar_lazy);
- assert_equals(std::string(R"(<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
- }
-
- {
- auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
- assert_equals(false, params.grammar_lazy);
- assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
- assert_equals(true, params.grammar.empty());
- }
-
- {
- auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
- assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
- assert_equals(true, params.grammar_lazy);
- assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
- assert_equals(false, params.grammar.empty());
- }
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test single tool call with JSON format
- common_chat_msg msg_single_tool_call;
- msg_single_tool_call.role = "assistant";
- msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
- assert_msg_equals(
- msg_single_tool_call,
- test_chat_parse(
- "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test tool call with string argument
- common_chat_msg msg_tool_call_string;
- msg_tool_call_string.role = "assistant";
- msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
- assert_msg_equals(
- msg_tool_call_string,
- test_chat_parse(
- "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test tool call with multiple arguments
- common_chat_msg msg_multi_args;
- msg_multi_args.role = "assistant";
- msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
- assert_msg_equals(
- msg_multi_args,
- test_chat_parse(
- "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test multiple tool calls in single array
- common_chat_msg msg_multiple_tools;
- msg_multiple_tools.role = "assistant";
- msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
- msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
- assert_msg_equals(
- msg_multiple_tools,
- test_chat_parse(
- "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test tool call with content before
- common_chat_msg msg_content_before_tool;
- msg_content_before_tool.role = "assistant";
- msg_content_before_tool.content = "Let me check the weather for you.";
- msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
- assert_msg_equals(
- msg_content_before_tool,
- test_chat_parse(
- "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test tool call with content after
- common_chat_msg msg_content_after_tool;
- msg_content_after_tool.role = "assistant";
- msg_content_after_tool.content = "Here's the result.";
- msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
- assert_msg_equals(
- msg_content_after_tool,
- test_chat_parse(
- "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Test tool call with newlines (common in LLM output)
- common_chat_msg msg_tool_call_newlines;
- msg_tool_call_newlines.role = "assistant";
- msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
- assert_msg_equals(
- msg_tool_call_newlines,
- test_chat_parse(
- "<|tool_call_start|>[{\n \"name\": \"get_current_time\",\n \"arguments\": {\n \"location\": \"Paris\"\n }\n}]<|tool_call_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
- // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
- // Unlike other formats, LFM2 template does not render tool calls in conversation history,
- // so we don't use test_templates() for tool call generation. Instead, the parsing tests
- // above verify edge cases and format variations for the tool call output format.
+ // MiniMax-M2 tests - XML invoke format with parameter tags
+ // Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
+ {
+ auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
+ tst.test(
+ "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
+ "name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
}
+ // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format
+ // Format: <TOOLCALL>[{"name": "func", "arguments": {...}}]</TOOLCALL>
{
- auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
- std::vector<std::string> end_tokens{ "[e~[" };
-
- assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
- // Test parsing tool calls with thinking
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test tool calls with extra content
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_MINIMAX_M2}
- ));
-
- // Test tool calls with extra content AND thinking
- assert_msg_equals(message_assist_call_thoughts_content,
- test_chat_parse(
- "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test streaming
- test_parser_with_streaming(message_assist_call_thoughts_content,
- "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(message_assist_call_thoughts_unparsed,
- "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
- test_parser_with_streaming(message_assist_call_thoughts_content,
- "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(message_assist_call_withopt,
- "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
-
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "Hello, world!\nWhat's up?",
- /* expect_grammar_triggered= */ false);
-
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
- /* ignore_whitespace_differences= */ true
- );
-
- // Test template generation for tools with optional parameters
- test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
- "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
- /* ignore_whitespace_differences= */ true
- );
- test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
- "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
- /* ignore_whitespace_differences= */ true
- );
+ auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug);
+ tst.test("<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL><SPECIAL_12>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
}
+ // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION)
{
- auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
- std::vector<std::string> end_tokens{ "<|assistant|>", "<|observation|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GLM_4_5}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }), true);
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GLM_4_5}), true);
-
- // Test parsing tool calls with thinking
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }), true);
-
- // Test tool calls with extra content
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_GLM_4_5}
- ), true);
-
- // Test tool calls with extra content AND thinking
- assert_msg_equals(message_assist_call_thoughts_content,
- test_chat_parse(
- "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }), true);
-
- // Test streaming
- test_parser_with_streaming(message_assist_call_thoughts_content,
- "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(message_assist_call_thoughts_unparsed,
- "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
- test_parser_with_streaming(message_assist_call_withopt,
- "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(
- simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
- "<tool_call>complex_function\n"
- "<arg_key>name</arg_key>\n"
- "<arg_value>John Doe</arg_value>\n"
- "<arg_key>age</arg_key>\n"
- "<arg_value>30</arg_value>\n"
- "<arg_key>active</arg_key>\n"
- "<arg_value>true</arg_value>\n"
- "<arg_key>score</arg_key>\n"
- "<arg_value>95.5</arg_value>\n"
- "</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
- test_parser_with_streaming(
- simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
- "<tool_call>web_search\n"
- "<arg_key>query</arg_key>\n"
- "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
- "<arg_key>limit</arg_key>\n"
- "<arg_value>3</arg_value>\n"
- "<arg_key>type</arg_key>\n"
- "<arg_value>text</arg_value>\n"
- "</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-
- // Test interleaved thinking
- test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
- "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
- "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
-
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "\n<think></think>\nHello, world!\nWhat's up?",
- /* expect_grammar_triggered= */ false);
-
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ false,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
-
- // Test template generation for tools with optional parameters
- test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
- "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ false,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
- test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
- "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ false,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
+ auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+ tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+ tst.test(
+ "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+ "<|START_ACTION|>[\n"
+ " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+ "]<|END_ACTION|>")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .expect(message_assist_thoughts_call_idx)
+ .run();
+ }
+ // CohereForAI-c4ai-command-r-plus (uses markdown code block format)
+ {
+ auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug);
+ tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run();
+ // Tool calls: Action: followed by JSON code block
+ tst.test(
+ "Action:\n"
+ "```json\n"
+ "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n"
+ "```")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
+
+ // mistralai-Mistral-Nemo-Instruct-2407.jinja
+ {
+ auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]")
+ .tools({ special_function_tool })
+ .expect(message_assist_call_id)
+ .run();
}
-
{
- auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
- std::vector<std::string> end_tokens{ "<|im_end|>" };
-
- assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
- assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Test parsing regular content
- assert_msg_equals(message_assist,
- test_chat_parse(
- "Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_KIMI_K2}));
-
- // Test parsing content with thinking
- assert_msg_equals(message_assist_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
- }));
-
- // Test parsing tool calls
- assert_msg_equals(message_assist_call,
- test_chat_parse(
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_KIMI_K2}));
-
- // Test parsing tool calls with thinking
- assert_msg_equals(message_assist_call_thoughts,
- test_chat_parse(
- "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test tool calls with extra content
- assert_msg_equals(message_assist_call_content,
- test_chat_parse(
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {COMMON_CHAT_FORMAT_KIMI_K2}
- ));
-
- // Test tool calls with extra content AND thinking
- assert_msg_equals(message_assist_call_thoughts_content,
- test_chat_parse(
- "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
- /* is_partial= */ false,
- {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }));
-
- // Test streaming
- test_parser_with_streaming(message_assist_call_thoughts_content,
- "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(message_assist_call_thoughts_unparsed,
- "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
- test_parser_with_streaming(message_assist_call_thoughts_content,
- "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(message_assist_call_withopt,
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
- }); });
- test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
- "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
- /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(
- simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
- "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
- "<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
- test_parser_with_streaming(
- simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
- "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
- "<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
- test_parser_with_streaming(
- simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
- "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
- "<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
- test_parser_with_streaming(
- simple_assist_msg(
- "Let me start by examining the relevant files to understand the current implementation.", "",
- "read_file",
- "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
- "Let me start by examining the relevant files to understand the current implementation."
- "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
- "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
- "<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
- auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
- multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
- multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
- multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
- multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" });
- test_parser_with_streaming(multi_tool_msg,
- "<think>I'm thinking.</think>Let me call multiple tools."
- "<|tool_calls_section_begin|>"
- "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
- "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
- "<|tool_call_end|>"
- "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
- "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
- "<|tool_call_end|>"
- "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
- "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
- "<|tool_call_end|>"
- "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
- "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}"
- "<|tool_call_end|>"
- "<|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- COMMON_CHAT_FORMAT_KIMI_K2,
- COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(
- simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
- "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
- "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
- "<|tool_call_end|><|tool_calls_section_end|>",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- COMMON_CHAT_FORMAT_KIMI_K2,
- COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
- test_parser_with_streaming(
- simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
- "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
- "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
- "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
- [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
- COMMON_CHAT_FORMAT_KIMI_K2,
- COMMON_REASONING_FORMAT_DEEPSEEK
- }); });
-
- // Test template rendering
- common_chat_templates_inputs conversation_with_tools = inputs_tools;
- conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
- conversation_with_tools.messages.push_back({
- "tool",
- "Tool response 1",
- /* .content_parts = */ {},
- /* .tool_calls = */ {},
- /* .reasoning_content = */ "",
- /* .tool_name = */ "complex_function",
- /* .tool_call_id = */ "",
- });
- conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
- conversation_with_tools.messages.push_back({
- "tool",
- "Tool response 2",
- /* .content_parts = */ {},
- /* .tool_calls = */ {},
- /* .reasoning_content = */ "",
- /* .tool_name = */ "web_search",
- /* .tool_call_id = */ "",
- });
- conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
- conversation_with_tools.messages.push_back({
- "tool",
- "Tool response 3",
- /* .content_parts = */ {},
- /* .tool_calls = */ {},
- /* .reasoning_content = */ "",
- /* .tool_name = */ "read_file",
- /* .tool_call_id = */ "",
- });
- assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
-
- // Test template generation for regular content
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
- "<think></think>Hello, world!\nWhat's up?",
- /* expect_grammar_triggered= */ false);
-
- // Test template generation for tool calls
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
- "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
-
- // Test template generation for tools with optional parameters
- test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
- "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
- test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
- "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
- /* expect_grammar_triggered= */ true,
- /* test_grammar_if_triggered= */ true,
- /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
- /* ignore_whitespace_differences= */ true
- );
+ auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("<function=special_function>{\"arg1\": 1}</function>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
+ // Functionary v3.2 - recipient-based format: >>>recipient\n{content}
+ {
+ auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
+ tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test(">>>special_function\n{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
}
+ // FireFunction
{
- // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
- // but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
- // schema-aware parameter parsing.
- auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
- assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
- // Grammar and PEG parser should be generated with thinking_forced_open
- {
- common_chat_templates_inputs inputs;
- inputs.messages = { message_user };
- inputs.tools = { special_function_tool };
- auto params = common_chat_templates_apply(tmpls.get(), inputs);
- assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
- assert_equals(true, params.thinking_forced_open);
- assert_equals(false, params.grammar.empty());
- assert_equals(false, params.parser.empty());
- auto grammar = build_grammar(params.grammar);
- GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar");
- }
+ auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
}
-}
-static void test_template_output_peg_parsers() {
- printf("[%s]\n", __func__);
+ // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking)
+ // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+ {
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?")
+ .enable_thinking(true) // Forced open
+ .expect(message_assist)
+ .run();
+ tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
+ }
+ // llama-cpp DeepSeek R1 template (always forced-open thinking)
+ {
+ auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
+ tst.test(
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>")
+ .tools({ special_function_tool })
+ .parallel_tool_calls(true)
+ .expect(message_assist_call)
+ .run();
+ }
+ // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking)
+ // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+ {
+ auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run();
+ tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .expect(message_assist_thoughts)
+ .run();
+ tst.test(
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
+ "```json\n{\"arg1\": 1}```<|tool▁call▁end|><|tool▁calls▁end|>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
+
+ // MiMo-VL / Hermes 3 / Qwen 2.5 (Common <tool_call> JSON format)
+ for (const auto & path :
+ { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+ "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) {
+ auto tst = peg_tester(path, detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
+
+ // Apriel 1.5
+ {
+ auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("<tool_calls>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</tool_calls>")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ }
- // JSON schemas
- const char * invoice_schema = R"({
- "type": "object",
- "properties": {
- "amount": {"type": "number"},
- "date": {"type": "string"}
- }
- })";
+ // Apriel 1.6 Thinker (reasoning-only support)
+ {
+ auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ // Implicit reasoning start (forced open)
+ tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect(message_assist_thoughts)
+ .run();
+
+ // Reasoning + Tool calls
+ tst.test(
+ "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
+ "{\"arg1\": 1}}]</tool_calls>")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+ }
+
+ // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...}
+ {
+ auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call_id)
+ .run();
+ }
+ // Devstral
+ {
+ auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+ tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+ tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call_content)
+ .run();
+ }
{
- // Ministral-3-14B-Reasoning-2512
- auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
-
- // Test basic message
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "Hello, world!\nWhat's up?";
- t.expect = message_assist;
- });
-
- // Test basic message and reasoning with reasoning_format = none
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
- t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
- });
-
- // Test basic message and reasoning with reasoning_format = auto
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
- t.expect = message_assist_thoughts;
- });
-
- // Test tool call
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call;
- });
-
- // Test tool call with reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "[THINK]I'm\nthinking[/THINK]"
- R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call_thoughts;
- });
-
- // Test parallel tool calls
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
- R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ {},
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ {},
- }};
- });
-
- // Test response format
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
- "```json\n"
- R"({"amount": 123.45, "date": "2025-12-03"})"
- "\n```";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.json_schema = invoice_schema;
-
- t.expect.reasoning_content = "I need to output the invoice details in JSON";
- t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
- });
+ // Llama 3.1
+ auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
}
{
- // Qwen3-Coder
- auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
-
- // Test basic message
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "Hello, world!\nWhat's up?";
- t.expect = message_assist;
- });
-
- // Test tool call
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.tools = {special_function_tool};
- t.expect = message_assist_call;
- });
-
- // Test parallel tool calls
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "<function=special_function_with_opt>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "<parameter=arg2>\n"
- "2\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ {},
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with string parameter
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with JSON parameter
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=todo_list>\n"
- "<parameter=todos>\n"
- "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.tools = {todo_list_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "todo_list",
- /* .arguments = */ "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with string parameter and no closing </parameter> tag
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</function>\n"
- "</tool_call>";
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test response format
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = R"({"amount": 123.45, "date": "2025-12-03"})";
- t.params.json_schema = invoice_schema;
-
- t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
- });
+ // Llama 3.2
+ auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
}
{
- // NVIDIA Nemotron-3 Nano
- auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
-
- // Test basic message
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "Hello, world!\nWhat's up?";
- t.expect = message_assist;
- });
-
- // Test basic message and reasoning with reasoning_format = none
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
- t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
- });
-
- // Test basic message and reasoning with reasoning_format = auto
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
- t.params.enable_thinking = true;
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
- t.expect = message_assist_thoughts;
- });
-
- // Test tool call
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.enable_thinking = false;
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call;
- });
-
- // Test tool call with reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "I'm\nthinking\n</think>\n"
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call_thoughts;
- });
-
- // Test parallel tool calls
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "<function=special_function_with_opt>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "<parameter=arg2>\n"
- "2\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.enable_thinking = false;
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ {},
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with string parameter
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.enable_thinking = false;
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with string parameter and no closing </parameter> tag
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</function>\n"
- "</tool_call>";
- t.params.enable_thinking = false;
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test response format
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "I need to output the invoice details in JSON\n"
- "</think>\n"
- R"({"amount": 123.45, "date": "2025-12-03"})";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.json_schema = invoice_schema;
-
- t.expect.reasoning_content = "I need to output the invoice details in JSON";
- t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
- });
+ // Llama 3.3
+ auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug);
+ tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run();
}
+ // GPT-OSS format tests
{
- // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open)
- // Unlike Nemotron, Step-3.5-Flash always emits <think> regardless of enable_thinking,
- // so all inputs must include a </think> delimiter.
- auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-
- // Test basic message with reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
- t.expect = message_assist_thoughts;
- });
-
- // Test basic message without thinking content
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "</think>\nHello, world!\nWhat's up?";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
- t.expect = message_assist;
- });
-
- // Test tool call without thinking content
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "</think>\n"
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call;
- });
-
- // Test tool call with thinking
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "I'm\nthinking\n</think>\n"
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {special_function_tool};
-
- t.expect = message_assist_call_thoughts;
- });
-
- // Test parallel tool calls with thinking
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "I'm\nthinking\n</think>\n"
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "<function=special_function_with_opt>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "<parameter=arg2>\n"
- "2\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.reasoning_content = "I'm\nthinking";
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ {},
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ {},
- }};
- });
-
- // Test parallel tool calls without thinking content
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "</think>\n"
- "<tool_call>\n"
- "<function=special_function>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>\n"
- "<tool_call>\n"
- "<function=special_function_with_opt>\n"
- "<parameter=arg1>\n"
- "1\n"
- "</parameter>\n"
- "<parameter=arg2>\n"
- "2\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ {},
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with code string parameter
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "</think>\n"
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</parameter>\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test tool call with string parameter and no closing </parameter> tag
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "</think>\n"
- "<tool_call>\n"
- "<function=python>\n"
- "<parameter=code>\n"
- "def hello():\n"
- " print(\"Hello, world!\")\n"
- "\n"
- "hello()\n"
- "</function>\n"
- "</tool_call>";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.tools = {python_tool};
-
- t.expect.tool_calls = {{
- /* .name = */ "python",
- /* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
- /* .id = */ {},
- }};
- });
-
- // Test response format (JSON schema with thinking)
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input =
- "I need to output the invoice details in JSON\n"
- "</think>\n"
- R"({"amount": 123.45, "date": "2025-12-03"})";
- t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
- t.params.json_schema = invoice_schema;
-
- t.expect.reasoning_content = "I need to output the invoice details in JSON";
- t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
- });
+ auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug);
+
+ // Basic content only - final channel
+ tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ // Basic content only - commentary channel
+ tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+ // Analysis channel (reasoning) with final channel (content)
+ tst.test(
+ "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+ "up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect(message_assist_thoughts)
+ .run();
+
+ // Analysis channel only (partial) - still works when reasoning format is set
+ tst.test("<|channel|>analysis<|message|>I'm\nthinking")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .is_partial(true)
+ .expect_reasoning("I'm\nthinking")
+ .run();
+
+ // Reasoning format none - reasoning stays in content
+ tst.test(
+ "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+ "up?")
+ .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+ .expect_content(
+ "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
+ .run();
+
+ // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
+ tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON"
+ tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON"
+ tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ // Tool call in commentary channel (channel header variant)
+ tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}")
+ .tools({ special_function_tool })
+ .expect(message_assist_call)
+ .run();
+
+ // Tool call with reasoning + content (analysis first, then tool call)
+ tst.test(
+ "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n"
+ "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+
+ // Tool calling with extra channel before
+ tst.test(
+ "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary"
+ " to=functions.special_function <|message|>{\"arg1\": 1}")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({ special_function_tool })
+ .expect(message_assist_call_thoughts)
+ .run();
+
+ // Reasoning after final channel
+ // Tool calling after final channel
+ tst.test(
+ "<|channel|>final<|message|><|end|>"
+ "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..."
+ )
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .expect_reasoning("Thinking about edit...")
+ .expect_content("")
+ .run();
+
+ // Tool calling after final channel
+ tst.test(
+ "<|channel|>final<|message|><|end|>"
+ "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>"
+ "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
+ "<|message|>{\"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
+ )
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({
+ {
+ /* .name = */ "edit",
+ /* .description = */ "Edit a file",
+ /* .parameters = */ R"({
+ "type": "object",
+ "properties": {
+ "oldString": {
+ "type": "string",
+ "description": "Old string to replace."
+ },
+ "newString": {
+ "type": "string",
+ "description": "New replacement string."
+ },
+ "replaceAll": {
+ "type": "boolean",
+ "description": "Whether to replace all occurences."
+ }
+ },
+ "required": ["oldString", "newString"]
+ })",
+ }
+ })
+ .expect_reasoning("Thinking about edit...")
+ .expect_tool_calls({
+ { "edit", R"({"oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} }
+ })
+ .run();
+
+ // Parallel tool calls
+ tst.test(
+ " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n"
+ "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, "
+ "\"arg2\": 2}")
+ .parallel_tool_calls(true)
+ .tools({
+ special_function_tool, special_function_tool_with_optional_param
+ })
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 1})", {} },
+ { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+ })
+ .run();
}
{
- // Solar-Open-100B
- auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
-
- // Test basic message
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|content|>Hello, world!\nWhat's up?";
- t.expect = message_assist;
- });
-
- // Test basic message and reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
- t.expect = message_assist_thoughts;
- });
-
- // Test basic message and reasoning_effort = low
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|content|>Hello, world!\nWhat's up?";
- t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
- t.expect = message_assist;
- });
-
- // Test tool call
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|tool_calls|>"
- "<|tool_call:begin|>123456789"
- "<|tool_call:name|>special_function"
- "<|tool_call:args|>{\"arg1\":1}"
- "<|tool_call:end|>";
-
- t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
- t.params.tools = {special_function_tool};
- t.expect = message_assist_call_id;
- });
-
- // Test tool call with reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|think|>I'm\nthinking<|end|>"
- "<|begin|>assistant<|tool_calls|>"
- "<|tool_call:begin|>0"
- "<|tool_call:name|>special_function"
- "<|tool_call:args|>{\"arg1\":1}"
- "<|tool_call:end|>";
-
- t.params.tools = {special_function_tool};
- t.expect = message_assist_thoughts_call_idx;
- });
-
- // Test tool call with reasoning and tool_choice = required
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|think|>I'm\nthinking<|end|>"
- "<|begin|>assistant<|tool_calls|>"
- "<|tool_call:begin|>0"
- "<|tool_call:name|>special_function"
- "<|tool_call:args|>{\"arg1\":1}"
- "<|tool_call:end|>";
-
- t.params.tools = {special_function_tool};
- t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- t.expect = message_assist_thoughts_call_idx;
- });
-
- // Test tool call without reasoning and tool_choice = required
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|tool_calls|>"
- "<|tool_call:begin|>0"
- "<|tool_call:name|>special_function"
- "<|tool_call:args|>{\"arg1\":1}"
- "<|tool_call:end|>";
-
- t.params.tools = {special_function_tool};
- t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
- t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
- t.expect = message_assist_call_idx;
- });
-
- // Test parallel tool calls
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|think|>I'm\nthinking<|end|>"
- "<|begin|>assistant<|tool_calls|>"
- "<|tool_call:begin|>0"
- "<|tool_call:name|>special_function"
- "<|tool_call:args|>{\"arg1\":1}"
- "<|tool_call:end|>"
- "<|tool_call:begin|>1"
- "<|tool_call:name|>special_function_with_opt"
- "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
- "<|tool_call:end|>";
-
- t.params.parallel_tool_calls = true;
- t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
- t.expect.reasoning_content = "I'm\nthinking";
- t.expect.tool_calls = {{
- /* .name = */ "special_function",
- /* .arguments = */ R"({"arg1": 1})",
- /* .id = */ "0",
- }, {
- /* .name = */ "special_function_with_opt",
- /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
- /* .id = */ "1",
- }};
- });
-
- // Test response format
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
- "<|begin|>assistant<|content|>"
- R"({"amount": 123.45, "date": "2025-12-03"})";
-
- t.params.json_schema = invoice_schema;
-
- t.expect.reasoning_content = "I need to output the invoice details in JSON";
- t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
- });
-
- // Test response format no reasoning
- test_peg_parser(tmpls.get(), [&](auto & t) {
- t.input = "<|content|>"
- R"({"amount": 123.45, "date": "2025-12-03"})";
-
- t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
- t.params.json_schema = invoice_schema;
-
- t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
- });
+ auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
+ tst.test("I was thinking</think>\nNow I'm not.").
+ enable_thinking(true).
+ reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
+ expect_reasoning("I was thinking").
+ expect_content("Now I'm not.")
+ .run();
+
+ // Test that numeric-looking string values are coerced to strings per the schema
+ tst.test(
+ "Let me call the magic tool\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=magic>\n"
+ "<parameter=name>\nfooBar\n</parameter>\n"
+ "<parameter=ref>\n5123123\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ magic_tool })
+ .expect_reasoning("Let me call the magic tool")
+ .expect_tool_calls({
+ { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} },
+ })
+ .run();
+
+ // Test that numeric values are correctly interpreted as numbers when schema calls for number
+ tst.test(
+ "Let me call the special function\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=special_function>\n"
+ "<parameter=arg1>\n42555916\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool })
+ .expect_reasoning("Let me call the special function")
+ .expect_tool_calls({
+ { "special_function", R"({"arg1": 42555916})", {} },
+ })
+ .run();
+
+ tst.test(
+ "Let me call the special function with opt\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=special_function_with_opt>\n"
+ "<parameter=arg1>\n42555916\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ special_function_tool_with_optional_param })
+ .expect_reasoning("Let me call the special function with opt")
+ .expect_tool_calls({
+ { "special_function_with_opt", R"({"arg1": 42555916})", {} },
+ })
+ .run();
+
+ tst.test(
+ "Let me call the magic_int function\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=magic_int>\n"
+ "<parameter=ref>\n42555916\n</parameter>\n"
+ "<parameter=name>\nbaz\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ magic_int_tool })
+ .expect_reasoning("Let me call the magic_int function")
+ .expect_tool_calls({
+ { "magic_int", R"({"ref": 42555916, "name": "baz"})", {} },
+ })
+ .run();
+
+ tst.test(
+ "Call string_param with empty text\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=string_param>\n"
+ "<parameter=text>\n\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ string_param_tool })
+ .expect_reasoning("Call string_param with empty text")
+ .expect_tool_calls({
+ { "string_param", R"({"text": ""})", {} },
+ })
+ .run();
+
+ tst.test(
+ "Test simple quoted unquoted\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=quoted_unquoted>\n"
+ "<parameter=quoted>\n\"foo\"\n</parameter>\n"
+ "<parameter=unquoted>\nfoo\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ quoted_unquoted_tool })
+ .expect_reasoning("Test simple quoted unquoted")
+ .expect_tool_calls({
+ { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} },
+ })
+ .run();
+
+ tst.test(
+ "Test complex quoted unquoted\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=quoted_unquoted>\n"
+ "<parameter=quoted>\n\"printf(\\\"foo\\\");\"\n</parameter>\n"
+ "<parameter=unquoted>\nprintf(\"foo\");\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ quoted_unquoted_tool })
+ .expect_reasoning("Test complex quoted unquoted")
+ .expect_tool_calls({
+ { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} }
+ })
+ .run();
+
+ tst.test(
+ "Test negative number\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=magic_int>\n"
+ "<parameter=ref>\n-14\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ magic_int_tool })
+ .expect_reasoning("Test negative number")
+ .expect_tool_calls({
+ { "magic_int", R"({ "ref" : -14 })", {} }
+ })
+ .run();
+
+ tst.test(
+ "Test decimal number\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=amount>\n"
+ "<parameter=orig>\n3.14\n</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ amount_tool })
+ .expect_reasoning("Test decimal number")
+ .expect_tool_calls({
+ { "amount", R"({ "orig" : 3.14 })", {} }
+ })
+ .run();
+
+ tst.test(
+ "Test imaginary number\n"
+ "</think>\n"
+ "<tool_call>\n"
+ "<function=imaginary_number>\n"
+ "<parameter=number>\n"
+ "{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
+ "</parameter>\n"
+ "</function>\n"
+ "</tool_call>")
+ .enable_thinking(true)
+ .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+ .tools({ imaginary_number_tool })
+ .expect_reasoning("Test imaginary number")
+ .expect_tool_calls({
+ { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
+ })
+ .run();
+
}
}
static void test_msg_diffs_compute() {
- printf("[%s]\n", __func__);
+ LOG_DBG("%s\n", __func__);
{
common_chat_msg msg1;
common_chat_msg_diff diff;
diff.content_delta = "Hello, world!";
- assert_equals(
- {diff},
- common_chat_msg_diff::compute_diffs(msg1, msg2));
+ assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
}
{
common_chat_msg msg1;
common_chat_msg_diff diff;
diff.content_delta = " world!";
- assert_equals(
- {diff},
- common_chat_msg_diff::compute_diffs(msg1, msg2));
+ assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
}
{
common_chat_msg msg0;
common_chat_msg msg1;
- msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
+ msg1.tool_calls = {
+ { "special_function", "{\"ar", /* .id = */ "123" }
+ };
common_chat_msg msg2;
- msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
+ msg2.tool_calls = {
+ { "special_function", "{\"arg1\": 1}", /* .id = */ "123" }
+ };
common_chat_msg_diff diff01;
- diff01.tool_call_index = 0;
- diff01.tool_call_delta.name = "special_function";
- diff01.tool_call_delta.id = "123";
+ diff01.tool_call_index = 0;
+ diff01.tool_call_delta.name = "special_function";
+ diff01.tool_call_delta.id = "123";
diff01.tool_call_delta.arguments = "{\"ar";
- assert_equals(
- {diff01},
- common_chat_msg_diff::compute_diffs(msg0, msg1));
+ assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1));
common_chat_msg_diff diff12;
- diff12.tool_call_index = 0;
+ diff12.tool_call_index = 0;
// Note: neither id nor name change here.
diff12.tool_call_delta.arguments = "g1\": 1}";
- assert_equals(
- {diff12},
- common_chat_msg_diff::compute_diffs(msg1, msg2));
+ assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2));
}
{
common_chat_msg msg0;
};
common_chat_msg_diff diff1;
- diff1.tool_call_index = 0;
- diff1.tool_call_delta.name = "f1";
- diff1.tool_call_delta.id = "123";
+ diff1.tool_call_index = 0;
+ diff1.tool_call_delta.name = "f1";
+ diff1.tool_call_delta.id = "123";
diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
common_chat_msg_diff diff2;
- diff2.tool_call_index = 1;
- diff2.tool_call_delta.name = "f2";
- diff2.tool_call_delta.id = "222";
+ diff2.tool_call_index = 1;
+ diff2.tool_call_delta.name = "f2";
+ diff2.tool_call_delta.id = "222";
diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
- assert_equals(
- {diff1, diff2},
- common_chat_msg_diff::compute_diffs(msg0, msg2));
+ assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2));
}
}
int main(int argc, char ** argv) {
- common_log_set_verbosity_thold(999);
+ bool detailed_debug = false;
+ bool only_run_filtered = false;
+
+ // Check for --template flag
+ for (int i = 1; i < argc; i++) {
+ std::string arg = argv[i];
+ if (arg == "--template" && i + 1 < argc) {
+ g_template_filter = argv[++i];
+ // Only run PEG parser tests with the filter
+ only_run_filtered = true;
+ }
+ if (arg == "--detailed") {
+ detailed_debug = true;
+ common_log_set_verbosity_thold(999);
+ }
+ }
+
+ if (only_run_filtered) {
+ test_template_output_peg_parsers(detailed_debug);
+ std::cout << "\n[chat] All template tests passed!" << '\n';
+ return 0;
+ }
- // try {
#ifndef _WIN32
- if (argc > 1) {
- common_chat_templates_inputs inputs;
- common_chat_msg msg;
- msg.role = "user";
- msg.content = "Hey";
- inputs.messages = {msg};
- inputs.tools = { special_function_tool };
-
- std::cout << "| Template | Format |\n";
- std::cout << "|----------|--------|\n";
-
- for (int i = 1; i < argc; i++) {
- try {
- std::string path = argv[i];
- if (path.rfind(".jinja") != path.size() - 6) {
- std::cerr << "Skipping non-jinja file: " << path << '\n';
- continue;
- }
- auto tmpls = read_templates(path);
- auto parts = string_split(path, "/");
- auto name = parts[parts.size() - 1];
- auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
- std::cout << "| " << name << " | " << format << " |\n";
- } catch (const std::exception & e) {
- std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
+ if (argc > 1) {
+ common_chat_templates_inputs inputs;
+ common_chat_msg msg;
+ msg.role = "user";
+ msg.content = "Hey";
+ inputs.messages = { msg };
+ inputs.tools = { special_function_tool };
+
+ std::cout << "| Template | Format |\n";
+ std::cout << "|----------|--------|\n";
+
+ for (int i = 1; i < argc; i++) {
+ try {
+ std::string path = argv[i];
+ if (path.rfind(".jinja") != path.size() - 6) {
+ std::cerr << "Skipping non-jinja file: " << path << '\n';
+ continue;
}
+ auto tmpls = read_templates(path);
+ auto parts = string_split(path, "/");
+ const auto & name = parts[parts.size() - 1];
+ const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
+ std::cout << "| " << name << " | " << format << " |\n";
+ } catch (const std::exception & e) {
+ std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
}
- } else
-#endif
- {
- test_msg_diffs_compute();
- test_msgs_oaicompat_json_conversion();
- test_tools_oaicompat_json_conversion();
- test_template_output_parsers();
- test_template_output_peg_parsers();
- std::cout << "\n[chat] All tests passed!" << '\n';
}
- return 0;
- // } catch (const std::exception & e) {
- // std::cerr << "Error: " << e.what() << '\n';
- // return 1;
- // }
+ } else
+#endif
+ {
+ test_msg_diffs_compute();
+ test_msgs_oaicompat_json_conversion();
+ test_tools_oaicompat_json_conversion();
+ test_template_output_peg_parsers(detailed_debug);
+ std::cout << "\n[chat] All tests passed!" << '\n';
+ }
+ return 0;
}
)"""
});
+ test({
+ SUCCESS,
+ "description only (no type) treated as unconstrained",
+ R"""({"description": "The 0-based index of the last line to be retrieved (inclusive). If None, read until the end of the file."})""",
+ R"""(
+ array ::= "[" space ( value ("," space value)* )? "]" space
+ boolean ::= ("true" | "false") space
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+ decimal-part ::= [0-9]{1,16}
+ integral-part ::= [0] | [1-9] [0-9]{0,15}
+ null ::= "null" space
+ number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+ object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+ root ::= value
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ string ::= "\"" char* "\"" space
+ value ::= object | array | string | number | boolean | null
+ )"""
+ });
+
test({
SUCCESS,
"literal string with escapes",
t.test("json", test_json_parser);
t.test("gbnf", test_gbnf_generation);
t.test("serialization", test_json_serialization);
+ t.test("python-dict", test_python_dict_parser);
return t.summary();
}
add_subdirectory(server)
endif()
add_subdirectory(tokenize)
+ add_subdirectory(parser)
add_subdirectory(tts)
add_subdirectory(mtmd)
if (GGML_RPC)
+#include "chat.h"
#include "common.h"
#include "arg.h"
#include "console.h"
inputs.use_jinja = chat_params.use_jinja;
inputs.parallel_tool_calls = false;
inputs.add_generation_prompt = true;
- inputs.enable_thinking = chat_params.enable_thinking;
+ inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+ inputs.enable_thinking = common_chat_templates_support_enable_thinking(chat_params.tmpls.get());
// Apply chat template to the list of messages
return common_chat_templates_apply(chat_params.tmpls.get(), inputs);
--- /dev/null
+if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
+ # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API
+ set(TARGET llama-debug-template-parser)
+ add_executable(${TARGET} debug-template-parser.cpp)
+ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+ target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+ if(LLAMA_TOOLS_INSTALL)
+ install(TARGETS ${TARGET} RUNTIME)
+ endif()
+endif()
+
+set(TARGET llama-template-analysis)
+add_executable(${TARGET} template-analysis.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+ install(TARGETS ${TARGET} RUNTIME)
+endif()
--- /dev/null
+#include "../src/llama-grammar.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "gguf.h"
+#include "jinja/runtime.h"
+#include "log.h"
+
+#include <fstream>
+#include <numeric>
+#include <sstream>
+#include <string>
+
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+using json = nlohmann::ordered_json;
+
+enum class output_mode {
+ ANALYSIS, // Only output analysis results (default)
+ TEMPLATE, // Only output rendered template
+ BOTH // Output both
+};
+
+enum class input_message_type {
+ NONE, // Don't render any message scenarios (only analysis)
+ CONTENT_ONLY, // Simple assistant message with content
+ REASONING_CONTENT, // Message with reasoning_content + content
+ TOOL_CALL_ONLY, // Message with tool_calls only
+ CONTENT_TOOL_CALL, // Message with content + tool_calls
+ REASONING_TOOL_CALL, // Message with reasoning_content + tool_calls
+ CONTENT_FAKE_TOOL_CALL, // Message with content but no actual tool_calls (for testing)
+ ALL // Render all scenarios
+};
+
+struct debug_options {
+ std::string template_path;
+ bool with_tools = true;
+ bool generation_prompt = true;
+ bool enable_reasoning = true;
+ bool debug_jinja = false;
+ bool force_tool_call = false;
+ output_mode mode = output_mode::BOTH;
+ input_message_type input_message = input_message_type::NONE;
+};
+
+static std::string read_file(const std::string & path) {
+ std::ifstream fin(path, std::ios::binary);
+ if (!fin.is_open()) {
+ throw std::runtime_error("Could not open file: " + path);
+ }
+ std::ostringstream buf;
+ buf << fin.rdbuf();
+ return buf.str();
+}
+
+static std::string read_gguf_chat_template(const std::string & path) {
+ struct gguf_init_params params = { /*no_alloc =*/true, // We only need metadata, not tensor data
+ /*ctx=*/nullptr };
+
+ struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
+ if (ctx == nullptr) {
+ throw std::runtime_error("Could not open GGUF file: " + path);
+ }
+
+ const char * key = "tokenizer.chat_template";
+ int64_t key_id = gguf_find_key(ctx, key);
+
+ if (key_id == -1) {
+ gguf_free(ctx);
+ throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
+ }
+
+ const char * template_str = gguf_get_val_str(ctx, key_id);
+ if (template_str == nullptr) {
+ gguf_free(ctx);
+ throw std::runtime_error("GGUF file contains chat template key but value is null");
+ }
+
+ std::string result = template_str;
+ gguf_free(ctx);
+ return result;
+}
+
+static void print_usage(const char * program_name) {
+ LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
+ LOG_ERR("\nOptions:\n");
+ LOG_ERR(" --no-tools Disable tool definitions\n");
+ LOG_ERR(" --force-tool-call Set tool calls to forced\n");
+ LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
+ LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n");
+ LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n");
+ LOG_ERR(" --debug-jinja Enable Jinja fine-grained debug\n");
+ LOG_ERR(" --input-message=TYPE Message type to render:\n");
+ LOG_ERR(" content_only, reasoning_content, tool_call_only,\n");
+ LOG_ERR(" content_tool_call, reasoning_tool_call,\n");
+ LOG_ERR(" content_fake_tool_call, all\n");
+ LOG_ERR("\nExamples:\n");
+ LOG_ERR(" %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
+ LOG_ERR(" %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
+}
+
+static bool parse_bool_option(const std::string & value) {
+ return value == "1" || value == "true" || value == "yes";
+}
+
+static bool parse_options(int argc, char ** argv, debug_options & opts) {
+ if (argc < 2) {
+ print_usage(argv[0]);
+ return false;
+ }
+
+ opts.template_path = argv[1];
+
+ for (int i = 2; i < argc; ++i) {
+ std::string arg = argv[i];
+
+ if (arg == "--force-tool-call") {
+ opts.force_tool_call = true;
+ } else if (arg == "--debug-jinja") {
+ opts.debug_jinja = true;
+ } else if (arg == "--no-tools") {
+ opts.with_tools = false;
+ } else if (arg.rfind("--generation-prompt=", 0) == 0) {
+ opts.generation_prompt = parse_bool_option(arg.substr(20));
+ } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
+ opts.enable_reasoning = parse_bool_option(arg.substr(19));
+ } else if (arg.rfind("--output=", 0) == 0) {
+ std::string mode = arg.substr(9);
+ if (mode == "analysis") {
+ opts.mode = output_mode::ANALYSIS;
+ } else if (mode == "template") {
+ opts.mode = output_mode::TEMPLATE;
+ } else if (mode == "both") {
+ opts.mode = output_mode::BOTH;
+ } else {
+ LOG_ERR("Unknown output mode: %s\n", mode.c_str());
+ return false;
+ }
+ } else if (arg.rfind("--input-message=", 0) == 0) {
+ std::string type = arg.substr(16);
+ if (type == "content_only") {
+ opts.input_message = input_message_type::CONTENT_ONLY;
+ } else if (type == "reasoning_content") {
+ opts.input_message = input_message_type::REASONING_CONTENT;
+ } else if (type == "tool_call_only") {
+ opts.input_message = input_message_type::TOOL_CALL_ONLY;
+ } else if (type == "content_tool_call") {
+ opts.input_message = input_message_type::CONTENT_TOOL_CALL;
+ } else if (type == "reasoning_tool_call") {
+ opts.input_message = input_message_type::REASONING_TOOL_CALL;
+ } else if (type == "content_fake_tool_call") {
+ opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
+ } else if (type == "all") {
+ opts.input_message = input_message_type::ALL;
+ } else {
+ LOG_ERR("Unknown input message type: %s\n", type.c_str());
+ return false;
+ }
+ } else {
+ LOG_ERR("Unknown option: %s\n", arg.c_str());
+ print_usage(argv[0]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static json build_user_message() {
+ return json{
+ { "role", "user" },
+ { "content", "Hello, please help me with a task." }
+ };
+}
+
+static json build_content_only_message() {
+ return json{
+ { "role", "assistant" },
+ { "content", "Hello! I'm here to help you with your task." }
+ };
+}
+
+static json build_reasoning_content_message() {
+ return json{
+ { "role", "assistant" },
+ { "content", "Hello! I'm here to help you with your task." },
+ { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
+ };
+}
+
+static json build_tool_call_only_message() {
+ return json{
+ { "role", "assistant" },
+ { "content", nullptr },
+ { "tool_calls",
+ json::array({ json{
+ { "type", "function" },
+ { "function", json{ { "name", "test_function_name" },
+ { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
+ { "id", "123456789" } } }) }
+ };
+}
+
+static json build_content_tool_call_message() {
+ return json{
+ { "role", "assistant" },
+ { "content", "I'll help you by calling a function." },
+ { "tool_calls",
+ json::array({ json{
+ { "type", "function" },
+ { "function",
+ json{ { "name", "test_function_name" },
+ { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+ };
+}
+
+static json build_reasoning_tool_call_message() {
+ return json{
+ { "role", "assistant" },
+ { "content", nullptr },
+ { "reasoning_content", "I need to call a function to help with this task." },
+ { "tool_calls",
+ json::array({ json{
+ { "type", "function" },
+ { "function",
+ json{ { "name", "test_function_name" },
+ { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+ };
+}
+
+static json build_content_fake_tool_call_message() {
+ // This message has content but NO tool_calls field
+ // It's used to test if a template renders tool definitions but not tool calls
+ return json{
+ { "role", "assistant" },
+ { "content", "I'll help you by calling a function." }
+ };
+}
+
+static json build_tools_definition() {
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["param1"] = json::object({
+ { "type", "string" },
+ { "description", "First parameter" }
+ });
+ parameters_schema["properties"]["param2"] = json::object({
+ { "type", "string" },
+ { "description", "Second parameter" }
+ });
+ parameters_schema["required"] = json::array({ "param1" });
+
+ return json::array({
+ json{ { "type", "function" },
+ { "function", json{ { "name", "test_function_name" },
+ { "description", "A test function for debugging" },
+ { "parameters", parameters_schema } } } }
+ });
+}
+
+static void render_scenario(const common_chat_template & tmpl,
+ const std::string & scenario_name,
+ const json & messages,
+ const json & tools,
+ bool add_generation_prompt,
+ bool enable_thinking) {
+ LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
+ LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
+ enable_thinking ? "true" : "false");
+
+ // When add_generation_prompt is true, add a trailing user message to trigger the prompt
+ json final_messages = messages;
+ if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
+ final_messages.push_back(json{
+ { "role", "user" },
+ { "content", "Now please continue with another response." }
+ });
+ }
+
+ LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
+
+ try {
+ autoparser::templates_params inputs;
+ inputs.messages = final_messages;
+ inputs.add_generation_prompt = add_generation_prompt;
+ inputs.extra_context["enable_thinking"] = enable_thinking;
+
+ if (!tools.is_null() && tools.is_array() && !tools.empty()) {
+ inputs.tools = tools;
+ }
+
+ std::string output = common_chat_template_direct_apply(tmpl, inputs);
+
+ LOG_ERR("\n--- Rendered Output ---\n");
+ LOG_ERR("%s\n", output.c_str());
+ LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
+ } catch (const std::exception & e) {
+ LOG_ERR("Rendering failed: %s\n", e.what());
+ }
+}
+
+static void render_all_scenarios(const common_chat_template & tmpl,
+ const json & tools,
+ bool add_generation_prompt,
+ bool enable_thinking,
+ input_message_type message_type) {
+ json user_msg = build_user_message();
+
+ auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
+ if (message_type == input_message_type::ALL || message_type == type) {
+ json messages = json::array({ user_msg, assistant_msg });
+ render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
+ }
+ };
+
+ render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
+ render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
+ render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
+ render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
+ render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
+ render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
+ build_content_fake_tool_call_message());
+
+ // Also render with add_generation_prompt=true to show the prompt ending
+ if (message_type == input_message_type::ALL) {
+ LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
+
+ json prompt_messages = json::array({ user_msg });
+ render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
+
+ // With enable_thinking toggled
+ render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
+ }
+}
+
+int main(int argc, char ** argv) {
+ // Set log level to most verbose to capture all debug output
+ common_log_set_verbosity_thold(99);
+
+ debug_options opts;
+ if (!parse_options(argc, argv, opts)) {
+ return 1;
+ }
+
+ if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
+ jinja::enable_debug(true);
+ }
+
+ std::string template_source;
+ try {
+ // Check if the file is a GGUF file
+ if (opts.template_path.size() >= 5 &&
+ opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
+ template_source = read_gguf_chat_template(opts.template_path);
+ } else {
+ template_source = read_file(opts.template_path);
+ }
+ } catch (const std::exception & e) {
+ LOG_ERR("Error reading template: %s\n", e.what());
+ return 1;
+ }
+
+ LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
+ LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
+ opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
+
+ try {
+ common_chat_template chat_template(template_source, "", "");
+
+ // Build tools definition
+ json tools = opts.with_tools ? build_tools_definition() : json();
+
+ // Render template scenarios if requested
+ if (opts.input_message != input_message_type::NONE &&
+ (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
+ LOG_ERR("\n");
+ LOG_ERR("================================================================================\n");
+ LOG_ERR(" TEMPLATE RENDERING OUTPUT\n");
+ LOG_ERR("================================================================================\n");
+
+ render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
+ opts.input_message);
+ }
+
+ // Output analysis if requested
+ if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
+ LOG_ERR("\n");
+ LOG_ERR("================================================================================\n");
+ LOG_ERR(" TEMPLATE ANALYSIS\n");
+ LOG_ERR("================================================================================\n");
+
+ autoparser::autoparser analysis;
+ analysis.analyze_template(chat_template);
+
+ // Generate Parser
+ autoparser::templates_params params;
+ params.messages = json::array({ build_user_message() });
+ params.reasoning_format =
+ opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
+ params.enable_thinking = opts.enable_reasoning;
+ params.add_generation_prompt = opts.generation_prompt;
+
+ if (opts.with_tools) {
+ params.tools = tools;
+ params.tool_choice = opts.force_tool_call ? COMMON_CHAT_TOOL_CHOICE_REQUIRED : COMMON_CHAT_TOOL_CHOICE_AUTO;
+ } else {
+ params.tools = json();
+ params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+ }
+ params.parallel_tool_calls = false;
+
+ auto parser_data = autoparser::peg_generator::generate_parser(chat_template, params, analysis);
+
+ LOG_ERR("\n=== Generated Parser ===\n");
+ common_peg_arena arena;
+ arena.load(parser_data.parser);
+ LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
+
+ LOG_ERR("\n=== Generated Grammar ===\n");
+ LOG_ERR("%s\n", parser_data.grammar.c_str());
+
+ LOG_ERR("\n=== Generated Lazy Grammar ===\n");
+ LOG_ERR("%d\n", parser_data.grammar_lazy);
+
+ LOG_ERR("\n=== Generated Grammar Triggers ===\n");
+ for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
+ LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
+ }
+
+ LOG_ERR("\n=== Preserved Tokens ===\n");
+ for (const std::string & token : parser_data.preserved_tokens) {
+ LOG_ERR(" '%s'\n", token.c_str());
+ }
+
+ if (!parser_data.grammar.empty()) {
+ LOG_ERR("\n=== Verifying created grammar ===\n");
+ auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
+ parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
+ if (grammar != nullptr) {
+ LOG_ERR("\n=== Grammar successfully created ===\n");
+ }
+ }
+ }
+ } catch (const std::exception & e) {
+ LOG_ERR("Analysis failed: %s\n", e.what());
+ return 1;
+ }
+
+ return 0;
+}
--- /dev/null
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat.h"
+#include "log.h"
+#include "jinja/caps.h"
+#include "jinja/runtime.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+// ANSI color codes - using 256-color palette for brighter colors (all bold)
+#define ANSI_RESET "\033[0m"
+#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers
+#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers
+#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels
+#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences
+#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences
+#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message)
+#define ANSI_BOLD "\033[1m" // Standalone bold
+#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix
+#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix
+
+// All template paths extracted from tests/test-chat.cpp
+static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
+ "models/templates/Apertus-8B-Instruct.jinja",
+ "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
+ "models/templates/ByteDance-Seed-OSS.jinja",
+ "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
+ "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
+ "models/templates/GLM-4.6.jinja",
+ "models/templates/GLM-4.7-Flash.jinja",
+ "models/templates/Kimi-K2-Instruct.jinja",
+ "models/templates/Kimi-K2-Thinking.jinja",
+ "models/templates/MiMo-VL.jinja",
+ "models/templates/MiniMax-M2.jinja",
+ "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
+ "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
+ "models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
+ "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
+ "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+ "models/templates/Qwen-QwQ-32B.jinja",
+ "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
+ "models/templates/Qwen3-Coder.jinja",
+ "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
+ "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
+ "models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
+ "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
+ "models/templates/google-gemma-2-2b-it.jinja",
+ "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
+ "models/templates/llama-cpp-deepseek-r1.jinja",
+ "models/templates/meetkai-functionary-medium-v3.1.jinja",
+ "models/templates/meetkai-functionary-medium-v3.2.jinja",
+ "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
+ "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
+ "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
+ "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
+ "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
+ "models/templates/moonshotai-Kimi-K2.jinja",
+ "models/templates/openai-gpt-oss-120b.jinja",
+ "models/templates/unsloth-Apriel-1.5.jinja",
+ "models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
+};
+
+struct analysis_options {
+ std::vector<std::string> template_paths;
+ bool analyze_all = false;
+};
+
+static std::string read_file(const std::string & path) {
+ std::ifstream fin(path, std::ios::binary);
+ if (!fin.is_open()) {
+ throw std::runtime_error("Could not open file: " + path);
+ }
+ std::ostringstream buf;
+ buf << fin.rdbuf();
+ return buf.str();
+}
+
+static void print_usage(const char * program_name) {
+ LOG_ERR("Usage: %s [options]\n", program_name);
+ LOG_ERR("\nOptions:\n");
+ LOG_ERR(" --template <name> Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
+ LOG_ERR(" --template-file <path> Analyze custom template file\n");
+ LOG_ERR(" --all Analyze all templates from test suite\n");
+ LOG_ERR("\nExamples:\n");
+ LOG_ERR(" %s --all\n", program_name);
+ LOG_ERR(" %s --template deepseek\n", program_name);
+ LOG_ERR(" %s --template-file my-template.jinja\n", program_name);
+}
+
+static bool parse_options(int argc, char ** argv, analysis_options & opts) {
+ if (argc < 2) {
+ print_usage(argv[0]);
+ return false;
+ }
+
+ for (int i = 1; i < argc; ++i) {
+ std::string arg = argv[i];
+
+ if (arg == "--all") {
+ opts.analyze_all = true;
+ } else if (arg == "--template") {
+ if (i + 1 >= argc) {
+ LOG_ERR("--template requires an argument\n");
+ return false;
+ }
+ std::string pattern = argv[++i];
+ std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+
+ // Find matching templates
+ bool found = false;
+ for (const auto & path : ALL_TEMPLATE_PATHS) {
+ std::string path_lower = path;
+ std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
+ if (path_lower.find(pattern) != std::string::npos) {
+ opts.template_paths.push_back(path);
+ found = true;
+ }
+ }
+
+ if (!found) {
+ LOG_ERR("No templates found matching: %s\n", pattern.c_str());
+ return false;
+ }
+ } else if (arg == "--template-file") {
+ if (i + 1 >= argc) {
+ LOG_ERR("--template-file requires an argument\n");
+ return false;
+ }
+ opts.template_paths.push_back(argv[++i]);
+ } else {
+ LOG_ERR("Unknown option: %s\n", arg.c_str());
+ print_usage(argv[0]);
+ return false;
+ }
+ }
+
+ if (opts.analyze_all) {
+ opts.template_paths = ALL_TEMPLATE_PATHS;
+ }
+
+ if (opts.template_paths.empty()) {
+ LOG_ERR("No templates specified\n");
+ print_usage(argv[0]);
+ return false;
+ }
+
+ return true;
+}
+
+static json build_tools_definition() {
+ json parameters_schema = json::object();
+ parameters_schema["type"] = "object";
+ parameters_schema["properties"] = json::object();
+ parameters_schema["properties"]["param1"] = json::object({
+ { "type", "string" },
+ { "description", "First parameter" }
+ });
+ parameters_schema["properties"]["param2"] = json::object({
+ { "type", "string" },
+ { "description", "Second parameter" }
+ });
+ parameters_schema["required"] = json::array({ "param1", "param2" });
+
+ return json::array({
+ json{ { "type", "function" },
+ { "function", json{ { "name", "test_function_name" },
+ { "description", "A test function for debugging" },
+ { "parameters", parameters_schema } } } }
+ });
+}
+
+// Helper to create a tool call with arguments as JSON object
+static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
+ return json{
+ {"id", id},
+ {"type", "function"},
+ {"function", json{
+ {"name", name},
+ {"arguments", args_object} // Pass as JSON object, not serialized string
+ }}
+ };
+}
+
+// Helper functions to create repeating message definitions
+static json make_user_msg() {
+ return json{
+ {"role", "user"},
+ {"content", "Hello, please help me."}
+ };
+}
+
+static json make_user_msg2() {
+ return json{
+ {"role", "user"},
+ {"content", "Thank you."}
+ };
+}
+
+static json make_user_msg2_continue() {
+ return json{
+ {"role", "user"},
+ {"content", "Continue."}
+ };
+}
+
+static json make_assistant_no_tool() {
+ return json{
+ {"role", "assistant"},
+ {"content", "Let me help you."}
+ };
+}
+
+static json make_assistant_one_tool() {
+ return json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })}
+ };
+}
+
+static json make_assistant_two_tools() {
+ return json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+ build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+ })}
+ };
+}
+
+static json make_assistant_no_reasoning() {
+ return json{
+ {"role", "assistant"},
+ {"content", "I can help you with that."}
+ };
+}
+
+static json make_assistant_with_reasoning() {
+ return json{
+ {"role", "assistant"},
+ {"content", "I can help you with that."},
+ {"reasoning_content", "The user is asking for help. I should respond positively."}
+ };
+}
+
+static json make_assistant_one_tool_with_reasoning() {
+ return json{
+ {"role", "assistant"},
+ {"content", nullptr},
+ {"tool_calls", json::array({
+ build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+ })},
+ {"reasoning_content", "I need to call the tool first."}
+ };
+}
+
+static void print_diff_split(const std::string & title, const diff_split & diff) {
+ LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
+ LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
+ LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
+ LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
+ LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
+}
+
+static void check_reasoning_variables(const common_chat_template & tmpl) {
+ LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
+
+ try {
+ // Create a list of candidate reasoning/thinking variable names to probe
+ std::vector<std::string> candidate_vars = {
+ "enable_reasoning",
+ "use_reasoning",
+ "reasoning_enabled",
+ "has_reasoning",
+ "reasoning_mode",
+ "reasoning_format",
+ "reasoning_active",
+ "with_reasoning",
+ "use_thinking",
+ "thinking_enabled",
+ "has_thinking",
+ "thinking_mode",
+ "thinking_format",
+ "thinking_active",
+ "with_thinking",
+ "enable_reason",
+ "reason_enabled",
+ "enable_think",
+ "think_enabled",
+ };
+
+ jinja::context ctx;
+ ctx.is_get_stats = true;
+
+ json messages = json::array({
+ json{
+ {"role", "user"},
+ {"content", "Test message"}
+ },
+ json{
+ {"role", "assistant"},
+ {"content", "Response"},
+ {"reasoning_content", "Some reasoning"}
+ }
+ });
+
+ // Set up base context
+ jinja::global_from_json(ctx, json{
+ {"messages", messages},
+ {"tools", json::array()},
+ {"bos_token", ""},
+ {"eos_token", ""},
+ {"add_generation_prompt", false},
+ {"enable_thinking", true} // Already passed, so we'll exclude this from results
+ }, true);
+
+ // Add candidate variables as undefined to probe which ones are accessed
+ for (const auto & var_name : candidate_vars) {
+ ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
+ }
+
+ try {
+ jinja::runtime runtime(ctx);
+ runtime.execute(tmpl.prog);
+ } catch (const std::exception & e) {
+ // Execution may fail, that's okay - we just want to see what variables were accessed
+ }
+
+ // Check which candidate variables were accessed (stats.used = true)
+ std::vector<std::string> accessed_vars;
+ for (const auto & var_name : candidate_vars) {
+ auto val = ctx.get_val(var_name);
+ if (!val->is_undefined()) {
+ // Variable was overwritten, skip it
+ continue;
+ }
+ if (val->stats.used) {
+ accessed_vars.push_back(var_name);
+ }
+ }
+
+ if (accessed_vars.empty()) {
+ LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
+ } else {
+ LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
+ for (const auto & var : accessed_vars) {
+ LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
+ }
+ }
+
+ } catch (const std::exception & e) {
+ LOG_ERR("Error checking reasoning variables: %s\n", e.what());
+ }
+}
+
+static void analyze_template(const std::string & template_path) {
+ LOG_ERR("\n");
+ LOG_ERR("%s", ANSI_PURPLE);
+ LOG_ERR("================================================================================\n");
+ LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str());
+ LOG_ERR("================================================================================\n");
+ LOG_ERR("%s", ANSI_RESET);
+
+ std::string template_source;
+ try {
+ template_source = read_file(template_path);
+ } catch (const std::exception & e) {
+ LOG_ERR("Error reading template: %s\n", e.what());
+ return;
+ }
+
+ try {
+ common_chat_template chat_template(template_source, "", "");
+ json tools = build_tools_definition();
+
+ // ===== CAPABILITIES ANALYSIS =====
+ LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
+ auto caps = chat_template.original_caps();
+ LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
+ LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
+ LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
+ LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
+ LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false");
+ LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false");
+
+ // ===== DIFFERENTIAL ANALYSIS =====
+
+ // Test 1: With and without tools (single user message)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_no_tools;
+ params_no_tools.messages = json::array({ user_msg });
+ params_no_tools.add_generation_prompt = false;
+ params_no_tools.tools = json::array();
+
+ autoparser::templates_params params_with_tools = params_no_tools;
+ params_with_tools.tools = tools;
+
+ std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
+ std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
+
+ auto diff = calculate_diff_split(output_no_tools, output_with_tools);
+ print_diff_split("Diff: With vs Without Tools (single user message)", diff);
+ }
+
+ // Test 2: With and without add_generation_prompt (single user message)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_no_prompt;
+ params_no_prompt.messages = json::array({ user_msg });
+ params_no_prompt.add_generation_prompt = false;
+ params_no_prompt.tools = json::array();
+
+ autoparser::templates_params params_with_prompt = params_no_prompt;
+ params_with_prompt.add_generation_prompt = true;
+
+ std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
+ std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
+
+ auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
+ print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
+ }
+
+ // Test 3: Assistant with reasoning_content (user, assistant)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_no_reasoning;
+ params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
+ params_no_reasoning.add_generation_prompt = false;
+ params_no_reasoning.enable_thinking = true;
+
+ autoparser::templates_params params_with_reasoning = params_no_reasoning;
+ params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
+
+ std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+ std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+ auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+ print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
+ }
+
+ // Test 4: Assistant with reasoning_content (user, assistant, user)
+ {
+ json user_msg = make_user_msg();
+ json user_msg2 = make_user_msg2();
+
+ autoparser::templates_params params_no_reasoning;
+ params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
+ params_no_reasoning.add_generation_prompt = false;
+ params_no_reasoning.enable_thinking = true;
+
+ autoparser::templates_params params_with_reasoning = params_no_reasoning;
+ params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
+
+ std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+ std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+ auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+ print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
+ }
+
+ // Test 5: Tool call in last assistant message (user, assistant)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_no_tool;
+ params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
+ params_no_tool.add_generation_prompt = false;
+ params_no_tool.tools = tools;
+
+ autoparser::templates_params params_with_tool = params_no_tool;
+ params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+
+ std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+ std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+ auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+ print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
+ }
+
+ // Test 6: Tool call in last assistant message (user, assistant, user)
+ {
+ json user_msg = make_user_msg();
+ json user_msg2 = make_user_msg2_continue();
+
+ autoparser::templates_params params_no_tool;
+ params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
+ params_no_tool.add_generation_prompt = false;
+ params_no_tool.tools = tools;
+
+ autoparser::templates_params params_with_tool = params_no_tool;
+ params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+
+ std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+ std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+ auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+ print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
+ }
+
+ // Test 7: One vs two tool calls (user, assistant)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_one_tool;
+ params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+ params_one_tool.add_generation_prompt = false;
+ params_one_tool.tools = tools;
+
+ autoparser::templates_params params_two_tools = params_one_tool;
+ params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
+
+ std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+ std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+ auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+ print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
+ }
+
+ // Test 8: One vs two tool calls (user, assistant, user)
+ {
+ json user_msg = make_user_msg();
+ json user_msg2 = make_user_msg2_continue();
+
+ autoparser::templates_params params_one_tool;
+ params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+ params_one_tool.add_generation_prompt = false;
+ params_one_tool.tools = tools;
+
+ autoparser::templates_params params_two_tools = params_one_tool;
+ params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
+
+ std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+ std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+ auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+ print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
+ }
+
+ // Test 9: Tool call with vs without reasoning_content (user, assistant)
+ {
+ json user_msg = make_user_msg();
+
+ autoparser::templates_params params_no_reasoning;
+ params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
+ params_no_reasoning.add_generation_prompt = false;
+ params_no_reasoning.tools = tools;
+ params_no_reasoning.enable_thinking = true;
+
+ autoparser::templates_params params_with_reasoning = params_no_reasoning;
+ params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
+
+ std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+ std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+ auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+ print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
+ }
+
+ // Check reasoning variables
+ check_reasoning_variables(chat_template);
+
+ } catch (const std::exception & e) {
+ LOG_ERR("Analysis failed: %s\n", e.what());
+ }
+}
+
+int main(int argc, char ** argv) {
+ // Set log level to capture all output
+ common_log_set_verbosity_thold(99);
+
+ analysis_options opts;
+ if (!parse_options(argc, argv, opts)) {
+ return 1;
+ }
+
+ LOG_ERR("\n");
+ LOG_ERR("%s", ANSI_PURPLE);
+ LOG_ERR("================================================================================\n");
+ LOG_ERR(" TEMPLATE ANALYSIS TOOL\n");
+ LOG_ERR("================================================================================\n");
+ LOG_ERR("%s", ANSI_RESET);
+ LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
+
+ for (const auto & path : opts.template_paths) {
+ analyze_template(path);
+ }
+
+ LOG_ERR("\n");
+ LOG_ERR("%s", ANSI_GREEN);
+ LOG_ERR("================================================================================\n");
+ LOG_ERR(" ANALYSIS COMPLETE\n");
+ LOG_ERR("================================================================================\n");
+ LOG_ERR("%s", ANSI_RESET);
+
+ return 0;
+}
return this._addRule(ruleName, out.join(''));
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
+ } else if (schemaType === undefined && typeof schema === 'object' && !Array.isArray(schema) && schema !== null) {
+ // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+ // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+ return this._addRule(ruleName, this._addPrimitive('value', PRIMITIVE_RULES['value']));
} else {
if (!(schemaType in PRIMITIVE_RULES)) {
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
-#include "server-common.h"
#include "server-task.h"
+#include "chat.h"
#include "common.h"
+#include "json-schema-to-grammar.h"
#include "llama.h"
-#include "chat.h"
#include "sampling.h"
#include "speculative.h"
-#include "json-schema-to-grammar.h"
+#include "server-common.h"
using json = nlohmann::ordered_json;
common_chat_msg task_result_state::update_chat_msg(
const std::string & text_added,
bool is_partial,
- std::vector<common_chat_msg_diff> & diffs) {
+ std::vector<common_chat_msg_diff> & diffs,
+ bool filter_tool_calls) {
generated_text += text_added;
auto msg_prv_copy = chat_msg;
SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
if (!new_msg.empty()) {
new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
chat_msg = new_msg;
- diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
+ auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
+
+ if (!filter_tool_calls) {
+ diffs = std::move(all_diffs);
+ } else {
+ for (auto & d : all_diffs) {
+ // If this is a new type of delta, flush all currently pending tool call names
+ for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+ if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
+ continue;
+ }
+ if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
+ common_chat_msg_diff header;
+ header.tool_call_index = i;
+ header.tool_call_delta.id = chat_msg.tool_calls[i].id;
+ header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+ diffs.push_back(std::move(header));
+ sent_tool_call_names.insert(i);
+ }
+ }
+
+ if (d.tool_call_index == std::string::npos) {
+ diffs.push_back(std::move(d));
+ } else {
+ size_t i = d.tool_call_index;
+ if (sent_tool_call_names.count(i)) {
+ if (!d.tool_call_delta.arguments.empty()) {
+ d.tool_call_delta.name = "";
+ d.tool_call_delta.id = "";
+ diffs.push_back(std::move(d));
+ }
+ } else {
+ // Not sent yet.
+ if (!d.tool_call_delta.arguments.empty() || !is_partial) {
+ d.tool_call_delta.name = chat_msg.tool_calls[i].name;
+ d.tool_call_delta.id = chat_msg.tool_calls[i].id;
+ diffs.push_back(std::move(d));
+ sent_tool_call_names.insert(i);
+ } else {
+ // Suppress
+ }
+ }
+ }
+ }
+ // Final check at EOF
+ if (!is_partial) {
+ for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+ if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
+ common_chat_msg_diff header;
+ header.tool_call_index = i;
+ header.tool_call_delta.id = chat_msg.tool_calls[i].id;
+ header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+ diffs.push_back(std::move(header));
+ sent_tool_call_names.insert(i);
+ }
+ }
+ }
+ }
}
return chat_msg;
}
common_chat_msg chat_msg;
std::string generated_text; // append new chunks of generated text here
std::vector<std::string> generated_tool_call_ids;
+ std::unordered_set<size_t> sent_tool_call_names;
// for OpenAI Responses and Anthropic streaming API:
// track output item / content block state across chunks
common_chat_msg update_chat_msg(
const std::string & text_added,
bool is_partial,
- std::vector<common_chat_msg_diff> & diffs);
+ std::vector<common_chat_msg_diff> & diffs,
+ bool filter_tool_calls = false);
};
struct server_task {
assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
# assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
- assert expected_function_name == tool_call["function"]["name"]
+ assert expected_function_name == tool_call["function"]["name"], f'Expected tool name to be {tool_call["function"]["name"]} in {choice["message"]}'
actual_arguments = tool_call["function"]["arguments"]
- assert isinstance(actual_arguments, str)
+ assert isinstance(actual_arguments, dict) or isinstance(actual_arguments, str), f'Expected arguments to be a dict or str, got: {actual_arguments}'
if argument_key is not None:
- actual_arguments = json.loads(actual_arguments)
- assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
+ if (isinstance(actual_arguments, str)):
+ actual_arguments = json.loads(actual_arguments)
+ assert argument_key in actual_arguments, f"tool arguments: {actual_arguments}, expected: {argument_key}"
@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED])
@pytest.mark.parametrize("template_name,tool,argument_key", [
- ("google-gemma-2-2b-it", TEST_TOOL, "success"),
- ("google-gemma-2-2b-it", TEST_TOOL, "success"),
+ ("Qwen3-Coder", TEST_TOOL, "success"),
+ ("Qwen3-Coder", TEST_TOOL, "success"),
("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"),
("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"),
("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"),