Autoparser - complete refactoring of parser architecture (#18675)

author Piotr Wilkin (ilintar) <redacted>

Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)

committer GitHub <redacted>

Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)
author Piotr Wilkin (ilintar) <redacted>
Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)
committer GitHub <redacted>
Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt

index 27ca335be37d168ae8b08800e24c5e124a903327..51bff1c44bfae71a6838be585503689cee227c16 100644 (file)
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -47,10 +47,10 @@ add_library(${TARGET} STATIC
      arg.cpp
      arg.h
      base64.hpp
-    chat-parser.cpp
-    chat-parser.h
-    chat-parser-xml-toolcall.h
-    chat-parser-xml-toolcall.cpp
+    chat-auto-parser-generator.cpp
+    chat-auto-parser-helpers.cpp
+    chat-auto-parser.h
+    chat-diff-analyzer.cpp
      chat-peg-parser.cpp
      chat-peg-parser.h
      chat.cpp
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp

new file mode 100644 (file)

index 0000000..080bce5
--- /dev/null
+++ b/common/chat-auto-parser-generator.cpp
@@ -0,0 +1,413 @@
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "json-schema-to-grammar.h"
+#include "nlohmann/json.hpp"
+
+#include <stdexcept>
+#include <string>
+
+using json = nlohmann::ordered_json;
+
+// Helper to iterate over tools/functions
+static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
+    for (const auto & tool : tools) {
+        if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
+            continue;
+        }
+        fn(tool);
+    }
+}
+
+namespace autoparser {
+
+parser_build_context::parser_build_context(common_chat_peg_builder & p, const templates_params & inputs) :
+    p(p),
+    inputs(inputs),
+    reasoning_parser(p.eps()) {}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
+                                                  const struct templates_params & inputs) {
+    // Run differential analysis to extract template structure
+    struct autoparser autoparser;
+    autoparser.analyze_template(tmpl);
+    return generate_parser(tmpl, inputs, autoparser);
+}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
+                                                  const struct templates_params & inputs,
+                                                  const autoparser &              autoparser) {
+    // Build the parser using the analysis results
+    auto parser = autoparser.build_parser(inputs);
+
+    // Create the result structure
+    common_chat_params data;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = autoparser.preserved_tokens;
+    data.parser           = parser.save();
+
+    // Build grammar if tools are present
+    bool has_tools =
+        autoparser.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
+    std::string trigger_marker = !autoparser.tools.format.section_start.empty() ? autoparser.tools.format.section_start :
+                                                                                autoparser.tools.format.per_call_start;
+    bool        include_grammar =
+        has_tools && ((inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO && !trigger_marker.empty()) ||
+                      inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        // Set grammar triggers based on tool section markers (fall back to per-call markers)
+        if (data.grammar_lazy) {  // only do triggers on lazy grammar
+            data.grammar_triggers = {
+                { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
+            };
+        }
+    }
+
+    return data;
+}
+
+common_peg_arena autoparser::build_parser(const templates_params & inputs) const {
+    if (!analysis_complete) {
+        throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
+    }
+    return build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // If the template uses Python dict format (single-quoted strings in JSON structures),
+        // pre-register a json-string rule that accepts both quote styles. This must happen
+        // before any call to p.json() so that all JSON parsing inherits the flexible rule.
+        if (tools.format.uses_python_dicts) {
+            p.rule("json-string", [&]() { return p.choice({ p.double_quoted_string(), p.single_quoted_string() }); });
+        }
+
+        parser_build_context ctx(p, inputs);
+        bool                 extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+        bool                 enable_thinking   = inputs.enable_thinking;
+
+        ctx.extracting_reasoning = extract_reasoning && enable_thinking && reasoning.mode != reasoning_mode::NONE;
+        ctx.content              = &content;
+
+        // Build reasoning parser
+        ctx.reasoning_parser = reasoning.build_parser(ctx);
+
+        bool has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+        bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+
+        if (has_response_format) {
+            return ctx.reasoning_parser + p.space() +
+                   p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
+        }
+
+        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
+            return tools.build_parser(ctx);
+        }
+
+        return content.build_parser(ctx);
+    });
+}
+
+common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (!ctx.extracting_reasoning) {
+        return p.eps();
+    }
+
+    bool thinking_forced_open   = (mode == reasoning_mode::FORCED_OPEN);
+    bool thinking_forced_closed = (mode == reasoning_mode::FORCED_CLOSED);
+
+    if (thinking_forced_open || thinking_forced_closed) {
+        // Thinking is forced open OR forced closed with enable_thinking=true
+        // In both cases, expect only the closing tag (opening was in template)
+        return p.reasoning(p.until(end)) + end;
+    }
+    if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
+        // Standard tag-based reasoning OR tools-only mode (reasoning appears with tools)
+        // Both use the same tag-based pattern if markers are available
+        if (!start.empty() && !end.empty()) {
+            return p.optional(start + p.reasoning(p.until(end)) + end);
+        }
+    } else if (mode == reasoning_mode::DELIMITER) {
+        return p.optional(p.reasoning(p.until(end)) + end);
+    }
+
+    return p.eps();
+}
+
+common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (is_always_wrapped()) {
+        if (ctx.extracting_reasoning) {
+            return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end();
+        }
+        return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
+    }
+    return ctx.reasoning_parser + p.content(p.rest()) + p.end();
+}
+
+common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (is_always_wrapped()) {
+        return p.optional(start + p.content(p.until(end)) + end);
+    }
+    return p.eps();
+}
+
+common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const {
+    switch (format.mode) {
+        case tool_format::JSON_NATIVE:
+            return build_tool_parser_json_native(ctx);
+        case tool_format::TAG_WITH_JSON:
+            return build_tool_parser_tag_json(ctx);
+        case tool_format::TAG_WITH_TAGGED:
+            return build_tool_parser_tag_tagged(ctx);
+        default:
+            GGML_ABORT("Unable to create tool parser");
+    }
+}
+
+common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    // Build effective field names with dot notation if function_field is set
+    std::string name_field = format.name_field;
+    std::string args_field = format.args_field;
+
+    if (!format.function_field.empty() && format.function_field != "function" &&
+        name_field.find('.') == std::string::npos) {
+        name_field = format.function_field + "." + name_field;
+        args_field = format.function_field + "." + args_field;
+    }
+
+    auto tools_parser = p.standard_json_tools(
+        format.section_start, format.section_end, inputs.tools, inputs.parallel_tool_calls,
+        inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
+        format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
+
+    // Handle content wrappers if present
+    if (ctx.content && ctx.content->is_always_wrapped()) {
+        auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
+        return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
+    }
+
+    std::string tool_start = "{";
+    if (!format.section_start.empty()) {
+        tool_start = format.section_start;
+    } else if (!format.per_call_start.empty()) {
+        tool_start = format.per_call_start;
+    }
+
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser +
+           p.end();
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_choice = p.choice();
+
+    foreach_function(inputs.tools, [&](const json & tool) {
+        const auto & func   = tool.at("function");
+        std::string  name   = func.at("name");
+        const auto & schema = func.at("parameters");
+
+        // Build call_id parser based on position (if supported)
+        common_peg_parser call_id_section = p.eps();
+        if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+            !call_id.suffix.empty()) {
+            call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
+        }
+
+        auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
+                           call_id_section + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
+        if (!function.close.empty()) {
+            func_parser = func_parser + function.close;
+        }
+        func_parser = p.atomic(func_parser);
+
+        tool_choice |= p.rule("tool-" + name, func_parser);
+    });
+
+    auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_calls = p.eps();
+
+    if (!format.per_call_start.empty()) {
+        auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end;
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+        } else {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call);
+        }
+        if (!format.section_start.empty()) {
+            tool_calls = p.trigger_rule("tool-calls",
+                                        p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+                                            (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+        }
+    } else {
+        std::string separator = ", ";  // Default
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice +
+                                                         p.zero_or_more(separator + tool_choice) + format.section_end);
+        } else {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice + format.section_end);
+        }
+    }
+
+    if (!require_calls) {
+        tool_calls = p.optional(tool_calls);
+    }
+
+    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
+    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+           p.end();
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_choice = p.choice();
+
+    foreach_function(inputs.tools, [&](const json & tool) {
+        const auto & func   = tool.at("function");
+        std::string  name   = func.at("name");
+        const auto & params = func.at("parameters");
+
+        if (!params.contains("properties") || !params.at("properties").is_object()) {
+            return;
+        }
+
+        const auto &          properties = params.at("properties");
+        std::set<std::string> required;
+        if (params.contains("required") && params.at("required").is_array()) {
+            params.at("required").get_to(required);
+        }
+
+        // Build parser for each argument
+        std::vector<common_peg_parser> arg_parsers;
+        for (const auto & [param_name, param_schema] : properties.items()) {
+            bool        is_required = required.find(param_name) != required.end();
+            std::string type        = "object";
+            auto        type_obj    = param_schema.contains("type") ? param_schema.at("type") : json::object();
+            if (type_obj.is_string()) {
+                type_obj.get_to(type);
+            } else if (type_obj.is_object()) {
+                if (type_obj.contains("type") && type_obj.at("type").is_string()) {
+                    type_obj.at("type").get_to(type);
+                }
+            }
+
+            auto arg = p.tool_arg(
+                p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
+                                arguments.name_suffix) +
+                arguments.value_prefix +
+                (type == "string" ? p.tool_arg_string_value(p.schema(p.until(arguments.value_suffix),
+                                                                     "tool-" + name + "-arg-" + param_name + "-schema",
+                                                                     param_schema, true)) :
+                                    p.tool_arg_json_value(p.schema(
+                                        p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, format.uses_python_dicts)) +
+                                        p.space()) +
+                p.tool_arg_close(p.literal(arguments.value_suffix)));
+
+            if (is_required) {
+                arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
+            } else {
+                arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+            }
+        }
+
+        // Build arg sequence with space() between consecutive args
+        common_peg_parser args_seq = p.eps();
+        for (size_t i = 0; i < arg_parsers.size(); i++) {
+            if (i > 0) {
+                args_seq = args_seq + p.space();
+            }
+            args_seq = args_seq + arg_parsers[i];
+        }
+
+        // Build call_id parser based on position (if supported)
+        common_peg_parser call_id_section = p.eps();
+        if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+            !call_id.suffix.empty()) {
+            call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
+        }
+
+        auto func_parser = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix) +
+                           call_id_section + p.space() + args_seq;
+
+        if (!function.close.empty()) {
+            func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close));
+        } else if (!format.per_call_end.empty()) {
+            // When there's no func_close but there is a per_call_end marker, use peek() to ensure
+            // we only emit tool_close when we can actually see the closing marker. This prevents
+            // premature closing during partial parsing when we've seen e.g. "</" which could be
+            // either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
+            func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end)));
+        } else {
+            func_parser =
+                func_parser + p.tool_close(p.space());  // force this to process tool closing callbacks in mapper
+        }
+
+        func_parser = p.atomic(func_parser);
+        tool_choice |= p.rule("tool-" + name, func_parser);
+    });
+
+    auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_calls = p.eps();
+
+    if (!format.per_call_start.empty()) {
+        auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+        } else {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call);
+        }
+        if (!format.section_start.empty()) {
+            tool_calls = p.trigger_rule("tool-calls",
+                                        p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+                                            (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+        }
+    } else {
+        std::string separator = ", ";  // Default
+
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + p.space() + tool_choice +
+                                                         p.zero_or_more(separator + tool_choice) + p.space() +
+                                                         format.section_end);
+        } else {
+            tool_calls = p.trigger_rule(
+                "tool-call", format.section_start + p.space() + tool_choice + p.space() + format.section_end);
+        }
+    }
+
+    if (!require_tools) {
+        tool_calls = p.optional(tool_calls);
+    }
+
+    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
+    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+           p.end();
+}
+
+}  // namespace autoparser
diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp

new file mode 100644 (file)

index 0000000..1519d8b
--- /dev/null
+++ b/common/chat-auto-parser-helpers.cpp
@@ -0,0 +1,347 @@
+#include "chat-auto-parser-helpers.h"
+
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+
+#include <cctype>
+#include <numeric>
+
+using json = nlohmann::ordered_json;
+
+std::string trim_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    if (start == str.length()) {
+        return "";
+    }
+
+    size_t end = str.length() - 1;
+    while (end > start && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+
+    return str.substr(start, end - start + 1);
+}
+
+std::string trim_leading_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    return str.substr(start);
+}
+
+std::string trim_trailing_whitespace(const std::string & str) {
+    if (str.empty()) {
+        return "";
+    }
+
+    size_t end = str.length() - 1;
+    while (end > 0 && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+
+    // If first char is also whitespace, return empty string
+    if (end == 0 && std::isspace(static_cast<unsigned char>(str[0]))) {
+        return "";
+    }
+
+    return str.substr(0, end + 1);
+}
+
+std::string trim_trailing_newlines(const std::string & str) {
+    size_t end = str.length();
+    while (end > 0 && str[end - 1] == '\n') {
+        end--;
+    }
+
+    return str.substr(0, end);
+}
+
+static size_t common_prefix_len(const std::string & left, const std::string & right) {
+    size_t prefix_len = 0;
+    size_t min_len    = std::min(left.length(), right.length());
+    while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) {
+        prefix_len++;
+    }
+    return prefix_len;
+}
+
+static size_t common_suffix_len(const std::string & left, const std::string & right) {
+    size_t suffix_len = 0;
+    size_t min_len    = std::min(left.length(), right.length());
+    while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) {
+        suffix_len++;
+    }
+    return suffix_len;
+}
+
+diff_split calculate_diff_split(const std::string & left, const std::string & right) {
+    diff_split result;
+
+    auto left_seg = segmentize_markers(left);
+    auto right_seg = segmentize_markers(right);
+
+    if (left_seg.empty()) {
+        result.right = right;
+        return result;
+    }
+    if (right_seg.empty()) {
+        result.left = left;
+        return result;
+    }
+
+    auto left_start = left_seg.begin();
+    auto left_end = --left_seg.end();
+    auto right_start = right_seg.begin();
+    auto right_end = --right_seg.end();
+
+    auto test = [&] () {
+        return left_start != left_end && right_start != right_end;
+    };
+
+    bool left_fully_consumed = false;
+    bool right_fully_consumed = false;
+
+    while (test()) {
+        bool advanced = false;
+        if (*left_start == *right_start) {
+            result.prefix.append(left_start->value);
+            left_start++;
+            right_start++;
+            advanced = true;
+        }
+        if (*left_end == *right_end) {
+            result.suffix = left_end->value + result.suffix;
+            if (left_start != left_end) {
+                left_end--;
+            } else {
+                left_fully_consumed = true;
+            }
+            if (right_start != right_end) {
+                right_end--;
+            } else {
+                right_fully_consumed = true;
+            }
+            advanced = true;
+        }
+        if (!advanced) {
+            break;
+        }
+    }
+
+    if (left_start == left_end && right_start != right_end) {
+        if (*left_start == *right_end) {
+            result.suffix = right_end->value + result.suffix;
+            right_end--;
+            left_fully_consumed = true;
+        } else if (*left_start == *right_start) {
+            result.prefix.append(right_start->value);
+            right_start++;
+            left_fully_consumed = true;
+        }
+    } else if (right_start == right_end && left_start != left_end) {
+        if (*left_end == *right_start) {
+            result.suffix = left_end->value + result.suffix;
+            left_end--;
+            right_fully_consumed = true;
+        } else if (*left_start == *right_start) {
+            result.prefix.append(left_start->value);
+            left_start++;
+            right_fully_consumed = true;
+        }
+    } else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) {
+        result.prefix.append(right_start->value);
+        left_fully_consumed = true;
+        right_fully_consumed = true;
+    }
+
+    auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); };
+
+    bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT;
+    bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT;
+
+    std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment);
+    std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment);
+
+    size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0;
+    // avoid overlaps between prefix and suffix
+    size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len),
+        remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0;
+
+    result.prefix.append(remainder_left.substr(0, prefix_len));
+    result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix;
+    result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len);
+    result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len);
+
+    if (result.left == "" && result.right == "") {
+        // degenerate case, no diff
+        result.prefix = left;
+        result.suffix = "";
+        // pick prefix = all as representation
+    }
+    return result;
+}
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common prefix of left and right
+    size_t common_prefix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) {
+        common_prefix_len++;
+    }
+
+    // If there's no common prefix, return empty string
+    if (common_prefix_len == 0) {
+        return "";
+    }
+
+    // Find the common prefix in the full string
+    std::string common_prefix = left.substr(0, common_prefix_len);
+    size_t      pos           = full.find(common_prefix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything before the common prefix
+    return full.substr(0, pos);
+}
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common suffix of left and right (compare from the end)
+    size_t common_suffix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_suffix_len < min_len &&
+           left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) {
+        common_suffix_len++;
+    }
+
+    // If there's no common suffix, return empty string
+    if (common_suffix_len == 0) {
+        return "";
+    }
+
+    // Extract the common suffix
+    std::string common_suffix = left.substr(left.length() - common_suffix_len);
+
+    // Find the last occurrence of the common suffix in the full string
+    size_t pos = full.rfind(common_suffix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything after the common suffix
+    return full.substr(pos + common_suffix_len);
+}
+
+// TODO: segmentize will treat a JSON array inside tags as a tag: <calls>[{ "fun": { ... } }]</calls> will be three markers
+// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will
+// Might have to put some restrictions on tag contents as well (like "no { }")
+std::vector<segment> segmentize_markers(const std::string & text) {
+    std::vector<segment> retval;
+    bool in_marker = false;
+    char marker_opener = '\0';
+
+    auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; };
+    auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); };
+
+    size_t last_border = 0;
+
+    for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) {
+        if (!in_marker && is_marker_opener(text[cur_pos])) {
+            if (last_border < cur_pos) {
+                retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border)));
+            }
+            last_border = cur_pos;
+            in_marker = true;
+            marker_opener = text[cur_pos];
+        } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) {
+            // no need to check because last_border will always be smaller
+                retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1)));
+            last_border = cur_pos + 1;
+            in_marker = false;
+            marker_opener = '\0';
+        }
+    }
+    if (last_border < text.length()) {
+            retval.push_back(segment(segment_type::TEXT, text.substr(last_border)));
+    }
+    return retval;
+}
+
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments) {
+    std::vector<segment> result;
+    for (const auto & seg : segments) {
+        if (!trim_whitespace(seg.value).empty()) {
+            result.push_back(seg);
+        }
+    }
+    return result;
+}
+
+namespace autoparser {
+
+std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
+    templates_params tmpl_params;
+    tmpl_params.messages              = params.messages;
+    tmpl_params.tools                 = params.tools;
+    tmpl_params.add_generation_prompt = params.add_generation_prompt;
+    tmpl_params.enable_thinking       = params.enable_thinking;
+
+    if (params.extra_context) {
+        tmpl_params.extra_context = *params.extra_context;
+    }
+    tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
+
+    try {
+        return common_chat_template_direct_apply(tmpl, tmpl_params);
+    } catch (const std::exception & e) {
+        LOG_DBG("Template application failed: %s\n", e.what());
+        return "";
+    }
+}
+
+std::optional<compare_variants_result> compare_variants(
+    const common_chat_template &                   tmpl,
+    const template_params &                        params_A,
+    const std::function<void(template_params &)> & params_modifier) {
+    // Create variant B by copying A
+    template_params params_B = params_A;
+
+    // Apply modifier to create variant B
+    if (params_modifier) {
+        params_modifier(params_B);
+    }
+
+    // Apply template to both variants
+    std::string output_A = apply_template(tmpl, params_A);
+    std::string output_B = apply_template(tmpl, params_B);
+
+    // Check for template application failures
+    if (output_A.empty() || output_B.empty()) {
+        return std::nullopt;
+    }
+
+    // Calculate diff and return result with both outputs
+    compare_variants_result result;
+    result.diff     = calculate_diff_split(output_A, output_B);
+    result.output_A = output_A;
+    result.output_B = output_B;
+
+    return result;
+}
+
+}  // namespace autoparser
+
diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h

new file mode 100644 (file)

index 0000000..6e3df79
--- /dev/null
+++ b/common/chat-auto-parser-helpers.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "chat-auto-parser.h"
+#include <functional>
+#include <optional>
+#include <string>
+
+std::string trim_whitespace(const std::string & str);
+std::string trim_leading_whitespace(const std::string & str);
+std::string trim_trailing_whitespace(const std::string & str);
+std::string trim_trailing_newlines(const std::string & str);
+
+// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
+// mismatched part on the left, mismatched part on the right) between two strings
+// account for markers - align prefix and suffix endings so that they end on markers
+// * eg.:
+// calculate_diff_split("<html><body><div></div></body></html>", "<html><body><p>Something</p></body><html>") ->
+//  { "prefix": "<html><body>" (not: "<html><body><"), "suffix": "</body></html>", "left": "<div></div>", "right": "<p>Something</p>" }
+// calculate_diff_split("<html><body>Something</body></html>", "<html><body></body><html>") ->
+//  { "prefix": "<html><body>", "suffix": "</body></html>", "left": "Something", "right": "" }
+diff_split calculate_diff_split(const std::string & left, const std::string & right);
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+// Returns empty string if there's no common prefix
+// * eg.:
+// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a "
+// until_common_prefix("<tool_call>", "<something>", "<something_else>") -> ""
+// until_common_prefix("some text", "1234", "abcd") -> ""
+// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one ""
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+// Returns empty string if there's no common suffix
+// Mirror function of `until_common_prefix`
+// * eg.:
+// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call"
+// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four"
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
+
+// Segmentize text into markers and non-marker fragments
+// * eg.:
+// segmentize_markers("<html><head><title>The site title</title><body><div>Here's some <b>content</b></div></body></html>" ->
+//  [ (MARKER, "<html>"), (MARKER, "<head>"), (MARKER, "<title>"), (TEXT, "The site title"), (MARKER, "</title>"),
+//    (MARKER, "<body>"), (MARKER, "<div>"), (TEXT, "Here's some "), (MARKER, "<b>"), (TEXT, "content"), (MARKER, "</b>"),
+//    (MARKER, "</div>"), (MARKER, "</body>"), (MARKER, "</html>")
+//  ]
+// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") ->
+//  [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ]
+std::vector<segment> segmentize_markers(const std::string & text);
+
+// Prune whitespace-only segments from a vector of segments
+// * eg.:
+// segmentize_markers("<tool_call>\n<function=foo>\n<arg=bar>\n   \n</arg>\n</function>\n</tool_call>") ->
+//  X = [ (MARKER, "<tool_call>"), (TEXT, "\n"), (MARKER, "<function=foo>"), (TEXT, "\n"), (MARKER, "<arg=bar>"), (TEXT, "\n   \n"),
+//        (MARKER, "</arg>"), (TEXT, "\n"), (MARKER, "</function>"), (TEXT, "\n"), (MARKER, "</tool_call>") ]
+// prune_whitespace_segments(X) -> [ (MARKER, "<tool_call>"), (MARKER, "<function=foo>"), (MARKER, "<arg=bar>"), (MARKER, "</arg>"),
+//                                   (MARKER, "</function>"), (MARKER, "</tool_call>") ]
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
+
+namespace autoparser {
+
+// Apply a template with the given parameters, returning the rendered string (empty on failure)
+std::string apply_template(const common_chat_template & tmpl, const template_params & params);
+
+// Factorized differential comparison function
+// Takes base params and a single modifier lambda to create variant B
+// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
+std::optional<compare_variants_result> compare_variants(
+    const common_chat_template &                   tmpl,
+    const template_params &                        params_A,
+    const std::function<void(template_params &)> & params_modifier);
+
+}  // namespace autoparser
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h

new file mode 100644 (file)

index 0000000..52c6488
--- /dev/null
+++ b/common/chat-auto-parser.h
@@ -0,0 +1,433 @@
+#pragma once
+
+#include "chat.h"
+#include "common.h"
+#include "jinja/caps.h"
+#include "peg-parser.h"
+
+#include <chrono>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+class common_chat_peg_builder;
+
+// ============================================================================
+// Parameters for template application (low-level, used by diff analysis)
+// ============================================================================
+struct template_params {
+    json                messages;
+    json                tools;
+    bool                add_generation_prompt = false;
+    bool                enable_thinking       = true;
+    std::optional<json> extra_context         = std::nullopt;
+};
+
+struct diff_split {
+    std::string prefix;
+    std::string suffix;
+    std::string left;
+    std::string right;
+
+    bool operator==(struct diff_split & other) const {
+        return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
+    }
+};
+
+// Result of compare_variants containing diff and original outputs
+struct compare_variants_result {
+    diff_split  diff;
+    std::string output_A;
+    std::string output_B;
+};
+
+namespace autoparser {
+
+// ============================================================================
+// High-level params for parser generation
+// ============================================================================
+
+struct templates_params {
+    json                                  messages;
+    json                                  tools;
+    common_chat_tool_choice               tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    json                                  json_schema;
+    bool                                  parallel_tool_calls = true;
+    common_reasoning_format               reasoning_format    = COMMON_REASONING_FORMAT_AUTO;
+    bool                                  stream              = true;
+    std::string                           grammar;
+    bool                                  add_generation_prompt = false;
+    bool                                  enable_thinking       = true;
+    std::chrono::system_clock::time_point now                   = std::chrono::system_clock::now();
+    json                                  extra_context;
+    bool                                  add_bos       = false;
+    bool                                  add_eos       = false;
+    bool                                  is_inference  = true;
+    bool                                  add_inference = false;
+    bool                                  mark_input    = true;  // whether to mark input strings in the jinja context
+};
+
+// ============================================================================
+// Analysis Result Enums
+// ============================================================================
+
+// Reasoning handling mode (derived from R1-R3 comparisons)
+enum class reasoning_mode {
+    NONE,           // No reasoning markers detected
+    TAG_BASED,      // Standard tag-based: <think>...</think>
+    DELIMITER,      // Delimiter-based: [BEGIN FINAL RESPONSE] (reasoning ends at delimiter)
+    FORCED_OPEN,    // Template ends with open reasoning tag (empty start, non-empty end)
+    FORCED_CLOSED,  // Template ends with open reasoning tag on enabled thinking but
+                    // with both opened and closed tag for disabled thinking
+    TOOLS_ONLY      // Only reason on tool calls, not on normal content
+};
+
+inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
+    switch (mode) {
+        case reasoning_mode::NONE:
+            return os << "NONE";
+        case reasoning_mode::TAG_BASED:
+            return os << "TAG_BASED";
+        case reasoning_mode::DELIMITER:
+            return os << "DELIMITER";
+        case reasoning_mode::FORCED_OPEN:
+            return os << "FORCED_OPEN";
+        case reasoning_mode::FORCED_CLOSED:
+            return os << "FORCED_CLOSED";
+        case reasoning_mode::TOOLS_ONLY:
+            return os << "TOOLS_ONLY";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Content wrapping mode (derived from C1 comparison)
+enum class content_mode {
+    PLAIN,                   // No content markers
+    ALWAYS_WRAPPED,          // Content always wrapped with markers
+    WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+};
+
+inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
+    switch (mode) {
+        case content_mode::PLAIN:
+            return os << "PLAIN";
+        case content_mode::ALWAYS_WRAPPED:
+            return os << "ALWAYS_WRAPPED";
+        case content_mode::WRAPPED_WITH_REASONING:
+            return os << "WRAPPED_WITH_REASONING";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Call ID position in tool calls (for non-JSON formats)
+enum class call_id_position {
+    NONE,                   // No call ID support detected
+    PRE_FUNC_NAME,          // Call ID before function name: [CALL_ID]id[FUNC]name{args}
+    BETWEEN_FUNC_AND_ARGS,  // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
+    POST_ARGS,              // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
+};
+
+inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
+    switch (pos) {
+        case call_id_position::NONE:
+            return os << "NONE";
+        case call_id_position::PRE_FUNC_NAME:
+            return os << "PRE_FUNC_NAME";
+        case call_id_position::BETWEEN_FUNC_AND_ARGS:
+            return os << "BETWEEN_FUNC_AND_ARGS";
+        case call_id_position::POST_ARGS:
+            return os << "POST_ARGS";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
+enum class tool_format {
+    NONE,             // No tool support detected
+    JSON_NATIVE,      // Pure JSON: {"name": "X", "arguments": {...}}
+    TAG_WITH_JSON,    // Tag-based with JSON args: <function=X>{...}</function>
+    TAG_WITH_TAGGED,  // Tag-based with tagged args: <param=key>value</param>
+};
+
+inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
+    switch (format) {
+        case tool_format::NONE:
+            return os << "NONE";
+        case tool_format::JSON_NATIVE:
+            return os << "JSON_NATIVE";
+        case tool_format::TAG_WITH_JSON:
+            return os << "TAG_WITH_JSON";
+        case tool_format::TAG_WITH_TAGGED:
+            return os << "TAG_WITH_TAGGED";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// ============================================================================
+// Sub-structs for tool analysis
+// ============================================================================
+
+struct tool_format_analysis {
+    tool_format mode = tool_format::NONE;
+
+    std::string section_start;   // e.g., "<tool_call>", "[TOOL_CALLS]", ""
+    std::string section_end;     // e.g., "</tool_call>", ""
+    std::string per_call_start;  // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
+    std::string per_call_end;    // e.g., "<|tool_call_end|>", ""
+
+    bool fun_name_is_key = false;       // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
+    bool tools_array_wrapped = false;   // Tool calls wrapped in JSON array [...]
+    bool uses_python_dicts = false;     // Tool call args use Python dict format (single-quoted strings)
+
+    std::string              function_field = "function";
+    std::string              name_field     = "name";
+    std::string              args_field     = "arguments";
+    std::string              id_field;
+    std::string              gen_id_field;
+    std::vector<std::string> parameter_order;
+};
+
+struct tool_function_analysis {
+    std::string name_prefix;  // e.g., "<function=", "\"name\": \"", "functions."
+    std::string name_suffix;  // e.g., ">", "\"", ":0"
+    std::string close;        // e.g., "</function>", "" (for tag-based)
+};
+
+struct tool_arguments_analysis {
+    std::string start;          // e.g., "<|tool_call_argument_begin|>", "<args>"
+    std::string end;            // e.g., "<|tool_call_argument_end|>", "</args>"
+    std::string name_prefix;   // e.g., "<param=", "<arg_key>", "\""
+    std::string name_suffix;   // e.g., ">", "</arg_key>", "\":"
+    std::string value_prefix;  // e.g., "", "<arg_value>", ""
+    std::string value_suffix;  // e.g., "</param>", "</arg_value>", ""
+    std::string separator;     // e.g., "", "\n", ","
+};
+
+struct tool_id_analysis {
+    call_id_position pos = call_id_position::NONE;
+
+    std::string prefix;  // e.g., "[CALL_ID]" (marker before call ID value)
+    std::string suffix;  // e.g., "" (marker after call ID value, before next section)
+};
+
+// ============================================================================
+// Parser build context (shared interface for build_parser methods)
+// ============================================================================
+
+struct analyze_content;
+
+struct parser_build_context {
+    common_chat_peg_builder & p;
+    const templates_params &          inputs;
+    common_peg_parser                 reasoning_parser;
+    bool                              extracting_reasoning = false;
+    const analyze_content *           content              = nullptr;
+
+    parser_build_context(common_chat_peg_builder & p, const templates_params & inputs);
+};
+
+// ============================================================================
+// Base class for analyzers with parser building
+// ============================================================================
+
+struct analyze_base {
+    virtual ~analyze_base() = default;
+    virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0;
+
+  protected:
+    const common_chat_template * tmpl = nullptr;
+
+    analyze_base() = default;
+    explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {}
+};
+
+// ============================================================================
+// Reasoning analyzer
+// ============================================================================
+
+struct analyze_reasoning : analyze_base {
+    reasoning_mode mode = reasoning_mode::NONE;
+
+    std::string start;  // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
+    std::string end;    // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
+
+    analyze_reasoning() = default;
+    analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+  private:
+    // Look for reasoning markers in rendered content
+    void compare_reasoning_presence();
+
+    // Compare generation prompt with enable_thinking=true vs false
+    void compare_thinking_enabled();
+
+    // Check if reasoning is always possible or only in tool calls
+    void compare_reasoning_scope();
+};
+
+// ============================================================================
+// Content analyzer
+// ============================================================================
+
+struct analyze_content : analyze_base {
+    content_mode mode = content_mode::PLAIN;
+
+    std::string start;  // e.g., "<response>", ">>>all\n", ""
+    std::string end;    // e.g., "</response>", ""
+
+    bool requires_nonnull_content = false;
+
+    analyze_content() = default;
+    analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning);
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+    bool is_always_wrapped() const;
+    common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
+};
+
+// ============================================================================
+// Tool analyzer
+// ============================================================================
+
+struct analyze_tools : analyze_base {
+    tool_format_analysis    format;
+    tool_function_analysis  function;
+    tool_arguments_analysis arguments;
+    tool_id_analysis        call_id;
+
+    analyze_tools() = default;
+    analyze_tools(const common_chat_template & tmpl,
+                  const jinja::caps &          caps,
+                  const analyze_reasoning &    reasoning);
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+  private:
+    // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
+    void analyze_tool_calls(const analyze_reasoning & reasoning);
+
+    // Analyze format based on position of function and argument name in needle
+    void analyze_tool_call_format(const std::string &       haystack,
+                                  const std::string &       fun_name_needle,
+                                  const std::string &       arg_name_needle,
+                                  const analyze_reasoning & reasoning);
+
+    // Analyze specifics of JSON native format (entire tool call is a JSON object)
+    void analyze_tool_call_format_json_native(const std::string & clean_haystack,
+                                              const std::string & fun_name_needle,
+                                              const std::string & arg_name_needle);
+
+    // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
+    void analyze_tool_call_format_non_json(const std::string & clean_haystack,
+                                           const std::string & fun_name_needle);
+
+    // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
+    void check_per_call_markers();
+
+    // Extract function name markers
+    void extract_function_markers();
+
+    // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
+    void analyze_arguments();
+
+    // Extract argument name markers
+    void extract_argument_name_markers();
+
+    // Extract argument value markers
+    void extract_argument_value_markers();
+
+    // Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
+    void extract_argument_separator();
+
+    // Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
+    void extract_args_markers();
+
+    // Extract call ID markers, if present
+    void extract_call_id_markers();
+
+    // Per-format tool parser builders
+    common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const;
+    common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const;
+    common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const;
+};
+
+// ============================================================================
+// Main autoparser class
+// ============================================================================
+
+struct autoparser {
+    jinja::caps          jinja_caps;
+    analyze_reasoning    reasoning;
+    analyze_content      content;
+    analyze_tools        tools;
+    bool                 analysis_complete = false;
+
+    // Preserved tokens for tokenizer (union of all non-empty markers)
+    std::vector<std::string> preserved_tokens;
+
+    autoparser() = default;
+
+    // Run full differential analysis on a template
+    void analyze_template(const common_chat_template & tmpl);
+
+    // Build the PEG parser for this template
+    common_peg_arena build_parser(const templates_params & inputs) const;
+
+  private:
+    // Collect tokens from entire analysis to preserve
+    void collect_preserved_tokens();
+};
+
+// ============================================================================
+// Parser generator
+// ============================================================================
+
+class peg_generator {
+  public:
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct templates_params & inputs);
+
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct templates_params & inputs,
+                                              const autoparser &              autoparser);
+};
+
+}  // namespace autoparser
+
+enum segment_type { TEXT, MARKER };
+
+inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
+    switch (type) {
+        case segment_type::TEXT:
+            return os << "TEXT";
+        case segment_type::MARKER:
+            return os << "MARKER";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+struct segment {
+    segment_type type;
+    std::string  value;
+
+    segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
+
+    bool operator==(const segment & other) const {
+        return type == other.type && value == other.value;
+    }
+
+    bool operator!=(const segment & other) const {
+        return !(*this == other);
+    }
+};
diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp

new file mode 100644 (file)

index 0000000..4068340
--- /dev/null
+++ b/common/chat-diff-analyzer.cpp
@@ -0,0 +1,1330 @@
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <algorithm>
+
+#define ANSI_RESET  "\033[0m"
+#define ANSI_PURPLE "\033[1m\x1b[38;5;126m"
+#define ANSI_ORANGE "\033[1m\x1b[38;5;214m"
+#define ANSI_RED    "\033[1m\x1b[38;5;196m"
+
+using json = nlohmann::ordered_json;
+
+namespace autoparser {
+
+static const std::string FUN_FIRST = "FFF_FIRST_FUN_F";
+static const std::string FUN_SECOND = "SSS_SECOND_FUN_S";
+static const std::string ARG_FIRST = "AA_ARG_FST_AA";
+static const std::string ARG_SECOND = "BB_ARG_SND_BB";
+static const std::string USER_MSG = "U_USER_MSG Hello END_U";
+static const std::string ASSISTANT_MSG = "A_ASST_MSG I can help END_A";
+static const std::string THINKING_CONTENT = "REASON_PART I am thinking END_R";
+
+static std::vector<std::function<void(const common_chat_template & tmpl, autoparser &)>> workarounds(
+    { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to
+      // support reasoning on them
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
+              tmpl.src.find("reasoning_content") == std::string::npos &&
+              analysis.reasoning.mode == reasoning_mode::NONE) {
+              analysis.reasoning.mode  = reasoning_mode::FORCED_OPEN;
+              analysis.reasoning.start = "<think>";
+              analysis.reasoning.end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
+          }
+      },
+      // Granite 3.3, with separate reasoning and content markers
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
+                            "<response></response>") != std::string::npos) {
+              analysis.reasoning.mode  = reasoning_mode::TAG_BASED;
+              analysis.reasoning.start = "<think>";
+              analysis.reasoning.end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              analysis.content.mode  = content_mode::WRAPPED_WITH_REASONING;
+              analysis.content.start = "<response>";
+              analysis.content.end   = "</response>";
+              analysis.preserved_tokens.push_back("<response>");
+              analysis.preserved_tokens.push_back("</response>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET);
+          }
+      },
+      // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|>
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos &&
+              tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) {
+              analysis.content.mode  = content_mode::ALWAYS_WRAPPED;
+              analysis.content.start = "<|CHATBOT_TOKEN|>";
+              analysis.content.end   = "<|END_OF_TURN_TOKEN|>";
+              analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>");
+              analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET);
+          }
+      },
+      // Functionary - no tool call section delimiter
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", "
+                            "\"code_interpreter\") | list | length > 0") != std::string::npos) {
+              analysis.content.mode                = content_mode::PLAIN;
+              analysis.content.end                 = "";
+              analysis.tools.function.name_prefix  = "";
+              analysis.tools.format.section_start  = "";
+              analysis.tools.format.section_end    = "";
+              analysis.tools.format.per_call_start = "<function=";
+              analysis.tools.format.per_call_end   = "</function>";
+              analysis.tools.function.close        = "";
+              analysis.preserved_tokens.clear();
+              analysis.preserved_tokens.push_back("<|eot_id|>");
+              analysis.preserved_tokens.push_back("<|eom_id|>");
+              analysis.preserved_tokens.push_back("<function=");
+              analysis.preserved_tokens.push_back(">");
+              analysis.preserved_tokens.push_back("</function>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET);
+          }
+      },
+      // DeepSeek-R1-Distill-Qwen
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find(
+                  "{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>'") !=
+              std::string::npos) {
+              analysis.tools.format.section_start  = "<｜tool▁calls▁begin｜>";
+              analysis.tools.format.section_end    = "<｜tool▁calls▁end｜>";
+              analysis.tools.format.per_call_start = "<｜tool▁call▁begin｜>function";
+              analysis.tools.function.name_prefix  = "<｜tool▁sep｜>";
+              analysis.tools.format.per_call_end   = "<｜tool▁call▁end｜>";
+              analysis.tools.function.close        = "```";
+          }
+      }
+    });
+
+// Common JSON structures
+static json params_schema = {
+    { "type",       "object"                                                           },
+    { "properties",
+     { { ARG_FIRST, { { "type", "string" }, { "description", "First argument" } } },
+        { ARG_SECOND, { { "type", "string" }, { "description", "Second argument" } } } } },
+    { "required",   json::array({})                                                    }
+};
+
+static json tools = json::array({
+    { { "type", "function" },
+     { "function",
+        json{ { "name", FUN_FIRST }, { "description", "Test function foo" }, { "parameters", params_schema } } } },
+    { { "type", "function" },
+     { "function",
+        json{ { "name", FUN_SECOND }, { "description", "Test function bar" }, { "parameters", params_schema } } } }
+});
+
+static json user_msg = json{
+    { "role",    "user"  },
+    { "content", USER_MSG }
+};
+
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call00001") {
+    return json{
+        { "id",       id                                              },
+        { "type",     "function"                                      },
+        { "function", json{ { "name", name }, { "arguments", args } } }
+    };
+}
+
+static json first_tool_call_zero_args         = build_tool_call(FUN_FIRST, json::object(), "call00001");
+static json first_tool_call_one_arg           = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "XXXX" }}, "call00001");
+static json first_tool_call_one_arg_other_val = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "YYYY" }}, "call00001");
+static json first_tool_call_other_arg         = build_tool_call(FUN_FIRST, {{ ARG_SECOND, "YYYY" }}, "call00001");
+
+static json first_tool_call =
+    build_tool_call(FUN_FIRST, json{{ ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, "call00001");
+static json second_tool_call =
+    build_tool_call(FUN_SECOND, json{ { ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, "call00002");
+static json first_tool_call_alt_id =
+    build_tool_call(FUN_FIRST, json{{ ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, "call99999");
+
+template <typename T>
+static std::string mode_to_str(T mode) {
+    std::ostringstream os;
+    os << mode;
+    return os.str();
+}
+
+void autoparser::analyze_template(const common_chat_template & tmpl) {
+    jinja_caps = tmpl.original_caps();
+    reasoning = analyze_reasoning(tmpl, jinja_caps.supports_tool_calls);
+    content = analyze_content(tmpl, reasoning);
+    tools = analyze_tools(jinja_caps.supports_tool_calls ? analyze_tools(tmpl, jinja_caps, reasoning) : analyze_tools());
+    collect_preserved_tokens();
+
+    for (auto & workaround : workarounds) {
+        workaround(tmpl, *this);
+    }
+
+    LOG_DBG("\n--- Reasoning & Content Structure ---\n");
+    LOG_DBG("reasoning_mode: %s\n", mode_to_str(reasoning.mode).c_str());
+    LOG_DBG("reasoning_start: '%s'\n", reasoning.start.c_str());
+    LOG_DBG("reasoning_end: '%s'\n", reasoning.end.c_str());
+    LOG_DBG("content_mode: %s\n", mode_to_str(content.mode).c_str());
+    LOG_DBG("content_start: '%s'\n", content.start.c_str());
+    LOG_DBG("content_end: '%s'\n", content.end.c_str());
+
+    LOG_DBG("\n--- Tool Call Structure ---\n");
+    LOG_DBG("tool_mode: %s\n", mode_to_str(tools.format.mode).c_str());
+    LOG_DBG("supports_tools: %s\n", jinja_caps.supports_tools ? "true" : "false");
+    LOG_DBG("supports_parallel_calls: %s\n", jinja_caps.supports_parallel_tool_calls ? "true" : "false");
+    LOG_DBG("tool_section_start: '%s'\n", tools.format.section_start.c_str());
+    LOG_DBG("tool_section_end: '%s'\n", tools.format.section_end.c_str());
+    LOG_DBG("per_call_start: '%s'\n", tools.format.per_call_start.c_str());
+    LOG_DBG("per_call_end: '%s'\n", tools.format.per_call_end.c_str());
+    LOG_DBG("func_name_prefix: '%s'\n", tools.function.name_prefix.c_str());
+    LOG_DBG("func_name_suffix: '%s'\n", tools.function.name_suffix.c_str());
+    LOG_DBG("func_close: '%s'\n", tools.function.close.c_str());
+    LOG_DBG("python_dict_format: %s\n", tools.format.uses_python_dicts ? "true" : "false");
+    LOG_DBG("arg_name_prefix: '%s'\n", tools.arguments.name_prefix.c_str());
+    LOG_DBG("arg_name_suffix: '%s'\n", tools.arguments.name_suffix.c_str());
+    LOG_DBG("arg_value_prefix: '%s'\n", tools.arguments.value_prefix.c_str());
+    LOG_DBG("arg_value_suffix: '%s'\n", tools.arguments.value_suffix.c_str());
+    LOG_DBG("name_field: '%s'\n", tools.format.name_field.c_str());
+    LOG_DBG("args_field: '%s'\n", tools.format.args_field.c_str());
+    LOG_DBG("id_field: '%s'\n", tools.format.id_field.c_str());
+    LOG_DBG("gen_id_field: '%s'\n", tools.format.gen_id_field.c_str());
+    LOG_DBG("parameter_order: '%s'\n", std::accumulate(tools.format.parameter_order.begin(), tools.format.parameter_order.end(),
+        std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
+        ).c_str());
+
+    LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET);
+    analysis_complete = true;
+}
+
+void autoparser::collect_preserved_tokens() {
+    auto add_token = [this](const std::string & org_token) {
+        std::string token = trim_whitespace(org_token);
+        if (!token.empty()) {
+            // Avoid duplicates
+            if (std::find(preserved_tokens.begin(), preserved_tokens.end(), token) == preserved_tokens.end()) {
+                preserved_tokens.push_back(token);
+            }
+        }
+    };
+
+    add_token(reasoning.start);
+    add_token(reasoning.end);
+    add_token(content.start);
+    add_token(content.end);
+    add_token(tools.format.section_start);
+    add_token(tools.format.section_end);
+    add_token(tools.format.per_call_start);
+    add_token(tools.format.per_call_end);
+    add_token(tools.function.name_prefix);
+    add_token(tools.function.name_suffix);
+    add_token(tools.function.close);
+    add_token(tools.arguments.start);
+    add_token(tools.arguments.end);
+    add_token(tools.arguments.name_prefix);
+    add_token(tools.arguments.name_suffix);
+    add_token(tools.arguments.separator);
+    add_token(tools.arguments.value_prefix);
+    add_token(tools.arguments.value_suffix);
+    add_token(tools.call_id.prefix);
+    add_token(tools.call_id.suffix);
+}
+
+analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET);
+    LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET);
+
+    compare_reasoning_presence();
+    compare_thinking_enabled();
+    if (supports_tools) {
+        compare_reasoning_scope();
+    }
+}
+
+void analyze_reasoning::compare_reasoning_presence() {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", USER_MSG }
+    };
+
+    json assistant_no_reasoning = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"                },
+        { "content",           ASSISTANT_MSG              },
+        { "reasoning_content", THINKING_CONTENT           }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    const std::string reasoning_content = THINKING_CONTENT;
+
+    if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
+        auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
+        });
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        // try the more aggressive parse first, if it fails, fall back to the delimiter one
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (!result.result.success()) {
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+        }
+        if (result.result.success()) {
+            if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
+                if (parser_wrapped.parse_anywhere_and_extract(diff.right).result.success()) { // both tags in the diff = no forced close
+                    mode = reasoning_mode::TAG_BASED;
+                } else {
+                    mode = reasoning_mode::FORCED_CLOSED;
+                }
+                start = trim_whitespace(result.tags["pre"]);
+                end   = result.tags["post"];
+            } else if (!result.tags["post"].empty()) {
+                mode = reasoning_mode::DELIMITER;
+                end = result.tags["post"];
+            }
+        }
+    }
+}
+
+void analyze_reasoning::compare_thinking_enabled() {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", USER_MSG }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg });
+    params.add_generation_prompt = true;
+    params.enable_thinking       = false;
+
+    auto comparison = compare_variants(*tmpl, params, [&](template_params & p) { p.enable_thinking = true; });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    std::string left_trimmed = trim_whitespace(diff.left);
+
+    if (left_trimmed.empty() && !diff.right.empty()) {
+        std::string right_trimmed = trim_whitespace(diff.right);
+
+        if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
+            if (start.empty()) {
+                start = right_trimmed;
+                mode  = reasoning_mode::FORCED_OPEN;
+            }
+        }
+    }
+
+    if (start.empty() && !end.empty()) {
+        mode = reasoning_mode::DELIMITER;
+    }
+
+    // Check for FORCED_CLOSED: when enable_thinking=false produces both start and end markers,
+    // but enable_thinking=true produces only the start marker
+    if (!comparison->output_A.empty() && !comparison->output_B.empty()) {
+        auto parser_start = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.literal(start) + p.space() + p.literal(end) + p.rest();
+        });
+        auto parser_start_end = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.literal(start)) + p.space() + p.negate(p.literal(end)) + p.rest();
+        });
+        if (!start.empty() && parser_start_end.parse_anywhere_and_extract(comparison->output_A).result.success() &&
+            parser_start.parse_anywhere_and_extract(comparison->output_B).result.success()) {
+            mode = reasoning_mode::FORCED_CLOSED;
+        } else if (!end.empty()) { // we extract the starting marker now since we didn't get it earlier
+            auto result = parser_start_end.parse_anywhere_and_extract(comparison->output_A);
+            if (result.result.success()) {
+                start = result.tags["pre"];
+                mode  = reasoning_mode::FORCED_CLOSED;
+            }
+        }
+    }
+
+    if (start.empty() && end.empty()) {  // we might still have the case of "just open" and "just close"
+        if (!diff.left.empty() && !diff.right.empty()) {
+            auto seg_A = segmentize_markers(trim_trailing_whitespace(diff.left));
+            auto seg_B = segmentize_markers(trim_trailing_whitespace(diff.right));
+            if (seg_A.size() == 1 && seg_B.size() == 1) {
+                mode = reasoning_mode::FORCED_CLOSED;
+                start = seg_B[0].value;
+                end = seg_A[0].value;
+            }
+        }
+    }
+}
+
+void analyze_reasoning::compare_reasoning_scope() {
+    json assistant_reasoning_content = json{
+        { "role",              "assistant"      },
+        { "content",           ASSISTANT_MSG    },
+        { "reasoning_content", THINKING_CONTENT }
+    };
+
+    json assistant_reasoning_tools = json{
+        { "role",              "assistant"                                                                  },
+        { "content",           nullptr                                                                      },
+        { "reasoning_content", THINKING_CONTENT                                                             },
+        { "tool_calls",
+         json::array({ build_tool_call(FUN_FIRST, json{ { ARG_FIRST, "VVVV" }, { ARG_SECOND, "XXXX" } }) }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_reasoning_content });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    std::string reasoning_content = THINKING_CONTENT;
+
+    // Check if reasoning only appears in variant B (with tools)
+    bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos;
+    bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos;
+
+    if (!reasoning_in_A && reasoning_in_B) {
+        mode = reasoning_mode::TOOLS_ONLY;
+        LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__);
+
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space()));
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (result.result.success()) {
+            start = result.tags["pre"];
+            end = result.tags["post"];
+        } else {
+            auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())));
+            });
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+            if (result.result.success()) {
+                end = result.tags["post"];
+            } else {
+                LOG_DBG(ANSI_ORANGE "%s: Unable to extracft reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
+                mode = reasoning_mode::NONE;
+            }
+        }
+    }
+}
+
+analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET);
+
+    json assistant_content_only = json{
+        { "role",    "assistant"     },
+        { "content", ASSISTANT_MSG   }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"      },
+        { "content",           ""               },
+        { "reasoning_content", THINKING_CONTENT }
+    };
+
+    template_params params_content_only;
+    params_content_only.messages              = json::array({ user_msg, assistant_content_only });
+    params_content_only.add_generation_prompt = false;
+    params_content_only.enable_thinking       = true;
+    params_content_only.tools                 = tools;
+
+    auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_tools });
+    });
+
+    auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_reasoning });
+    });
+
+    if (!comparison_with_tools || !comparison_with_reasoning) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+    }
+
+    const auto & diff_tools     = comparison_with_tools->diff;
+    const auto & diff_reasoning = comparison_with_reasoning->diff;
+
+    std::string response = ASSISTANT_MSG;
+
+    bool found_plain_content = false;
+    if (trim_whitespace(diff_tools.left) == response) {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.space() + diff_reasoning.left + p.space() + p.optional(p.marker()) + p.space() + p.end();
+        });
+        if (parser.parse_and_extract(diff_reasoning.left).result.success()) {
+            // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
+            mode = content_mode::PLAIN;
+            found_plain_content = true;
+        } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) {
+            auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.literal(reasoning.end) + p.space() + p.literal(response);
+            });
+            if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) {
+                mode = content_mode::PLAIN;
+                found_plain_content = true;
+            }
+        }
+    }
+    if (!found_plain_content) {
+        std::string rdiff = diff_reasoning.left;
+        if (!reasoning.end.empty() && rdiff.find(reasoning.end) != std::string::npos) {
+            rdiff = rdiff.substr(rdiff.find(reasoning.end) + reasoning.end.length());
+        }
+        // Take the more promising diff
+        std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker()) + p.space() + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(pure_content);
+        start = result.tags["pre"];
+        end = result.tags["post"];
+        // TODO: WRAPPED_WITH_REASONING
+    }
+
+    // Determine content mode
+    if (!start.empty() || !end.empty()) {
+        mode = content_mode::ALWAYS_WRAPPED;
+        // TODO: END_DELIMITED content mode - delimited at end but not at start?
+    }
+}
+
+bool analyze_content::is_always_wrapped() const {
+    return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
+}
+
+analyze_tools::analyze_tools(const common_chat_template & tmpl,
+                             const jinja::caps &          caps,
+                             const analyze_reasoning &    reasoning)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET);
+
+    analyze_tool_calls(reasoning);
+
+    if (format.mode != tool_format::NONE && format.mode != tool_format::JSON_NATIVE) {
+        if (caps.supports_parallel_tool_calls) {
+            check_per_call_markers();
+        }
+        extract_function_markers();
+        if (format.mode == tool_format::TAG_WITH_TAGGED) {
+            analyze_arguments();
+        }
+        extract_argument_separator();
+        extract_args_markers();
+        extract_call_id_markers();
+    }
+}
+
+void analyze_tools::analyze_tool_calls(const analyze_reasoning & reasoning) {
+    json assistant_no_tools = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_tools });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    std::string tool_section = diff.right;
+
+    if (tool_section.empty()) {
+        return;
+    }
+
+    analyze_tool_call_format(tool_section, FUN_FIRST, ARG_FIRST, reasoning);
+}
+
+void analyze_tools::analyze_tool_call_format(const std::string &       haystack,
+                                             const std::string &       fun_name_needle,
+                                             const std::string &       arg_name_needle,
+                                             const analyze_reasoning & reasoning) {
+    if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) {
+        return;
+    }
+
+    enum class json_quote_style { NONE, DOUBLE_QUOTES, SINGLE_QUOTES };
+
+    auto in_json_haystack = [&haystack](const std::string & needle) -> json_quote_style {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) << p.choice({
+                p.tag("sq", p.literal("'") + p.literal(needle) + p.literal("'")),
+                p.tag("dq", p.literal("\"") + p.literal(needle) + p.literal("\"")) });
+        });
+        auto result = parser.parse_anywhere_and_extract(haystack);
+        if (!result.result.success()) {
+            return json_quote_style::NONE;
+        }
+        return result.tags.count("sq") && !result.tags["sq"].empty()
+            ? json_quote_style::SINGLE_QUOTES
+            : json_quote_style::DOUBLE_QUOTES;
+    };
+
+    auto fun_quote = in_json_haystack(fun_name_needle);
+    auto arg_quote = in_json_haystack(arg_name_needle);
+
+    if (fun_quote != json_quote_style::NONE) {
+        // no need to check further, we're in JSON land
+        format.mode = tool_format::JSON_NATIVE;
+        format.uses_python_dicts = (fun_quote == json_quote_style::SINGLE_QUOTES);
+    } else if (arg_quote != json_quote_style::NONE) {
+        format.mode = tool_format::TAG_WITH_JSON;
+        format.uses_python_dicts = (arg_quote == json_quote_style::SINGLE_QUOTES);
+    } else {
+        format.mode = tool_format::TAG_WITH_TAGGED;
+    }
+
+    // first, remove any reasoning markers
+    std::string clean_haystack = haystack;
+    if (!reasoning.start.empty()) {
+        auto pos = haystack.find(reasoning.start);
+        if (pos != std::string::npos) {
+            clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + reasoning.start.length());
+        }
+    }
+    if (!reasoning.end.empty()) {
+        auto pos = clean_haystack.find(reasoning.end);
+        if (pos != std::string::npos) {
+            clean_haystack = clean_haystack.substr(0, pos) + clean_haystack.substr(pos + reasoning.end.length());
+        }
+    }
+
+    if (format.mode == tool_format::JSON_NATIVE) {
+        analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle);
+    } else {
+        analyze_tool_call_format_non_json(clean_haystack, fun_name_needle);
+    }
+    // always relax whitespace requirements on ending markers since they don't influence content
+    format.section_end  = trim_whitespace(format.section_end);
+    format.per_call_end = trim_whitespace(format.per_call_end);
+}
+
+void analyze_tools::analyze_tool_call_format_json_native(const std::string & clean_haystack,
+                                                         const std::string & fun_name_needle,
+                                                         const std::string & arg_name_needle) {
+    // we might not have the typical OpenAI tool calling structure
+    int  json_start     = clean_haystack.find_first_of('{');
+    int  json_end       = clean_haystack.find_last_of('}');
+    std::string cut     = clean_haystack.substr(json_start, json_end - json_start + 1);
+    json call_struct    = json::parse(cut);
+    auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
+        if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) {
+            format.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) {
+            format.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().dump().find(arg_name_needle) !=
+                   std::string::npos) {  // handle both string and JSON obj variants
+            format.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.key().find("id") != std::string::npos) {
+            // heuristics for generated id field
+            format.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        }
+    };
+    for (const auto & el : call_struct.items()) {
+        if (el.key() == fun_name_needle) {
+            format.fun_name_is_key = true;
+            // When function name is the key, there's no name field and args are direct
+            format.name_field.clear();
+            format.args_field.clear();
+            // Don't register this element - the function name IS the key, not a field
+        } else {
+            if (el.value().is_object() &&
+                el.value().dump().find(arg_name_needle) == std::string::npos) {  // not the args object
+                format.function_field = el.key();
+                for (const auto & subel : el.value().items()) {
+                    register_field(el.key(), subel);
+                }
+            }
+            // Register this element as a potential field
+            register_field("", el);
+        }
+    }
+    auto array_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("pre", p.literal("[") + p.space()) + p.literal(cut) + p.tag("post", p.space() + p.literal("]"));
+    });
+
+    auto ar_parse_res = array_parser.parse_anywhere_and_extract(clean_haystack);
+    if (ar_parse_res.result.success()) {
+        format.tools_array_wrapped = true;
+        json_start -= ar_parse_res.tags["pre"].length();
+        json_end += ar_parse_res.tags["post"].length();
+    }
+    json_end++; // we want to move past the closing char for end marker extraction
+
+    std::vector<std::pair<size_t, std::string>> located_params;
+    if (!format.name_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.name_field), format.name_field });
+    }
+    if (!format.args_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.args_field), format.args_field });
+    }
+    if (!format.id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.id_field), format.id_field });
+    }
+    if (!format.gen_id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.gen_id_field), format.gen_id_field });
+    }
+    std::sort(located_params.begin(), located_params.end());
+    for (auto & pair : located_params) {
+        format.parameter_order.push_back(pair.second);
+    }
+    // we can immediately extract tool calling markers too
+    format.section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start));
+    format.section_end   = trim_whitespace(clean_haystack.substr(json_end));
+    // When tools_array_wrapped is true, the closing bracket is part of the array structure,
+    // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets.
+    if (format.tools_array_wrapped && format.section_end == "]") {
+        format.section_end.clear();
+    }
+}
+
+void analyze_tools::analyze_tool_call_format_non_json(const std::string & clean_haystack,
+                                                      const std::string & fun_name_needle) {
+    // first, let's find out if the function is inside a tag or standalone
+    auto fun_marker_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("fun_marker", p.choice({
+            p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", fun_name_needle })) + p.literal(fun_name_needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+            p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", fun_name_needle })) + p.literal(fun_name_needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+    });
+    auto fun_res = fun_marker_parser.parse_anywhere_and_extract(clean_haystack);
+    std::string fun_marker = fun_name_needle;
+    if (fun_res.result.success()) {
+        fun_marker = fun_res.tags["fun_marker"];
+    }
+    // now, consume up to two markers, then treat everything up to the function marker as function name prefix
+    auto per_tool_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("sec_start", p.marker() + p.space()) + p.tag("call_start", p.marker() + p.space()) +
+            p.tag("fun_pre", p.until(fun_marker)) + fun_marker + p.tag("rest", p.rest());
+    });
+    auto section_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("sec_start", p.marker() + p.space()) + fun_marker + p.tag("rest", p.rest());
+    });
+    auto result = per_tool_parser.parse_anywhere_and_extract(clean_haystack);
+    tagged_parse_result result_end;
+    if (result.result.success()) {
+        auto double_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("call_end", p.marker() + p.space()) + p.tag("sec_end", p.marker() + p.space()) + p.end();
+        });
+        result_end = double_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+        function.name_prefix = fun_res.tags["fun_pre"] + function.name_prefix;
+    } else {
+        result = section_parser.parse_anywhere_and_extract(clean_haystack);
+        auto single_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("sec_end", p.marker() + p.space()) + p.end();
+        });
+        result_end = single_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+    }
+    format.per_call_start = result.tags["call_start"];
+    format.per_call_end = result_end.tags["call_end"];
+    format.section_start = result.tags["sec_start"];
+    format.section_end = result_end.tags["sec_end"];
+}
+
+void analyze_tools::check_per_call_markers() {
+    json assistant_one_tool = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_two_tools = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call, second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_tool });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto one_vs_two = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); });
+
+    if (!one_vs_two) {
+        LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right);
+
+    std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right);
+    if (!format.section_start.empty() &&
+        second_tool_content.find(format.section_start) == 0) {
+        format.per_call_start = format.section_start;
+        format.per_call_end   = format.section_end;
+        format.section_start.clear();
+        format.section_end.clear();
+    }
+}
+
+void analyze_tools::extract_function_markers() {
+    json assistant_nocall = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG },
+    };
+
+    json assistant_foofoo = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_barbar = json{
+        { "role",       "assistant"                       },
+        { "content",    ""                                },
+        { "tool_calls", json::array({ second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_foofoo });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left.find(FUN_FIRST) != std::string::npos && diff.right.find(FUN_SECOND) != std::string::npos) {
+        std::string prefix_marker;
+        if (!format.per_call_start.empty()) {
+            prefix_marker = format.per_call_start;
+        } else {
+            prefix_marker = format.section_start;
+        }
+        if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) {
+            function.name_prefix =
+                diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size());
+        }
+
+        // Extract name prefix/suffix from diff.left (stop at the next marker boundary)
+        auto name_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.until(FUN_FIRST)) + p.literal(FUN_FIRST) +
+                   p.tag("post", p.zero_or_more(p.negate(p.marker()) + p.any()));
+        });
+        auto name_result = name_parser.parse_and_extract(diff.left);
+        if (name_result.result.success()) {
+            function.name_prefix += name_result.tags["pre"];
+            function.name_suffix = name_result.tags["post"];
+        }
+
+        // Extend name_suffix with content from diff.suffix before args begin
+        if (format.mode == tool_format::TAG_WITH_JSON) {
+            // For JSON: name_suffix extends to the first non-marker { or [, including any
+            // markers along the way. Only applies if there's at least one marker after
+            // the JSON content (matching the original "stop < seg_suf.size() - 1" guard).
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                auto non_json = p.marker() | (p.negate(p.literal("{")) + p.negate(p.literal("[")) + p.any());
+                auto after_json = p.zero_or_more(p.negate(p.marker()) + p.any()) + p.marker();
+                return p.tag("ext", p.zero_or_more(non_json)) + after_json;
+            });
+            auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+            if (suf_result.result.success()) {
+                function.name_suffix += suf_result.tags["ext"];
+            }
+        } else {
+            // For tagged: name_suffix extends to the first marker (arg marker)
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                return p.tag("ext", p.zero_or_more(p.negate(p.marker()) + p.any()));
+            });
+            auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+            if (suf_result.result.success()) {
+                function.name_suffix += suf_result.tags["ext"];
+            }
+        }
+
+        // Extract the closer (between last arg and call/section end marker)
+        std::string suffix_marker;
+        if (!format.per_call_end.empty()) {
+            suffix_marker = format.per_call_end;
+        } else {
+            suffix_marker = format.section_end;
+        }
+        std::string closer_suffix;
+        if (suffix_marker.empty()) {
+            // we'll have to rely on an extra diff with no-calls version
+            auto notool_comp = compare_variants(
+                *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
+            auto nt_diff  = notool_comp->diff;
+            closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
+        } else {
+            closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
+        }
+        if (!closer_suffix.empty()) {
+            if (format.mode == tool_format::TAG_WITH_TAGGED) {
+                // After last arg value, skip the closing arg marker, rest is closer
+                auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                    return p.until("YYYY") + p.literal("YYYY") + p.space() +
+                           p.marker() + p.space() +
+                           p.tag("close", p.rest());
+                });
+                auto close_result = closer_parser.parse_and_extract(closer_suffix);
+                if (close_result.result.success()) {
+                    function.close = close_result.tags["close"];
+                }
+            } else if (format.mode == tool_format::TAG_WITH_JSON) {
+                // After last arg value, find end of JSON args, rest is closer
+                auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                    return p.until("YYYY") + p.literal("YYYY") + p.tag("post_val", p.rest());
+                });
+                auto close_result = closer_parser.parse_and_extract(closer_suffix);
+                if (close_result.result.success()) {
+                    const auto & post = close_result.tags["post_val"];
+                    size_t pos = post.find_last_of("}]");
+                    if (pos != std::string::npos && pos < post.size() - 1) {
+                        function.close = trim_leading_whitespace(post.substr(pos + 1));
+                    }
+                }
+            }
+        }
+        function.close = trim_leading_whitespace(function.close);
+    }
+}
+
+void analyze_tools::analyze_arguments() {
+    LOG_DBG(ANSI_ORANGE "Phase 4: Argument analysis\n" ANSI_RESET);
+
+    extract_argument_name_markers();
+    extract_argument_value_markers();
+}
+
+void analyze_tools::extract_argument_name_markers() {
+    json assistant_first_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_second_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_other_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_first_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (!diff.left.empty() && !diff.right.empty()) {
+        // Parse both sides to find ARG_FIRST/ARG_SECOND and extract the surrounding structure
+        auto left_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("pre", p.until(ARG_FIRST)) + p.literal(ARG_FIRST) +
+                   p.tag("suffix", p.until_one_of({"\"", "X"}));
+        });
+        auto right_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("pre", p.until(ARG_SECOND)) + p.literal(ARG_SECOND) +
+                   p.tag("suffix", p.until_one_of({"\"", "Y"}));
+        });
+        auto left_result  = left_parser.parse_anywhere_and_extract(diff.left);
+        auto right_result = right_parser.parse_anywhere_and_extract(diff.right);
+
+        if (left_result.result.success() && right_result.result.success() &&
+            !left_result.tags["pre"].empty() &&
+            left_result.tags["pre"] == right_result.tags["pre"] &&
+            left_result.tags["suffix"] == right_result.tags["suffix"]) {
+            // Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
+            arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
+            arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
+        } else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
+            // Name is directly in the diff: prefix comes from last marker in diff.prefix
+            auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+                return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("name_prefix", last_marker);
+            });
+            auto pre_result = pre_parser.parse_and_extract(diff.prefix);
+            arguments.name_prefix = pre_result.result.success()
+                ? pre_result.tags["name_prefix"] : diff.prefix;
+
+            // Suffix extends from after ARG_FIRST to the first marker (+ optional whitespace).
+            // The marker could be in diff.left itself or in diff.suffix, so we concatenate.
+            std::string after_first = diff.left.substr(ARG_FIRST.length()) + diff.suffix;
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.tag("suffix", p.zero_or_more(p.negate(p.marker()) + p.any()) +
+                                       p.marker() + p.space());
+            });
+            auto suf_result = suffix_parser.parse_anywhere_and_extract(after_first);
+            if (suf_result.result.success()) {
+                arguments.name_suffix = suf_result.tags["suffix"];
+            }
+        }
+    }
+}
+
+void analyze_tools::extract_argument_value_markers() {
+    json assistant_val_X = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_val_Y = json{
+        { "role",       "assistant"                                        },
+        { "content",    ""                                                 },
+        { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_val_X });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left == "XXXX" && diff.right == "YYYY") {
+        std::string arg_name_ending = ARG_FIRST + arguments.name_suffix;
+        std::string prefix          = diff.prefix;
+        if (prefix.rfind(arg_name_ending) != std::string::npos) {
+            prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size());
+        }
+        if (!prefix.empty()) {
+            // Find the last marker + any trailing non-marker text to end
+            auto prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+                return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("val_prefix", last_marker);
+            });
+            auto pre_result = prefix_parser.parse_and_extract(prefix);
+            arguments.value_prefix = pre_result.result.success() ? pre_result.tags["val_prefix"] : prefix;
+        }
+
+        std::string value_suffix = diff.suffix;
+        if (!function.close.empty()) {
+            size_t func_close_pos = value_suffix.find(function.close);
+            if (func_close_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, func_close_pos);
+            }
+        } else if (!format.per_call_end.empty() || !format.section_end.empty()) {
+            std::string end_marker =
+                !format.per_call_end.empty() ? format.per_call_end : format.section_end;
+            size_t end_marker_pos = value_suffix.find(end_marker);
+            if (end_marker_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, end_marker_pos);
+            }
+        }
+        value_suffix = trim_leading_whitespace(value_suffix);
+        if (!value_suffix.empty()) {
+            arguments.value_suffix = value_suffix;
+        }
+    }
+}
+
+void analyze_tools::extract_argument_separator() {
+    json assistant_one_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_two_args = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (!diff.right.empty()) {
+        std::string separator        = until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND);
+        arguments.separator = separator;
+    }
+}
+
+void analyze_tools::extract_args_markers() {
+    json assistant_no_args = json{
+        { "role",       "assistant"},
+        { "content",    ""         },
+        { "tool_calls", json::array({ first_tool_call_zero_args }) }
+    };
+
+    json assistant_with_args = json{
+        { "role",       "assistant"},
+        { "content",    ""         },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_args });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (format.mode != tool_format::JSON_NATIVE) {
+        std::string prefix_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
+        std::string suffix_marker = !format.section_end.empty() ? format.section_end : format.per_call_end;
+        // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones
+        size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker);
+        size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker);
+        if (prefix_pos == std::string::npos) {
+            prefix_pos = 0;
+        }
+        if (suffix_pos == std::string::npos) {
+            suffix_pos = diff.suffix.size();
+        }
+        std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size());
+        std::string suffix_cut = diff.suffix.substr(0, suffix_pos);
+        std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":");
+        std::string args_end   = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}");
+
+        if (!args_start.empty() || !args_end.empty()) {
+            size_t find_fun = args_start.find(FUN_FIRST);
+            if (find_fun != std::string::npos) {
+                args_start = args_start.substr(find_fun + FUN_FIRST.size(), args_start.size() - find_fun - FUN_FIRST.size());
+            }
+            arguments.start = args_start;
+            arguments.end   = args_end;
+        }
+    }
+}
+
+void analyze_tools::extract_call_id_markers() {
+    json assistant_id1 = json{
+        { "role",       "assistant" },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_id2 = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_alt_id }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_id1 });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left.empty() && diff.right.empty()) {
+        return;
+    }
+
+    std::string id_value_1 = "call00001";
+    std::string id_value_2 = "call99999";
+
+    size_t common_id_prefix_len = 0;
+    for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) {
+        if (id_value_1[i] == id_value_2[i]) {
+            common_id_prefix_len++;
+        } else {
+            break;
+        }
+    }
+    std::string common_id_part = id_value_1.substr(0, common_id_prefix_len);
+
+    // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS)
+    // or in the suffix (call_id is PRE_FUNC_NAME)
+    std::string func_name           = FUN_FIRST;
+    size_t      func_name_in_prefix = diff.prefix.rfind(func_name);
+    size_t      func_name_in_suffix = diff.suffix.find(func_name);
+
+    // Helper: find the last marker in a string (returns just the marker, not trailing text)
+    auto find_last_marker = [](const std::string & str) -> std::string {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            auto last = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+            return p.zero_or_more(p.negate(last) + p.any()) + p.tag("m", p.marker());
+        });
+        auto res = parser.parse_anywhere_and_extract(str);
+        return res.result.success() ? res.tags["m"] : "";
+    };
+
+    // Helper: find the first marker in a string
+    auto find_first_marker = [](const std::string & str) -> std::string {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("m", p.marker());
+        });
+        auto res = parser.parse_anywhere_and_extract(str);
+        return res.result.success() ? res.tags["m"] : "";
+    };
+
+    if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) {
+        // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS
+        // Check if args indicator "{" is in prefix or suffix
+        size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix);
+        size_t args_in_suffix = diff.suffix.find('{');
+
+        if (args_in_suffix != std::string::npos &&
+            (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) {
+            // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS
+            call_id.pos = call_id_position::BETWEEN_FUNC_AND_ARGS;
+
+            // Find call_id_prefix: marker immediately preceding common_id_part (no intervening markers)
+            std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length());
+            auto id_prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.tag("prefix", p.marker()) +
+                       p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal(common_id_part)) + p.any()) +
+                       p.literal(common_id_part);
+            });
+            auto id_res = id_prefix_parser.parse_anywhere_and_extract(after_func);
+            if (id_res.result.success()) {
+                call_id.prefix = id_res.tags["prefix"];
+            } else {
+                // Fallback: use the last marker in after_func
+                call_id.prefix = find_last_marker(after_func);
+            }
+
+            // Extract call_id_suffix: the first marker in the suffix before args "{"
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal("{")) + p.any()) +
+                       p.tag("suffix", p.marker());
+            });
+            auto suf_res = suffix_parser.parse_anywhere_and_extract(diff.suffix);
+            if (suf_res.result.success()) {
+                call_id.suffix = suf_res.tags["suffix"];
+            }
+        } else if (args_in_prefix != std::string::npos) {
+            // Args are in prefix, so call_id is POST_ARGS
+            call_id.pos = call_id_position::POST_ARGS;
+
+            // Extract last marker between args closing brace and the ID
+            std::string after_args    = diff.prefix.substr(args_in_prefix);
+            size_t      closing_brace = after_args.rfind('}');
+            if (closing_brace != std::string::npos) {
+                std::string between_args_and_id = after_args.substr(closing_brace + 1);
+                call_id.prefix = find_last_marker(between_args_and_id);
+            }
+
+            // call_id_suffix: first marker in diff.suffix
+            call_id.suffix = find_first_marker(diff.suffix);
+        }
+    } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) {
+        // Function name is only in suffix - call_id is PRE_FUNC_NAME
+        call_id.pos = call_id_position::PRE_FUNC_NAME;
+
+        // call_id_prefix: last marker in diff.prefix
+        call_id.prefix = find_last_marker(diff.prefix);
+
+        // call_id_suffix: first marker in the portion of diff.suffix before func_name
+        std::string before_func = diff.suffix.substr(0, func_name_in_suffix);
+        call_id.suffix = find_first_marker(before_func);
+    }
+
+    // When call_id is detected, per_call_end may have been incorrectly set to include
+    // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix.
+    if (call_id.pos != call_id_position::NONE && !call_id.suffix.empty() &&
+        format.per_call_end.find(call_id.suffix) == 0) {
+        format.per_call_end.clear();
+    }
+}
+
+}  // namespace autoparser
diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp

deleted file mode 100644 (file)

index ba359fd..0000000
--- a/common/chat-parser-xml-toolcall.cpp
+++ /dev/null
@@ -1,879 +0,0 @@
-#include "chat.h"
-#include "chat-parser.h"
-#include "common.h"
-#include "json-partial.h"
-#include "json-schema-to-grammar.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-class xml_toolcall_syntax_exception : public std::runtime_error {
-  public:
-    xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-template<typename T>
-inline void sort_uniq(std::vector<T> &vec) {
-    std::sort(vec.begin(), vec.end());
-    vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-}
-
-template<typename T>
-inline bool all_space(const T &str) {
-    return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
-}
-
-static size_t utf8_truncate_safe(const std::string_view s) {
-    size_t len = s.size();
-    if (len == 0) return 0;
-    size_t i = len;
-    for (size_t back = 0; back < 4 && i > 0; ++back) {
-        --i;
-        unsigned char c = s[i];
-        if ((c & 0x80) == 0) {
-            return len;
-        } else if ((c & 0xC0) == 0xC0) {
-            size_t expected_len = 0;
-            if ((c & 0xE0) == 0xC0) expected_len = 2;
-            else if ((c & 0xF0) == 0xE0) expected_len = 3;
-            else if ((c & 0xF8) == 0xF0) expected_len = 4;
-            else return i;
-            if (len - i >= expected_len) {
-                return len;
-            } else {
-                return i;
-            }
-        }
-    }
-    return len - std::min(len, size_t(3));
-}
-
-inline void utf8_truncate_safe_resize(std::string &s) {
-    s.resize(utf8_truncate_safe(s));
-}
-
-inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
-    return s.substr(0, utf8_truncate_safe(s));
-}
-
-static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
-    if (literal1.size() == 0) return builder.try_find_literal(literal2);
-    const auto saved_pos = builder.pos();
-    while (auto res = builder.try_find_literal(literal1)) {
-        builder.consume_spaces();
-        const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
-        if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
-            if (res->prelude.size() != res->groups[0].begin - saved_pos) {
-                res->prelude = builder.str({saved_pos, res->groups[0].begin});
-            }
-            builder.move_to(builder.pos() + match_len);
-            res->groups[0].end = builder.pos();
-            GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
-            return res;
-        }
-        builder.move_to(res->groups[0].begin + 1);
-    }
-    builder.move_to(saved_pos);
-    return std::nullopt;
-}
-
-/**
- * make a GBNF that accept any strings except those containing any of the forbidden strings.
- */
-std::string make_gbnf_excluding(std::vector<std::string> forbids) {
-    constexpr auto charclass_escape = [](unsigned char c) -> std::string {
-        if (c == '\\' || c == ']' || c == '^' || c == '-') {
-            std::string s = "\\";
-            s.push_back((char)c);
-            return s;
-        }
-        if (isprint(c)) {
-            return std::string(1, (char)c);
-        }
-        char buf[16];
-        snprintf(buf, 15, "\\x%02X", c);
-        return std::string(buf);
-    };
-    constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
-        std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
-        int i = l;
-        while (i < r) {
-            const std::string &s = forbids[i];
-            if ((int)s.size() == depth) {
-                ++i;
-                continue;
-            }
-            unsigned char c = (unsigned char)s[depth];
-            int j = i;
-            while (j < r && (int)forbids[j].size() > depth &&
-                   (unsigned char)forbids[j][depth] == c) {
-                ++j;
-            }
-            children.push_back({c, {i, j}});
-            i = j;
-        }
-        std::vector<std::string> alts;
-        if (!children.empty()) {
-            std::string cls;
-            for (auto &ch : children) cls += charclass_escape(ch.first);
-            alts.push_back(std::string("[^") + cls + "]");
-        }
-        for (auto &ch : children) {
-            std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
-            if (!childExpr.empty()) {
-                std::string quoted_ch = "\"";
-                if (ch.first == '\\') quoted_ch += "\\\\";
-                else if (ch.first == '"') quoted_ch += "\\\"";
-                else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
-                else {
-                    char buf[16];
-                    snprintf(buf, 15, "\\x%02X", ch.first);
-                    quoted_ch += buf;
-                }
-                quoted_ch += "\"";
-                std::string branch = quoted_ch + std::string(" ") + childExpr;
-                alts.push_back(branch);
-            }
-        }
-        if (alts.empty()) return "";
-        std::ostringstream oss;
-        oss << "( ";
-        for (size_t k = 0; k < alts.size(); ++k) {
-            if (k) oss << " | ";
-            oss << alts[k];
-        }
-        oss << " )";
-        return oss.str();
-    };
-    if (forbids.empty()) return "( . )*";
-    sort(forbids.begin(), forbids.end());
-    std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
-    if (expr.empty()) {
-        std::string cls;
-        for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
-        expr = std::string("( [^") + cls + "] )";
-    }
-    if (forbids.size() == 1)
-        return expr + "*";
-    else
-        return std::string("( ") + expr + " )*";
-}
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.tool_sep.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    std::string key_val_sep = form.key_val_sep;
-    if (form.key_val_sep2) {
-        key_val_sep += "\n";
-        key_val_sep += *form.key_val_sep2;
-    }
-    GGML_ASSERT(!key_val_sep.empty());
-
-    if (tools.is_array() && !tools.empty()) {
-        data.grammar = build_grammar([&](const common_grammar_builder &builder) {
-            auto string_arg_val = form.last_val_end ?
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
-
-            std::vector<std::string> tool_rules;
-            for (const auto & tool : tools) {
-                if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
-                    LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
-                    continue;
-                }
-                const auto & function = tool.at("function");
-                if (!function.contains("name") || !function.at("name").is_string()) {
-                    LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
-                    continue;
-                }
-                if (!function.contains("parameters") || !function.at("parameters").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
-                    continue;
-                }
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                struct parameter_rule {
-                    std::string symbol_name;
-                    bool is_required;
-                };
-                std::vector<parameter_rule> arg_rules;
-                if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
-                    LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
-                    continue;
-                } else {
-                    std::vector<std::string> requiredParameters;
-                    if (parameters.contains("required")) {
-                        try { parameters.at("required").get_to(requiredParameters); }
-                        catch (const std::runtime_error&) {
-                            LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
-                        }
-                    }
-                    sort_uniq(requiredParameters);
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        std::string quoted_key = key;
-                        bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
-                        if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
-                            quoted_key = gbnf_format_literal(key);
-                            quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
-                        }
-                        arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
-                            gbnf_format_literal(form.key_start) + " " +
-                            gbnf_format_literal(quoted_key) + " " +
-                            gbnf_format_literal(key_val_sep) + " " +
-                            ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
-                                    (form.raw_argval ?
-                                            string_arg_val :
-                                            "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
-                                    ) :
-                                    builder.add_schema(name + "-arg-" + key, value)
-                            )
-                        ), required});
-                    }
-                }
-
-                auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
-                decltype(next_arg_with_sep) next_arg = "\"\"";
-                for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
-                    std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
-                    next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg
-                    );
-                    include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
-                    next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
-                            include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
-                    );
-                }
-
-                std::string quoted_name = name;
-                if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
-                    quoted_name = gbnf_format_literal(name);
-                    quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
-                }
-                quoted_name = gbnf_format_literal(quoted_name);
-                // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-                if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
-                    quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                        gbnf_format_literal(form.tool_start) + " " +
-                        quoted_name + " " +
-                        gbnf_format_literal(form.tool_sep) + " " +
-                        next_arg
-                ));
-            }
-
-            auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
-            auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
-            auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
-            auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
-            builder.add_rule("root",
-                (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
-                tool_call_multiple_with_end  + "?" +
-                (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
-            );
-        });
-
-        // grammar trigger for tool call
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
-    }
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.key_val_sep.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    // Helper to choose return false or throw error
-    constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
-        LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
-        if (recovery) {
-            builder.move_to(start_pos);
-            return false;
-        } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
-    };
-    // Drop substring from needle to end from a JSON
-    constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
-        auto pos = json_str.rfind(needle);
-        if (pos == std::string::npos) {
-            return false;
-        }
-        for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
-            unsigned char ch = static_cast<unsigned char>(json_str[i]);
-            if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
-                return false;
-            }
-        }
-        if (pos != 0 && json_str[pos - 1] == '"') {
-            --pos;
-        }
-        json_str.resize(pos);
-        return true;
-    };
-    // Helper to generate a partial argument JSON
-    constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
-        auto rest = builder.consume_rest();
-        utf8_truncate_safe_resize(rest);
-        set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
-        auto tool_str = arguments.dump();
-        if (partial_json(tool_str)) {
-            if (builder.add_tool_call(function_name, "", tool_str)) {
-                return;
-            }
-        }
-        LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
-    };
-    // Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
-    constexpr auto try_find_close = [](
-            common_chat_msg_parser & builder,
-            const std::string & end,
-            const std::optional<std::string> & alt_end,
-            const std::string & end_next,
-            const std::optional<std::string> & alt_end_next
-    ) {
-        auto saved_pos = builder.pos();
-        auto tc = builder.try_find_literal(end);
-        auto val_end_size = end.size();
-        if (alt_end) {
-            auto pos_1 = builder.pos();
-            builder.move_to(saved_pos);
-            auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
-            if (alt_end_next) {
-                builder.move_to(saved_pos);
-                auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
-                if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
-                    tc2 = tc3;
-                }
-            }
-            if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
-                tc = tc2;
-                tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
-                builder.move_to(tc->groups[0].end);
-                val_end_size = alt_end->size();
-            } else {
-                builder.move_to(pos_1);
-            }
-        }
-        return std::make_pair(val_end_size, tc);
-    };
-    // Helper to find a val_end or last_val_end, returns matched pattern size
-    const auto try_find_val_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
-    };
-    // Helper to find a tool_end or last_tool_end, returns matched pattern size
-    const auto try_find_tool_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
-    };
-
-    bool recovery = true;
-    const auto start_pos = builder.pos();
-    if (!all_space(form.scope_start)) {
-        if (auto tc = builder.try_find_literal(form.scope_start)) {
-            if (all_space(tc->prelude)) {
-                if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
-                    throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
-            } else {
-                builder.move_to(start_pos);
-                return false;
-            }
-        } else return false;
-    }
-    while (auto tc = builder.try_find_literal(form.tool_start)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                    gbnf_format_literal(form.tool_start).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            builder.move_to(tc->groups[0].begin - tc->prelude.size());
-            break;
-        }
-
-        // Find tool name
-        auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
-        if (!func_name) {
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-        if (!func_name) {
-            // Partial tool name not supported
-            throw common_chat_msg_partial_exception("incomplete tool_call");
-        }
-        // If the model generate multiple tool call and the first tool call has no argument
-        if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
-            builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-
-        // Parse tool name
-        builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
-        std::string function_name = string_strip(func_name->prelude);
-        // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-        if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
-            if (string_starts_with(function_name, "functions.")) {
-                static const std::regex re(":\\d+$");
-                if (std::regex_search(function_name, re)) {
-                    function_name = function_name.substr(10, function_name.rfind(":") - 10);
-                }
-            }
-        }
-
-        // Argument JSON
-        json arguments = json::object();
-
-        // Helper to generate a partial argument JSON
-        const auto gen_partial_args = [&](auto set_partial_arg) {
-            gen_partial_json(set_partial_arg, arguments, builder, function_name);
-        };
-
-        // Parse all arg_key/arg_value pairs
-        while (auto tc = builder.try_find_literal(form.key_start)) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                        gbnf_format_literal(form.key_start).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                builder.move_to(tc->groups[0].begin - tc->prelude.size());
-                break;
-            }
-            if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
-                auto tool_call_arg = arguments.dump();
-                if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-                    tool_call_arg.resize(tool_call_arg.size() - 1);
-                }
-                builder.add_tool_call(function_name, "", tool_call_arg);
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
-            }
-
-            // Parse arg_key
-            auto key_res = builder.try_find_literal(form.key_val_sep);
-            if (!key_res) {
-                gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
-                throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
-            }
-            if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
-                gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
-            }
-            auto &key = key_res->prelude;
-            recovery = false;
-
-            // Parse arg_value
-            if (form.key_val_sep2) {
-                if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
-                    if (!all_space(tc->prelude)) {
-                        LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
-                                gbnf_format_literal(tc->prelude).c_str(),
-                                gbnf_format_literal(form.key_val_sep).c_str(),
-                                gbnf_format_literal(*form.key_val_sep2).c_str()
-                        );
-                        return return_error(builder, start_pos, false);
-                    }
-                    if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
-                    }
-                } else {
-                    gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
-                }
-            }
-            auto val_start = builder.pos();
-
-            // Test if arg_val is a partial JSON
-            std::optional<common_json> value_json = std::nullopt;
-            if (!form.raw_argval || !*form.raw_argval) {
-                try { value_json = builder.try_consume_json(); }
-                catch (const std::runtime_error&) { builder.move_to(val_start); }
-                // TODO: Delete this when json_partial adds top-level support for null/true/false
-                if (builder.pos() == val_start) {
-                    const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
-                    builder.consume_spaces();
-                    std::string_view sv = utf8_truncate_safe_view(builder.input());
-                    sv.remove_prefix(builder.pos());
-                    std::string rest = "a";
-                    if (sv.size() < 6) rest = sv;
-                    if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
-                        value_json = {123, {"123", "123"}};
-                        builder.consume_rest();
-                    } else {
-                        builder.move_to(val_start);
-                    }
-                }
-            }
-
-            // If it is a JSON and followed by </arg_value>, parse as json
-            // cannot support streaming because it may be a plain text starting with JSON
-            if (value_json) {
-                auto json_end = builder.pos();
-                builder.consume_spaces();
-                if (builder.pos() == builder.input().size()) {
-                    if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
-                        arguments[key] = value_json->json;
-                        auto json_str = arguments.dump();
-                        if (!value_json->healing_marker.json_dump_marker.empty()) {
-                            GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
-                            json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
-                        } else {
-                            GGML_ASSERT(json_str.back() == '}');
-                            json_str.resize(json_str.size() - 1);
-                        }
-                        builder.add_tool_call(function_name, "", json_str);
-                    } else {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    }
-                    LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
-                    throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
-                }
-                builder.move_to(json_end);
-                auto [val_end_size, tc] = try_find_val_end();
-                if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
-                    if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
-                    } else arguments[key] = value_json->json;
-                } else builder.move_to(val_start);
-            }
-
-            // If not, parse as plain text
-            if (val_start == builder.pos()) {
-                if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
-                    auto &value_str = value_plain->prelude;
-                    if (form.trim_raw_argval) value_str = string_strip(value_str);
-                    if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
-                        throw common_chat_msg_partial_exception(
-                                "Expected " + gbnf_format_literal(form.val_end) +
-                                " after " + gbnf_format_literal(form.key_val_sep) +
-                                (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                        );
-                    }
-                    arguments[key] = value_str;
-                } else {
-                    if (form.trim_raw_argval) {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
-                    } else {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
-                    }
-                    throw common_chat_msg_partial_exception(
-                            "Expected " + gbnf_format_literal(form.val_end) +
-                            " after " + gbnf_format_literal(form.key_val_sep) +
-                            (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                    );
-                }
-            }
-        }
-
-        // Consume closing tag
-        if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
-            if (!all_space(tc->prelude)) {
-                LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                        gbnf_format_literal(form.tool_end).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                return return_error(builder, start_pos, recovery);
-            }
-            if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
-                // Add the parsed tool call
-                if (!builder.add_tool_call(function_name, "", arguments.dump())) {
-                    throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
-                }
-                recovery = false;
-                continue;
-            }
-        }
-
-        auto tool_call_arg = arguments.dump();
-        if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-            tool_call_arg.resize(tool_call_arg.size() - 1);
-        }
-        builder.add_tool_call(function_name, "", tool_call_arg);
-        throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
-    }
-    if (auto tc = builder.try_find_literal(form.scope_end)) {
-        if (!all_space(tc->prelude)) {
-            LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                    gbnf_format_literal(form.scope_end).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            return return_error(builder, start_pos, recovery);
-        }
-    } else {
-        if (all_space(form.scope_end)) return true;
-        builder.consume_spaces();
-        if (builder.pos() == builder.input().size())
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                gbnf_format_literal(form.scope_end).c_str(),
-                gbnf_format_literal(builder.consume_rest()).c_str()
-        );
-        return return_error(builder, start_pos, recovery);
-    }
-
-    return true;
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
-    auto pos = pos_;
-    auto tsize = result_.tool_calls.size();
-    try { return parse_xml_tool_calls(*this, form); }
-    catch (const xml_toolcall_syntax_exception&) {}
-    move_to(pos);
-    result_.tool_calls.resize(tsize);
-    return false;
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
-    constexpr auto rstrip = [](std::string &s) {
-        s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
-    };
-    // Erase substring from l to r, along with additional spaces nearby
-    constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
-        while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
-        ++l;
-        while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
-        if (l < r) str[l] = '\n';
-        if (l + 1 < r) str[l + 1] = '\n';
-        if (l != 0) l += 2;
-        str.erase(l, r - l);
-        return l;
-    };
-    constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
-        auto best_match = content.size();
-        for (auto pattern: list) {
-            if (pattern.size() == 0) continue;
-            for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
-                auto match_len = content.size() - match_idx;
-                if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
-                    best_match = match_idx;
-                }
-            }
-        }
-        if (content.size() > best_match) {
-            content.erase(best_match);
-        }
-    };
-    const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
-        return trim_suffix(content, {
-            start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
-            form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
-            form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
-            form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
-            form.scope_end
-        });
-    };
-
-
-    // Trim leading spaces without affecting keyword matching
-    static const common_regex spaces_regex("\\s*");
-    {
-        auto tc = builder.consume_regex(spaces_regex);
-        auto spaces = builder.str(tc.groups[0]);
-        auto s1 = spaces.size();
-        trim_potential_partial_word(spaces);
-        auto s2 = spaces.size();
-        builder.move_to(builder.pos() - (s1 - s2));
-    }
-
-    // Parse content
-    bool reasoning_unclosed = builder.syntax().thinking_forced_open;
-    std::string unclosed_reasoning_content("");
-    for (;;) {
-        auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
-        std::string content;
-        std::string tool_call_start;
-
-        if (tc) {
-            content = std::move(tc->prelude);
-            tool_call_start = builder.str(tc->groups[0]);
-            LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
-        } else {
-            content = builder.consume_rest();
-            utf8_truncate_safe_resize(content);
-        }
-
-        // Handle unclosed think block
-        if (reasoning_unclosed) {
-            if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
-                unclosed_reasoning_content += content;
-                if (!(form.allow_toolcall_in_think && tc)) {
-                    unclosed_reasoning_content += tool_call_start;
-                    continue;
-                }
-            } else {
-                reasoning_unclosed = false;
-                std::string reasoning_content;
-                if (pos == std::string::npos) {
-                    reasoning_content = std::move(content);
-                } else {
-                    reasoning_content = content.substr(0, pos);
-                    content.erase(0, pos + end_think.size());
-                }
-                if (builder.pos() == builder.input().size() && all_space(content)) {
-                    rstrip(reasoning_content);
-                    trim_potential_partial_word(reasoning_content);
-                    rstrip(reasoning_content);
-                    if (reasoning_content.empty()) {
-                        rstrip(unclosed_reasoning_content);
-                        trim_potential_partial_word(unclosed_reasoning_content);
-                        rstrip(unclosed_reasoning_content);
-                        if (unclosed_reasoning_content.empty()) continue;
-                    }
-                }
-                if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                    builder.add_content(start_think);
-                    builder.add_content(unclosed_reasoning_content);
-                    builder.add_content(reasoning_content);
-                    if (builder.pos() != builder.input().size() || !all_space(content))
-                        builder.add_content(end_think);
-                } else {
-                    builder.add_reasoning_content(unclosed_reasoning_content);
-                    builder.add_reasoning_content(reasoning_content);
-                }
-                unclosed_reasoning_content.clear();
-            }
-        }
-
-        // Handle multiple think block
-        bool toolcall_in_think = false;
-        for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
-            if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
-                if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                    auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
-                    builder.add_reasoning_content(reasoning_content);
-                    think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
-                } else {
-                    think_start = think_end + end_think.size() - 1;
-                }
-            } else {
-                // This <tool_call> start is in thinking block, skip this tool call
-                // This <tool_call> start is in thinking block
-                if (form.allow_toolcall_in_think) {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size());
-                } else {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
-                }
-                reasoning_unclosed = true;
-                content.resize(think_start);
-                toolcall_in_think = true;
-            }
-        }
-
-        if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-            rstrip(content);
-            // Handle unclosed </think> token from content: delete all </think> token
-            if (auto pos = content.rfind(end_think); pos != std::string::npos) {
-                while (pos != std::string::npos) {
-                    pos = erase_spaces(content, pos, pos + end_think.size() - 1);
-                    pos = content.rfind(end_think, pos);
-                }
-            }
-            // Strip if needed
-            if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
-                content = string_strip(content);
-            }
-        }
-
-        // remove potential partial suffix
-        if (builder.pos() == builder.input().size() && builder.is_partial()) {
-            if (unclosed_reasoning_content.empty()) {
-                rstrip(content);
-                trim_potential_partial_word(content);
-                rstrip(content);
-            } else {
-                rstrip(unclosed_reasoning_content);
-                trim_potential_partial_word(unclosed_reasoning_content);
-                rstrip(unclosed_reasoning_content);
-            }
-        }
-
-        // consume unclosed_reasoning_content if allow_toolcall_in_think is set
-        if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                builder.add_reasoning_content(unclosed_reasoning_content);
-            } else {
-                if (content.empty()) {
-                    content = start_think + unclosed_reasoning_content;
-                } else {
-                    content += "\n\n" + start_think;
-                    content += unclosed_reasoning_content;
-                }
-            }
-            unclosed_reasoning_content.clear();
-        }
-
-        // Add content
-        if (!content.empty()) {
-            // If there are multiple content blocks
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
-                builder.add_content("\n\n");
-            }
-            builder.add_content(content);
-        }
-
-        // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
-        if (toolcall_in_think && !form.allow_toolcall_in_think) {
-            continue;
-        }
-
-        // There is no tool call and all content is parsed
-        if (!tc) {
-            GGML_ASSERT(builder.pos() == builder.input().size());
-            GGML_ASSERT(unclosed_reasoning_content.empty());
-            if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
-            break;
-        }
-
-        builder.move_to(tc->groups[0].begin);
-        if (builder.try_consume_xml_tool_calls(form)) {
-            auto end_of_tool = builder.pos();
-            builder.consume_spaces();
-            if (builder.pos() != builder.input().size()) {
-                builder.move_to(end_of_tool);
-                if (!builder.result().content.empty()) {
-                    builder.add_content("\n\n");
-                }
-            }
-        } else {
-            static const common_regex next_char_regex(".");
-            auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
-            rstrip(c);
-            builder.add_content(c);
-        }
-    }
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- */
-void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
-    parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
-}
diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h

deleted file mode 100644 (file)

index b309fb6..0000000
--- a/common/chat-parser-xml-toolcall.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include "chat.h"
-
-#include <nlohmann/json.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-
-// Sample config:
-// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
-// GLM 4.5   (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
-struct xml_tool_call_format {
-    std::string scope_start; // <minimax:tool_call>\n  // \n                      // can be empty
-    std::string tool_start;  // <invoke name=\"        // <tool_call>
-    std::string tool_sep;    // \">\n                  // \n                      // can be empty only for parse_xml_tool_calls
-    std::string key_start;   // <parameter name=\"     // <arg_key>
-    std::string key_val_sep; // \">                    // </arg_key>\n<arg_value>
-    std::string val_end;     // </parameter>\n         // </arg_value>\n
-    std::string tool_end;    // </invoke>\n            // </tool_call>\n
-    std::string scope_end;   // </minimax:tool_call>   //                         // can be empty
-    // Set this if there can be dynamic spaces inside key_val_sep.
-    // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
-    std::optional<std::string> key_val_sep2 = std::nullopt;
-    // Set true if argval should only be raw string. e.g. Hello "world" hi
-    // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
-    // Defaults to std::nullopt, both will be allowed.
-    std::optional<bool> raw_argval = std::nullopt;
-    std::optional<std::string> last_val_end = std::nullopt;
-    std::optional<std::string> last_tool_end = std::nullopt;
-    bool trim_raw_argval = false;
-    bool allow_toolcall_in_think = false;
-};
-
-// make a GBNF that accept any strings except those containing any of the forbidden strings.
-std::string make_gbnf_excluding(std::vector<std::string> forbids);
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp

deleted file mode 100644 (file)

index 060578f..0000000
--- a/common/chat-parser.cpp
+++ /dev/null
@@ -1,1649 +0,0 @@
-#include "chat-parser.h"
-#include "chat-peg-parser.h"
-#include "common.h"
-#include "log.h"
-#include "peg-parser.h"
-#include "regex-partial.h"
-
-#include <algorithm>
-#include <cctype>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <vector>
-
-using json = nlohmann::ordered_json;
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
-                                                const common_regex &     prefix,
-                                                size_t                   rstrip_prefix = 0) {
-    static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
-    if (auto res = builder.try_find_regex(prefix)) {
-        builder.move_back(rstrip_prefix);
-        auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
-        if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call array");
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
-    std::string arguments;
-    if (builder.is_partial()) {
-        arguments = (json{
-                         { "code", code + builder.healing_marker() }
-        })
-                        .dump();
-        auto idx = arguments.find(builder.healing_marker());
-        if (idx != std::string::npos) {
-            arguments.resize(idx);
-        }
-    } else {
-        arguments = (json{
-                         { "code", code }
-        })
-                        .dump();
-    }
-    return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
-    common_chat_msg_parser &            builder,
-    const std::optional<common_regex> & block_open,
-    const std::optional<common_regex> & function_regex_start_only,
-    const std::optional<common_regex> & function_regex,
-    const common_regex &                close_regex,
-    const std::optional<common_regex> & block_close,
-    bool                                allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
-        nullptr) {
-    auto parse_tool_calls = [&]() {
-        size_t from  = std::string::npos;
-        auto   first = true;
-        while (true) {
-            auto start_pos = builder.pos();
-            auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
-                       function_regex                     ? builder.try_find_regex(*function_regex, from) :
-                                                            std::nullopt;
-
-            if (res) {
-                std::string name;
-                if (get_function_name) {
-                    name = get_function_name(*res);
-                } else {
-                    GGML_ASSERT(res->groups.size() == 2);
-                    name = builder.str(res->groups[1]);
-                }
-                first = false;
-                if (name.empty()) {
-                    // get_function_name signalled us that we should skip this match and treat it as content.
-                    from = res->groups[0].begin + 1;
-                    continue;
-                }
-                from = std::string::npos;
-
-                auto maybe_raw_python = name == "python" && allow_raw_python;
-                if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
-                    if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
-                            throw common_chat_msg_partial_exception("incomplete tool call");
-                        }
-                        builder.consume_regex(close_regex);
-                    }
-                    continue;
-                }
-                if (maybe_raw_python) {
-                    auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
-                        throw common_chat_msg_partial_exception("incomplete tool call");
-                    }
-                    return;
-                }
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            } else {
-                builder.move_to(start_pos);
-            }
-            break;
-        }
-        if (block_close) {
-            builder.consume_regex(*block_close);
-        }
-        builder.consume_spaces();
-        builder.add_content(builder.consume_rest());
-    };
-    if (block_open) {
-        if (auto res = builder.try_find_regex(*block_open)) {
-            parse_tool_calls();
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-    } else {
-        parse_tool_calls();
-    }
-}
-
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
-    : input_(input), is_partial_(is_partial), syntax_(syntax)
-{
-    result_.role = "assistant";
-
-    while (true) {
-        std::string id = std::to_string(std::rand());
-        if (input.find(id) == std::string::npos) {
-            healing_marker_ = id;
-            break;
-        }
-    }
-}
-
-std::string common_chat_msg_parser::str(const common_string_range & rng) const {
-    GGML_ASSERT(rng.begin <= rng.end);
-    return input_.substr(rng.begin, rng.end - rng.begin);
-}
-
-void common_chat_msg_parser::add_content(const std::string &content) {
-    result_.content += content;
-}
-
-void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
-    result_.reasoning_content += reasoning_content;
-}
-
-bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
-    if (name.empty()) {
-        return false;
-    }
-
-    common_chat_tool_call tool_call;
-    tool_call.name = name;
-    tool_call.arguments = arguments;
-    tool_call.id = id;
-
-    // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
-    result_.tool_calls.emplace_back(tool_call);
-
-    return true;
-}
-bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
-    std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
-    std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
-    std::string arguments = "";
-    if (tool_call.contains("arguments")) {
-        if (tool_call.at("arguments").is_object()) {
-            arguments = tool_call.at("arguments").dump();
-        } else {
-            arguments = tool_call.at("arguments");
-        }
-    }
-
-    return add_tool_call(name, id, arguments);
-}
-
-bool common_chat_msg_parser::add_tool_calls(const json & arr) {
-    for (const auto & item : arr) {
-        if (!add_tool_call(item)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
-    if (!tool_call.is_object() || tool_call.size() != 1) {
-        return false;
-    }
-
-    // Get the tool name (the single key in the object)
-    auto it = tool_call.begin();
-    std::string name = it.key();
-
-    if (name.empty()) {
-        return false;
-    }
-
-    // Get the arguments (the nested object)
-    const json & args_json = it.value();
-    std::string arguments = "";
-
-    if (args_json.is_object()) {
-        arguments = args_json.dump();
-    } else if (args_json.is_string()) {
-        arguments = args_json;
-    } else if (!args_json.is_null()) {
-        // For other types, convert to string representation
-        arguments = args_json.dump();
-    }
-
-    return add_tool_call(name, "", arguments);
-}
-void common_chat_msg_parser::finish() {
-    if (!is_partial_ && pos_ != input_.size()) {
-        throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
-    }
-}
-
-bool common_chat_msg_parser::consume_spaces() {
-    const auto length = input_.size();
-    auto consumed = false;
-    while (pos_ < length && std::isspace(input_[pos_])) {
-        ++pos_;
-        consumed = true;
-    }
-    return consumed;
-}
-
-bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
-    auto pos = pos_;
-    for (auto i = 0u; i < literal.size(); ++i) {
-        if (pos >= input_.size()) {
-            return false;
-        }
-        if (input_[pos] != literal[i]) {
-            return false;
-        }
-        ++pos;
-    }
-    pos_ = pos;
-    return true;
-}
-
-std::optional<common_chat_msg_parser::find_regex_result>  common_chat_msg_parser::try_find_literal(const std::string & literal) {
-    auto idx = input_.find(literal, pos_);
-    if (idx != std::string::npos) {
-        find_regex_result res;
-        res.prelude = input_.substr(pos_, idx - pos_);
-        auto end = idx + literal.size();
-        res.groups.emplace_back(common_string_range{idx, end});
-        move_to(end);
-        return res;
-    }
-    if (is_partial_) {
-        idx = string_find_partial_stop(input_, literal);
-        if (idx != std::string::npos && idx >= pos_) {
-            find_regex_result res;
-            res.prelude = input_.substr(pos_, idx - pos_);
-            auto end = input_.size();
-            res.groups.emplace_back(common_string_range{idx, end});
-            move_to(end);
-            return res;
-        }
-    }
-    return std::nullopt;
-}
-
-void common_chat_msg_parser::consume_literal(const std::string & literal) {
-    if (!try_consume_literal(literal)) {
-        throw common_chat_msg_partial_exception(literal);
-    }
-}
-
-bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
-    std::string pending_reasoning_prefix;
-
-    if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-        return false;
-    }
-
-    auto set_reasoning_prefix = [&](size_t prefix_pos) {
-        if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
-            return;
-        }
-        if (prefix_pos + start_think.size() > input_.size()) {
-            pending_reasoning_prefix.clear();
-            return;
-        }
-        // Capture the exact literal that opened the reasoning section so we can
-        // surface it back to callers. This ensures formats that force the
-        // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
-        // instead of dropping it during parsing.
-        pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
-    };
-
-    auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
-        auto stripped_reasoning = string_strip(reasoning);
-        if (stripped_reasoning.empty()) {
-            return;
-        }
-        if (syntax_.reasoning_in_content) {
-            add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
-            add_content(stripped_reasoning);
-            if (closed) {
-                add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
-            }
-        } else {
-            if (!pending_reasoning_prefix.empty()) {
-                add_reasoning_content(pending_reasoning_prefix);
-                pending_reasoning_prefix.clear();
-            }
-            add_reasoning_content(stripped_reasoning);
-        }
-    };
-
-    const size_t saved_pos = pos_;
-    const size_t saved_content_size = result_.content.size();
-    const size_t saved_reasoning_size = result_.reasoning_content.size();
-
-    auto restore_state = [&]() {
-        move_to(saved_pos);
-        result_.content.resize(saved_content_size);
-        result_.reasoning_content.resize(saved_reasoning_size);
-    };
-
-    // Allow leading whitespace to be preserved as content when reasoning is present at the start
-    size_t cursor = pos_;
-    size_t whitespace_end = cursor;
-    while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
-        ++whitespace_end;
-    }
-
-    if (whitespace_end >= input_.size()) {
-        restore_state();
-        if (syntax_.thinking_forced_open) {
-            auto rest = input_.substr(saved_pos);
-            if (!rest.empty()) {
-                handle_reasoning(rest, /* closed */ !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-        return false;
-    }
-
-    cursor = whitespace_end;
-    const size_t remaining = input_.size() - cursor;
-    const size_t start_prefix = std::min(start_think.size(), remaining);
-    const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
-
-    if (has_start_tag && start_prefix < start_think.size()) {
-        move_to(input_.size());
-        return true;
-    }
-
-    if (has_start_tag) {
-        if (whitespace_end > pos_) {
-            add_content(input_.substr(pos_, whitespace_end - pos_));
-        }
-        set_reasoning_prefix(cursor);
-        cursor += start_think.size();
-    } else if (syntax_.thinking_forced_open) {
-        cursor = whitespace_end;
-    } else {
-        restore_state();
-        return false;
-    }
-    while (true) {
-        if (cursor >= input_.size()) {
-            move_to(input_.size());
-            return true;
-        }
-
-        size_t end_pos = input_.find(end_think, cursor);
-        if (end_pos == std::string::npos) {
-            std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
-            size_t partial_off = string_find_partial_stop(remaining_view, end_think);
-            size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
-            if (reasoning_end > cursor) {
-                handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-
-        if (end_pos > cursor) {
-            handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
-        } else {
-            handle_reasoning("", /* closed */ true);
-        }
-
-        cursor = end_pos + end_think.size();
-
-        while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
-            ++cursor;
-        }
-
-        const size_t next_remaining = input_.size() - cursor;
-        if (next_remaining == 0) {
-            move_to(cursor);
-            return true;
-        }
-
-        const size_t next_prefix = std::min(start_think.size(), next_remaining);
-        if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
-            if (next_prefix < start_think.size()) {
-                move_to(input_.size());
-                return true;
-            }
-            set_reasoning_prefix(cursor);
-            cursor += start_think.size();
-            continue;
-        }
-
-        move_to(cursor);
-        return true;
-    }
-}
-
-std::string common_chat_msg_parser::consume_rest() {
-    auto rest = input_.substr(pos_);
-    pos_ = input_.size();
-    return rest;
-}
-
-// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
-    auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
-    pos_ = m.groups[0].end;
-
-    if (add_prelude_to_content) {
-        add_content(prelude);
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    return find_regex_result{prelude, m.groups};
-}
-
-common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
-    if (auto result = try_consume_regex(regex)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception(regex.str());
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
-    auto m = regex.search(input_, pos_);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    if (m.groups[0].begin != pos_) {
-        // Didn't match at the current position.
-        return std::nullopt;
-    }
-    pos_ = m.groups[0].end;
-
-    return find_regex_result {
-        /* .prelude = */ "",
-        m.groups,
-    };
-}
-
-std::optional<common_json> common_chat_msg_parser::try_consume_json() {
-    auto it = input_.cbegin() + pos_;
-    const auto end = input_.cend();
-    common_json result;
-    if (!common_json_parse(it, end, healing_marker_, result)) {
-        return std::nullopt;
-    }
-    pos_ = std::distance(input_.cbegin(), it);
-    if (result.healing_marker.marker.empty()) {
-        // No healing marker, just return the parsed json
-        return result;
-    }
-    if (!is_partial()) {
-        throw common_chat_msg_partial_exception("JSON");
-    }
-    return result;
-}
-
-common_json common_chat_msg_parser::consume_json() {
-    if (auto result = try_consume_json()) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    auto partial = try_consume_json();
-    if (!partial) {
-        return std::nullopt;
-    }
-    auto is_arguments_path = [&](const std::vector<std::string> & path) {
-        return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
-    };
-    auto is_content_path = [&](const std::vector<std::string> & path) {
-        return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
-    };
-
-    if (partial->healing_marker.marker.empty()) {
-        if (args_paths.empty()) {
-            // No arguments to dump, and JSON was parsed fully.
-            return consume_json_result {
-                partial->json,
-                /* .is_partial = */ false,
-            };
-        }
-        if (is_arguments_path({})) {
-            // Entire JSON is the arguments and was parsed fully.
-            return consume_json_result {
-                partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
-                /* .is_partial = */ false,
-            };
-        }
-    }
-
-    LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-
-    auto found_healing_marker = false;
-    std::vector<std::string> path;
-    std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
-        if (is_arguments_path(path)) {
-            auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
-            if (is_partial() && !partial->healing_marker.marker.empty()) {
-                auto idx = arguments.find(partial->healing_marker.json_dump_marker);
-                if (idx != std::string::npos) {
-                    arguments.resize(idx);
-                    found_healing_marker = true;
-                }
-                if (arguments == "\"") {
-                    // This happens because of completing `:"$magic` after `"arguments"`
-                    arguments = "";
-                }
-            }
-            return arguments;
-        }
-        if (is_content_path(path)) {
-            if (!j.is_string()) {
-                throw std::runtime_error("Content path must be a string");
-            }
-            std::string str = j;
-            auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
-            if (idx != std::string::npos) {
-                str.resize(idx);
-                found_healing_marker = true;
-            }
-            return str;
-        }
-        if (j.is_object()) {
-            auto obj = json::object();
-            for (const auto & p : j.items()) {
-                const auto & key = p.key();
-                const auto & value = p.value();
-                const std::string key_str = key; // NOLINT
-                auto idx = key_str.find(healing_marker_);
-                if (idx != std::string::npos) {
-                    found_healing_marker = true;
-                    break;
-                }
-                path.push_back(key_str);
-                if (value.is_string()) {
-                    const std::string value_str = value;
-                    if (value_str.find(healing_marker_) != std::string::npos) {
-                        found_healing_marker = true;
-                        if (is_content_path(path)) {
-                            if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
-                                // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
-                                obj[key] = remove_unsupported_healings_and_dump_args(value);
-                            }
-                        }
-                        break;
-                    }
-                    obj[key] = value;
-                } else {
-                    obj[key] = remove_unsupported_healings_and_dump_args(value);
-                }
-                path.pop_back();
-            }
-            return obj;
-        }
-        if (j.is_array()) {
-            auto arr = json::array();
-            for (const auto & value : j) {
-                if (value.is_string()) {
-                    std::string str = value;
-                    auto idx = str.find(healing_marker_);
-                    if (idx != std::string::npos) {
-                        // Don't heal array values that aren't in the arguments.
-                        found_healing_marker = true;
-                        break;
-                    }
-                }
-                arr.push_back(remove_unsupported_healings_and_dump_args(value));
-            }
-            return arr;
-        }
-        return j;
-    };
-
-    auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
-    LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-    return consume_json_result {
-        cleaned,
-        /* .is_partial = */ found_healing_marker,
-    };
-}
-
-void common_chat_msg_parser::clear_tools() {
-    result_.tool_calls.clear();
-}
-
-/**
- * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
- * to reduce incremental compile time for parser changes.
- */
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const std::vector<std::vector<std::string>> content_paths = {
-        {"response"},
-    };
-    static const std::vector<std::vector<std::string>> args_paths = {
-        {"tool_call", "arguments"},
-        {"tool_calls", "arguments"},
-    };
-    auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
-    if (data.value.contains("tool_calls")) {
-        if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        }
-    } else if (data.value.contains("tool_call")) {
-        if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    } else if (data.value.contains("response")) {
-        const auto & response = data.value.at("response");
-        builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
-        if (data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete response");
-        }
-    } else {
-        throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
-    }
-}
-
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-    static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-    static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-    static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-    static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-    if (auto res = builder.try_find_regex(start_action_regex)) {
-        // If we didn't extract thoughts, prelude includes them.
-        auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
-        for (const auto & tool_call : tool_calls.value) {
-            std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-            std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-            std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-            if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-        if (tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_regex(end_action_regex);
-    } else if (auto res = builder.try_find_regex(start_response_regex)) {
-        if (!builder.try_find_regex(end_response_regex)) {
-            builder.add_content(builder.consume_rest());
-            throw common_chat_msg_partial_exception(end_response_regex.str());
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex function_regex(
-        "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
-    static const common_regex close_regex("\\}\\s*");
-
-    static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
-    static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
-    if (with_builtin_tools) {
-        static const common_regex builtin_call_regex("<\\|python_tag\\|>");
-        if (auto res = builder.try_find_regex(builtin_call_regex)) {
-            auto fun_res = builder.consume_regex(function_name_regex);
-            auto function_name = builder.str(fun_res.groups[1]);
-
-            common_healing_marker healing_marker;
-            json args = json::object();
-            while (true) {
-                if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
-                    auto arg_name = builder.str(arg_res->groups[1]);
-                    auto partial = builder.consume_json();
-                    args[arg_name] = partial.json;
-                    healing_marker.marker = partial.healing_marker.marker;
-                    healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
-                    builder.consume_spaces();
-                    if (!builder.try_consume_literal(",")) {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-            builder.consume_literal(")");
-            builder.consume_spaces();
-
-            auto arguments = args.dump();
-            if (!builder.add_tool_call(function_name, "", arguments)) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            return;
-        }
-    }
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ function_regex,
-        /* function_regex= */ std::nullopt,
-        close_regex,
-        std::nullopt);
-
-}
-
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?function<｜tool▁sep｜>([^\n]+)\n```json\n");
-    static const common_regex close_regex("```[\\s\\r\\n]*<｜tool▁call▁end｜>");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?([^\\n<]+)(?:<｜tool▁sep｜>)");
-
-    static const common_regex close_regex("(?:[\\s]*)?<｜tool▁call▁end｜>");
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
-    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        // </think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>NAME\n```json\nJSON\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_deepseek_v3_1_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            // <｜tool▁call▁begin｜>NAME<｜tool▁sep｜>JSON<｜tool▁call▁end｜>
-            common_chat_parse_deepseek_v3_1_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</invoke>",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\":";
-        form.val_end     = ",";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.allow_toolcall_in_think = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
-}
-
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
-    static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
-    static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
-    static const common_regex start_regex("<\\|start\\|>assistant");
-    static const common_regex analysis_regex("<\\|channel\\|>analysis");
-    static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
-    static const common_regex preamble_regex("<\\|channel\\|>commentary");
-    static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
-    static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
-    auto consume_end = [&](bool include_end = false) {
-        if (auto res = builder.try_find_literal("<|end|>")) {
-            return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
-        }
-        return builder.consume_rest();
-    };
-
-    auto handle_tool_call = [&](const std::string & name) {
-        if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
-            if (builder.syntax().parse_tool_calls) {
-                if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            } else if (args->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    };
-
-    auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
-        auto match = regex.search(input, 0, true);
-        if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
-            return match;
-        }
-        return std::nullopt;
-    };
-
-    do {
-        auto header_start_pos = builder.pos();
-        auto content_start = builder.try_find_literal("<|message|>");
-        if (!content_start) {
-            throw common_chat_msg_partial_exception("incomplete header");
-        }
-
-        auto header = content_start->prelude;
-
-        if (auto match = regex_match(tool_call1_regex, header)) {
-            auto group = match->groups[1];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (auto match = regex_match(tool_call2_regex, header)) {
-            auto group = match->groups[2];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (regex_match(analysis_regex, header)) {
-            builder.move_to(header_start_pos);
-            if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                builder.add_content(consume_end(true));
-            } else {
-                builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
-            }
-            continue;
-        }
-
-        if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
-            builder.add_content(consume_end());
-            continue;
-        }
-
-        // Possibly a malformed message, attempt to recover by rolling
-        // back to pick up the next <|start|>
-        LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
-        builder.move_to(header_start_pos);
-    } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
-    auto remaining = builder.consume_rest();
-    if (!remaining.empty()) {
-        LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
-    }
-}
-
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start  = */ "",
-        /* form.tool_start   = */ "<tool_call>",
-        /* form.tool_sep     = */ "",
-        /* form.key_start    = */ "<arg_key>",
-        /* form.key_val_sep  = */ "</arg_key>",
-        /* form.val_end      = */ "</arg_value>",
-        /* form.tool_end     = */ "</tool_call>",
-        /* form.scope_end    = */ "",
-        /* form.key_val_sep2 = */ "<arg_value>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const common_regex prefix(regex_escape(" functools["));
-    parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
-
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
-    static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
-    static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
-    static const common_regex close_regex(R"(\s*)");
-
-    parse_json_tool_calls(
-        builder,
-        std::nullopt,
-        function_regex_start_only,
-        function_regex,
-        close_regex,
-        std::nullopt,
-        /* allow_raw_python= */ true,
-        /* get_function_name= */ [&](const auto & res) -> std::string {
-            auto at_start = res.groups[0].begin == 0;
-            auto name = builder.str(res.groups[1]);
-            if (!name.empty() && name.back() == '{') {
-                // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
-                builder.move_back(1);
-            }
-            auto idx = name.find_last_not_of("\n{");
-            name = name.substr(0, idx + 1);
-            if (at_start && name == "all") {
-                return "";
-            }
-            return name;
-        });
-}
-
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
-    static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
-    static const common_regex function_regex(R"(<function=(\w+)>)");
-    static const common_regex close_regex(R"(</function>)");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        std::nullopt);
-
-    if (auto res = builder.try_find_regex(python_tag_regex)) {
-        auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-        builder.add_tool_call("python", "", arguments);
-        return;
-    }
-}
-
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex open_regex(
-        "(?:"
-            "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
-            "("                          // match 2 (open_tag)
-                "<tool_call>"
-                "|<function_call>"
-                "|<tool>"
-                "|<tools>"
-                "|<response>"
-                "|<json>"
-                "|<xml>"
-                "|<JSON>"
-            ")?"
-            "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
-        ")"
-        "|<function=([^>]+)>"            // match 4 (function name)
-        "|<function name=\"([^\"]+)\">"  // match 5 (function name again)
-    );
-
-    while (auto res = builder.try_find_regex(open_regex)) {
-        const auto & block_start = res->groups[1];
-        std::string block_end = block_start.empty() ? "" : "```";
-
-        const auto & open_tag = res->groups[2];
-        std::string close_tag;
-
-        if (!res->groups[3].empty()) {
-            builder.move_to(res->groups[3].begin);
-            close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
-
-            if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
-                if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            } else {
-                throw common_chat_msg_partial_exception("failed to parse tool call");
-            }
-        } else {
-            auto function_name = builder.str(res->groups[4]);
-            if (function_name.empty()) {
-                function_name = builder.str(res->groups[5]);
-            }
-            GGML_ASSERT(!function_name.empty());
-
-            close_tag = "</function>";
-
-            if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            }
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    static const common_regex start_think_regex(regex_escape("<think>"));
-    static const common_regex end_think_regex(regex_escape("</think>"));
-    // Granite models output partial tokens such as "<" and "<think".
-    // By leveraging try_consume_regex()/try_find_regex() throwing
-    // common_chat_msg_partial_exception for these partial tokens,
-    // processing is interrupted and the tokens are not passed to add_content().
-    if (auto res = builder.try_consume_regex(start_think_regex)) {
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-        builder.try_find_regex(end_think_regex, std::string::npos, false);
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-    }
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    // Parse response tags
-    static const common_regex start_response_regex(regex_escape("<response>"));
-    static const common_regex end_response_regex(regex_escape("</response>"));
-    // Granite models output partial tokens such as "<" and "<response".
-    // Same hack as reasoning parsing.
-    if (builder.try_consume_regex(start_response_regex)) {
-        builder.try_find_regex(end_response_regex);
-    }
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
-            if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            if (!builder.try_consume_literal("</TOOLCALL>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            builder.add_tool_calls(tool_calls_data.json);
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            builder.consume_spaces();
-            if (!builder.try_consume_literal("<|tools_suffix|>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            for (const auto & value : tool_calls_data.json) {
-                if (value.is_object()) {
-                    builder.add_tool_call_short_form(value);
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
-    static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
-    static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
-    // Loop through all tool calls
-    while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(res->groups[0].end);
-
-        // Parse JSON array format: [{"name": "...", "arguments": {...}}]
-        auto tool_calls_data = builder.consume_json();
-
-        // Consume end marker
-        builder.consume_spaces();
-        if (!builder.try_consume_regex(tool_call_end_regex)) {
-            throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
-        }
-
-        // Process each tool call in the array
-        if (tool_calls_data.json.is_array()) {
-            for (const auto & tool_call : tool_calls_data.json) {
-                if (!tool_call.is_object()) {
-                    throw common_chat_msg_partial_exception("Tool call must be an object");
-                }
-
-                if (!tool_call.contains("name")) {
-                    throw common_chat_msg_partial_exception("Tool call missing 'name' field");
-                }
-
-                std::string function_name = tool_call.at("name");
-                std::string arguments = "{}";
-
-                if (tool_call.contains("arguments")) {
-                    if (tool_call.at("arguments").is_object()) {
-                        arguments = tool_call.at("arguments").dump();
-                    } else if (tool_call.at("arguments").is_string()) {
-                        arguments = tool_call.at("arguments");
-                    }
-                }
-
-                if (!builder.add_tool_call(function_name, "", arguments)) {
-                    throw common_chat_msg_partial_exception("Incomplete tool call");
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
-        }
-
-        // Consume any trailing whitespace after this tool call
-        builder.consume_spaces();
-    }
-
-    // Consume any remaining content after all tool calls
-    auto remaining = builder.consume_rest();
-    if (!string_strip(remaining).empty()) {
-        builder.add_content(remaining);
-    }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<seed:tool_call>",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</function>",
-        /* form.scope_end   = */ "</seed:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
-}
-
-static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
-
-    // TODO: Tool calling
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
-    // 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
-    // 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
-    static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    // Find all <tool_call></tool_call> blocks
-    while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(first->groups[0].end);
-        builder.consume_spaces();
-
-        builder.try_consume_literal("```json");
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        // Consume JSON object
-        auto data = builder.consume_json();
-
-        builder.consume_spaces();
-        builder.try_consume_literal("```");
-        builder.consume_spaces();
-
-        if (!builder.try_consume_literal("</tool_call>")) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_spaces();
-
-        // Extract name and arguments
-        std::string name;
-        std::string id;
-        nlohmann::ordered_json arguments;
-
-        const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
-            if (!obj.contains("name") || !obj.contains("arguments")) {
-                return false;
-            }
-            name = obj.at("name").get<std::string>();
-            arguments = obj.at("arguments");
-            if (obj.contains("id") && obj.at("id").is_string()) {
-                id = obj.at("id").get<std::string>();
-            }
-            return true;
-        };
-
-        if (!extract_args(data.json)) {
-            if (data.json.contains("function") && data.json.at("function").is_object()) {
-                auto fn = data.json.at("function");
-                extract_args(fn);
-                if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
-                    id = data.json.at("id").get<std::string>();
-                }
-            }
-        }
-
-        // If name is empty, treat the JSON object as content
-        if (name.empty()) {
-            LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
-            builder.add_content(data.json.dump());
-            continue;
-        }
-
-        std::string args_str = arguments.dump();
-        if (!builder.add_tool_call(name, id, args_str)) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
-    LOG_DBG("%s: parsing exaone_moe\n", __func__);
-    // EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        common_chat_parse_exaone_moe_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_exaone_moe_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            common_chat_parse_exaone_moe_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
-    switch (builder.syntax().format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
-            common_chat_parse_content_only(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GENERIC:
-            common_chat_parse_generic(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
-            common_chat_parse_mistral_nemo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MAGISTRAL:
-            common_chat_parse_magistral(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X:
-            common_chat_parse_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
-            common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
-            common_chat_parse_deepseek_r1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
-            common_chat_parse_deepseek_v3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
-            common_chat_parse_functionary_v3_2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
-            common_chat_parse_functionary_v3_1_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO:
-            common_chat_parse_hermes_2_pro(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
-            common_chat_parse_firefunction_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_COMMAND_R7B:
-            common_chat_parse_command_r7b(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GRANITE:
-            common_chat_parse_granite(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GPT_OSS:
-            common_chat_parse_gpt_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SEED_OSS:
-            common_chat_parse_seed_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2:
-            common_chat_parse_nemotron_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APERTUS:
-            common_chat_parse_apertus(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
-            common_chat_parse_lfm2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MINIMAX_M2:
-            common_chat_parse_minimax_m2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GLM_4_5:
-            common_chat_parse_glm_4_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_KIMI_K2:
-            common_chat_parse_kimi_k2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APRIEL_1_5:
-            common_chat_parse_apriel_1_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
-            common_chat_parse_xiaomi_mimo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN:
-            common_chat_parse_solar_open(builder);
-            break;
-        case COMMON_CHAT_FORMAT_EXAONE_MOE:
-            common_chat_parse_exaone_moe(builder);
-            break;
-        default:
-            throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
-    }
-    builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
-    }
-    common_chat_msg_parser builder(input, is_partial, syntax);
-    try {
-        common_chat_parse(builder);
-    } catch (const common_chat_msg_partial_exception & ex) {
-        LOG_DBG("Partial parse: %s\n", ex.what());
-        if (!is_partial) {
-            builder.clear_tools();
-            builder.move_to(0);
-            common_chat_parse_content_only(builder);
-        }
-    }
-    auto msg = builder.result();
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
-
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
-    if (parser.empty()) {
-        throw std::runtime_error("Failed to parse due to missing parser definition.");
-    }
-
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
-
-    common_peg_parse_context ctx(input, is_partial);
-    auto result = parser.parse(ctx);
-    if (result.fail()) {
-        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
-    }
-
-    common_chat_msg msg;
-    msg.role = "assistant";
-
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
-        auto mapper = common_chat_peg_native_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        auto mapper = common_chat_peg_constructed_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else {
-        // Generic mapper
-        auto mapper = common_chat_peg_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    }
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
diff --git a/common/chat-parser.h b/common/chat-parser.h

deleted file mode 100644 (file)

index 3ed9c30..0000000
--- a/common/chat-parser.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#pragma once
-
-#include "chat.h"
-#include "chat-parser-xml-toolcall.h"
-#include "json-partial.h"
-#include "regex-partial.h"
-
-#include <nlohmann/json_fwd.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-class common_chat_msg_partial_exception : public std::runtime_error {
-  public:
-    common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-class common_chat_msg_parser {
-    std::string input_;
-    bool is_partial_;
-    common_chat_parser_params syntax_; // TODO: rename to params
-    std::string healing_marker_;
-
-    size_t pos_ = 0;
-    common_chat_msg result_;
-
-  public:
-    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-    const std::string & input() const { return input_; }
-    size_t pos() const { return pos_; }
-    const std::string & healing_marker() const { return healing_marker_; }
-    const bool & is_partial() const { return is_partial_; }
-    const common_chat_msg & result() const { return result_; }
-    const common_chat_parser_params & syntax() const { return syntax_; }
-
-    void move_to(size_t pos) {
-        if (pos > input_.size()) {
-            throw std::runtime_error("Invalid position!");
-        }
-        pos_ = pos;
-    }
-    void move_back(size_t n) {
-        if (pos_ < n) {
-            throw std::runtime_error("Can't move back that far!");
-        }
-        pos_ -= n;
-    }
-
-    // Get the substring of the input at the given range
-    std::string str(const common_string_range & rng) const;
-
-    // Appends to the result.content field
-    void add_content(const std::string & content);
-
-    // Appends to the result.reasoning_content field
-    void add_reasoning_content(const std::string & reasoning_content);
-
-    // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
-    bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
-
-    // Adds a tool call using the "name", "id" and "arguments" fields of the json object
-    bool add_tool_call(const nlohmann::ordered_json & tool_call);
-
-    // Adds an array of tool calls using their "name", "id" and "arguments" fields.
-    bool add_tool_calls(const nlohmann::ordered_json & arr);
-
-    // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
-    bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
-
-    void finish();
-
-    bool consume_spaces();
-
-    void consume_literal(const std::string & literal);
-
-    bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
-
-    std::string consume_rest();
-
-    struct find_regex_result {
-        std::string prelude;
-        std::vector<common_string_range> groups;
-    };
-
-    std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
-
-    bool try_consume_literal(const std::string & literal);
-
-    std::optional<find_regex_result> try_find_literal(const std::string & literal);
-
-    find_regex_result consume_regex(const common_regex & regex);
-
-    std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
-
-    std::optional<common_json> try_consume_json();
-    common_json consume_json();
-
-    struct consume_json_result {
-        nlohmann::ordered_json value;
-        bool is_partial;
-    };
-
-    /*
-        Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
-
-        By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
-        e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
-
-        But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
-        - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
-        - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
-    */
-    consume_json_result consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-    std::optional<consume_json_result> try_consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-
-    /**
-     * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
-     * form.scope_start, form.tool_sep and form.scope_end can be empty.
-     */
-    bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
-
-    // Parse content uses reasoning and XML-Style tool call
-    void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
-
-    void clear_tools();
-};
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp

index 1bcba9cd8666da0a08c4ca216c4991552db6e38a..ef9dec5935a3fe72dd2f2310a51d01d62fca8b3b 100644 (file)
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -1,13 +1,17 @@
  #include "chat-peg-parser.h"
  
+#include "chat-auto-parser.h"
+#include "ggml.h"
+#include "peg-parser.h"
+
  #include <nlohmann/json.hpp>
  
-using json = nlohmann::json;
+using json = nlohmann::ordered_json;
  
  static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
      int count = 0;
      while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
-        if (max != -1 && count <= max) {
+        if (max != -1 && count >= max) {
              break;
          }
          sv.remove_suffix(1);
@@ -16,109 +20,753 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
      return sv;
  }
  
-void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
-    arena.visit(result, [this](const common_peg_ast_node & node) {
-        map(node);
-    });
+static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
+    int count = 0;
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
+        if (max != -1 && count >= max) {
+            break;
+        }
+        sv.remove_prefix(1);
+        count++;
+    }
+    return sv;
  }
  
-void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
-    bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
-    bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+static std::string_view trim(std::string_view sv) {
+    return trim_trailing_space(trim_leading_space(sv, 1));
+}
  
-    if (is_reasoning) {
-        result.reasoning_content = std::string(trim_trailing_space(node.text));
+// Count the number of unclosed '{' braces in a JSON-like string,
+// properly skipping braces inside quoted strings.
+static int json_brace_depth(const std::string & s) {
+    int  depth     = 0;
+    bool in_string = false;
+    bool escaped   = false;
+    for (char c : s) {
+        if (escaped) {
+            escaped = false;
+            continue;
+        }
+        if (c == '\\' && in_string) {
+            escaped = true;
+            continue;
+        }
+        if (c == '"') {
+            in_string = !in_string;
+            continue;
+        }
+        if (!in_string) {
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+            }
+        }
      }
+    return depth;
+}
  
-    if (is_content) {
-        result.content = std::string(trim_trailing_space(node.text));
+// JSON-escape a string and return the inner content (without surrounding quotes).
+static std::string escape_json_string_inner(const std::string & s) {
+    std::string escaped = json(s).dump();
+    if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+        return escaped.substr(1, escaped.size() - 2);
      }
+    return escaped;
  }
  
-void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
+// Convert Python-style single-quoted strings to JSON double-quoted strings
+// Only converts outer string delimiters, properly handling escape sequences:
+// - {'key': 'value'} -> {"key": "value"}
+// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
+// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);  // May need extra space for escaping
  
-    bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
-    bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
-    bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
  
-    if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        // Handle escape sequences
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                // Inside a single-quoted string being converted to double quotes
+                if (next == '\'') {
+                    // \' -> ' (escaped single quote becomes unescaped in double-quoted string)
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    // \" stays as \" (already escaped, works in double-quoted string)
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                // Other escapes (\n, \\, etc.): pass through both characters
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                // Inside a double-quoted string - pass through escape sequences as-is
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            // Outside any string - just pass through the backslash
+            result += c;
+            continue;
+        }
+
+        // Handle quote characters
+        if (c == '"') {
+            if (in_single_quoted) {
+                // Unescaped double quote inside single-quoted string -> must escape for JSON
+                result += "\\\"";
+            } else {
+                // Double quote as string delimiter or outside strings
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                // Single quote inside double-quoted string -> pass through
+                result += c;
+            } else if (in_single_quoted) {
+                // Closing single quote -> convert to double quote
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                // Opening single quote -> convert to double quote
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
      }
  
-    if (is_tool_id && current_tool) {
-        current_tool->id = std::string(trim_trailing_space(node.text));
+    return result;
+}
+
+void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+    arena.visit(result, [this](const common_peg_ast_node & node) {
+        if (!node.tag.empty()) {
+            tags[node.tag] = std::string(node.text);
+        }
+    });
+}
+
+tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const {
+    common_peg_parse_context ctx(input, is_partial);
+    auto parse_result = arena.parse(ctx);
+
+    tag_based_peg_mapper mapper;
+    mapper.from_ast(ctx.ast, parse_result);
+
+    return { std::move(parse_result), std::move(mapper.tags) };
+}
+
+tagged_parse_result tagged_peg_parser::parse_anywhere_and_extract(const std::string & input) const {
+    if (input.empty()) {
+        return parse_and_extract(input, false);
+    }
+    for (size_t i = 0; i < input.size(); i++) {
+        common_peg_parse_context ctx(input, false);
+        ctx.debug = debug;
+        auto parse_result = arena.parse(ctx, i);
+        if (parse_result.success() || i == input.size() - 1) {
+            tag_based_peg_mapper mapper;
+            mapper.from_ast(ctx.ast, parse_result);
+            return { std::move(parse_result), std::move(mapper.tags) };
+        }
      }
+    GGML_ABORT("Should not happen");
+}
  
-    if (is_tool_name && current_tool) {
-        current_tool->name = std::string(trim_trailing_space(node.text));
+tagged_peg_parser build_tagged_peg_parser(
+    const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn) {
+    common_peg_parser_builder builder;
+    builder.set_root(fn(builder));
+    return { builder.build() };
+}
+
+common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string &       tag_name,
+                                                                 const std::string &       marker,
+                                                                 const common_peg_parser & p) {
+    if (marker.empty()) {
+        return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
      }
+    auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
+    return zero_or_more(choice({ p, content_chunk }));
+}
  
-    if (is_tool_args && current_tool) {
-        current_tool->arguments = std::string(trim_trailing_space(node.text));
+std::string & common_chat_peg_mapper::args_target() {
+    return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
+}
+
+void common_chat_peg_mapper::from_ast(const common_peg_ast_arena &    arena,
+                                      const common_peg_parse_result & parse_result_arg) {
+    arena.visit(parse_result_arg, [this](const common_peg_ast_node & node) { map(node); });
+    // Flush any pending tool call that was started but never got a name
+    // This happens during partial parsing when the tool call is incomplete
+    if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
+        if (!args_buffer.empty()) {
+            pending_tool_call->arguments = args_buffer;
+        }
+        if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
+            pending_tool_call->arguments += "\"";
+        }
+        result.tool_calls.push_back(pending_tool_call.value());
+        pending_tool_call.reset();
      }
  }
  
-void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
+void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
+    // Handle reasoning/content tags
+    bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
+    bool is_content   = node.tag == common_chat_peg_builder::CONTENT;
  
-    bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
-    bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
-    bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
-    bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
-    bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
-    bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
-    bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+    if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
+        result.reasoning_content += std::string(node.text);
+    }
+
+    if (is_content) {
+        // Concatenate content from multiple content nodes (e.g., when reasoning markers
+        // are preserved before content markers in reasoning_format=NONE mode)
+        result.content += std::string(node.text);
+    }
+
+    // Handle tool-related tags (supporting both JSON and tagged formats)
+    bool is_tool_open  = node.tag == common_chat_peg_builder::TOOL_OPEN;
+    bool is_tool_close = node.tag == common_chat_peg_builder::TOOL_CLOSE;
+    bool is_tool_name  = node.tag == common_chat_peg_builder::TOOL_NAME;
+    bool is_tool_id    = node.tag == common_chat_peg_builder::TOOL_ID;
+    bool is_tool_args  = node.tag == common_chat_peg_builder::TOOL_ARGS;
+    bool is_arg_open   = node.tag == common_chat_peg_builder::TOOL_ARG_OPEN;
+    bool is_arg_close  = node.tag == common_chat_peg_builder::TOOL_ARG_CLOSE;
+    bool is_arg_name         = node.tag == common_chat_peg_builder::TOOL_ARG_NAME;
+    bool is_arg_value        = node.tag == common_chat_peg_builder::TOOL_ARG_VALUE;
+    bool is_arg_string_value = node.tag == common_chat_peg_builder::TOOL_ARG_STRING_VALUE;
  
      if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
-        arg_count = 0;
+        pending_tool_call     = common_chat_tool_call();
+        current_tool          = &pending_tool_call.value();
+        arg_count             = 0;
+        args_buffer.clear();
+        closing_quote_pending = false;
+    }
+
+    if (is_tool_id && current_tool) {
+        auto text = trim_trailing_space(node.text);
+        if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
+            text = text.substr(1, text.size() - 2);
+        }
+        current_tool->id = std::string(text);
+    }
+
+    if (is_tool_name && current_tool) {
+        current_tool->name = std::string(trim_trailing_space(node.text));
+        // Now that we have the name, populate the arguments from the buffer
+        if (!args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        } else if (current_tool->arguments.empty()) {
+            current_tool->arguments = "{";
+        }
+        // Add the tool call to results so streaming can see it
+        if (pending_tool_call.has_value()) {
+            result.tool_calls.push_back(pending_tool_call.value());
+            pending_tool_call.reset();
+            current_tool = &result.tool_calls.back();
+        }
      }
  
-    if (is_tool_name) {
-        current_tool->name = std::string(node.text);
-        current_tool->arguments = "{";
+    if (is_tool_args && current_tool) {
+        // For JSON format: arguments come as a complete JSON object
+        // For tagged format: built up from individual arg_name/arg_value nodes
+        auto text = trim_trailing_space(node.text);
+        if (!text.empty() && text.front() == '{') {
+            args_target() = std::string(text);
+        }
      }
  
      if (is_arg_open) {
-        needs_closing_quote = false;
+        closing_quote_pending = false;
      }
  
      if (is_arg_name && current_tool) {
+        std::string arg_entry;
          if (arg_count > 0) {
-            current_tool->arguments += ",";
+            arg_entry = ",";
          }
-        current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
+        arg_entry += json(trim(node.text)).dump() + ":";
          ++arg_count;
+
+        auto & target = args_target();
+        if (target.empty()) {
+            target = "{";
+        }
+        target += arg_entry;
      }
  
-    if (is_arg_string && current_tool) {
-        // Serialize to JSON, but exclude the end quote
-        std::string dumped = json(trim_trailing_space(node.text)).dump();
-        current_tool->arguments += dumped.substr(0, dumped.size() - 1);
-        needs_closing_quote = true;
+    if ((is_arg_value || is_arg_string_value) && current_tool) {
+        std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+
+        std::string value_to_add;
+        if (value_content.empty() && is_arg_string_value) {
+            // Empty string value - arg_close will add the closing quote
+            value_to_add          = "\"";
+            closing_quote_pending = true;
+        } else if (!value_content.empty() && is_arg_string_value) {
+            // Schema declares this as string type - always treat as literal string value
+            if (!closing_quote_pending) {
+                value_to_add          = "\"";
+                closing_quote_pending = true;
+            }
+            value_to_add += escape_json_string_inner(value_content);
+        } else if (!value_content.empty()) {
+            // For potential containers, normalize Python-style single quotes to JSON double quotes
+            bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
+            if (is_potential_container) {
+                value_content = normalize_quotes_to_json(value_content);
+            }
+
+            // Try to parse as JSON value (number, bool, null, object, array)
+            try {
+                json parsed = json::parse(value_content);
+                if (parsed.is_string()) {
+                    // Don't add closing quote yet (added by arg_close) for monotonic streaming
+                    std::string escaped = parsed.dump();
+                    if (!escaped.empty() && escaped.back() == '"') {
+                        escaped.pop_back();
+                    }
+                    value_to_add          = escaped;
+                    closing_quote_pending = true;
+                } else {
+                    // Non-string values: use raw content to preserve whitespace for monotonicity
+                    value_to_add = value_content;
+                }
+            } catch (...) {
+                if (node.is_partial && is_potential_container) {
+                    // Partial container: pass through the already-normalized content
+                    value_to_add = value_content;
+                } else {
+                    // Not valid JSON - treat as string value
+                    if (!closing_quote_pending) {
+                        value_to_add          = "\"";
+                        closing_quote_pending = true;
+                    }
+                    value_to_add += escape_json_string_inner(value_content);
+                }
+            }
+        }
+
+        args_target() += value_to_add;
      }
  
      if (is_arg_close && current_tool) {
-        if (needs_closing_quote) {
+        if (closing_quote_pending) {
+            args_target() += "\"";
+            closing_quote_pending = false;
+        }
+    }
+
+    if (is_tool_close && current_tool) {
+        // Flush buffer to arguments if tool name was never seen
+        if (current_tool->name.empty() && !args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        }
+        // Close any pending string quote
+        if (closing_quote_pending) {
              current_tool->arguments += "\"";
-            needs_closing_quote = false;
+            closing_quote_pending = false;
+        }
+        // Close any unclosed braces (accounts for nested objects)
+        for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
+            current_tool->arguments += "}";
+        }
+        // Add tool call to results if named; otherwise discard
+        if (pending_tool_call.has_value()) {
+            if (!current_tool->name.empty()) {
+                result.tool_calls.push_back(pending_tool_call.value());
+            }
+            pending_tool_call.reset();
+        }
+    }
+}
+
+common_peg_parser common_chat_peg_builder::standard_constructed_tools(
+    const std::map<std::string, std::string> & markers,
+    const nlohmann::json &                     tools,
+    bool                                       parallel_tool_calls,
+    bool                                       force_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    // Extract markers with defaults
+    auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
+        auto it = markers.find(key);
+        return it != markers.end() ? it->second : default_val;
+    };
+
+    std::string section_start    = get_marker("tool_call_start_marker", "<tool_call>");
+    std::string section_end      = get_marker("tool_call_end_marker", "</tool_call>");
+    std::string func_opener      = get_marker("function_opener", "<function=");
+    std::string func_name_suffix = get_marker("function_name_suffix", ">");
+    std::string func_closer      = get_marker("function_closer", "</function>");
+    std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
+    std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
+    std::string param_closer     = get_marker("parameter_closer", "</param>");
+
+    // Build tool choices for tagged format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
          }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        // Build argument parsers
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+
+                auto arg_name_parser =
+                    choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+                auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
+                                         literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
+                                         tool_arg_close(literal(param_closer)));
+                arg_choice |= arg_rule;
+            }
+            args = zero_or_more(arg_choice + space());
+        }
+
+        // Build function parser: <function=name>args</function>
+        auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
+                                space() + tool_args(args) + space() + tool_close(literal(func_closer)));
+
+        tool_choices |= rule("tool-" + name, tool_parser);
      }
  
-    if (is_arg_json && current_tool) {
-        current_tool->arguments += std::string(trim_trailing_space(node.text));
+    // Build the section with markers
+    auto section =
+        parallel_tool_calls ?
+            trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
+                                          literal(section_end)) :
+            trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+// Helper: Parse dot notation key into prefix and field name
+static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
+    auto dot_pos = key.find('.');
+    if (dot_pos == std::string::npos) {
+        return {"", key};  // Top-level field
      }
+    return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
+}
  
-    if (is_tool_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+// Mode 1: function_is_key — parse {"function_name": {...}}
+common_peg_parser common_chat_peg_builder::build_json_tools_function_is_key(
+    const nlohmann::json & tools,
+    const std::string &    args_key,
+    const std::string &    effective_args_key,
+    const std::string &    call_id_key,
+    const std::string &    gen_call_id_key) {
+
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        // Build inner object fields
+        std::vector<common_peg_parser> inner_fields;
+
+        if (!call_id_key.empty()) {
+            auto id_parser = atomic(
+                literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                literal("\"") + tool_id(json_string_content()) + literal("\"")
+            );
+            inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
+        }
+
+        if (!gen_call_id_key.empty()) {
+            auto gen_id_parser = atomic(
+                literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(json_string_content()) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+            inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
+        }
+
+        // Arguments — either wrapped in args_key or parsed directly
+        common_peg_parser args_parser = eps();
+        if (args_key.empty()) {
+            args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
+        } else {
+            args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
+                          tool_args(schema(json(), "tool-" + name + "-schema", params));
+        }
+        inner_fields.push_back(args_parser);
+
+        // Build inner object parser
+        common_peg_parser inner_object = eps();
+        if (args_key.empty() && inner_fields.size() == 1) {
+            inner_object = inner_fields[0];
+        } else {
+            inner_object = literal("{") + space();
+            for (size_t i = 0; i < inner_fields.size(); i++) {
+                inner_object = inner_object + inner_fields[i];
+                if (i < inner_fields.size() - 1) {
+                    inner_object = inner_object + space();
+                }
+            }
+            inner_object = inner_object + space() + literal("}");
+        }
+
+        auto tool_parser = tool(
+            tool_open(literal("{")) + space() +
+            literal("\"") + tool_name(literal(name)) + literal("\"") +
+            space() + literal(":") + space() +
+            inner_object +
+            space() + tool_close(literal("}"))
+        );
+
+        tool_choices |= rule("tool-" + name, tool_parser);
+    }
+
+    return tool_choices;
+}
+
+// Mode 2: Nested keys (dot notation like "function.name")
+common_peg_parser common_chat_peg_builder::build_json_tools_nested_keys(
+    const nlohmann::json & tools,
+    const std::string &    effective_name_key,
+    const std::string &    effective_args_key,
+    const std::string &    call_id_key,
+    const std::string &    gen_call_id_key) {
+
+    auto tool_choices = choice();
+
+    auto name_spec = parse_key_spec(effective_name_key);
+    auto args_spec = parse_key_spec(effective_args_key);
+
+    std::string nested_prefix     = !name_spec.first.empty() ? name_spec.first  : args_spec.first;
+    std::string nested_name_field = !name_spec.first.empty() ? name_spec.second  : effective_name_key;
+    std::string nested_args_field = !args_spec.first.empty() ? args_spec.second  : effective_args_key;
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
          }
-        current_tool->arguments += "}";
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
+                          literal("\"") + tool_name(literal(name)) + literal("\"");
+        auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
+                          tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+        auto nested_object = literal("{") + space() +
+                            nested_name + space() + literal(",") + space() +
+                            nested_args +
+                            space() + literal("}");
+
+        // Format: { id?, "function": {...} }
+        auto tool_parser_body = tool_open(literal("{")) + space();
+
+        if (!call_id_key.empty()) {
+            auto id_spec = parse_key_spec(call_id_key);
+            if (id_spec.first.empty()) {
+                auto id_parser = atomic(
+                    literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                    literal("\"") + tool_id(json_string_content()) + literal("\"")
+                );
+                tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
+            }
+        }
+
+        if (!gen_call_id_key.empty()) {
+            auto gen_id_spec = parse_key_spec(gen_call_id_key);
+            if (gen_id_spec.first.empty()) {
+                auto gen_id_parser = atomic(
+                    literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                    choice({
+                        literal("\"") + tool_id(json_string_content()) + literal("\""),
+                        tool_id(json_number())
+                    })
+                );
+                tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
+            }
+        }
+
+        auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
+        tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
+
+        tool_choices |= rule("tool-" + name, tool(tool_parser_body));
+    }
+
+    return tool_choices;
+}
+
+// Mode 3: Flat keys with optional ID fields and parameter ordering
+common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
+    const nlohmann::json &           tools,
+    const std::string &              effective_name_key,
+    const std::string &              effective_args_key,
+    const std::string &              call_id_key,
+    const std::string &              gen_call_id_key,
+    const std::vector<std::string> & parameters_order) {
+
+    auto tool_choices    = choice();
+    auto name_key_parser = literal("\"" + effective_name_key + "\"");
+    auto args_key_parser = literal("\"" + effective_args_key + "\"");
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        auto tool_name_ = name_key_parser + space() + literal(":") + space() +
+                         literal("\"") + tool_name(literal(name)) + literal("\"");
+        auto tool_args_ = args_key_parser + space() + literal(":") + space() +
+                         tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+        // Build ID parsers if keys are provided
+        common_peg_parser id_parser = eps();
+        if (!call_id_key.empty()) {
+            id_parser = atomic(
+                literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(json_string_content()) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+        }
+
+        common_peg_parser gen_id_parser = eps();
+        if (!gen_call_id_key.empty()) {
+            gen_id_parser = atomic(
+                literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(json_string_content()) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+        }
+
+        // Create (parser, key) pairs for all fields, then sort by parameters_order
+        std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
+        parser_pairs.emplace_back(tool_name_, effective_name_key);
+        parser_pairs.emplace_back(tool_args_, effective_args_key);
+        if (!call_id_key.empty()) {
+            parser_pairs.emplace_back(optional(id_parser), call_id_key);
+        }
+        if (!gen_call_id_key.empty()) {
+            parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
+        }
+
+        std::sort(parser_pairs.begin(), parser_pairs.end(),
+            [&parameters_order](const auto & a, const auto & b) {
+                auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
+                auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
+                size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
+                size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
+                return idx_a < idx_b;
+            });
+
+        auto ordered_body = tool_open(literal("{")) + space();
+        for (size_t i = 0; i < parser_pairs.size(); i++) {
+            ordered_body = ordered_body + parser_pairs[i].first;
+            if (i < parser_pairs.size() - 1) {
+                ordered_body = ordered_body + space() + literal(",") + space();
+            }
+        }
+        ordered_body = ordered_body + space() + tool_close(literal("}"));
+
+        tool_choices |= rule("tool-" + name, tool(ordered_body));
+    }
+
+    return tool_choices;
+}
+
+common_peg_parser common_chat_peg_builder::standard_json_tools(
+                                                       const std::string &              section_start,
+                                                       const std::string &              section_end,
+                                                       const nlohmann::json &           tools,
+                                                       bool                             parallel_tool_calls,
+                                                       bool                             force_tool_calls,
+                                                       const std::string &              name_key,
+                                                       const std::string &              args_key,
+                                                       bool                             array_wrapped,
+                                                       bool                             function_is_key,
+                                                       const std::string &              call_id_key,
+                                                       const std::string &              gen_call_id_key,
+                                                       const std::vector<std::string> & parameters_order) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
      }
+
+    std::string effective_name_key = name_key.empty() ? "name" : name_key;
+    std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
+
+    // Dispatch to the appropriate builder based on the JSON layout mode
+    common_peg_parser tool_choices = eps();
+    if (function_is_key) {
+        tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
+    } else {
+        auto name_spec = parse_key_spec(effective_name_key);
+        auto args_spec = parse_key_spec(effective_args_key);
+        if (!name_spec.first.empty() || !args_spec.first.empty()) {
+            tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
+        } else {
+            tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
+        }
+    }
+
+    // Build the section with markers
+    auto tool_calls = tool_choices;
+    if (parallel_tool_calls) {
+        tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
+    }
+
+    if (array_wrapped) {
+        tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
+    }
+
+    auto section =
+        trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
  }
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h

index b84cbed206902a487a657cb52a4ccec54008c07d..e130ceea5ff6e3a574c9ee3dc32ea22de0f783b5 100644 (file)
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -3,22 +3,9 @@
  #include "chat.h"
  #include "peg-parser.h"
  
-class common_chat_peg_builder : public common_peg_parser_builder {
-  public:
-    static constexpr const char * REASONING_BLOCK = "reasoning-block";
-    static constexpr const char * REASONING = "reasoning";
-    static constexpr const char * CONTENT = "content";
-
-    common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
-    common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
-    common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
-};
-
-inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
-    common_chat_peg_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+#include <map>
+#include <optional>
+#include <vector>
  
  class common_chat_peg_mapper {
    public:
@@ -26,80 +13,164 @@ class common_chat_peg_mapper {
  
      common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
  
+    virtual ~common_chat_peg_mapper() = default;
+
      virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
      virtual void map(const common_peg_ast_node & node);
+    private:
+      // Tool call handling state
+      std::optional<common_chat_tool_call> pending_tool_call;  // Tool call waiting for name
+      common_chat_tool_call *              current_tool          = nullptr;
+      int                                  arg_count             = 0;
+      bool                                 closing_quote_pending = false;
+      std::string                          args_buffer;  // Buffer to delay arguments until tool name is known
+
+      // Returns a reference to the active argument destination string.
+      // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
+      std::string & args_target();
  };
  
-class common_chat_peg_native_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_ID = "tool-id";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARGS = "tool-args";
-
-    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
-    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
-    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
-    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
-    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
-    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
-};
-
-class common_chat_peg_native_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
+struct content_structure;
+struct tool_call_structure;
  
+class common_chat_peg_builder : public common_peg_parser_builder {
    public:
-    common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    // Tag constants (from former common_chat_peg_base_builder)
+    static constexpr const char * REASONING_BLOCK = "reasoning-block";
+    static constexpr const char * REASONING       = "reasoning";
+    static constexpr const char * CONTENT         = "content";
+
+    // Tag constants
+    static constexpr const char * TOOL           = "tool";
+    static constexpr const char * TOOL_OPEN      = "tool-open";
+    static constexpr const char * TOOL_CLOSE     = "tool-close";
+    static constexpr const char * TOOL_ID        = "tool-id";
+    static constexpr const char * TOOL_NAME      = "tool-name";
+    static constexpr const char * TOOL_ARGS      = "tool-args";
+    static constexpr const char * TOOL_ARG       = "tool-arg";
+    static constexpr const char * TOOL_ARG_OPEN  = "tool-arg-open";
+    static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+    static constexpr const char * TOOL_ARG_NAME         = "tool-arg-name";
+    static constexpr const char * TOOL_ARG_VALUE        = "tool-arg-value";
+    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";  // For schema-declared string types
  
-    void map(const common_peg_ast_node & node) override;
-};
+    // Low-level tag methods (from former common_chat_peg_base_builder)
+    common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
  
-inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
-    common_chat_peg_native_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+    common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
  
-class common_chat_peg_constructed_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARG = "tool-arg";
-    static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
-    static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
-    static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
-    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
-    static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+    common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+
+    common_peg_parser tag_with_safe_content(const std::string &       tag_name,
+                        const std::string &       marker,
+                        const common_peg_parser & p);
  
+    // Low-level tag methods
      common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
      common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
      common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
      common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
      common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
      common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
      common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
      common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+    common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // Use for schema-declared string types - won't be treated as potential JSON container
      common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
-    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
+
+    // Legacy-compatible helper for building standard JSON tool calls
+    // Used by tests and manual parsers
+    // name_key/args_key: JSON key names for function name and arguments
+    //   Empty or "name"/"arguments" will accept both common variations
+    //   Supports dot notation for nested objects (e.g., "function.name")
+    // array_wrapped: if true, tool calls are wrapped in JSON array [...]
+    // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
+    // call_id_key: JSON key for string call ID (e.g., "id")
+    // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
+    // parameters_order: order in which JSON fields should be parsed
+    common_peg_parser standard_json_tools(const std::string &              section_start,
+                                          const std::string &              section_end,
+                                          const nlohmann::json &           tools,
+                                          bool                             parallel_tool_calls,
+                                          bool                             force_tool_calls,
+                                          const std::string &              name_key = "",
+                                          const std::string &              args_key = "",
+                                          bool                             array_wrapped = false,
+                                          bool                             function_is_key = false,
+                                          const std::string &              call_id_key = "",
+                                          const std::string &              gen_call_id_key = "",
+                                          const std::vector<std::string> & parameters_order = {});
+
+    // Legacy-compatible helper for building XML/tagged style tool calls
+    // Used by tests and manual parsers
+    common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
+                                                 const nlohmann::json &                     tools,
+                                                 bool                                       parallel_tool_calls,
+                                                 bool                                       force_tool_calls);
+
+  private:
+    // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
+    common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
+                                                       const std::string &    args_key,
+                                                       const std::string &    effective_args_key,
+                                                       const std::string &    call_id_key,
+                                                       const std::string &    gen_call_id_key);
+
+    common_peg_parser build_json_tools_nested_keys(const nlohmann::json & tools,
+                                                   const std::string &    effective_name_key,
+                                                   const std::string &    effective_args_key,
+                                                   const std::string &    call_id_key,
+                                                   const std::string &    gen_call_id_key);
+
+    common_peg_parser build_json_tools_flat_keys(const nlohmann::json &           tools,
+                                                 const std::string &              effective_name_key,
+                                                 const std::string &              effective_args_key,
+                                                 const std::string &              call_id_key,
+                                                 const std::string &              gen_call_id_key,
+                                                 const std::vector<std::string> & parameters_order);
  };
  
-class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
-    int arg_count = 0;
-    bool needs_closing_quote = false;
+inline common_peg_arena build_chat_peg_parser(
+  const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+  common_chat_peg_builder builder;
+  builder.set_root(fn(builder));
+  return builder.build();
+}
  
+class tag_based_peg_mapper {
    public:
-    common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    std::map<std::string, std::string> tags;
  
-    void map(const common_peg_ast_node & node) override;
+    void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
  };
  
-inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
-    common_chat_peg_constructed_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+struct tagged_parse_result {
+    common_peg_parse_result              result;
+    std::map<std::string, std::string> tags;
+};
+
+struct tagged_peg_parser {
+    common_peg_arena arena;
+    bool debug = false;
+
+    tagged_peg_parser & withDebug() {
+      debug = true;
+      return *this;
+    }
+
+    tagged_peg_parser & withoutDebug() {
+      debug = false;
+      return *this;
+    }
+
+    tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const;
+    tagged_parse_result parse_anywhere_and_extract(const std::string & input) const;
+};
+
+tagged_peg_parser build_tagged_peg_parser(
+    const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
+
diff --git a/common/chat.cpp b/common/chat.cpp

index 52780c59ad1b8cf51634847f25e101e211b89b9d..81c23430af9282b80582de3da4ef2a9783f4f2f5 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1,24 +1,25 @@
  #include "chat.h"
-#include "chat-parser.h"
+
+#include "chat-auto-parser.h"
  #include "chat-peg-parser.h"
  #include "common.h"
-#include "json-partial.h"
+#include "ggml.h"
  #include "json-schema-to-grammar.h"
  #include "log.h"
-#include "regex-partial.h"
  
-#include "jinja/parser.h"
  #include "jinja/value.h"
  #include "jinja/runtime.h"
  #include "jinja/caps.h"
+#include "peg-parser.h"
  
-#include <algorithm>
  #include <cstdio>
-#include <cctype>
+#include <cstdlib>
+#include <ctime>
  #include <exception>
  #include <functional>
-#include <iostream>
+
  #include <optional>
+#include <sstream>
  #include <stdexcept>
  #include <string>
  #include <vector>
@@ -26,14 +27,26 @@
  using json = nlohmann::ordered_json;
  
  static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
-    auto time = std::chrono::system_clock::to_time_t(now);
-    auto local_time = *std::localtime(&time);
+    auto               time       = std::chrono::system_clock::to_time_t(now);
+    auto               local_time = *std::localtime(&time);
      std::ostringstream ss;
      ss << std::put_time(&local_time, format.c_str());
      auto res = ss.str();
      return res;
  }
  
+static json safe_args_parse(const std::string & to_parse) {
+    std::string stripped = to_parse;
+    if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') {
+        stripped = to_parse.substr(1, to_parse.length() - 1);
+    }
+    try {
+        return json::parse(stripped);
+    } catch (json::exception & e) {
+        return stripped;
+    }
+}
+
  static std::string string_diff(const std::string & last, const std::string & current) {
      if (last.empty()) {
          return current;
@@ -116,7 +129,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
                  {"type", "function"},
                  {"function", {
                      {"name", tool_call.name},
-                    {"arguments", tool_call.arguments},
+                    {"arguments", json::parse(tool_call.arguments)},
                  }},
              };
              if (!tool_call.id.empty()) {
@@ -133,7 +146,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
      return jmsg;
  }
  
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv,
+                                                                      const common_chat_msg & msg_new) {
      std::vector<common_chat_msg_diff> diffs;
      if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
          diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
@@ -143,38 +157,56 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
  
      // TODO: these can become expensive for long messages - how to optimize?
      if (msg_prv.reasoning_content != msg_new.reasoning_content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff                  = diffs.emplace_back();
          diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
      }
      if (msg_prv.content != msg_new.content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff        = diffs.emplace_back();
          diff.content_delta = string_diff(msg_prv.content, msg_new.content);
      }
  
      if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
-        throw std::runtime_error("Invalid diff: now finding less tool calls!");
+        std::string err = "Invalid diff: now finding less tool calls!\n";
+        err += "  Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_prv.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_new.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current msg text content:\n" + msg_new.content + "\n";
+        throw std::runtime_error(err);
      }
  
      if (!msg_prv.tool_calls.empty()) {
-        const auto idx = msg_prv.tool_calls.size() - 1;
+        const auto   idx  = msg_prv.tool_calls.size() - 1;
          const auto & pref = msg_prv.tool_calls[idx];
          const auto & newf = msg_new.tool_calls[idx];
-        if (pref.name != newf.name) {
-            throw std::runtime_error("Invalid diff: tool call mismatch!");
+        // Allow tool name to change during incremental parsing:
+        // - empty -> non-empty (initial discovery)
+        // - prefix -> longer string (name grows as more input is parsed)
+        if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) {
+            // Check if one is a prefix of the other (for incremental parsing where names grow or shrink)
+            bool is_prefix = (newf.name.rfind(pref.name, 0) == 0);
+            if (!is_prefix) {
+                LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str());
+                throw std::runtime_error("Invalid diff: tool call mismatch!");
+            }
          }
          const auto args_diff = string_diff(pref.arguments, newf.arguments);
-        if (!args_diff.empty() || pref.id != newf.id) {
-            auto & diff = diffs.emplace_back();
+        if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) {
+            auto & diff          = diffs.emplace_back();
              diff.tool_call_index = idx;
-            if (pref.id != newf.id) {
-                diff.tool_call_delta.id = newf.id;
+            if (pref.id != newf.id || pref.name != newf.name) {
+                diff.tool_call_delta.id   = newf.id;
                  diff.tool_call_delta.name = newf.name;
              }
              diff.tool_call_delta.arguments = args_diff;
          }
      }
      for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
-        auto & diff = diffs.emplace_back();
+        auto & diff          = diffs.emplace_back();
          diff.tool_call_index = idx;
          diff.tool_call_delta = msg_new.tool_calls[idx];
      }
@@ -184,94 +216,14 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
  
  using chat_template_caps = jinja::caps;
  
-struct common_chat_template {
-    jinja::program prog;
-    std::string bos_tok;
-    std::string eos_tok;
-    std::string src;
-    chat_template_caps caps;
-
-    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
-        jinja::lexer lexer;
-        auto lexer_res = lexer.tokenize(src);
-        this->prog = jinja::parse_from_tokens(lexer_res);
-
-        this->src = lexer_res.source;
-        this->bos_tok = bos_token;
-        this->eos_tok = eos_token;
-
-        this->caps = jinja::caps_get(prog);
-        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
-    }
-
-    const std::string & source() const { return src; }
-    const std::string & bos_token() const { return bos_tok; }
-    const std::string & eos_token() const { return eos_tok; }
-
-    // TODO: this is ugly, refactor it somehow
-    json add_system(const json & messages, const std::string & system_prompt) const {
-        GGML_ASSERT(messages.is_array());
-        auto msgs_copy = messages;
-        if (!caps.supports_system_role) {
-            if (msgs_copy.empty()) {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "user"},
-                    {"content", system_prompt}
-                });
-            } else {
-                auto & first_msg = msgs_copy[0];
-                if (!first_msg.contains("content")) {
-                    first_msg["content"] = "";
-                }
-                first_msg["content"] = system_prompt + "\n\n"
-                    + first_msg["content"].get<std::string>();
-            }
-        } else {
-            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "system"},
-                    {"content", system_prompt}
-                });
-            } else if (msgs_copy[0].at("role") == "system") {
-                msgs_copy[0]["content"] = system_prompt;
-            }
-        }
-        return msgs_copy;
-    }
-
-    chat_template_caps original_caps() const {
-        return caps;
-    }
-
-};
-
  struct common_chat_templates {
      bool add_bos;
      bool add_eos;
-    bool has_explicit_template; // Model had builtin template or template overridde was specified.
-    std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+    bool has_explicit_template;  // Model had builtin template or template overridde was specified.
+    std::unique_ptr<common_chat_template> template_default;  // always set (defaults to chatml)
      std::unique_ptr<common_chat_template> template_tool_use;
  };
  
-struct templates_params {
-    json messages;
-    json tools;
-    common_chat_tool_choice tool_choice;
-    json json_schema;
-    bool parallel_tool_calls;
-    common_reasoning_format reasoning_format;
-    bool stream;
-    std::string grammar;
-    bool add_generation_prompt = true;
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    json extra_context;
-    bool add_bos;
-    bool add_eos;
-    bool is_inference = true;
-    bool mark_input = true; // whether to mark input strings in the jinja context
-};
-
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
      if (tool_choice == "auto") {
          return COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -286,23 +238,24 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
  }
  
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
-    common_chat_templates_inputs dummy_inputs;
+    common_chat_templates_inputs inputs;
+    inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
      common_chat_msg msg;
-    msg.role = "user";
+    msg.role    = "user";
      msg.content = "test";
-    dummy_inputs.messages = {msg};
-    dummy_inputs.enable_thinking = false;
-    const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    dummy_inputs.enable_thinking = true;
-    const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
+    inputs.messages = { msg };
+    inputs.enable_thinking = true;
+    inputs.add_generation_prompt = true;
+    inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+
+    auto params = common_chat_templates_apply(chat_templates, inputs);
+    return params.supports_thinking;
  }
  
  std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
      std::vector<common_chat_msg> msgs;
  
      try {
-
          if (!messages.is_array()) {
              throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
          }
@@ -318,7 +271,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
              }
              msg.role = message.at("role");
  
-            auto has_content = message.contains("content");
+            auto has_content    = message.contains("content");
              auto has_tool_calls = message.contains("tool_calls");
              if (has_content) {
                  const auto & content = message.at("content");
@@ -339,7 +292,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                          msg.content_parts.push_back(msg_part);
                      }
                  } else if (!content.is_null()) {
-                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " +
+                                                content.dump() +
+                                                " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                  }
              }
              if (has_tool_calls) {
@@ -359,8 +314,13 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                      if (!fc.contains("name")) {
                          throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                      }
-                    tc.name = fc.at("name");
-                    tc.arguments = fc.at("arguments");
+                    tc.name           = fc.at("name");
+                    const auto & args = fc.at("arguments");
+                    if (args.is_string()) {
+                        tc.arguments = args;
+                    } else {
+                        tc.arguments = args.dump();
+                    }
                      if (tool_call.contains("id")) {
                          tc.id = tool_call.at("id");
                      }
@@ -368,7 +328,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                  }
              }
              if (!has_content && !has_tool_calls) {
-                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument(
+                    "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & "
+                    "https://github.com/ggml-org/llama.cpp/issues/12279)");
              }
              if (message.contains("reasoning_content")) {
                  msg.reasoning_content = message.at("reasoning_content");
@@ -474,12 +436,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
      auto result = json::array();
      for (const auto & tool : tools) {
          result.push_back({
-            {"type", "function"},
-            {"function", {
-                {"name", tool.name},
-                {"description", tool.description},
-                {"parameters", json::parse(tool.parameters)},
-            }},
+            { "type",     "function" },
+            { "function",
+             {
+                  { "name", tool.name },
+                  { "description", tool.description },
+                  { "parameters", json::parse(tool.parameters) },
+              }                      },
          });
      }
      return result;
@@ -497,16 +460,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
          json tool_call;
          tool_call["index"] = diff.tool_call_index;
          if (!diff.tool_call_delta.id.empty()) {
-            tool_call["id"] = diff.tool_call_delta.id;
+            tool_call["id"]   = diff.tool_call_delta.id;
              tool_call["type"] = "function";
          }
-        json function = json::object();
-        if (!diff.tool_call_delta.name.empty()) {
-            function["name"] = diff.tool_call_delta.name;
+        if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
+            json function = json::object();
+            if (!diff.tool_call_delta.name.empty()) {
+                function["name"] = diff.tool_call_delta.name;
+            }
+            if (!diff.tool_call_delta.arguments.empty()) {
+                function["arguments"] = diff.tool_call_delta.arguments;
+            }
+            tool_call["function"] = function;
          }
-        function["arguments"] = diff.tool_call_delta.arguments;
-        tool_call["function"] = function;
-        delta["tool_calls"] = json::array({tool_call});
+        delta["tool_calls"] = json::array({ tool_call });
      }
      return delta;
  }
@@ -515,13 +482,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
      if (use_jinja) {
          try {
              common_chat_msg msg;
-            msg.role = "user";
+            msg.role    = "user";
              msg.content = "test";
  
              auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
  
              common_chat_templates_inputs inputs;
-            inputs.messages = {msg};
+            inputs.messages = { msg };
  
              common_chat_templates_apply(tmpls.get(), inputs);
              return true;
@@ -530,28 +497,28 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
              return false;
          }
      }
-    llama_chat_message chat[] = {{"user", "test"}};
+    llama_chat_message chat[] = {
+        { "user", "test" }
+    };
      const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
      return res >= 0;
  }
  
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja) {
-
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja) {
      common_chat_templates_inputs inputs;
      inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.add_bos   = tmpls->add_bos;
+    inputs.add_eos   = tmpls->add_eos;
  
      std::string fmt_past_msg;
      if (!past_msg.empty()) {
-        inputs.messages = past_msg;
+        inputs.messages              = past_msg;
          inputs.add_generation_prompt = false;
-        fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+        fmt_past_msg                 = common_chat_templates_apply(tmpls, inputs).prompt;
      }
      std::ostringstream ss;
      // if the past_msg ends with a newline, we must preserve it in the formatted version
@@ -561,37 +528,39 @@ std::string common_chat_format_single(
      // format chat with new_msg
      inputs.messages.push_back(new_msg);
      inputs.add_generation_prompt = add_ass;
-    auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+    auto fmt_new_msg             = common_chat_templates_apply(tmpls, inputs).prompt;
      // get the diff part
      ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
      return ss.str();
  }
  
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs) {
      common_chat_templates_inputs inputs;
-    inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.use_jinja            = use_jinja;
+    inputs.add_bos              = tmpls->add_bos;
+    inputs.add_eos              = tmpls->add_eos;
      inputs.chat_template_kwargs = chat_template_kwargs;
-    auto add_simple_msg = [&](auto role, auto content) {
+    auto add_simple_msg         = [&](auto role, auto content) {
          common_chat_msg msg;
-        msg.role = role;
+        msg.role    = role;
          msg.content = content;
          inputs.messages.push_back(msg);
      };
-    add_simple_msg("system",    "You are a helpful assistant");
-    add_simple_msg("user",      "Hello");
+    add_simple_msg("system", "You are a helpful assistant");
+    add_simple_msg("user", "Hello");
      add_simple_msg("assistant", "Hi there");
-    add_simple_msg("user",      "How are you?");
+    add_simple_msg("user", "How are you?");
      return common_chat_templates_apply(tmpls, inputs).prompt;
  }
  
-#define CHATML_TEMPLATE_SRC \
-    "{%- for message in messages -%}\n" \
+#define CHATML_TEMPLATE_SRC                                                               \
+    "{%- for message in messages -%}\n"                                                   \
      "  {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
-    "{%- endfor -%}\n" \
-    "{%- if add_generation_prompt -%}\n" \
-    "  {{- '<|im_start|>assistant\n' -}}\n" \
+    "{%- endfor -%}\n"                                                                    \
+    "{%- if add_generation_prompt -%}\n"                                                  \
+    "  {{- '<|im_start|>assistant\n' -}}\n"                                               \
      "{%- endif -%}"
  
  void common_chat_templates_free(struct common_chat_templates * tmpls) {
@@ -609,19 +578,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
                  return tmpls->template_tool_use->source();
              }
              return "";
-        } else {
-            LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
          }
+        LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
      }
      return tmpls->template_default->source();
  }
  
-common_chat_templates_ptr common_chat_templates_init(
-    const struct llama_model * model,
-    const std::string & chat_template_override,
-    const std::string & bos_token_override,
-    const std::string & eos_token_override)
-{
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override,
+                                                     const std::string &        eos_token_override) {
      std::string default_template_src;
      std::string template_tool_use_src;
  
@@ -630,7 +596,7 @@ common_chat_templates_ptr common_chat_templates_init(
          GGML_ASSERT(model != nullptr);
          const auto * str = llama_model_chat_template(model, /* name */ nullptr);
          if (str) {
-            default_template_src = str;
+            default_template_src  = str;
              has_explicit_template = true;
          }
          str = llama_model_chat_template(model, /* name */ "tool_use");
@@ -652,34 +618,40 @@ common_chat_templates_ptr common_chat_templates_init(
      // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
      // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
      if (default_template_src.find("<|channel|>") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("in message.content or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("in message.content or") != std::string::npos) {
          string_replace_all(default_template_src,
-            "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
-            "{%- if false %}");
+                           "{%- if \"<|channel|>analysis<|message|>\" in message.content or "
+                           "\"<|channel|>final<|message|>\" in message.content %}",
+                           "{%- if false %}");
      }
  
      // TODO @aldehir : this is a temporary fix, pending Minja changes
      // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
      if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
          string_replace_all(default_template_src,
-            "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
-            "{%- if false %}");
+                           "{%- if (message['content'] is none or message['content'] == '' or "
+                           "message['content']|length == 0) and (message['tool_calls'] is not defined or "
+                           "message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
+                           "{%- if false %}");
      }
  
      std::string token_bos = bos_token_override;
      std::string token_eos = eos_token_override;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool        add_bos   = false;
+    bool        add_eos   = false;
      if (model) {
-        const auto * vocab = llama_model_get_vocab(model);
-        const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
+        const auto * vocab     = llama_model_get_vocab(model);
+        const auto   get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
              if (token == LLAMA_TOKEN_NULL) {
-                if (default_template_src.find(jinja_variable_name) != std::string::npos
-                    || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
-                    LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
+                if (default_template_src.find(jinja_variable_name) != std::string::npos ||
+                    template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
+                    LOG_WRN(
+                        "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't "
+                          "work as intended.\n",
+                        name);
                  }
                  return std::string();
              }
@@ -687,13 +659,13 @@ common_chat_templates_ptr common_chat_templates_init(
          };
          token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
          token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
-        add_bos = llama_vocab_get_add_bos(vocab);
-        add_eos = llama_vocab_get_add_eos(vocab);
+        add_bos   = llama_vocab_get_add_bos(vocab);
+        add_eos   = llama_vocab_get_add_eos(vocab);
      }
      common_chat_templates_ptr tmpls(new common_chat_templates());
      tmpls->has_explicit_template = has_explicit_template;
-    tmpls->add_bos = add_bos;
-    tmpls->add_eos = add_eos;
+    tmpls->add_bos               = add_bos;
+    tmpls->add_eos               = add_eos;
      try {
          tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
      } catch (const std::exception & e) {
@@ -714,35 +686,12 @@ common_chat_templates_ptr common_chat_templates_init(
  
  const char * common_chat_format_name(common_chat_format format) {
      switch (format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
-        case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
-        case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
-        case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
-        case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
-        case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
-        case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
-        case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
-        case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
-        case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
-        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
-        case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
-        case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
-        case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
-        case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
-        case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
-        case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
+        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+            return "Content-only";
+        case COMMON_CHAT_FORMAT_PEG_SIMPLE:
+            return "peg-simple";
+        case COMMON_CHAT_FORMAT_PEG_NATIVE:
+            return "peg-native";
          default:
              throw std::runtime_error("Unknown chat format");
      }
@@ -750,10 +699,14 @@ const char * common_chat_format_name(common_chat_format format) {
  
  const char * common_reasoning_format_name(common_reasoning_format format) {
      switch (format) {
-        case COMMON_REASONING_FORMAT_NONE:     return "none";
-        case COMMON_REASONING_FORMAT_AUTO:     return "auto";
-        case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
-        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
+        case COMMON_REASONING_FORMAT_NONE:
+            return "none";
+        case COMMON_REASONING_FORMAT_AUTO:
+            return "auto";
+        case COMMON_REASONING_FORMAT_DEEPSEEK:
+            return "deepseek";
+        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY:
+            return "deepseek-legacy";
          default:
              throw std::runtime_error("Unknown reasoning format");
      }
@@ -762,11 +715,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
  common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
      if (format == "none") {
          return COMMON_REASONING_FORMAT_NONE;
-    } else if (format == "auto") {
+    }
+    if (format == "auto") {
          return COMMON_REASONING_FORMAT_AUTO;
-    } else if (format == "deepseek") {
+    }
+    if (format == "deepseek") {
          return COMMON_REASONING_FORMAT_DEEPSEEK;
-    } else if (format == "deepseek-legacy") {
+    }
+    if (format == "deepseek-legacy") {
          return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
      }
      throw std::runtime_error("Unknown reasoning format: " + format);
@@ -782,7 +738,8 @@ static void foreach_function(const json & tools, const std::function<void(const
      }
  }
  
-static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
+static void foreach_parameter(const json &                                                         function,
+                              const std::function<void(const std::string &, const json &, bool)> & fn) {
      if (!function.contains("parameters") || !function.at("parameters").is_object()) {
          return;
      }
@@ -790,7 +747,7 @@ static void foreach_parameter(const json & function, const std::function<void(co
      if (!params.contains("properties") || !params.at("properties").is_object()) {
          return;
      }
-    const auto & props = params.at("properties");
+    const auto &          props = params.at("properties");
      std::set<std::string> required;
      if (params.contains("required") && params.at("required").is_array()) {
          params.at("required").get_to(required);
@@ -801,19 +758,19 @@ static void foreach_parameter(const json & function, const std::function<void(co
      }
  }
  
-static std::string apply(
+std::string common_chat_template_direct_apply(
      const common_chat_template & tmpl,
-    const struct templates_params & inputs,
-    const std::optional<json> & messages_override = std::nullopt,
-    const std::optional<json> & tools_override = std::nullopt,
-    const std::optional<json> & additional_context = std::nullopt)
-{
+    const autoparser::templates_params & inputs,
+    const std::optional<json> & messages_override,
+    const std::optional<json> & tools_override,
+    const std::optional<json> & additional_context) {
      jinja::context ctx(tmpl.source());
  
      nlohmann::ordered_json inp = nlohmann::ordered_json{
          {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
          {"bos_token", tmpl.bos_token()},
          {"eos_token", tmpl.eos_token()},
+        {"enable_thinking", inputs.enable_thinking},
      };
      if (tools_override.has_value() || !inputs.tools.empty()) {
          inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
@@ -839,7 +796,7 @@ static std::string apply(
      // render
      jinja::runtime runtime(ctx);
      const jinja::value results = runtime.execute(tmpl.prog);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
  
      std::string result = parts->as_string().str();
  
@@ -853,265 +810,8 @@ static std::string apply(
      return result;
  }
  
-static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    auto tool_call_schemas = json::array();
-    foreach_function(inputs.tools, [&](const json & tool) {
-        const auto & function = tool.at("function");
-        auto tool_schema = json {
-            {"type", "object"},
-            {"properties", {
-                {"name", {
-                    {"type", "string"},
-                    {"const", function.at("name")},
-                }},
-                {"arguments", function.at("parameters")},
-            }},
-            {"required", json::array({"name", "arguments"})},
-        };
-        if (function.contains("description")) {
-            tool_schema["description"] = function.at("description");
-        }
-        if (inputs.parallel_tool_calls) {
-            tool_schema.at("properties")["id"] = {
-                {"type", "string"},
-                {"minLength", 4},
-            };
-            tool_schema.at("required").push_back("id");
-        }
-        tool_call_schemas.emplace_back(tool_schema);
-    });
-    const auto tool_call =
-        inputs.parallel_tool_calls
-            ? json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_calls", {
-                        {"type", "array"},
-                        {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                            {"anyOf", tool_call_schemas},
-                        }},
-                        {"minItems", 1},
-                    }},
-                }},
-                {"required", json::array({"tool_calls"})},
-            }
-            : json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                        {"anyOf", tool_call_schemas},
-                    }},
-                }},
-                {"required", json::array({"tool_call"})},
-            };
-    const auto schema =
-        inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
-            ? json {
-                {"anyOf", json::array({
-                    tool_call,
-                    {
-                        {"type", "object"},
-                        {"properties", {
-                            {"response", inputs.json_schema.is_null()
-                                ? json {{"type", "string"}}
-                                : inputs.json_schema
-                            },
-                        }},
-                        {"required", json::array({"response"})},
-                    },
-                })}
-            }
-            : tool_call;
-
-    data.grammar_lazy = false;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        builder.add_schema("root", schema);
-    });
-
-    auto tweaked_messages = tmpl.add_system(
-        inputs.messages,
-        "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
-
-    // ensure all messages has "content" field
-    for (auto & message : tweaked_messages) {
-        if (!message.contains("content") || message["content"].is_null()) {
-            message["content"] = "";
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    // Important note: the model is probably trained to take a JSON stringified arguments value.
-                    // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
-                    {"name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"arguments", function.at("parameters")},
-                    {"id", {
-                        {"type", "string"},
-                        // Nemo's template expects a 9-character alphanumeric ID.
-                        {"pattern", "^[a-zA-Z0-9]{9}$"},
-                    }},
-                }},
-                {"required", json::array({"name", "arguments", "id"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-    });
-    data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-    data.preserved_tokens = {
-        "[TOOL_CALLS]",
-    };
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
-    return data;
-}
-
-
-// Case-insensitive find
-static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
-    auto it = std::search(
-        haystack.begin() + pos, haystack.end(),
-        needle.begin(), needle.end(),
-        [](char a, char b) { return std::tolower(a) == std::tolower(b); }
-    );
-    return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
-}
-
-static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    const auto is_json_schema_provided = !inputs.json_schema.is_null();
-    const auto is_grammar_provided = !inputs.grammar.empty();
-    const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
-
-    // the logic requires potentially modifying the messages
-    auto tweaked_messages = inputs.messages;
-
-    auto replace_json_schema_marker = [](json & messages) -> bool {
-        static std::string marker1 = "force json schema.\n";
-        static std::string marker2 = "force json schema.";
-
-        if (messages.empty() || messages.at(0).at("role") != "system") {
-            return false;
-        }
-
-        std::string content = messages.at(0).at("content");
-
-        for (const auto & marker : {marker1, marker2}) {
-            const auto pos = ifind_string(content, marker);
-            if (pos != std::string::npos) {
-                content.replace(pos, marker.length(), "");
-                // inject modified content back into the messages
-                messages.at(0).at("content") = content;
-                return true;
-            }
-        }
-
-        return false;
-    };
-
-    // Lfm2 model does not natively work with json, but can generally understand the tools structure
-    //
-    // Example of the pytorch dialog structure:
-    //     <|startoftext|><|im_start|>system
-    //     List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
-    //     <|im_start|>user
-    //     What is the current status of candidate ID 12345?<|im_end|>
-    //     <|im_start|>assistant
-    //     <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
-    //     <|im_start|>tool
-    //     <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
-    //     <|im_start|>assistant
-    //     The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
-    //
-    // For the llama server compatibility with json tools semantic,
-    // the client can add "Follow json schema." line into the system message prompt to force the json output.
-    //
-    if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
-        // server/utils.hpp prohibits that branch for the custom grammar anyways
-        throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
-    } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
-        LOG_INF("%s: Using tools to build a grammar\n", __func__);
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-
-            builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
-        });
-        // model has no concept of tool selection mode choice,
-        // if the system prompt rendered correctly it will produce a tool call
-        // the grammar goes inside the tool call body
-        data.grammar_lazy = true;
-        data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-        data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
-    } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
-        LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
-        // output those tokens
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-    } else if (is_json_schema_provided) {
-        LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else if (is_grammar_provided) {
-        LOG_INF("%s: Using provided grammar\n", __func__);
-        data.grammar = inputs.grammar;
-    } else {
-        LOG_INF("%s: Using content relying on the template\n", __func__);
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_ministral_3(const common_chat_template &    tmpl,
+                                                              const autoparser::templates_params & inputs) {
      common_chat_params data;
  
      // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@@ -1129,8 +829,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
          // If message contains `reasoning_content`, add it as a block of type `thinking`
          if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
              content.push_back({
-                {"type", "thinking"},
-                {"thinking", msg.at("reasoning_content").get<std::string>()},
+                { "type",     "thinking"                                     },
+                { "thinking", msg.at("reasoning_content").get<std::string>() },
              });
          }
  
@@ -1138,8 +838,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
          if (msg.contains("content")) {
              if (msg.at("content").is_string()) {
                  content.push_back({
-                    {"type", "text"},
-                    {"text", msg.at("content").get<std::string>()},
+                    { "type", "text"                               },
+                    { "text", msg.at("content").get<std::string>() },
                  });
              } else if (msg.at("content").is_array()) {
                  auto blocks = msg.at("content");
@@ -1147,32 +847,35 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
              }
          }
  
-        auto adjusted = msg;
+        auto adjusted       = msg;
          adjusted["content"] = content;
          adjusted.erase("reasoning_content");
          adjusted_messages.push_back(adjusted);
      }
  
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
      auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
+    auto include_grammar   = true;
  
-    data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
+    data.supports_thinking = true;
+    data.prompt            = common_chat_template_direct_apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens  = {
          "[THINK]",
          "[/THINK]",
          "[TOOL_CALLS]",
          "[ARGS]",
      };
  
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto reasoning =
+            extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
  
          // Response format parser
          if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
              // Ministral wants to emit json surrounded by code fences
-            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
+            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema))
+                             << "```";
          }
  
          // Tool call parser
@@ -1180,17 +883,16 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
              auto tool_choice = p.choice();
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+                std::string  name     = function.at("name");
+                const auto & schema   = function.at("parameters");
  
-                tool_choice |= p.rule("tool-" + name,
-                    p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
-                );
+                tool_choice |=
+                    p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") +
+                                               p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
              });
  
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+            auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
              auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
  
              return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
@@ -1209,1722 +911,369 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
          data.grammar = build_grammar([&](const common_grammar_builder & builder) {
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                  builder.resolve_refs(schema);
              });
              parser.build_grammar(builder, data.grammar_lazy);
          });
  
          data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" }
          };
      }
  
      return data;
  }
  
-static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
-    data.preserved_tokens = {
-        "[THINK]",
-        "[/THINK]",
-    };
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                        {"id", {
-                            {"type", "string"},
-                            {"pattern", "^[a-zA-Z0-9]{9}$"},
-                        }},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-        data.preserved_tokens.push_back("[TOOL_CALLS]");
-    } else {
-        data.grammar_lazy = false;
-        if (!inputs.json_schema.is_null()) {
-            if (!inputs.grammar.empty()) {
-                throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-            }
-            data.grammar = json_schema_to_grammar(inputs.json_schema);
-        } else {
-            data.grammar = inputs.grammar;
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template &    tmpl,
+                                                          const autoparser::templates_params & inputs) {
      common_chat_params data;
  
+    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
      auto adjusted_messages = json::array();
      for (const auto & msg : inputs.messages) {
          auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+        auto has_tool_calls        = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+
          if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["tool_plan"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
+            auto adjusted_message        = msg;
+            adjusted_message["thinking"] = msg.at("reasoning_content");
              adjusted_messages.push_back(adjusted_message);
          } else {
              adjusted_messages.push_back(msg);
          }
      }
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
-    if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|END_THINKING|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
-        data.prompt += "<|START_THINKING|><|END_THINKING|>";
-    }
-
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call_id", {
-                        {"type", "string"},
-                        // Command-R's template expects an integer string.
-                        {"pattern", "^[0-9]{1,10}$"},
-                    }},
-                    {"tool_name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"parameters", function.at("parameters")},
-                }},
-                {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root",
-            std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
-            "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
-    });
-    data.grammar_triggers.push_back({
-        COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-        // If thinking_forced_open, then we capture the </think> tag in the grammar,
-        // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-        std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
-            "(<\\|START_ACTION\\|>)[\\s\\S]*"
-    });
-    data.preserved_tokens = {
-        "<|START_ACTION|>",
-        "<|END_ACTION|>",
-        "<|START_RESPONSE|>",
-        "<|END_RESPONSE|>",
-        "<|START_THINKING|>",
-        "<|END_THINKING|>",
-    };
-    return data;
-}
-
-static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
-    if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
-        throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
-    }
-    const auto & parameters_properties = parameters.at("properties");
-    const auto & parameters_required = parameters.at("required");
-    for (const auto & prop : expected_properties) {
-        if (!parameters_properties.contains(prop)) {
-            throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
-        }
-        if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
-            throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
-        }
-    }
-    if (parameters_properties.size() != expected_properties.size()) {
-        throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
-    }
-}
-
-static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
-    auto builtin_tools = json::array();
-    common_chat_params data;
-    if (!inputs.tools.is_null()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-
-            auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
-                if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
-                    expect_tool_parameters(name, parameters, {"query"});
-                } else if (name == "python" || name == "code_interpreter") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
-                    expect_tool_parameters(name, parameters, {"code"});
-                } else {
-                    return false;
-                }
-
-                std::vector<std::string> kvs;
-                for (const auto & [key, value] : parameters.at("properties").items()) {
-                    kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
-                }
-
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
-                builtin_tools.push_back(name);
-
-                return true;
-            };
-
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
-                if (allow_python_tag_builtin_tools) {
-                    handle_builtin_tool(name, parameters);
-                }
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"{\" space "
-                        "( \"\\\"type\\\"\"       space \":\" space \"\\\"function\\\"\"     space \",\" space )? "
-                        "  \"\\\"name\\\"\"       space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
-                        "  \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
-                        "\"}\" space"));
-            });
-            // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
-            });
-            if (!builtin_tools.empty()) {
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            // Allow a few empty lines on top of the usual constrained json schema space rule.
-            builder.add_rule("root", string_join(tool_rules, " | "));
-            data.additional_stops.push_back("<|eom_id|>");
-        });
-        data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
-            ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
-            : COMMON_CHAT_FORMAT_LLAMA_3_X;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
-        {"date_string", format_time(inputs.now, "%d %b %Y")},
-        {"tools_in_user_message", false},
-        {"builtin_tools", builtin_tools},
-    });
-    return data;
-}
-
-static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
  
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
+    auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
  
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+    // Check if we need to replace the return token with end token during
+    // inference and without generation prompt. For more details see:
+    // https://github.com/ggml-org/llama.cpp/issues/15417
+    if (inputs.is_inference && !inputs.add_generation_prompt) {
+        static constexpr std::string_view return_token = "<|return|>";
+        static constexpr std::string_view end_token    = "<|end|>";
+        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
+            prompt.replace(pos, return_token.length(), end_token);
          }
      }
  
-    // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { "name",
-                            {
-                                { "type", "string" },
-                                { "const", function.at("name") },
-                            } },
-                            { "arguments", function.at("parameters") },
-                        }                                                                        },
-                    { "required",   json::array({ "name", "arguments" }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                                    "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
-                                    " \"</TOOLCALL>\"");
-        });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the </think> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(</think>\\s*)" :
-                            "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                "(<TOOLCALL>)[\\s\\S]*" });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_qwen3_coder(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
-
-    // Nemotron Nano 3 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
-    bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
+    data.prompt            = prompt;
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
  
+    // These special tokens are required to parse properly, so we include them
+    // even if parse_tool_calls is false.
      data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
+        "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
      };
  
-    if (supports_reasoning) {
-        data.preserved_tokens.insert(data.preserved_tokens.end(), {"<think>", "</think>"});
-    }
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
      auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
-
-    auto parser = build_chat_peg_constructed_parser([&](auto & p) {
-        auto reasoning = p.eps();
-        if (supports_reasoning && inputs.enable_thinking && extract_reasoning) {
-            auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
-            if (data.thinking_forced_open) {
-                reasoning = reasoning_content;
-            }
-        }
+    auto include_grammar   = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        const std::string END                = "<|end|>";
+        const std::string START              = "<|start|>";
+        const std::string MESSAGE            = "<|message|>";
+        const std::string CHANNEL            = "<|channel|>";
+        const std::string CONSTRAIN          = "<|constrain|>";
+        const std::string START_ASSISTANT    = START + "assistant";
+        const std::string CHANNEL_ANALYSIS   = CHANNEL + "analysis";
+        const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary";
+        const std::string CHANNEL_FINAL      = CHANNEL + "final";
+
+        auto the_end = END | p.end();
+
+        const std::string analysis_header  = CHANNEL_ANALYSIS + MESSAGE;
+        auto              segment_content  = p.until(END);
+        auto              analysis_segment = extract_reasoning ?
+                                                 p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end :
+                                                 p.content(analysis_header + p.until(END) + the_end);
+
+        auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE });
+        auto content_header         = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) });
+        auto content_segment        = p.rule("content-segment", content_header + channel_header_content + MESSAGE +
+                                                                    p.content(segment_content) + the_end);
  
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
+        if (!inputs.json_schema.is_null()) {
+            auto final_header = p.literal(CHANNEL_FINAL);
+            auto constraint   = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content);
+            return p.optional(analysis_segment) + final_header + constraint + MESSAGE +
+                   p.content(p.schema(p.json(), "response-format", inputs.json_schema));
          }
  
+        auto segment  = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment });
+        auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end();
+
          // Tool call parser
          if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
              auto tool_choice = p.choice();
+
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-
-                auto schema_info = common_schema_info();
-                schema_info.resolve_refs(parameters);
-
-                auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
-                auto tool_close = p.literal("</function>\n");
-                auto args = p.sequence();
-                auto arg_string = p.rule("xml-arg-string", p.until_one_of({
-                    "\n</parameter>",
-                    "\n<parameter=",
-                    "\n</function>"
-                }));
-
-                foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
-                    auto rule_name = "tool-" + name + "-arg-" + param_name;
-
-                    auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
-                    auto arg_close = p.literal("</parameter>\n");
-                    auto arg_value = p.eps();
-
-                    if (schema_info.resolves_to_string(param_schema)) {
-                        arg_value = p.tool_arg_string_value(arg_string) + "\n";
-                    } else {
-                        arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
-                    }
+                std::string  name     = function.at("name");
+                const auto & params   = function.at("parameters");
  
-                    // Model may or my not close with </parameter>
-                    auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
-                    args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
-                });
+                // Tool call can appear as:
+                // 1. In role header: " to=functions.NAME<|channel|>..."
+                // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..."
+                auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name));
+
+                auto channel    = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS);
+                auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content);
+                auto args       = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
  
-                tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
+                // Pattern 1: recipient in role header
+                // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS"
+                auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args);
+
+                // Pattern 2: recipient in channel header
+                // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS"
+                auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args);
+
+                tool_choice |= tool_in_role | tool_in_channel;
              });
  
              auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
              auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-            auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
-            auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
  
-            return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
+            auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT));
+            auto tool_call  = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end());
+
+            return p.choice({ p.trigger_rule("single-tool", tool_call), p.trigger_rule("tools", p.one_or_more(segment) + tool_call) });
          }
  
-        // Content only parser
-        include_grammar = false;
-        return reasoning << p.content(p.rest());
+        return contents;
      });
  
      data.parser = parser.save();
  
      if (include_grammar) {
          data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                  builder.resolve_refs(schema);
              });
              parser.build_grammar(builder, data.grammar_lazy);
          });
  
          data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)"               },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+             "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)"                }
          };
      }
  
      return data;
  }
  
-
-static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
+static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template &    tmpl,
+                                                                   const autoparser::templates_params & inputs) {
      common_chat_params data;
  
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_APERTUS;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = {
+        ">>>all",
+    };
  
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|inner_suffix|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
  
-    // When tools are present, build grammar for the <|tools_prefix|> format
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { function.at("name"), function.at("parameters") }
-                        }                                                                        },
-                    { "required",   json::array({ function.at("name") }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
-                                    "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
-                            });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
-                            "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
-                "(<\\|tools_prefix\\|>)[\\s\\S]*" });
-        data.preserved_tokens = {
-            "<|system_start|>",
-            "<|system_end|>",
-            "<|developer_start|>",
-            "<|developer_end|>",
-            "<|user_start|>",
-            "<|user_end|>",
-            "<|assistant_start|>",
-            "<|assistant_end|>",
-            "<|inner_prefix|>",
-            "<|inner_suffix|>",
-            "<|tools_prefix|>",
-            "<|tools_suffix|>",
-        };
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    auto prompt = apply(tmpl, inputs);
-
-    // Hacks to fix the official (broken) prompt.
-    // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
-    // until the official template is fixed.
-    if (tmpl.source().find("{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}") != std::string::npos) {
-        // Don't leave the chat dangling after tool results
-        if (string_ends_with(prompt, "<｜tool▁outputs▁end｜>")) {
-            prompt += "<｜end▁of▁sentence｜>";
-            if (inputs.add_generation_prompt) {
-                prompt += "<｜Assistant｜>";
-            }
-        }
-        // Fix up tool call delta example added by Minja
-        prompt = std::regex_replace(
-            prompt,
-            std::regex("(<｜tool▁call▁end｜>)[\\s\\r\\n]*(<｜tool▁outputs▁begin｜>|<｜User｜>)"),
-            "$1<｜tool▁calls▁end｜><｜end▁of▁sentence｜>$2");
-    }
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"function<｜tool▁sep｜>" + name + "\\n"
-                    "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"```<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Pass thinking context for DeepSeek V3.1 template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
-
-    auto prompt = apply(tmpl, inputs,
-                       /* messages_override= */ inputs.messages,
-                       /* tools_override= */ std::nullopt,
-                       additional_context);
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    if (string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"" + name + "<｜tool▁sep｜>"
-                    "\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜>",
-            };
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
-
-    // Handle thinking tags based on prompt ending
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            // Close the thinking tag immediately if thinking is disabled
-            data.prompt += "</think>\n\n";
-        } else {
-            // Mark thinking as forced open (template started with <think>)
-            data.thinking_forced_open = true;
-        }
-    }
-
-    // Preserve MiniMax-M2 special tokens
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<minimax:tool_call>",
-        "</minimax:tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>\n",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">\n",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>\n",
-        /* form.tool_end    = */ "</invoke>\n",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
-
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<|tool_calls_section_begin|>",
-        "<|tool_call_begin|>",
-        "<|tool_call_argument_begin|>",
-        "<|tool_call_end|>",
-        "<|tool_calls_section_end|>",
-        "<|im_end|>",
-        "<|im_system|>",
-        "<|im_middle|>",
-    };
-
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|im_end|>",
-        "<|im_middle|>"
-    });
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
-
-    data.preserved_tokens = {
-        "<thinking>",
-        "</thinking>",
-        "<tool_calls>",
-        "</tool_calls>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "\n";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-
-        if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["thinking"] = msg.at("reasoning_content");
-            adjusted_message.erase("content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Functionary v3.2 format:
+        // - Normal content: >>>all\n{content}
+        // - Tool calls: >>>function_name\n{json_args}
+        // Generation prompt ends with ">>>" so model outputs recipient immediately
  
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
+        // Build content parser for >>>all\n{content}
+        // When tools are present, content stops before the next ">>>" (tool call)
+        // When no tools, content goes until end
+        auto content_until_tool = p.literal(">>>all\n") + p.content(p.until(">>>"));
+        auto content_until_end  = p.literal(">>>all\n") + p.content(p.rest());
  
-    // Check if we need to replace the return token with end token during
-    // inference and without generation prompt. For more details see:
-    // https://github.com/ggml-org/llama.cpp/issues/15417
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|return|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
+        // If no tools or tool_choice is NONE, just parse content
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            // When no tools, just match the prefix and capture everything after
+            return content_until_end + p.end();
          }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GPT_OSS;
  
-    // These special tokens are required to parse properly, so we include them
-    // even if parse_tool_calls is false.
-    data.preserved_tokens = {
-        "<|channel|>",
-        "<|constrain|>",
-        "<|message|>",
-        "<|start|>",
-        "<|end|>",
-    };
+        // Build tool call parsers for each available function
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
  
-    if (!inputs.json_schema.is_null()) {
-        data.grammar_lazy = false;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schema = inputs.json_schema;
-            builder.resolve_refs(schema);
-
-            auto not_end = builder.add_rule("not-end",
-                "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-            auto analysis = builder.add_rule("analysis",
-                "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-            auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
-            auto final = builder.add_rule("final",
-                "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
-                builder.add_schema("response", schema)
+            // Tool format: >>>function_name\n{json_args}
+            auto tool_parser = p.tool(
+                p.tool_open(p.literal(">>>") + p.tool_name(p.literal(name)) + p.literal("\n")) +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
              );
  
-            builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
+            tool_choice |= p.rule("tool-" + name, tool_parser);
          });
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            // tool calls can appear in commentary or analysis channels
-            auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
  
-            std::vector<std::string> tool_rules_recipient_in_role;
-            std::vector<std::string> tool_rules_recipient_in_channel;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                tool_rules_recipient_in_role.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-
-                tool_rules_recipient_in_channel.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-            });
-
-            auto recipient_in_channel = builder.add_rule("recipient_in_channel",
-                channel + " \" to=functions.\" ( " +
-                string_join(tool_rules_recipient_in_channel, " | ") + " )"
-            );
+        auto content_only = content_until_end;
+        auto tools_only = p.trigger_rule("tools", p.one_or_more(tool_choice));
+        auto content_and_tools = content_until_tool + tools_only;
  
-            if (data.grammar_lazy) {
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\"<|start|>assistant\"? \" to=functions.\" ( " +
-                    string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
-            } else {
-                auto not_end = builder.add_rule("not-end",
-                    "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-                auto analysis = builder.add_rule("analysis",
-                    "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-                auto commentary = builder.add_rule("commentary",
-                    "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root",
-                    "( " + analysis + " \"<|start|>assistant\" )? " +
-                    "( " + commentary + " \"<|start|>assistant\" )? " +
-                    "( " + recipient_in_role + " | " + recipient_in_channel + " )"
-                );
+        if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+            if (inputs.parallel_tool_calls) {
+                return p.choice({ content_and_tools, tools_only }) + p.end();
              }
-
-            // Trigger on tool calls that appear in the commentary channel
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|channel\\|>(?:commentary|analysis) to"
-            });
-
-            // Trigger tool calls that appear in the role section, either at the
-            // start or in the middle.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "^ to"
-            });
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|start\\|>assistant to"
-            });
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    std::string prompt = apply(tmpl, inputs);
-
-    // match the existing trimming behavior
-    if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
-        prompt.erase(0, tmpl.bos_token().size());
-    }
-    if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
-        prompt.erase(prompt.size() - tmpl.eos_token().size());
-    }
-    if (string_ends_with(prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+            return p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
          }
-    }
-
-    // add GLM preserved tokens
-    data.preserved_tokens = {
-        "<|endoftext|>",
-        "[MASK]",
-        "[gMASK]",
-        "[sMASK]",
-        "<sop>",
-        "<eop>",
-        "<|system|>",
-        "<|user|>",
-        "<|assistant|>",
-        "<|observation|>",
-        "<|begin_of_image|>",
-        "<|end_of_image|>",
-        "<|begin_of_video|>",
-        "<|end_of_video|>",
-        "<|begin_of_audio|>",
-        "<|end_of_audio|>",
-        "<|begin_of_transcription|>",
-        "<|end_of_transcription|>",
-        "<|code_prefix|>",
-        "<|code_middle|>",
-        "<|code_suffix|>",
-        "/nothink",
-        "<think>",
-        "</think>",
-        "<tool_call>",
-        "</tool_call>",
-        "<arg_key>",
-        "</arg_key>",
-        "<arg_value>",
-        "</arg_value>"
-    };
-
-    // extra GLM 4.5 stop word
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|user|>",
-        "<|observation|>"
+        if (inputs.parallel_tool_calls) {
+            return p.choice({ content_and_tools, content_only, tools_only }) + p.end();
+        }
+        auto content_and_tool = content_until_tool + tool_choice;
+        return p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
      });
  
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "",
-        /* form.tool_start  = */ "\n<tool_call>",
-        /* form.tool_sep    = */ "\n",
-        /* form.key_start   = */ "<arg_key>",
-        /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
-        /* form.val_end     = */ "</arg_value>\n",
-        /* form.tool_end    = */ "</tool_call>\n",
-        /* form.scope_end   = */ "",
-    };
-    build_grammar_xml_tool_call(data, inputs.tools, form);
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GLM_4_5;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    LOG_DBG("%s\n", __func__);
-    common_chat_params data;
-    const std::optional<json> additional_context = json {
-        {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
-        {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
-    };
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
-        data.preserved_tokens = {
-            " functools[",
-        };
-        data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
-    // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
-    // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> first_tool_rules;
-            std::vector<std::string> subsequent_tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                std::string args_pattern = "[\\s\\S]*";
-                auto args_rule = builder.add_schema(name + "-args", parameters);
-                if (name == "python") {
-                    args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
-                } else {
-                    args_pattern = "\\{" + args_pattern;
-                }
-                auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
-                first_tool_rules.push_back(call_rule);
-                if (inputs.parallel_tool_calls) {
-                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
-                }
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                    "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
-                });
-            });
-            data.preserved_tokens = {
-                "<|end_header_id|>",
-            };
-            auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
-            if (inputs.parallel_tool_calls) {
-                auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
-                builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
-            } else {
-                builder.add_rule("root", first_rule);
-            }
-
-        });
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
-    common_chat_params data;
+    data.parser = parser.save();
  
-    if (!inputs.tools.is_null()) {
-        std::string python_code_argument_name;
-        auto has_raw_python = false;
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
  
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
          data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                const auto & parameters = function.at("parameters");
-                std::string name = function.at("name");
-                if (name == "python" || name == "ipython") {
-                    if (!parameters.contains("type")) {
-                        throw std::runtime_error("Missing type in python tool");
-                    }
-                    has_raw_python = true;
-                    const auto & type = parameters.at("type");
-                    if (type == "object") {
-                        auto properties = parameters.at("properties");
-                        for (auto it = properties.begin(); it != properties.end(); ++it) {
-                            if (it.value().at("type") == "string") {
-                                if (!python_code_argument_name.empty()) {
-                                    throw std::runtime_error("Multiple string arguments found in python tool");
-                                }
-                                python_code_argument_name = it.key();
-                            }
-                        }
-                        if (python_code_argument_name.empty()) {
-                            throw std::runtime_error("No string argument found in python tool");
-                        }
-                    } else if (type != "string") {
-                        throw std::runtime_error("Invalid type in python tool: " + type.dump());
-                    }
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
              });
-            if (has_raw_python) {
-                tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
-            builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
-            data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
+            parser.build_grammar(builder, data.grammar_lazy);
          });
-        data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-
-    data.prompt = apply(tmpl, inputs);
-    // TODO: if (has_raw_python)
-    return data;
-}
  
-static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    json extra_context = json {
-        {"enable_thinking", inputs.enable_thinking},
-    };
-    extra_context.update(inputs.extra_context);
-
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
-    data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!extra_context["enable_thinking"]) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            std::vector<std::string> tool_call_alts;
-            std::vector<std::string> escaped_names;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_schema(name + "-call", {
-                    {"type", "object"},
-                    {"properties", json {
-                        {"name", json {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                }));
-                tool_call_alts.push_back(builder.add_rule(
-                    name + "-function-tag",
-                    "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
-                    builder.add_schema(name + "-args", parameters) + " "
-                    "\"</function>\" space"));
-
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                    "<function=" + name + ">",
-                });
-                auto escaped_name = regex_escape(name);
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                    "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
-                });
-                escaped_names.push_back(escaped_name);
-            });
-            auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
-            std::vector<std::string> alt_tags {
-                any_tool_call,
-                "\"<tool_call>\" space "     + any_tool_call + " \"</tool_call>\"",
-                // The rest is just to accommodate common "good bad" outputs.
-                "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
-                "\"<response>\"  space "     + any_tool_call + " \"</response>\"",
-                "\"<tools>\"     space "     + any_tool_call + " \"</tools>\"",
-                "\"<json>\"      space "     + any_tool_call + " \"</json>\"",
-                "\"<xml>\"      space "     + any_tool_call + " \"</xml>\"",
-                "\"<JSON>\"      space "     + any_tool_call + " \"</JSON>\"",
-            };
-            auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
-            tool_call_alts.push_back(wrappable_tool_call);
-            tool_call_alts.push_back(
-                "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-            // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
-                    "\\s*("
-                    "(?:<tool_call>"
-                    "|<function"
-                    "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
-                    "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
-                    ")"
-                    ")"
-                ),
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-                "<function",
-                "<tools>",
-                "</tools>",
-                "<response>",
-                "</response>",
-                "<function_call>",
-                "</function_call>",
-                "<json>",
-                "</json>",
-                "<JSON>",
-                "</JSON>",
-                "```",
-                "```json",
-                "```xml",
-            };
-        });
+        // Grammar trigger for when the model starts outputting a tool call
+        // (after the initial ">>>" in the generation prompt but recipient other than "all")
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, ">>>(?!all)" }
+        };
      }
  
      return data;
  }
  
-static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+// The ID contains both the function name and an incrementing counter
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template &    tmpl,
+                                                          const autoparser::templates_params & inputs) {
      common_chat_params data;
  
-    // Pass thinking context for Granite template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
+    data.prompt            = common_chat_template_direct_apply(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<|tool_calls_section_begin|>",
+        "<|tool_calls_section_end|>",
+        "<|tool_call_begin|>",
+        "<|tool_call_argument_begin|>",
+        "<|tool_call_end|>",
+        "<think>",
+        "</think>",
      };
  
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
-    data.format = COMMON_CHAT_FORMAT_GRANITE;
-
-    if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // Granite uses <|tool_call|> followed by JSON list
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
-"-args", {
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                })));
-            });
-
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
-
-            if (data.thinking_forced_open) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
-            } else {
-                builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
-            }
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                "<|tool_call|>"
-            });
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Kimi K2 Thinking format:
+        // - Reasoning: <think>{reasoning}</think>
+        // - Content: text after reasoning
+        // - Tool calls section:
+        //   <|tool_calls_section_begin|>
+        //   <|tool_call_begin|>functions.<name>:<index><|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+        //   ...
+        //   <|tool_calls_section_end|>
+        // The ID format is: functions.<function_name>:<counter> where counter is 0, 1, 2, ...
+
+                // Tool call markers
+        const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>";
+        const std::string SECTION_END   = "<|tool_calls_section_end|>";
+        const std::string CALL_BEGIN    = "<|tool_call_begin|>";
+        const std::string ARGS_BEGIN    = "<|tool_call_argument_begin|>";
+        const std::string CALL_END      = "<|tool_call_end|>";
+
+        const std::string THINK_START   = "<think>";
+        const std::string THINK_END     = "</think>";
+
+        auto end = p.end();
+
+        // Note: this model is CRAZY. It can diverge from its supposed tool calling pattern in so many ways it's not funny.
+        // For example, it can call tools at the end of reasoning without closing reasoning...
+        auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning(
+            p.until_one_of({ THINK_END, "<|tool_calls_section_begin|>", "<|tool_call_begin|>" })) +
+            p.optional(p.literal(THINK_END))) : p.eps();
+
+
+        // Content only parser (no tools)
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return reasoning + p.content(p.rest()) + end;
+        }
+
+        // Build tool call parsers for each available function
+        // The ID format is: functions.<name>:<index>
+        // We need to match: functions.<name>:<digits>
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
+
+            // Match: functions.<name>:<digits>
+            // Capture the full call id (functions.<name>:<digits>) using tool_id tag
+            auto tool_id = p.tool_id(p.literal("functions.") + p.tool_name(p.literal(name)) + p.literal(":") + p.chars("[0-9]", 1, -1));
+            auto tool_parser = p.tool(
+                p.tool_open(tool_id + p.literal(ARGS_BEGIN)) +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) +
+                p.tool_close(p.optional((p.literal(CALL_END))))
+            );
  
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-                "<|tool_call|>",
-            };
+            tool_choice |= p.rule("tool-" + name, tool_parser);
          });
-    } else {
-        // Handle thinking tags for non-tool responses
-        if (data.thinking_forced_open && inputs.enable_thinking) {
-            data.grammar_lazy = false;
-            data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-            };
-        }
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy `reasoning_content` to `reasoning`
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
-            auto adjusted_message = msg;
-            adjusted_message["reasoning"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto include_grammar = true;
-
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
-    // Check if we need to replace the flush token with end token during inference and without generation prompt.
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|flush|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
-        "<|think|>",
-        "<|content|>",
-        "<|begin|>",
-        "<|end|>",
-        "<|tool_calls|>",
-        "<|tool_call:begin|>",
-        "<|tool_call:end|>",
-        "<|tool_call:name|>",
-        "<|tool_call:args|>",
-    };
-
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto lit_think = p.atomic(p.literal("<|think|>"));
-        auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
-        auto lit_content = p.atomic(p.literal("<|content|>"));
-        auto lit_end = p.atomic(p.literal("<|end|>"));
-        auto parser_until_end = p.until("<|end|>");
-
-        // reasoning <- "<|think|>" (!"<|end|>" .)*
-        auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
-
-        // content <- "<|content|>" (!"<|end|>" .)*
-        auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
-
-        // wrap_choice(items) <- item-choice wrapped*
-        // item-choice        <- items[0] / ... / items[n]
-        // wrapped            <- "<|end|><|begin|>assistant" item-choice
-        auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
-            auto choice = p.choice(items);
-            return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
-        };
-
-        // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
-        auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
-            auto seq = p.sequence();
-            for (auto i = 0u; i < items.size(); i++) {
-                if (i == 0) {
-                    seq += items[i];
-                    continue;
-                }
-                seq += lit_end + lit_assistant_begin + items[i];
-            }
-            return seq;
-        };
-
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
-            return p.choice({
-                wrap_seq({parser_reasoning, parser_response_format}),
-                wrap_seq({parser_response_format})
-            });
-        }
  
-        auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
-        auto lit_tool_call_name = p.literal("<|tool_call:name|>");
-        auto lit_tool_call_args = p.literal("<|tool_call:args|>");
-        auto lit_tool_call_end = p.literal("<|tool_call:end|>");
+        // Tool calls section: <|tool_calls_section_begin|> tool_calls <|tool_calls_section_end|>
+        auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+        auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
+        // Use trigger_rule so grammar generator knows where to start generating rules
+        auto tool_calls = p.rule("tool-calls",
+            p.optional(p.literal(SECTION_BEGIN)) +
+            p.trigger_rule("tool-call", p.repeat(CALL_BEGIN + tool_choice, min_calls, max_calls) +
+                p.optional(p.literal(SECTION_END)))
+        );
  
-        // Tool call parser
-        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
-            auto parser_tool_call = p.choice();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+        auto content_before_tools = p.content(p.until_one_of({ SECTION_BEGIN, CALL_BEGIN }));
  
-                // tool(name, schema) <- name "<|tool_call:args|>" schema
-                parser_tool_call |= p.rule("tool-" + name,
-                    p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
-            });
-
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-
-            // tool-calls  <- "<|tool_calls|>" tool-call+
-            // tool-call   <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
-            // call-id     <- [a-zA-Z0-9_-]+
-            // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
-            auto parser_tool_calls = p.trigger_rule("tool-calls",
-                p.atomic(p.literal("<|tool_calls|>"))
-                + p.repeat(
-                    p.tool_open(
-                        lit_tool_call_begin
-                        + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
-                        + lit_tool_call_name
-                        + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
-                    + parser_tool_call
-                    + p.tool_close(lit_tool_call_end),
-                /* min = */ 1,
-                /* max = */ max_calls));
-
-            if (min_calls == 1) {
-                // If required, then try any combination of the reasoning, content, and tool call
-                return p.choice({
-                    wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
-                    wrap_seq({parser_reasoning, parser_tool_calls}),
-                    wrap_seq({parser_content, parser_tool_calls}),
-                    wrap_seq({parser_tool_calls})
-                });
-            }
-
-            return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
-        }
-
-        // Content only parser
-        include_grammar = false;
-        return wrap_choice({parser_reasoning, parser_content});
+        return reasoning + content_before_tools + tool_calls + end;
      });
  
      data.parser = parser.save();
  
      if (include_grammar) {
-        data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
              foreach_function(inputs.tools, [&](const json & tool) {
                  const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                  builder.resolve_refs(schema);
              });
              parser.build_grammar(builder, data.grammar_lazy);
          });
  
          data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call_begin|>" }
          };
      }
  
      return data;
  }
  
-static common_chat_params common_chat_params_init_exaone_moe(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_EXAONE_MOE;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>\n\n";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                // Expect: <tool_call>{"name": "<name>", "arguments": {...}}</tool_call>
-                tool_rules.push_back(builder.add_rule(
-                    name + "-call",
-                    "\"<tool_call>\" space " +
-                        builder.add_schema(name + "-obj", json{
-                            {"type", "object"},
-                            {"properties", {
-                                {"name",      json{{"const", name}}},
-                                {"arguments", parameters},
-                            }},
-                            {"required", json::array({"name", "arguments"})},
-                        }) +
-                    " space \"</tool_call>\" space"));
-            });
-
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)?" : "") +
-                    "(<tool_call>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-            };
-        });
-    }
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // This template does not support tools or reasoning
-    // we just need to transform the messages into the correct schema
-
-    templates_params inputs_new = inputs;
-    json & messages = inputs_new.messages;
-
-    // default to chat_template_kwargs, or en-GB if not specified
-    std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
-    std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
-
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("role") && message["role"].get<std::string>() != "user") {
-            continue;
-        }
-        if (!message.contains("content")) {
-            message["content"] = json::array();
-        }
-        if (message.contains("content") && !message["content"].is_array()) {
-            auto content_str = message["content"].get<std::string>();
-            // default to en-GB if not specified (to make common_chat_format_example works)
-            auto src_lang = message.contains("source_lang_code")
-                        ? message["source_lang_code"].get<std::string>() : default_src_lang;
-            auto tgt_lang = message.contains("target_lang_code")
-                        ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
-            message["content"] = json::array({
-                json{
-                    {"type", "text"},
-                    {"text", content_str},
-                    {"source_lang_code", src_lang},
-                    {"target_lang_code", tgt_lang},
-                }
-            });
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    data.grammar_lazy = false;
-    if (!inputs.json_schema.is_null()) {
-        if (!inputs.grammar.empty()) {
-            throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-        }
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else {
-        data.grammar = inputs.grammar;
-    }
-    return data;
-}
-
-static common_chat_params common_chat_params_init_seed_oss(
-    const common_chat_template         & tmpl,
-    templates_params                   & params,
-    const common_chat_templates_inputs & inputs)
-{
-    common_chat_params data;
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_SEED_OSS;
-    if (string_ends_with(data.prompt, "<seed:think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</seed:think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (params.tools.is_array() && !params.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(params.tools, [&](const json & tool) {
-                const auto & function   = tool.at("function");
-                std::string  name       = function.at("name");
-                auto         parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // Create rule for Seed-OSS function call format
-                std::string param_rules;
-                if (parameters.contains("properties")) {
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
-                                       "\"</parameter>\"";
-                    }
-                }
-
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                                                      "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
-                                                          param_rules +
-                                                          " \"</function>\" space \"</seed:tool_call>\""));
-            });
-
-            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
-
-            data.preserved_tokens = {
-                "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
-                "<function=",   "</function>",   "<parameter=",      "</parameter>",
-            };
-
-            builder.add_rule("root", string_join(tool_rules, " | "));
-        });
-    }
-    return data;
-}
-
-// various workarounds for known issues with certain templates or model behaviors
-// TODO @ngxson : improve this (how?)
  namespace workaround {
  
  // if first message is system and template does not support it, merge it with next message
@@ -2944,6 +1293,15 @@ static void system_message_not_supported(json & messages) {
      }
  }
  
+static void requires_non_null_content(json & messages) {
+    GGML_ASSERT(messages.is_array());
+    for (auto & message : messages) {
+        if (message.contains("tool_calls") && !message.contains("content")) {
+            message["content"] = "";
+        }
+    }
+}
+
  static void func_args_not_string(json & messages) {
      GGML_ASSERT(messages.is_array());
      for (auto & message : messages) {
@@ -2964,71 +1322,21 @@ static void func_args_not_string(json & messages) {
      }
  }
  
-static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls")) {
-            auto tool_calls_new = json{
-                {"tool_calls", message.at("tool_calls")}
-            };
-            message.erase("tool_calls");
-            auto content = message.at("content");
-            std::string content_new = content.is_null() ? "" : content.get<std::string>();
-            message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
-        }
-    }
  }
  
-// TODO @ngxson : we may remove support for generic schema in the future
-static void use_generic_schema(json & messages) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
-            auto & tool_calls = message.at("tool_calls");
-            for (auto & tool_call : tool_calls) {
-                if (tool_call.contains("type") && tool_call.at("type") == "function" &&
-                    tool_call.contains("function") && tool_call.at("function").is_object()) {
-                    // Copy values before erasing to avoid use-after-free
-                    json name_value;
-                    json arguments_value;
-                    json id_value;
-                    const auto & function = tool_call.at("function");
-                    if (function.contains("name")) {
-                        name_value = function.at("name");
-                    }
-                    if (function.contains("arguments")) {
-                        arguments_value = function.at("arguments");
-                    }
-                    if (tool_call.contains("id")) {
-                        id_value = tool_call.at("id");
-                    }
-                    // Now safely erase and assign in the correct order
-                    tool_call.erase("type");
-                    tool_call.erase("function");
-                    tool_call.erase("id");
-                    // Reassign in desired order: name, arguments, id
-                    if (!name_value.is_null()) {
-                        tool_call["name"] = name_value;
-                    }
-                    if (!arguments_value.is_null()) {
-                        tool_call["arguments"] = arguments_value;
-                    }
-                    if (!id_value.is_null()) {
-                        tool_call["id"] = id_value;
-                    }
-                }
-            }
-        }
-    }
+static json common_chat_extra_context() {
+    json ctx = json::object();
+    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+    std::string datetime_str = format_time(now, "%b %d %Y");
+    std::string date_str = format_time(now, "%d %b %Y");
+    ctx["datetime"] = datetime_str;
+    ctx["date_string"] = date_str;
+    return ctx;
  }
  
-} // namespace workaround
-
-static common_chat_params common_chat_templates_apply_jinja(
-    const struct common_chat_templates        * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    templates_params params;
+static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates *        tmpls,
+                                                            const struct common_chat_templates_inputs & inputs) {
+    autoparser::templates_params params;
      params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
      const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
          ? *tmpls->template_tool_use
@@ -3049,7 +1357,14 @@ static common_chat_params common_chat_templates_apply_jinja(
          workaround::system_message_not_supported(params.messages);
      }
  
-    params.extra_context = json::object();
+    if (tmpl.original_caps().supports_tool_calls) {
+        // some templates will require the content field in tool call messages
+        // to still be non-null, this puts an empty string everywhere where the
+        // content field is null
+        workaround::requires_non_null_content(params.messages);
+    }
+
+    params.extra_context = common_chat_extra_context();
      for (auto el : inputs.chat_template_kwargs) {
          params.extra_context[el.first] = json::parse(el.second);
      }
@@ -3058,229 +1373,71 @@ static common_chat_params common_chat_templates_apply_jinja(
          params.json_schema = json::parse(inputs.json_schema);
      }
  
-    if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
-        LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
-        params.parallel_tool_calls = false;
-    } else {
-        params.parallel_tool_calls = inputs.parallel_tool_calls;
-    }
+    // if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
+    //     LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
+    //     params.parallel_tool_calls = false;
+    // } else {
+    params.parallel_tool_calls = inputs.parallel_tool_calls;
+    //}
  
      if (params.tools.is_array()) {
          if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
              throw std::runtime_error("Cannot specify grammar with tools");
          }
          if (caps.supports_tool_calls && !caps.supports_tools) {
-            LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
-        }
-    }
-
-    // DeepSeek V3.1: detect based on specific patterns in the template
-    if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
-        params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_v3_1(tmpl, params);
-    }
-
-    // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
-    if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_r1(tmpl, params);
-    }
-
-    // Command R7B: : use handler in all cases except json schema (thinking / tools).
-    if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_command_r7b(tmpl, params);
-    }
-
-    // Granite (IBM) - detects thinking / tools support
-    if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        workaround::use_generic_schema(params.messages);
-        workaround::move_tool_calls_to_content(params.messages);
-        return common_chat_params_init_granite(tmpl, params);
-    }
-
-    // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
-    if (src.find("[gMASK]<sop>") != std::string::npos &&
-        src.find("<arg_key>") != std::string::npos &&
-        src.find("<arg_value>") != std::string::npos &&
-        params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        if (!params.extra_context.contains("clear_thinking")) {
-            // by default, do not clear reasoning_content (added since GLM-4.7)
-            params.extra_context["clear_thinking"] = false;
+            LOG_WRN(
+                "Template supports tool calls but does not natively describe tools. The fallback behaviour used may "
+                "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
          }
-        return common_chat_params_init_glm_4_5(tmpl, params);
-    }
-
-    // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    // Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
-    // Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format.
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<function=") != std::string::npos &&
-        src.find("<parameter=") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_qwen3_coder(tmpl, params);
-    }
-
-    // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
-    if (src.find("<tools>") != std::string::npos &&
-        src.find("# Tools") != std::string::npos &&
-        src.find("</tools>") != std::string::npos &&
-        src.find("<tool_calls>") != std::string::npos &&
-        src.find("</tool_calls>") != std::string::npos &&
-        src.find("<tool_response>") != std::string::npos) {
-        return common_chat_params_init_xiaomi_mimo(tmpl, params);
      }
  
-    // EXAONE MoE format detection
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<tool_result>") != std::string::npos &&
-        src.find("<|tool_declare|>") != std::string::npos) {
-        return common_chat_params_init_exaone_moe(tmpl, params);
-    }
-
-    // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
-    if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_hermes_2_pro(tmpl, params);
+    // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
+    // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
+    if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
+        src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
+        LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
+        return common_chat_params_init_ministral_3(tmpl, params);
      }
  
-    // GPT-OSS
+    // GPT-OSS - has unique channel-based structure that needs dedicated handler
      if (src.find("<|channel|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: GPT-OSS\n");
          return common_chat_params_init_gpt_oss(tmpl, params);
      }
  
-    // Seed-OSS
-    if (src.find("<seed:think>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_seed_oss(tmpl, params, inputs);
-    }
-
-    // Nemotron v2
-    if (src.find("<SPECIAL_10>") != std::string::npos) {
-        return common_chat_params_init_nemotron_v2(tmpl, params);
-    }
-
-    // Apertus format detection
-    if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
-        return common_chat_params_init_apertus(tmpl, params);
-    }
-
-    // LFM2 (w/ tools)
-    if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
-        src.find("]<|tool_list_end|>") != std::string::npos) {
-        return common_chat_params_init_lfm2(tmpl, params);
-    }
-
-    // MiniMax-M2 format detection
-    if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_minimax_m2(tmpl, params);
-    }
-
-    // Kimi K2 format detection
-    if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
-        src.find("<|tool_calls_section_begin|>") != std::string::npos &&
-        src.find("## Return of") != std::string::npos) {
-        return common_chat_params_init_kimi_k2(tmpl, params);
-    }
-
-    // Apriel 1.5 format detection
-    if (src.find("<thinking>") != std::string::npos &&
-        src.find("</thinking>") != std::string::npos &&
-        src.find("<available_tools>") != std::string::npos &&
-        src.find("<|assistant|>") != std::string::npos &&
-        src.find("<|tool_result|>") != std::string::npos &&
-        src.find("<tool_calls>[") != std::string::npos &&
-        src.find("]</tool_calls>") != std::string::npos) {
-        return common_chat_params_init_apriel_1_5(tmpl, params);
-    }
-
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
-    }
-
-    // Use generic handler when mixing tools + JSON schema.
-    // TODO: support that mix in handlers below.
-    if ((params.tools.is_array() && params.json_schema.is_object())) {
-        return common_chat_params_init_generic(tmpl, params);
-    }
-
-    // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
-    if (src.find(">>>all") != std::string::npos) {
+    // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
+    // Detection: template has ">>>all" for content and ">>>" prefix for tool calls
+    if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
+        LOG_DBG("Using specialized template: Functionary v3.2\n");
          return common_chat_params_init_functionary_v3_2(tmpl, params);
      }
  
-    // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
-    if (src.find(" functools[") != std::string::npos) {
-        return common_chat_params_init_firefunction_v2(tmpl, params);
-    }
-
-    // Functionary v3.1 (w/ tools)
-    if (src.find("<|start_header_id|>") != std::string::npos
-        && src.find("<function=") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
-    }
-
-    // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
-    if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
-        auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
-    }
-
-    // Ministral/Mistral Large 3
-    if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
-        src.find("[TOOL_CALLS]") != std::string::npos &&
-        src.find("[ARGS]") != std::string::npos) {
-        return common_chat_params_init_ministral_3(tmpl, params);
-    }
-
-    if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
-        return common_chat_params_init_magistral(tmpl, params);
-    }
-
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
-    }
-
-    // TranslateGemma
-    if (src.find("[source_lang_code]") != std::string::npos &&
-        src.find("[target_lang_code]") != std::string::npos) {
-        return common_chat_params_init_translate_gemma(tmpl, params);
-    }
-
-    // Plain handler (no tools)
-    if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
-        return common_chat_params_init_without_tools(tmpl, params);
+    // Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+    // Detection: template has "<|tool_calls_section_begin|>" and "functions." prefix in tool call IDs
+    if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+        src.find("<|tool_call_begin|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: Kimi K2 Thinking\n");
+        return common_chat_params_init_kimi_k2(tmpl, params);
      }
  
-    // Mistral Nemo (w/ tools)
-    if (src.find("[TOOL_CALLS]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_mistral_nemo(tmpl, params);
+    try {
+        LOG_DBG("Using differential autoparser\n");
+        struct autoparser::autoparser autoparser;
+        autoparser.analyze_template(tmpl);
+        auto auto_params = autoparser::peg_generator::generate_parser(tmpl, params, autoparser);
+        auto_params.supports_thinking = autoparser.reasoning.mode != autoparser::reasoning_mode::NONE;
+        return auto_params;
+    } catch (const std::exception & e) {
+        throw std::invalid_argument(std::string("Unable to generate parser for this template. Automatic parser generation failed: ") + e.what());
      }
-
-    // Generic fallback
-    workaround::func_args_not_string(params.messages);
-    workaround::use_generic_schema(params.messages);
-    workaround::move_tool_calls_to_content(params.messages);
-    return common_chat_params_init_generic(tmpl, params);
  }
  
  // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
-static common_chat_params common_chat_templates_apply_legacy(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    size_t alloc_size = 0;
+static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates *        tmpls,
+                                                             const struct common_chat_templates_inputs & inputs) {
+    size_t                          alloc_size = 0;
      std::vector<llama_chat_message> chat;
-    std::vector<std::string> contents;
+    std::vector<std::string>        contents;
  
      for (const auto & msg : inputs.messages) {
          auto content = msg.content;
@@ -3290,25 +1447,27 @@ static common_chat_params common_chat_templates_apply_legacy(
                  continue;
              }
              if (!content.empty()) {
-                content += "\n";;
+                content += "\n";
+                ;
              }
              content += part.text;
          }
          contents.emplace_back(std::move(content));
      }
      for (size_t i = 0; i < contents.size(); ++i) {
-        const auto & msg = inputs.messages[i];
+        const auto & msg     = inputs.messages[i];
          const auto & content = contents[i];
-        chat.push_back({msg.role.c_str(), content.c_str()});
+        chat.push_back({ msg.role.c_str(), content.c_str() });
          size_t msg_size = msg.role.size() + content.size();
-        alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
+        alloc_size += msg_size + (msg_size / 4);  // == msg_size * 1.25 but avoiding float ops
      }
  
      std::vector<char> buf(alloc_size);
  
      // run the first time to get the total output length
      const auto & src = tmpls->template_default->source();
-    int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+    int32_t      res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt,
+                                                 buf.data(), buf.size());
  
      // error: chat template is not supported
      if (res < 0) {
@@ -3320,7 +1479,8 @@ static common_chat_params common_chat_templates_apply_legacy(
      // if it turns out that our buffer is too small, we resize it
      if ((size_t) res > buf.size()) {
          buf.resize(res);
-        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(),
+                                        buf.size());
      }
  
      // for safety, we check the result again
@@ -3338,14 +1498,72 @@ static common_chat_params common_chat_templates_apply_legacy(
      return params;
  }
  
-common_chat_params common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                               const struct common_chat_templates_inputs & inputs) {
      GGML_ASSERT(tmpls != nullptr);
-    return inputs.use_jinja
-        ? common_chat_templates_apply_jinja(tmpls, inputs)
-        : common_chat_templates_apply_legacy(tmpls, inputs);
+    return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) :
+                              common_chat_templates_apply_legacy(tmpls, inputs);
+}
+
+common_chat_msg common_chat_parse(const std::string &               input,
+                                  bool                              is_partial,
+                                  const common_chat_parser_params & params) {
+    return common_chat_peg_parse(params.parser, input, is_partial, params);
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_parser,
+                                      const std::string &               input,
+                                      bool                              is_partial,
+                                      const common_chat_parser_params & params) {
+    const common_peg_arena & parser = src_parser.empty() ?
+        build_chat_peg_parser([](common_chat_peg_builder & p) { return p.content(p.rest()) + p.end(); }) :
+        src_parser;
+
+        if (src_parser.empty()) {
+        LOG_WRN("No parser definition detected, assuming pure content parser.");
+    }
+
+    LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
+
+    common_peg_parse_context ctx(input, is_partial);
+    ctx.debug   = params.debug;
+    auto result = parser.parse(ctx);
+
+    if (result.fail()) {
+        // During partial parsing, return partial results if any AST nodes were captured
+        // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
+        if (is_partial && result.end > 0) {
+            // Try to extract any partial results from what was successfully parsed
+            common_chat_msg msg;
+            msg.role = "assistant";
+            auto mapper = common_chat_peg_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            if (ctx.debug) {
+                fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
+                fflush(stderr);
+            }
+            return msg;
+        }
+        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
+                                 input.substr(result.end));
+    }
+
+    common_chat_msg msg;
+    msg.role = "assistant";
+
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    if (ctx.debug) {
+        fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
+        fflush(stderr);
+    }
+
+    if (!is_partial) {
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str());
+    }
+    return msg;
  }
  
  std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
@@ -3353,3 +1571,4 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
      GGML_ASSERT(chat_templates->template_default != nullptr);
      return chat_templates->template_default->caps.to_map();
  }
+
diff --git a/common/chat.h b/common/chat.h

index 6f0b9409ec928d6201b202df5eb0269201f2df7d..005cc5c8b3fd63c3284fece71fa1b272a62321d3 100644 (file)
--- a/common/chat.h
+++ b/common/chat.h
@@ -3,17 +3,30 @@
  #pragma once
  
  #include "common.h"
+#include "jinja/parser.h"
+#include "nlohmann/json_fwd.hpp"
  #include "peg-parser.h"
-#include <functional>
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
+#include "nlohmann/json.hpp"
+
  #include <chrono>
+#include <functional>
+#include <map>
  #include <string>
  #include <vector>
-#include <map>
+
+using chat_template_caps = jinja::caps;
+using json = nlohmann::ordered_json;
  
  #include <nlohmann/json_fwd.hpp>
  
  struct common_chat_templates;
  
+namespace autoparser {
+struct templates_params;
+}  // namespace autoparser
+
  struct common_chat_tool_call {
      std::string name;
      std::string arguments;
@@ -38,21 +51,85 @@ struct common_chat_msg_content_part {
      }
  };
  
+struct common_chat_template {
+    jinja::program prog;
+    std::string bos_tok;
+    std::string eos_tok;
+    std::string src;
+    chat_template_caps caps;
+
+    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+        jinja::lexer lexer;
+        auto lexer_res = lexer.tokenize(src);
+        this->prog = jinja::parse_from_tokens(lexer_res);
+
+        this->src = lexer_res.source;
+        this->bos_tok = bos_token;
+        this->eos_tok = eos_token;
+
+        this->caps = jinja::caps_get(prog);
+        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+    }
+
+    const std::string & source() const { return src; }
+    const std::string & bos_token() const { return bos_tok; }
+    const std::string & eos_token() const { return eos_tok; }
+
+    // TODO: this is ugly, refactor it somehow
+    json add_system(const json & messages, const std::string & system_prompt) const {
+        GGML_ASSERT(messages.is_array());
+        auto msgs_copy = messages;
+        if (!caps.supports_system_role) {
+            if (msgs_copy.empty()) {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "user"},
+                    {"content", system_prompt}
+                });
+            } else {
+                auto & first_msg = msgs_copy[0];
+                if (!first_msg.contains("content")) {
+                    first_msg["content"] = "";
+                }
+                first_msg["content"] = system_prompt + "\n\n"
+                    + first_msg["content"].get<std::string>();
+            }
+        } else {
+            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
+                msgs_copy.insert(msgs_copy.begin(), json{
+                    {"role", "system"},
+                    {"content", system_prompt}
+                });
+            } else if (msgs_copy[0].at("role") == "system") {
+                msgs_copy[0]["content"] = system_prompt;
+            }
+        }
+        return msgs_copy;
+    }
+
+    chat_template_caps original_caps() const {
+        return caps;
+    }
+
+};
+
  struct common_chat_msg {
-    std::string role;
-    std::string content;
+    std::string                               role;
+    std::string                               content;
      std::vector<common_chat_msg_content_part> content_parts;
-    std::vector<common_chat_tool_call> tool_calls;
-    std::string reasoning_content;
-    std::string tool_name;
-    std::string tool_call_id;
+    std::vector<common_chat_tool_call>        tool_calls;
+    std::string                               reasoning_content;
+    std::string                               tool_name;
+    std::string                               tool_call_id;
  
      nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
  
      bool empty() const {
-        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
+        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
+               tool_name.empty() && tool_call_id.empty();
      }
-    void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
+
+    void set_tool_call_ids(std::vector<std::string> &           ids_cache,
+                           const std::function<std::string()> & gen_tool_call_id) {
          for (auto i = 0u; i < tool_calls.size(); i++) {
              if (ids_cache.size() <= i) {
                  auto id = tool_calls[i].id;
@@ -64,32 +141,28 @@ struct common_chat_msg {
              tool_calls[i].id = ids_cache[i];
          }
      }
+
      bool operator==(const common_chat_msg & other) const {
-        return role == other.role
-            && content == other.content
-            && content_parts == other.content_parts
-            && tool_calls == other.tool_calls
-            && reasoning_content == other.reasoning_content
-            && tool_name == other.tool_name
-            && tool_call_id == other.tool_call_id;
-    }
-    bool operator!=(const common_chat_msg & other) const {
-        return !(*this == other);
+        return role == other.role && content == other.content && content_parts == other.content_parts &&
+               tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
+               tool_name == other.tool_name && tool_call_id == other.tool_call_id;
      }
+
+    bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
  };
  
  struct common_chat_msg_diff {
-    std::string reasoning_content_delta;
-    std::string content_delta;
-    size_t tool_call_index = std::string::npos;
+    std::string           reasoning_content_delta;
+    std::string           content_delta;
+    size_t                tool_call_index = std::string::npos;
      common_chat_tool_call tool_call_delta;
  
-    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
+    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
+                                                           const common_chat_msg & msg_new);
  
      bool operator==(const common_chat_msg_diff & other) const {
-        return content_delta == other.content_delta
-        && tool_call_index == other.tool_call_index
-        && tool_call_delta == other.tool_call_delta;
+        return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
+               tool_call_delta == other.tool_call_delta;
      }
  };
  
@@ -107,64 +180,39 @@ enum common_chat_tool_choice {
  
  enum common_chat_format {
      COMMON_CHAT_FORMAT_CONTENT_ONLY,
-    COMMON_CHAT_FORMAT_GENERIC,
-    COMMON_CHAT_FORMAT_MISTRAL_NEMO,
-    COMMON_CHAT_FORMAT_MAGISTRAL,
-    COMMON_CHAT_FORMAT_LLAMA_3_X,
-    COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-    COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-    COMMON_CHAT_FORMAT_HERMES_2_PRO,
-    COMMON_CHAT_FORMAT_COMMAND_R7B,
-    COMMON_CHAT_FORMAT_GRANITE,
-    COMMON_CHAT_FORMAT_GPT_OSS,
-    COMMON_CHAT_FORMAT_SEED_OSS,
-    COMMON_CHAT_FORMAT_NEMOTRON_V2,
-    COMMON_CHAT_FORMAT_APERTUS,
-    COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
-    COMMON_CHAT_FORMAT_GLM_4_5,
-    COMMON_CHAT_FORMAT_MINIMAX_M2,
-    COMMON_CHAT_FORMAT_KIMI_K2,
-    COMMON_CHAT_FORMAT_APRIEL_1_5,
-    COMMON_CHAT_FORMAT_XIAOMI_MIMO,
-    COMMON_CHAT_FORMAT_SOLAR_OPEN,
-    COMMON_CHAT_FORMAT_EXAONE_MOE,
  
      // These are intended to be parsed by the PEG parser
      COMMON_CHAT_FORMAT_PEG_SIMPLE,
      COMMON_CHAT_FORMAT_PEG_NATIVE,
-    COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
  
-    COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+    COMMON_CHAT_FORMAT_COUNT,  // Not a format, just the # formats
  };
  
  struct common_chat_templates_inputs {
-    std::vector<common_chat_msg> messages;
-    std::string grammar;
-    std::string json_schema;
-    bool add_generation_prompt = true;
-    bool use_jinja = true;
+    std::vector<common_chat_msg>          messages;
+    std::string                           grammar;
+    std::string                           json_schema;
+    bool                                  add_generation_prompt = true;
+    bool                                  use_jinja             = true;
      // Parameters below only supported when use_jinja is true
-    std::vector<common_chat_tool> tools;
-    common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
-    bool parallel_tool_calls = false;
-    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    std::map<std::string, std::string> chat_template_kwargs;
-    bool add_bos = false;
-    bool add_eos = false;
+    std::vector<common_chat_tool>         tools;
+    common_chat_tool_choice               tool_choice         = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    bool                                  parallel_tool_calls = false;
+    common_reasoning_format               reasoning_format    = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
+    bool                                  enable_thinking     = true;
+    std::chrono::system_clock::time_point now                 = std::chrono::system_clock::now();
+    std::map<std::string, std::string>    chat_template_kwargs;
+    bool                                  add_bos = false;
+    bool                                  add_eos = false;
  };
  
  struct common_chat_params {
      common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
      std::string                         prompt;
      std::string                         grammar;
-    bool                                grammar_lazy = false;
+    bool                                grammar_lazy         = false;
      bool                                thinking_forced_open = false;
+    bool                                supports_thinking    = false;
      std::vector<common_grammar_trigger> grammar_triggers;
      std::vector<std::string>            preserved_tokens;
      std::vector<std::string>            additional_stops;
@@ -174,13 +222,14 @@ struct common_chat_params {
  // per-message parsing syntax
  // should be derived from common_chat_params
  struct common_chat_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
+    common_chat_format      format               = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    common_reasoning_format reasoning_format     = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
      // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-    common_peg_arena         parser                = {};
+    bool                    reasoning_in_content = false;
+    bool                    thinking_forced_open = false;
+    bool                    parse_tool_calls     = true;
+    bool                    debug                = false;  // Enable debug output for PEG parser
+    common_peg_arena        parser               = {};
      common_chat_parser_params() = default;
      common_chat_parser_params(const common_chat_params & chat_params) {
          format               = chat_params.format;
@@ -193,45 +242,42 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
  
  void common_chat_templates_free(struct common_chat_templates * tmpls);
  
-struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+struct common_chat_templates_deleter {
+    void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
+};
  
  typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
  
-common_chat_templates_ptr common_chat_templates_init(
-                                    const struct llama_model * model,
-                                           const std::string & chat_template_override,
-                                           const std::string & bos_token_override = "",
-                                           const std::string & eos_token_override = "");
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override = "",
+                                                     const std::string &        eos_token_override = "");
  
  bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
  std::string  common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
  
-
-struct common_chat_params      common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs);
+struct common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                                      const struct common_chat_templates_inputs & inputs);
  
  // Format single message, while taking into account the position of that message in chat history
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja);
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja);
  
  // Returns an example of formatted chat
-std::string common_chat_format_example(
-    const struct common_chat_templates * tmpls,
-    bool use_jinja,
-    const std::map<std::string, std::string> & chat_template_kwargs);
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs);
  
-const char*               common_chat_format_name(common_chat_format format);
-common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
-common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
+const char *            common_chat_format_name(common_chat_format format);
+common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
+common_chat_msg           common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
  
  // used by arg and server
-const char *             common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format  common_reasoning_format_from_name(const std::string & format);
+const char *            common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
  
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
  
@@ -250,3 +296,10 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_
  
  // get template caps, useful for reporting to server /props endpoint
  std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
+
+std::string common_chat_template_direct_apply(
+    const common_chat_template & tmpl,
+    const autoparser::templates_params & inputs,
+    const std::optional<json> & messages_override = std::nullopt,
+    const std::optional<json> & tools_override = std::nullopt,
+    const std::optional<json> & additional_context = std::nullopt);
diff --git a/common/common.cpp b/common/common.cpp

index 53bddc4ef2f53f3983eabb2bc27ca96395ca06f3..cc423d3439fc7ace8d55b04fba0068f15376626f 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -676,7 +676,7 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
  
      size_t offset = 0;
      while (offset < filename.size()) {
-        utf8_parse_result result = parse_utf8_codepoint(filename, offset);
+        utf8_parse_result result = common_parse_utf8_codepoint(filename, offset);
  
          if (result.status != utf8_parse_result::SUCCESS) {
              return false;
diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp

index dbaaed500a87e302aa749b5e344b89921e91b7b0..1158d5e5d6d7ba681f0691fc88b66d474541db41 100644 (file)
--- a/common/jinja/caps.cpp
+++ b/common/jinja/caps.cpp
@@ -1,3 +1,4 @@
+#include "log.h"
  #include "value.h"
  #include "runtime.h"
  #include "caps.h"
@@ -36,12 +37,16 @@ static void caps_try_execute(jinja::program & prog,
      auto tools = ctx.get_val("tools");
  
      bool success = false;
+    std::string result;
      try {
          jinja::runtime runtime(ctx);
-        runtime.execute(prog);
+        auto results = runtime.execute(prog);
+        auto parts = jinja::runtime::gather_string_parts(results);
+        result = parts->as_string().str();
          success = true;
      } catch (const std::exception & e) {
          JJ_DEBUG("Exception during execution: %s", e.what());
+        result = "";
          // ignore exceptions during capability analysis
      }
  
@@ -90,6 +95,8 @@ caps caps_get(jinja::program & prog) {
          return v->stats.ops.find(op_name) != v->stats.ops.end();
      };
  
+    JJ_DEBUG("%s\n", ">>> Running capability check: typed content");
+
      // case: typed content support
      caps_try_execute(
          prog,
@@ -120,6 +127,7 @@ caps caps_get(jinja::program & prog) {
          }
      );
  
+    JJ_DEBUG("%s\n", ">>> Running capability check: system prompt");
  
      // case: system prompt support
      caps_try_execute(
@@ -150,7 +158,9 @@ caps caps_get(jinja::program & prog) {
          }
      );
  
-    // case: tools support
+    JJ_DEBUG("%s\n", ">>> Running capability check: single tool support");
+
+    // case: tools support: single call
      caps_try_execute(
          prog,
          [&]() {
@@ -162,10 +172,10 @@ caps caps_get(jinja::program & prog) {
                  },
                  {
                      {"role", "assistant"},
-                    {"content", "Assistant message"},
+                    {"content", ""}, // Some templates expect content to be empty with tool calls
                      {"tool_calls", json::array({
                          {
-                            {"id", "call1"},
+                            {"id", "call00001"},
                              {"type", "function"},
                              {"function", {
                                  {"name", "tool1"},
@@ -173,19 +183,18 @@ caps caps_get(jinja::program & prog) {
                                      {"arg", "value"}
                                  }}
                              }}
-                        },
-                        {
-                            {"id", "call2"},
-                            {"type", "function"},
-                            {"function", {
-                                {"name", "tool2"},
-                                {"arguments", {
-                                    {"arg", "value"}
-                                }}
-                            }}
                          }
                      })}
                  },
+                {
+                    {"role", "tool"},
+                    {"content", "Tool response"},
+                    {"tool_call_id", "call00001"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "The tool response was 'tool response'"}
+                },
                  {
                      {"role", "user"},
                      {"content", "User message"},
@@ -199,7 +208,7 @@ caps caps_get(jinja::program & prog) {
                      {"name", "tool"},
                      {"type", "function"},
                      {"function", {
-                        {"name", "tool"},
+                        {"name", "tool1"},
                          {"description", "Tool description"},
                          {"parameters", {
                              {"type", "object"},
@@ -224,6 +233,7 @@ caps caps_get(jinja::program & prog) {
  
              auto & tool_name = tools->at(0)->at("function")->at("name");
              caps_print_stats(tool_name, "tools[0].function.name");
+            caps_print_stats(tools, "tools");
              if (!tool_name->stats.used) {
                  result.supports_tools = false;
              }
@@ -233,6 +243,93 @@ caps caps_get(jinja::program & prog) {
              if (!tool_calls->stats.used) {
                  result.supports_tool_calls = false;
              }
+        }
+    );
+
+    JJ_DEBUG("%s\n", ">>> Running capability check: parallel tool support");
+
+    // case: tools support: parallel calls
+    caps_try_execute(
+        prog,
+        [&]() {
+            // messages
+            return json::array({
+                {
+                    {"role", "user"},
+                    {"content", "User message"},
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", ""}, // Some templates expect content to be empty with tool calls
+                    {"tool_calls", json::array({
+                        {
+                            {"id", "call00001"},
+                            {"type", "function"},
+                            {"function", {
+                                {"name", "tool1"},
+                                {"arguments", {
+                                    {"arg", "value"}
+                                }}
+                            }}
+                        },
+                        {
+                            {"id", "call00002"},
+                            {"type", "function"},
+                            {"function", {
+                                {"name", "tool1"},
+                                {"arguments", {
+                                    {"arg", "value"}
+                                }}
+                            }}
+                        }
+                    })}
+                },
+                {
+                    {"role", "tool"},
+                    {"content", "Tool response"},
+                    {"tool_call_id", "call00001"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "The tool response was 'tool response'"}
+                },
+                {
+                    {"role", "user"},
+                    {"content", "User message"},
+                },
+            });
+        },
+        [&]() {
+            // tools
+            return json::array({
+                {
+                    {"name", "tool"},
+                    {"type", "function"},
+                    {"function", {
+                        {"name", "tool1"},
+                        {"description", "Tool description"},
+                        {"parameters", {
+                            {"type", "object"},
+                            {"properties", {
+                                {"arg", {
+                                    {"type", "string"},
+                                    {"description", "Arg description"},
+                                }},
+                            }},
+                            {"required", json::array({ "arg" })},
+                        }},
+                    }},
+                },
+            });
+        },
+        [&](bool success, value & messages, value & /*tools*/) {
+            if (!success) {
+                result.supports_parallel_tool_calls = false;
+                return;
+            }
+
+            auto & tool_calls = messages->at(1)->at("tool_calls");;
+            caps_print_stats(tool_calls, "messages[1].tool_calls");
  
              // check for second tool call usage
              auto & tool_call_1 = tool_calls->at(1)->at("function");
@@ -243,6 +340,8 @@ caps caps_get(jinja::program & prog) {
          }
      );
  
+    JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning");
+
      // case: preserve reasoning content in chat history
      caps_try_execute(
          prog,
diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp

index 5757c76b7a1b7c6a906a68f4ca1d03728b6b519f..af2282c54692cb2a9ca0e9e9fc47fe6a8140a1ba 100644 (file)
--- a/common/jinja/runtime.cpp
+++ b/common/jinja/runtime.cpp
@@ -114,8 +114,10 @@ value binary_expression::execute_impl(context & ctx) {
  
      // Logical operators
      if (op.value == "and") {
+        JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str());
          return left_val->as_bool() ? right->execute(ctx) : std::move(left_val);
      } else if (op.value == "or") {
+        JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str());
          return left_val->as_bool() ? std::move(left_val) : right->execute(ctx);
      }
  
@@ -838,7 +840,7 @@ value call_expression::execute_impl(context & ctx) {
      for (auto & arg_stmt : this->args) {
          auto arg_val = arg_stmt->execute(ctx);
          JJ_DEBUG("  Argument type: %s", arg_val->type().c_str());
-        args.push_back(std::move(arg_val));
+        args.push_back(arg_val);
      }
      // execute callee
      value callee_val = callee->execute(ctx);
diff --git a/common/jinja/value.h b/common/jinja/value.h

index 07e447ff69626193b0f5f04eac70093c61bd3c4c..6cbedefd96ec8176f8d229de2247708b8d1e4c85 100644 (file)
--- a/common/jinja/value.h
+++ b/common/jinja/value.h
@@ -12,8 +12,8 @@
  #include <set>
  #include <sstream>
  #include <string>
-#include <unordered_map>
  #include <vector>
+#include <unordered_map>
  
  namespace jinja {
  
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp

index 2f67c74d796d4b0e8609abdf4114236450ce69f8..27f13f034ed56ea3b9dd61b1b93341fbd9102a3e 100644 (file)
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -27,11 +27,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items
      if (separator_rule.empty()) {
          if (min_items == 1 && !has_max) {
              return item_rule + "+";
-        } else if (min_items == 0 && !has_max) {
+        }
+        if (min_items == 0 && !has_max) {
              return item_rule + "*";
-        } else {
-            return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
          }
+        return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
      }
  
      auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
@@ -41,7 +41,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
      return result;
  }
  
-static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
+static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
      auto has_min = min_value != std::numeric_limits<int64_t>::min();
      auto has_max = max_value != std::numeric_limits<int64_t>::max();
  
@@ -128,14 +128,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
      if (has_min && has_max) {
          if (min_value < 0 && max_value < 0) {
              out << "\"-\" (";
-            _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
              out << ")";
              return;
          }
  
          if (min_value < 0) {
              out << "\"-\" (";
-            _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
              out << ") | ";
              min_value = 0;
          }
@@ -159,7 +159,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
      if (has_min) {
          if (min_value < 0) {
              out << "\"-\" (";
-            _build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
+            build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
              out << ") | [0] | [1-9] ";
              more_digits(0, decimals_left - 1);
          } else if (min_value == 0) {
@@ -194,7 +194,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
              }
              digit_range(c, c);
              out << " (";
-            _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
+            build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
              out << ")";
              if (c < '9') {
                  out << " | ";
@@ -213,10 +213,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
                  more_digits(0, less_decimals);
                  out << " | ";
              }
-            _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
          } else {
              out << "\"-\" (";
-            _build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
+            build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
              out << ")";
          }
          return;
@@ -232,7 +232,7 @@ struct BuiltinRule {
      std::vector<std::string> deps;
  };
  
-std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
+static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
      {"boolean", {"(\"true\" | \"false\") space", {}}},
      {"decimal-part", {"[0-9]{1,16}", {}}},
      {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
@@ -247,7 +247,7 @@ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
      {"null", {"\"null\" space", {}}},
  };
  
-std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
+static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
      {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
      {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
      {"date-time", {"date \"T\" time", {"date", "time"}}},
@@ -260,22 +260,26 @@ static bool is_reserved_name(const std::string & name) {
      static const std::unordered_set<std::string> RESERVED_NAMES = [] {
          std::unordered_set<std::string> s;
          s.insert("root");
-        for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
-        for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
+        for (const auto & p : PRIMITIVE_RULES) {
+            s.insert(p.first);
+        }
+        for (const auto & p : STRING_FORMAT_RULES) {
+            s.insert(p.first);
+        }
          return s;
      }();
      return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
  }
  
-std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
-std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
-std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
-std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
+static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
+static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
+static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
+static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
      {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
  };
  
-std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
-std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
  
  static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch  &)> & replacement) {
      std::smatch match;
@@ -322,19 +326,19 @@ private:
          if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
              _rules[esc_name] = rule;
              return esc_name;
-        } else {
-            int i = 0;
-            while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
-                i++;
-            }
-            std::string key = esc_name + std::to_string(i);
-            _rules[key] = rule;
-            return key;
          }
+        int i = 0;
+        while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
+            i++;
+        }
+        std::string key = esc_name + std::to_string(i);
+        _rules[key] = rule;
+        return key;
      }
  
      std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
          std::vector<std::string> rules;
+        rules.reserve(alt_schemas.size());
          for (size_t i = 0; i < alt_schemas.size(); i++) {
              rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
          }
@@ -398,6 +402,7 @@ private:
                  flush_literal();
  
                  std::vector<std::string> results;
+                results.reserve(ret.size());
                  for (const auto & item : ret) {
                      results.push_back(to_rule(item));
                  }
@@ -551,7 +556,7 @@ private:
              TrieNode() : is_end_of_string(false) {}
  
              void insert(const std::string & string) {
-                auto node = this;
+                auto *node = this;
                  for (char c : string) {
                      node = &node->children[c];
                  }
@@ -676,7 +681,7 @@ private:
                  if (ks.empty()) {
                      return res;
                  }
-                std::string k = ks[0];
+                const std::string& k = ks[0];
                  std::string kv_rule_name = prop_kv_rule_names[k];
                  std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
                  if (first_is_optional) {
@@ -779,7 +784,7 @@ public:
                          std::string pointer = ref.substr(ref.find('#') + 1);
                          std::vector<std::string> tokens = string_split(pointer, "/");
                          for (size_t i = 1; i < tokens.size(); ++i) {
-                            std::string sel = tokens[i];
+                            const std::string& sel = tokens[i];
                              if (target.is_object() && target.contains(sel)) {
                                  target = target[sel];
                              } else if (target.is_array()) {
@@ -802,7 +807,7 @@ public:
                          _refs[ref] = target;
                      }
                  } else {
-                    for (auto & kv : n.items()) {
+                    for (const auto & kv : n.items()) {
                          visit_refs(kv.value());
                      }
                  }
@@ -812,7 +817,7 @@ public:
          visit_refs(schema);
      }
  
-    std::string _generate_constant_rule(const json & value) {
+    static std::string _generate_constant_rule(const json & value) {
          return format_literal(value.dump());
      }
  
@@ -823,10 +828,12 @@ public:
  
          if (schema.contains("$ref")) {
              return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
-        } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
+        }
+        if (schema.contains("oneOf") || schema.contains("anyOf")) {
              std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
              return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
-        } else if (schema_type.is_array()) {
+        }
+        if (schema_type.is_array()) {
              std::vector<json> schema_types;
              for (const auto & t : schema_type) {
                  json schema_copy(schema);
@@ -834,15 +841,18 @@ public:
                  schema_types.push_back(schema_copy);
              }
              return _add_rule(rule_name, _generate_union_rule(name, schema_types));
-        } else if (schema.contains("const")) {
+        }
+        if (schema.contains("const")) {
              return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
-        } else if (schema.contains("enum")) {
+        }
+        if (schema.contains("enum")) {
              std::vector<std::string> enum_values;
              for (const auto & v : schema["enum"]) {
                  enum_values.push_back(_generate_constant_rule(v));
              }
              return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
-        } else if ((schema_type.is_null() || schema_type == "object")
+        }
+        if ((schema_type.is_null() || schema_type == "object")
                  && (schema.contains("properties") ||
                      (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
              std::unordered_set<std::string> required;
@@ -863,11 +873,12 @@ public:
                  _build_object_rule(
                      properties, required, name,
                      schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
-        } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
+        }
+        if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
              std::unordered_set<std::string> required;
              std::vector<std::pair<std::string, json>> properties;
              std::map<std::string, size_t> enum_values;
-            std::string hybrid_name = name;
+            const std::string& hybrid_name = name;
              std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
                  if (comp_schema.contains("$ref")) {
                      add_component(_refs[comp_schema["$ref"]], is_required);
@@ -890,9 +901,9 @@ public:
                    // todo warning
                  }
              };
-            for (auto & t : schema["allOf"]) {
+            for (const auto & t : schema["allOf"]) {
                  if (t.contains("anyOf")) {
-                    for (auto & tt : t["anyOf"]) {
+                    for (const auto & tt : t["anyOf"]) {
                          add_component(tt, false);
                      }
                  } else {
@@ -911,7 +922,8 @@ public:
                  }
              }
              return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
-        } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
+        }
+        if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
              json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
              if (items.is_array()) {
                  std::string rule = "\"[\" space ";
@@ -923,27 +935,31 @@ public:
                  }
                  rule += " \"]\" space";
                  return _add_rule(rule_name, rule);
-            } else {
-                std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
-                int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
-                json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
-                int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
-
-                return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
              }
-        } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
+            std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
+            int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
+            json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
+            int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
+
+            return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
+        }
+        if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
              return _visit_pattern(schema["pattern"], rule_name);
-        } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
+        }
+        if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
              return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
-        } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
+        }
+        if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
              auto prim_name = schema_format + "-string";
              return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
-        } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
+        }
+        if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
              std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
              int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
              int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
              return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
-        } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
+        }
+        if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
              int64_t min_value = std::numeric_limits<int64_t>::min();
              int64_t max_value = std::numeric_limits<int64_t>::max();
              if (schema.contains("minimum")) {
@@ -958,19 +974,24 @@ public:
              }
              std::stringstream out;
              out << "(";
-            _build_min_max_int(min_value, max_value, out);
+            build_min_max_int(min_value, max_value, out);
              out << ") space";
              return _add_rule(rule_name, out.str());
-        } else if (schema.empty() || schema_type == "object") {
+        }
+        if (schema.empty() || schema_type == "object") {
              return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
-        } else {
-            if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
-                _errors.push_back("Unrecognized schema: " + schema.dump());
-                return "";
-            }
-            // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
-            return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
          }
+        if (schema_type.is_null() && schema.is_object()) {
+            // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+            // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+            return _add_rule(rule_name, _add_primitive("value", PRIMITIVE_RULES.at("value")));
+        }
+        if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
+            _errors.push_back("Unrecognized schema: " + schema.dump());
+            return "";
+        }
+        // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
+        return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
      }
  
      void check_errors() {
@@ -985,7 +1006,7 @@ public:
      std::string format_grammar() {
          std::stringstream ss;
          for (const auto & kv : _rules) {
-            ss << kv.first << " ::= " << kv.second << std::endl;
+            ss << kv.first << " ::= " << kv.second << '\n';
          }
          return ss.str();
      }
diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp

index f2fc84500f7044ce73e2a7cc81bd3f92a91c6b33..48379f1ec895f2c1defe465d6380cc0efcc535ea 100644 (file)
--- a/common/peg-parser.cpp
+++ b/common/peg-parser.cpp
@@ -1,14 +1,15 @@
-#include "common.h"
  #include "peg-parser.h"
+
+#include "common.h"
  #include "json-schema-to-grammar.h"
+#include "log.h"
  #include "unicode.h"
  
-#include <nlohmann/json.hpp>
-
  #include <algorithm>
  #include <initializer_list>
  #include <map>
  #include <memory>
+#include <nlohmann/json.hpp>
  #include <regex>
  #include <stdexcept>
  #include <unordered_set>
@@ -34,8 +35,7 @@ static bool is_hex_digit(const char c) {
  // This is used in common_peg_until_parser and to build a GBNF exclusion grammar
  struct trie {
      struct node {
-        size_t depth = 0;
-        std::map<unsigned char, size_t> children;
+        std::map<uint32_t, size_t> children;  // Use uint32_t to store Unicode codepoints
          bool is_word;
      };
  
@@ -55,15 +55,22 @@ struct trie {
          size_t current = 0; // Start at root
          size_t pos = start_pos;
  
+        // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str());
+
          while (pos < sv.size()) {
-            auto it = nodes[current].children.find(sv[pos]);
+            auto result = common_parse_utf8_codepoint(sv, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            auto it = nodes[current].children.find(result.codepoint);
              if (it == nodes[current].children.end()) {
                  // Can't continue matching
                  return match_result{match_result::NO_MATCH};
              }
  
              current = it->second;
-            pos++;
+            pos += result.bytes_consumed;
  
              // Check if we've matched a complete word
              if (nodes[current].is_word) {
@@ -82,22 +89,22 @@ struct trie {
      }
  
      struct prefix_and_next {
-        std::string prefix;
-        std::string next_chars;
+        std::vector<uint32_t> prefix;
+        std::vector<uint32_t> next_chars;
      };
  
      std::vector<prefix_and_next> collect_prefix_and_next() {
-        std::string prefix;
+        std::vector<uint32_t>        prefix;
          std::vector<prefix_and_next> result;
          collect_prefix_and_next(0, prefix, result);
          return result;
      }
  
    private:
-    void collect_prefix_and_next(size_t index, std::string & prefix, std::vector<prefix_and_next> & out) {
+    void collect_prefix_and_next(size_t index, std::vector<uint32_t> & prefix, std::vector<prefix_and_next> & out) {
          if (!nodes[index].is_word) {
              if (!nodes[index].children.empty()) {
-                std::string chars;
+                std::vector<uint32_t> chars;
                  chars.reserve(nodes[index].children.size());
                  for (const auto & p : nodes[index].children) {
                      chars.push_back(p.first);
@@ -107,7 +114,7 @@ struct trie {
          }
  
          for (const auto & p : nodes[index].children) {
-            unsigned char ch = p.first;
+            uint32_t ch = p.first;
              auto child = p.second;
              prefix.push_back(ch);
              collect_prefix_and_next(child, prefix, out);
@@ -123,11 +130,19 @@ struct trie {
  
      void insert(const std::string & word) {
          size_t current = 0;
-        for (unsigned char ch : word) {
+        size_t pos     = 0;
+        while (pos < word.length()) {
+            auto result = common_parse_utf8_codepoint(word, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            uint32_t ch = result.codepoint;
+            pos += result.bytes_consumed;
+
              auto it = nodes[current].children.find(ch);
              if (it == nodes[current].children.end()) {
                  size_t child = create_node();
-                nodes[child].depth = nodes[current].depth + 1;
                  nodes[current].children[ch] = child;
                  current = child;
              } else {
@@ -286,6 +301,32 @@ struct parser_executor {
      parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start)
          : arena(arena), ctx(ctx), start_pos(start) {}
  
+    std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); }
+
+    std::string debug_input_snippet(size_t pos, size_t len = 60) const {
+        if (pos >= ctx.input.size()) {
+            return "<EOF>";
+        }
+        auto        snippet = ctx.input.substr(pos, len);
+        // Escape newlines for display
+        std::string result;
+        for (char c : snippet) {
+            if (c == '\n') {
+                result += "\\n";
+            } else if (c == '\r') {
+                result += "\\r";
+            } else if (c == '\t') {
+                result += "\\t";
+            } else {
+                result += c;
+            }
+        }
+        if (pos + len < ctx.input.size()) {
+            result += "...";
+        }
+        return result;
+    }
+
      common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const {
          return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos);
      }
@@ -323,12 +364,39 @@ struct parser_executor {
      }
  
      common_peg_parse_result operator()(const common_peg_sequence_parser & p) {
+        if (ctx.debug) {
+            LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
          auto pos = start_pos;
          std::vector<common_peg_ast_id> nodes;
  
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
              auto result = arena.parse(child_id, ctx, pos);
+
+            if (ctx.debug) {
+                fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end);
+            }
+
              if (result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.is_partial && result.end >= ctx.input.size()) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%sSEQ -> NEED_MORE (child failed at end)\n", debug_indent().c_str());
+                    }
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end,
+                                                   std::move(nodes));
+                }
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str());
+                }
                  return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end);
              }
  
@@ -337,28 +405,65 @@ struct parser_executor {
              }
  
              if (result.need_more_input()) {
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str());
+                }
                  return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
              }
  
              pos = result.end;
          }
  
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos);
+        }
          return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
      }
  
      common_peg_parse_result operator()(const common_peg_choice_parser & p) {
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
          auto pos = start_pos;
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
              auto result = arena.parse(child_id, ctx, pos);
+            if (ctx.debug) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type));
+            }
              if (!result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(),
+                            common_peg_parse_result_type_name(result.type), i);
+                }
                  return result;
              }
          }
  
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str());
+        }
          return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
      }
  
      common_peg_parse_result operator()(const common_peg_repetition_parser & p) {
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count);
+        }
+        ctx.parse_depth++;
+
          auto pos = start_pos;
          int match_count = 0;
          std::vector<common_peg_ast_id> nodes;
@@ -366,14 +471,26 @@ struct parser_executor {
          // Try to match up to max_count times (or unlimited if max_count is -1)
          while (p.max_count == -1 || match_count < p.max_count) {
              if (pos >= ctx.input.size()) {
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count);
+                }
                  break;
              }
  
              auto result = arena.parse(p.child, ctx, pos);
  
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size());
+                fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str());
+            }
+
              if (result.success()) {
                  // Prevent infinite loop on empty matches
                  if (result.end == pos) {
+                    if (ctx.debug) {
+                        fprintf(stderr, "%s  REPEAT: empty match, stopping\n", debug_indent().c_str());
+                    }
                      break;
                  }
  
@@ -391,21 +508,43 @@ struct parser_executor {
                      nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end());
                  }
  
+                ctx.parse_depth--;
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(),
+                            match_count, nodes.size());
+                }
                  return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
              }
  
              // Child failed - stop trying
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str());
+            }
              break;
          }
  
          // Check if we got enough matches
          if (p.min_count > 0 && match_count < p.min_count) {
+            ctx.parse_depth--;
              if (pos >= ctx.input.size() && ctx.is_partial) {
+                if (ctx.debug) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(),
+                            match_count, p.min_count);
+                }
                  return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes));
              }
+            if (ctx.debug) {
+                fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count,
+                        p.min_count);
+            }
              return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
          }
  
+        ctx.parse_depth--;
+        if (ctx.debug) {
+            fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count,
+                    nodes.size());
+        }
          return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
      }
  
@@ -434,7 +573,7 @@ struct parser_executor {
  
      common_peg_parse_result operator()(const common_peg_any_parser & /* p */) const {
          // Parse a single UTF-8 codepoint (not just a single byte)
-        auto result = parse_utf8_codepoint(ctx.input, start_pos);
+        auto result = common_parse_utf8_codepoint(ctx.input, start_pos);
  
          if (result.status == utf8_parse_result::INCOMPLETE) {
              if (!ctx.is_partial) {
@@ -468,7 +607,7 @@ struct parser_executor {
  
          // Try to match up to max_count times (or unlimited if max_count is -1)
          while (p.max_count == -1 || match_count < p.max_count) {
-            auto result = parse_utf8_codepoint(ctx.input, pos);
+            auto result = common_parse_utf8_codepoint(ctx.input, pos);
  
              if (result.status == utf8_parse_result::INCOMPLETE) {
                  if (match_count >= p.min_count) {
@@ -537,6 +676,7 @@ struct parser_executor {
  
          switch (ctx.input[pos]) {
              case '"':
+            case '\'':
              case '\\':
              case '/':
              case 'b':
@@ -589,7 +729,49 @@ struct parser_executor {
                      return result;
                  }
              } else {
-                auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+                auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
+
+                if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
+                    if (!ctx.is_partial) {
+                        return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+                    }
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+                }
+
+                if (utf8_result.status == utf8_parse_result::INVALID) {
+                    return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
+                }
+
+                pos += utf8_result.bytes_consumed;
+            }
+        }
+
+        // Reached end without finding closing quote
+        if (!ctx.is_partial) {
+            return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
+        }
+        return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
+    }
+
+    common_peg_parse_result operator()(const common_peg_python_dict_string_parser & /* p */) {
+        auto pos = start_pos;
+
+        // Parse string content (without quotes)
+        while (pos < ctx.input.size()) {
+            char c = ctx.input[pos];
+
+            if (c == '\'') {
+                // Found closing quote - success (don't consume it)
+                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
+            }
+
+            if (c == '\\') {
+                auto result = handle_escape_sequence(ctx, start_pos, pos);
+                if (!result.success()) {
+                    return result;
+                }
+            } else {
+                auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
  
                  if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
                      if (!ctx.is_partial) {
@@ -621,7 +803,7 @@ struct parser_executor {
          size_t last_valid_pos = start_pos;
  
          while (pos < ctx.input.size()) {
-            auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+            auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
  
              if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
                  // Incomplete UTF-8 sequence
@@ -694,6 +876,9 @@ struct parser_executor {
  
      common_peg_parse_result operator()(const common_peg_tag_parser & p) {
          // Parse the child
+        if (ctx.debug) {
+            fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str());
+        }
          auto result = arena.parse(p.child, ctx, start_pos);
  
          if (!result.fail()) {
@@ -755,6 +940,31 @@ common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) {
      return id;
  }
  
+static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) {
+    for (int i = 0; i < indent; i++) {
+        oss << "  ";
+    }
+    oss << "NODE " << node.id;
+    if (!node.rule.empty()) {
+        oss << " (rule " << node.rule << ")";
+    }
+    if (!node.tag.empty()) {
+        oss << " (tag " << node.tag << ")";
+    }
+    oss << " ['" << node.text << "']\n";
+    for (const auto child : node.children) {
+        bfs_node(arena, oss, arena.get(child), indent + 1);
+    }
+}
+
+std::string common_peg_ast_arena::dump() {
+    std::ostringstream oss;
+    for (auto & node : nodes_) {
+        bfs_node(*this, oss, node, 0);
+    }
+    return oss.str();
+}
+
  void common_peg_arena::resolve_refs() {
      // Walk through all parsers and replace refs with their corresponding rule IDs
      for (auto & parser : parsers_) {
@@ -786,6 +996,7 @@ void common_peg_arena::resolve_refs() {
                                   std::is_same_v<T, common_peg_until_parser> ||
                                   std::is_same_v<T, common_peg_literal_parser> ||
                                   std::is_same_v<T, common_peg_json_string_parser> ||
+                                 std::is_same_v<T, common_peg_python_dict_string_parser> ||
                                   std::is_same_v<T, common_peg_chars_parser> ||
                                   std::is_same_v<T, common_peg_any_parser> ||
                                   std::is_same_v<T, common_peg_space_parser>) {
@@ -803,9 +1014,21 @@ void common_peg_arena::resolve_refs() {
  }
  
  std::string common_peg_arena::dump(common_peg_parser_id id) const {
+    std::unordered_set<common_peg_parser_id> visited;
+    return dump_impl(id, visited);
+}
+
+std::string common_peg_arena::dump_impl(common_peg_parser_id                       id,
+                                        std::unordered_set<common_peg_parser_id> & visited) const {
+    // Check for cycles
+    if (visited.count(id)) {
+        return "[cycle]";
+    }
+    visited.insert(id);
+
      const auto & parser = parsers_.at(id);
  
-    return std::visit([this](const auto & p) -> std::string {
+    return std::visit([this, &visited](const auto & p) -> std::string {
          using T = std::decay_t<decltype(p)>;
  
          if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
@@ -819,24 +1042,27 @@ std::string common_peg_arena::dump(common_peg_parser_id id) const {
          } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
              std::vector<std::string> parts;
              for (const auto & child : p.children) {
-                parts.push_back(dump(child));
+                parts.push_back(dump_impl(child, visited));
              }
              return "Sequence(" + string_join(parts, ", ") + ")";
          } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
              std::vector<std::string> parts;
              for (const auto & child : p.children) {
-                parts.push_back(dump(child));
+                parts.push_back(dump_impl(child, visited));
              }
              return "Choice(" + string_join(parts, ", ") + ")";
          } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
              if (p.max_count == -1) {
-                return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)";
+                return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) +
+                        ", unbounded)";
              }
-            return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
+            return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
          } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return "And(" + dump(p.child) + ")";
+            return "And(" + dump_impl(p.child, visited) + ")";
          } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return "Not(" + dump(p.child) + ")";
+            return "Not(" + dump_impl(p.child, visited) + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+            return "Atomic(" + dump_impl(p.child, visited) + ")";
          } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
              return "Any";
          } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
@@ -848,14 +1074,20 @@ std::string common_peg_arena::dump(common_peg_parser_id id) const {
              return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
          } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
              return "JsonString()";
+        } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+            return "PythonDictString()";
          } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
              return "Until(" + string_join(p.delimiters, " | ") + ")";
          } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
+            return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
          } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return "Rule(" + p.name + ", " + dump(p.child) + ")";
+            return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")";
          } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
              return "Ref(" + p.name + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+            return "Tag(" + p.tag + ", " + dump(p.child) + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+            return "Atomic(" + dump(p.child) + ")";
          } else {
              return "Unknown";
          }
@@ -1054,7 +1286,54 @@ common_peg_arena common_peg_parser_builder::build() {
      return std::move(arena_);
  }
  
+// String primitives
+
+common_peg_parser common_peg_parser_builder::json_string_content() {
+    return wrap(arena_.add_parser(common_peg_json_string_parser{}));
+}
+
+common_peg_parser common_peg_parser_builder::single_quoted_string_content() {
+    return wrap(arena_.add_parser(common_peg_python_dict_string_parser{}));
+}
+
+common_peg_parser common_peg_parser_builder::double_quoted_string() {
+    return rule("dq-string",
+                [this]() { return sequence({ literal("\""), json_string_content(), literal("\""), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::single_quoted_string() {
+    return rule("sq-string",
+                [this]() { return sequence({ literal("'"), single_quoted_string_content(), literal("'"), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::flexible_string() {
+    return rule("flexible-string", [this]() { return choice({ double_quoted_string(), single_quoted_string() }); });
+}
+
+// Generic helpers for object/array structure
+
+common_peg_parser common_peg_parser_builder::generic_object(const std::string &            name,
+                                                             const common_peg_parser & string_parser,
+                                                             const common_peg_parser & value_parser) {
+    return rule(name, [this, string_parser, value_parser]() {
+        auto ws      = space();
+        auto member  = sequence({ string_parser, ws, literal(":"), ws, value_parser });
+        auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+        return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }) });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::generic_array(const std::string &            name,
+                                                            const common_peg_parser & value_parser) {
+    return rule(name, [this, value_parser]() {
+        auto ws       = space();
+        auto elements = sequence({ value_parser, zero_or_more(sequence({ literal(","), ws, value_parser })) });
+        return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }) });
+    });
+}
+
  // JSON parsers
+
  common_peg_parser common_peg_parser_builder::json_number() {
     return rule("json-number", [this]() {
          auto digit1_9 = chars("[1-9]", 1, 1);
@@ -1062,7 +1341,11 @@ common_peg_parser common_peg_parser_builder::json_number() {
          auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})});
          auto frac = sequence({literal("."), digits});
          auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits});
-        return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()});
+        // Negative lookahead: only commit the number when the next character can't extend it.
+        // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
+        // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
+        auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
+        return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
      });
  }
  
@@ -1085,36 +1368,11 @@ common_peg_parser common_peg_parser_builder::json_null() {
  }
  
  common_peg_parser common_peg_parser_builder::json_object() {
-    return rule("json-object", [this]() {
-        auto ws = space();
-        auto member = sequence({json_string(), ws, literal(":"), ws, json()});
-        auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))});
-        return sequence({
-            literal("{"),
-            ws,
-            choice({
-                literal("}"),
-                sequence({members, ws, literal("}")})
-            }),
-            ws
-        });
-    });
+    return generic_object("json-object", json_string(), json());
  }
  
  common_peg_parser common_peg_parser_builder::json_array() {
-    return rule("json-array", [this]() {
-        auto ws = space();
-        auto elements = sequence({json(), zero_or_more(sequence({literal(","), ws, json()}))});
-        return sequence({
-            literal("["),
-            ws,
-            choice({
-                literal("]"),
-                sequence({elements, ws, literal("]")})
-            }),
-            ws
-        });
-    });
+    return generic_array("json-array", json());
  }
  
  common_peg_parser common_peg_parser_builder::json() {
@@ -1130,8 +1388,40 @@ common_peg_parser common_peg_parser_builder::json() {
      });
  }
  
-common_peg_parser common_peg_parser_builder::json_string_content() {
-    return wrap(arena_.add_parser(common_peg_json_string_parser{}));
+common_peg_parser common_peg_parser_builder::python_string() {
+    return rule("python-string", [this]() { return choice({ double_quoted_string(), single_quoted_string() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_number() {
+    return json_number();
+}
+
+common_peg_parser common_peg_parser_builder::python_bool() {
+    return rule("python-bool", [this]() { return sequence({ choice({ literal("True"), literal("False") }), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_null() {
+    return rule("python-none", [this]() { return sequence({ literal("None"), space() }); });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict() {
+    return generic_object("python-dict", python_string(), python_value());
+}
+
+common_peg_parser common_peg_parser_builder::python_array() {
+    return generic_array("python-array", python_value());
+}
+
+common_peg_parser common_peg_parser_builder::python_value() {
+    return rule("python-value", [this]() {
+        return choice({ python_dict(), python_array(), python_string(), python_number(), python_bool(), python_null() });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::marker() {
+    auto sharp_bracket_parser = literal("<") + until(">") + literal(">");
+    auto square_bracket_parser = literal("[") + until("]") + literal("]");
+    return choice({ sharp_bracket_parser, square_bracket_parser });
  }
  
  common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) {
@@ -1145,17 +1435,54 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key
      });
  }
  
+static std::string gbnf_escape_char_class(uint32_t c) {
+    if (c == '-' || c == ']' || c == '[' || c == '\\') {
+        return "\\" + std::string(1, (char) c);
+    }
+    // Escape whitespace control characters
+    if (c == '\n') {
+        return "\\n";
+    }
+    if (c == '\t') {
+        return "\\t";
+    }
+    if (c == '\r') {
+        return "\\r";
+    }
+
+    // Printable ASCII
+    if (c >= 0x20 && c <= 0x7E) {
+        return std::string(1, (char) c);
+    }
+
+    // Hex escape
+    char         buf[16];
+    const char * hex = "0123456789ABCDEF";
  
-static std::string gbnf_escape_char_class(char c) {
-    switch (c) {
-        case '\n': return "\\n";
-        case '\t': return "\\t";
-        case '\r': return "\\r";
-        case '\\': return "\\\\";
-        case ']':  return "\\]";
-        case '[':  return "\\[";
-        default:   return std::string(1, c);
+    if (c <= 0xFF) {
+        buf[0] = '\\';
+        buf[1] = 'x';
+        buf[2] = hex[(c >> 4) & 0xF];
+        buf[3] = hex[c & 0xF];
+        buf[4] = '\0';
+    } else if (c <= 0xFFFF) {
+        buf[0] = '\\';
+        buf[1] = 'u';
+        buf[2] = hex[(c >> 12) & 0xF];
+        buf[3] = hex[(c >> 8) & 0xF];
+        buf[4] = hex[(c >> 4) & 0xF];
+        buf[5] = hex[c & 0xF];
+        buf[6] = '\0';
+    } else {
+        buf[0] = '\\';
+        buf[1] = 'U';
+        for (int i = 0; i < 8; i++) {
+            buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF];
+        }
+        buf[10] = '\0';
      }
+
+    return std::string(buf);
  }
  
  static std::string gbnf_excluding_pattern(const std::vector<std::string> & strings) {
@@ -1173,12 +1500,12 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
  
          std::string cls;
          cls.reserve(chars.size());
-        for (const auto & ch : chars) {
+        for (uint32_t ch : chars) {
              cls += gbnf_escape_char_class(ch);
          }
  
          if (!pre.empty()) {
-            pattern += gbnf_format_literal(pre) + " [^" + cls + "]";
+            pattern += gbnf_format_literal(common_unicode_cpts_to_utf8(pre)) + " [^" + cls + "]";
          } else {
              pattern += "[^" + cls + "]";
          }
@@ -1208,7 +1535,8 @@ static std::unordered_set<std::string> collect_reachable_rules(
                            std::is_same_v<T, common_peg_chars_parser> ||
                            std::is_same_v<T, common_peg_space_parser> ||
                            std::is_same_v<T, common_peg_any_parser> ||
-                          std::is_same_v<T, common_peg_json_string_parser>) {
+                          std::is_same_v<T, common_peg_json_string_parser> ||
+                          std::is_same_v<T, common_peg_python_dict_string_parser>) {
                  // These parsers do not have any children
              } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
                  for (auto child : p.children) {
@@ -1346,6 +1674,8 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
                  return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
              } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
                  return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+            } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+                return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
              } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
                  if (p.delimiters.empty()) {
                      return ".*";
@@ -1477,6 +1807,8 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant &
              };
          } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
              return json{{"type", "json_string"}};
+        } else if constexpr (std::is_same_v<T, common_peg_python_dict_string_parser>) {
+            return json{{ "type", "python_dict_string" }};
          } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
              return json{{"type", "until"}, {"delimiters", p.delimiters}};
          } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
@@ -1606,6 +1938,9 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
      if (type == "json_string") {
          return common_peg_json_string_parser{};
      }
+    if (type == "python_dict_string") {
+        return common_peg_python_dict_string_parser{};
+    }
      if (type == "until") {
          if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
              throw std::runtime_error("until parser missing or invalid 'delimiters' field");
diff --git a/common/peg-parser.h b/common/peg-parser.h

index 1cd640365f2745d33f131d047ff6b26bf668eee5..57d4bcd8eaa10935dca2b1715ca112b3841cf42e 100644 (file)
--- a/common/peg-parser.h
+++ b/common/peg-parser.h
@@ -4,6 +4,7 @@
  
  #include <memory>
  #include <unordered_map>
+#include <unordered_set>
  #include <string>
  #include <string_view>
  #include <functional>
@@ -111,6 +112,8 @@ class common_peg_ast_arena {
  
      void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
      void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
+
+    std::string dump();
  };
  
  struct common_peg_parse_result {
@@ -139,6 +142,7 @@ struct common_peg_parse_result {
  struct common_peg_parse_context {
      std::string input;
      bool is_partial;
+    bool debug = false;  // Enable debug output for parser tracing
      common_peg_ast_arena ast;
  
      int parse_depth;
@@ -207,6 +211,7 @@ struct common_peg_chars_parser {
  };
  
  struct common_peg_json_string_parser {};
+struct common_peg_python_dict_string_parser {};
  
  struct common_peg_until_parser {
      std::vector<std::string> delimiters;
@@ -255,6 +260,7 @@ using common_peg_parser_variant = std::variant<
      common_peg_space_parser,
      common_peg_chars_parser,
      common_peg_json_string_parser,
+    common_peg_python_dict_string_parser,
      common_peg_until_parser,
      common_peg_schema_parser,
      common_peg_rule_parser,
@@ -299,6 +305,8 @@ class common_peg_arena {
      friend class common_peg_parser_builder;
  
    private:
+    std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
+
      common_peg_parser_id add_parser(common_peg_parser_variant parser);
      void add_rule(const std::string & name, common_peg_parser_id id);
  
@@ -311,6 +319,10 @@ class common_peg_parser_builder {
      common_peg_parser wrap(common_peg_parser_id id) { return common_peg_parser(id, *this); }
      common_peg_parser add(const common_peg_parser_variant & p) { return wrap(arena_.add_parser(p)); }
  
+    // Generic helpers for building object/array structures with configurable string/value parsers.
+    common_peg_parser generic_object(const std::string & name, const common_peg_parser & string_parser, const common_peg_parser & value_parser);
+    common_peg_parser generic_array(const std::string & name, const common_peg_parser & value_parser);
+
    public:
      common_peg_parser_builder();
  
@@ -404,6 +416,21 @@ class common_peg_parser_builder {
      //   S -> A{n}
      common_peg_parser repeat(const common_peg_parser & p, int n) { return repeat(p, n, n); }
  
+    // Matches a double-quoted string: '"' content '"' space
+    common_peg_parser double_quoted_string();
+
+    // Matches a single-quoted string: "'" content "'" space
+    common_peg_parser single_quoted_string();
+
+    // Matches a string that accepts both double-quoted and single-quoted styles.
+    common_peg_parser flexible_string();
+
+    // Matches double-quoted string content without the surrounding quotes.
+    common_peg_parser json_string_content();
+
+    // Matches single-quoted string content without the surrounding quotes.
+    common_peg_parser single_quoted_string_content();
+
      // Creates a complete JSON parser supporting objects, arrays, strings, numbers, booleans, and null.
      //   value -> object | array | string | number | true | false | null
      common_peg_parser json();
@@ -414,14 +441,24 @@ class common_peg_parser_builder {
      common_peg_parser json_bool();
      common_peg_parser json_null();
  
-    // Matches JSON string content without the surrounding quotes.
-    // Useful for extracting content within a JSON string.
-    common_peg_parser json_string_content();
-
      // Matches a JSON object member with a key and associated parser as the
      // value.
      common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
  
+    // Creates a complete Python format parser supporting dicts, arrays, strings, numbers, booleans, and None.
+    // Differs from JSON: uses True/False/None, accepts both single and double-quoted strings.
+    //   value -> dict | array | string | number | True | False | None
+    common_peg_parser python_value();
+    common_peg_parser python_dict();
+    common_peg_parser python_string();
+    common_peg_parser python_array();
+    common_peg_parser python_number();
+    common_peg_parser python_bool();
+    common_peg_parser python_null();
+
+    // A marker, i.e. text delimited by a pair of <> or []
+    common_peg_parser marker();
+
      // Wraps a parser with JSON schema metadata for grammar generation.
      // Used internally to convert JSON schemas to GBNF grammar rules.
      common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);
diff --git a/common/unicode.cpp b/common/unicode.cpp

index 56ab0f468e0385431fe309b5d6a422c6a3dd1dd7..c0ef6d02926df5113314574ee4108a2eec998795 100644 (file)
--- a/common/unicode.cpp
+++ b/common/unicode.cpp
@@ -1,14 +1,18 @@
  #include "unicode.h"
+#include <cassert>
+#include <stdexcept>
+#include <vector>
+#include <string>
  
  // implementation adopted from src/unicode.cpp
  
-size_t utf8_sequence_length(unsigned char first_byte) {
+size_t common_utf8_sequence_length(unsigned char first_byte) {
      const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
      uint8_t highbits = static_cast<uint8_t>(first_byte) >> 4;
      return lookup[highbits];
  }
  
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset) {
      if (offset >= input.size()) {
          return utf8_parse_result(utf8_parse_result::INCOMPLETE);
      }
@@ -62,3 +66,43 @@ utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
      // Invalid first byte
      return utf8_parse_result(utf8_parse_result::INVALID);
  }
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
+    std::string result;
+    for (size_t i = 0; i < cps.size(); ++i) {
+        result.append(common_unicode_cpt_to_utf8(cps[i]));
+    }
+    return result;
+}
+
+std::string common_unicode_cpt_to_utf8(uint32_t cpt) {
+    std::string result;
+
+    if (/* 0x00 <= cpt && */ cpt <= 0x7f) {
+        result.push_back(cpt);
+        return result;
+    }
+    if (0x80 <= cpt && cpt <= 0x7ff) {
+        result.push_back(0xc0 | ((cpt >> 6) & 0x1f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+    if (0x800 <= cpt && cpt <= 0xffff) {
+        result.push_back(0xe0 | ((cpt >> 12) & 0x0f));
+        result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+    if (0x10000 <= cpt && cpt <= 0x10ffff) {
+        result.push_back(0xf0 | ((cpt >> 18) & 0x07));
+        result.push_back(0x80 | ((cpt >> 12) & 0x3f));
+        result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+
+    throw std::invalid_argument("invalid codepoint");
+}
+
+
+
diff --git a/common/unicode.h b/common/unicode.h

index 9d9e8e1227aa97069eea7d9400f8bc7faab857d4..87bcc0ffcafe435cd7fdb4f941cf64e50d9438ea 100644 (file)
--- a/common/unicode.h
+++ b/common/unicode.h
@@ -2,6 +2,8 @@
  
  #include <cstdint>
  #include <string_view>
+#include <vector>
+#include <string>
  
  // UTF-8 parsing utilities for streaming-aware unicode support
  
@@ -16,7 +18,10 @@ struct utf8_parse_result {
  
  // Determine the expected length of a UTF-8 sequence from its first byte
  // Returns 0 for invalid first bytes
-size_t utf8_sequence_length(unsigned char first_byte);
+size_t common_utf8_sequence_length(unsigned char first_byte);
  
  // Parse a single UTF-8 codepoint from input
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset);
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset);
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps);
+std::string common_unicode_cpt_to_utf8(uint32_t cpt);
diff --git a/docs/autoparser.md b/docs/autoparser.md

new file mode 100644 (file)

index 0000000..686b2c2
--- /dev/null
+++ b/docs/autoparser.md
@@ -0,0 +1,525 @@
+# Auto-Parser Architecture
+
+The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
+
+## Overview
+
+The unified auto-parser uses a pure differential, compositional approach (inspired by the `git diff` algorithm) to analyze chat templates:
+
+**Core Philosophy**:
+
+- **Minimize Hardcoded Patterns**: All markers extracted through template comparison (the only heuristic is JSON detection to distinguish `JSON_NATIVE` from tag-based formats)
+- **Compositional Architecture**: Separate analyzer structs for reasoning, content, and tools — each responsible for its own analysis and parser construction
+
+**Analysis + Parser Building in Two Steps**:
+
+1. `autoparser::autoparser tmpl_analysis(tmpl)` — runs all differential comparisons and populates the analysis structs
+2. `autoparser::peg_generator::generate_parser(tmpl, params, tmpl_analysis)` — uses the analysis to build a PEG parser and optional GBNF grammar
+
+## Data Structures
+
+All structs are defined in [common/chat-auto-parser.h](common/chat-auto-parser.h).
+
+### Top-Level: `autoparser` (main analyzer and generator)
+
+[common/chat-auto-parser.h:367-388](common/chat-auto-parser.h#L367-L388) — top-level analysis result aggregating `jinja_caps`, `reasoning`, `content`, and `tools` sub-analyses, plus `preserved_tokens` (union of all non-empty markers).
+
+### `analyze_reasoning`
+
+[common/chat-auto-parser.h:254-274](common/chat-auto-parser.h#L254-L274) — reasoning analysis result: `mode` enum, `start` marker (e.g. `<think>`), and `end` marker (e.g. `</think>`).
+
+### `analyze_content`
+
+[common/chat-auto-parser.h:280-295](common/chat-auto-parser.h#L280-L295) — content analysis result: `mode` enum, `start`/`end` markers, and `requires_nonnull_content` flag.
+
+### `analyze_tools` and its sub-structs
+
+- [common/chat-auto-parser.h:176-194](common/chat-auto-parser.h#L176-L194) — `tool_format_analysis`: `mode` enum, `section_start/end`, `per_call_start/end`, JSON field names (`function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field`), and format flags (`fun_name_is_key`, `tools_array_wrapped`, `uses_python_dicts`)
+- [common/chat-auto-parser.h:196-200](common/chat-auto-parser.h#L196-L200) — `tool_function_analysis`: `name_prefix`, `name_suffix`, `close` markers around function names
+- [common/chat-auto-parser.h:202-210](common/chat-auto-parser.h#L202-L210) — `tool_arguments_analysis`: `start/end` container markers, `name_prefix/suffix`, `value_prefix/suffix`, `separator`
+- [common/chat-auto-parser.h:212-217](common/chat-auto-parser.h#L212-L217) — `tool_id_analysis`: `pos` enum, `prefix`/`suffix` markers around call ID values
+- [common/chat-auto-parser.h:301-361](common/chat-auto-parser.h#L301-L361) — `analyze_tools`: aggregates the four sub-structs above
+
+### Enums
+
+**`reasoning_mode`**: How the template handles reasoning/thinking blocks.
+
+| Value           | Description                                                                       |
+|-----------------|-----------------------------------------------------------------------------------|
+| `NONE`          | No reasoning markers detected                                                     |
+| `TAG_BASED`     | Standard tag-based: `<think>...</think>`                                          |
+| `DELIMITER`     | Delimiter-based: reasoning ends at a delimiter (e.g., `[BEGIN FINAL RESPONSE]`)   |
+| `FORCED_OPEN`   | Template ends with open reasoning tag when `enable_thinking=true`                 |
+| `FORCED_CLOSED` | `enable_thinking=false` emits both tags; `enable_thinking=true` emits only start  |
+| `TOOLS_ONLY`    | Reasoning only appears in tool call responses, not plain content                  |
+
+**`content_mode`**: How the template wraps assistant content.
+
+| Value                    | Description                                                    |
+|--------------------------|----------------------------------------------------------------|
+| `PLAIN`                  | No content markers                                             |
+| `ALWAYS_WRAPPED`         | Content always wrapped: `<response>...</response>`             |
+| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present                 |
+
+**`tool_format`**: Classification of tool call structure.
+
+| Value            | Description                                                      |
+|------------------|------------------------------------------------------------------|
+| `NONE`           | No tool support detected                                         |
+| `JSON_NATIVE`    | Pure JSON: `{"name": "X", "arguments": {...}}`                   |
+| `TAG_WITH_JSON`  | Tag-based with JSON args: `<function=X>{...}</function>`         |
+| `TAG_WITH_TAGGED`| Tag-based with tagged args: `<param=key>value</param>`           |
+
+**`call_id_position`**: Where call IDs appear in tag-based formats.
+
+| Value                    | Description                                  |
+|--------------------------|----------------------------------------------|
+| `NONE`                   | No call ID support detected                  |
+| `PRE_FUNC_NAME`          | Before function name                         |
+| `BETWEEN_FUNC_AND_ARGS`  | Between function name and arguments          |
+| `POST_ARGS`              | After arguments                              |
+
+## Tool Calling Formats
+
+### JSON_NATIVE
+
+**Structure**: The entire tool call (function name, arguments, values) is in JSON format. Optional enclosing tags around the section.
+
+**Detection**: Function name appears inside a JSON structure (quotes preceded by `{` or `:`).
+
+**Examples**:
+
+Standard OpenAI-style:
+
+```json
+<tool_call>
+{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
+</tool_call>
+```
+
+Mistral Nemo with array wrapper:
+
+```json
+[TOOL_CALLS]
+[{"name": "calculate", "arguments": {"expr": "2+2"}}]
+```
+
+Function name as JSON key (Apertus style):
+
+```json
+{"get_weather": {"location": "Paris"}}
+```
+
+---
+
+### TAG_WITH_JSON
+
+**Structure**: Function name is outside JSON, in tag attributes or XML-style tags. Arguments are a JSON object.
+
+**Detection**: Function name not in JSON, but argument names appear in JSON context.
+
+**Examples**:
+
+Functionary v3.1:
+
+```xml
+<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
+```
+
+MiniMax:
+
+```xml
+<minimax:tool_call>
+<tool_name>calculate</tool_name>
+<arguments>{"expr": "2+2"}</arguments>
+</minimax:tool_call>
+```
+
+---
+
+### TAG_WITH_TAGGED
+
+**Structure**: Both function name and argument names are in XML-style tags. String values are unquoted; non-string values are JSON-formatted.
+
+**Detection**: Neither function name nor argument names appear in a JSON context.
+
+**Examples**:
+
+Qwen/Hermes XML format:
+
+```xml
+<function=get_weather>
+<param=location>Paris</param>
+<param=unit>celsius</param>
+</function>
+```
+
+Mixed types:
+
+```xml
+<function=calculate>
+<param=expr>2+2</param>
+<param=precision>2</param>
+<param=options>{"round": true}</param>
+</function>
+```
+
+String values (`Paris`, `celsius`, `2+2`) are unquoted; `options` (object type) is JSON-formatted.
+
+---
+
+## Analysis Flow
+
+```text
+autoparser::autoparser(tmpl)
+    |
+    |-- Phase 1: analyze_reasoning(tmpl, jinja_caps.supports_tool_calls)
+    |     |-- R1: compare_reasoning_presence()   — with/without reasoning_content field
+    |     |-- R2: compare_thinking_enabled()     — enable_thinking=false vs true
+    |     '-- R3: compare_reasoning_scope()      — reasoning+content vs reasoning+tools
+    |           (only if supports_tool_calls)
+    |
+    |-- Phase 2: analyze_content(tmpl, reasoning)
+    |     '-- C1: compares content-only vs tools output and content-only vs reasoning output
+    |
+    |-- Phase 3: analyze_tools(tmpl, jinja_caps, reasoning)
+    |     (skipped entirely if !jinja_caps.supports_tool_calls)
+    |     |
+    |     |-- T1: analyze_tool_calls()           — no tools vs with tools; classifies format
+    |     |         |-- JSON path → analyze_tool_call_format_json_native()
+    |     |         '-- tag path → analyze_tool_call_format_non_json()
+    |     |
+    |     (if format != NONE and format != JSON_NATIVE:)
+    |     |
+    |     |-- T2: check_per_call_markers()       — 1 call vs 2 calls; moves section→per-call if needed
+    |     |         (only if supports_parallel_tool_calls)
+    |     |
+    |     |-- T3: extract_function_markers()     — func_alpha vs func_beta; extracts name prefix/suffix/close
+    |     |
+    |     |-- T4: analyze_arguments()            — (TAG_WITH_TAGGED only)
+    |     |         |-- A1: extract_argument_name_markers()   — arg_name_A vs arg_name_B
+    |     |         '-- A2: extract_argument_value_markers()  — value "XXXX" vs "YYYY"
+    |     |
+    |     |-- T5: extract_argument_separator()   — 1 arg vs 2 args; finds separator between args
+    |     |
+    |     |-- T6: extract_args_markers()         — 0 args vs 1 arg; finds args container markers
+    |     |
+    |     '-- T7: extract_call_id_markers()      — call_id "call00001" vs "call99999"
+    |
+    '-- collect_preserved_tokens()               — union of all non-empty markers
+    |
+    '-- apply workarounds()                      — post-hoc patches for edge-case templates
+    |
+    v
+autoparser (analysis result)
+    |
+    v
+autoparser::peg_generator::generate_parser(tmpl, inputs, analysis)
+    |-- analysis.build_parser(inputs)            — builds PEG parser arena
+    |     |-- reasoning.build_parser(ctx)        — reasoning parser (mode-dependent)
+    |     |-- content.build_parser(ctx)          — content parser (mode-dependent)
+    |     '-- tools.build_parser(ctx)            — tool parser (dispatches by tool_format)
+    |           |-- build_tool_parser_json_native()
+    |           |-- build_tool_parser_tag_json()
+    |           '-- build_tool_parser_tag_tagged()
+    |
+    |-- Build GBNF grammar (if tools present and trigger_marker non-empty)
+    '-- Set grammar_triggers from section_start or per_call_start
+    |
+    v
+common_chat_params (prompt, parser, grammar, triggers, preserved_tokens)
+```
+
+## Entry Point
+
+The auto-parser is invoked in [common/chat.cpp:1280-1310](common/chat.cpp#L1280-L1310) in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS with `<|channel|>`, Functionary v3.2 with `>>>all`), then the auto-parser handles everything else via `autoparser::autoparser` + `peg_generator::generate_parser`.
+
+## Algorithm Details
+
+### Core Mechanism: Differential Comparison
+
+All analysis phases use the same factorized comparison function declared in [common/chat-auto-parser-helpers.h:68](common/chat-auto-parser-helpers.h#L68):
+
+```cpp
+compare_variants(tmpl, params_A, params_modifier)
+```
+
+This creates variant B by applying a modifier lambda to a copy of `params_A`, renders both through the template, and computes a `diff_split` ([common/chat-auto-parser.h:28-37](common/chat-auto-parser.h#L28-L37)):
+
+- `prefix` — common prefix between A and B
+- `suffix` — common suffix between A and B
+- `left` — unique to variant A
+- `right` — unique to variant B
+
+The diff is computed via `calculate_diff_split()`, which finds the longest-common-prefix and longest-common-suffix, then iteratively moves incomplete `<...>` or `[...]` markers from the prefix/suffix into left/right until stable (tag boundary fixing).
+
+Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries.
+
+### Phase 1: Reasoning Analysis
+
+**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field.
+
+- Searches `diff.right` (output with reasoning) for the reasoning content needle
+- Uses PEG parsers to find surrounding markers:
+  - If both pre/post markers found in `diff.right` → `TAG_BASED` (both tags visible in diff = no forced close)
+  - If both found but post marker only in the full output B → `FORCED_CLOSED`
+  - If only post marker found → `DELIMITER`
+- Sets `reasoning.start` and `reasoning.end`
+
+**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true` with a generation prompt.
+
+- Detects `FORCED_OPEN`: `enable_thinking=true` adds a non-empty marker at the end of the prompt (where model will start generating) — sets `reasoning.start`, mode = `FORCED_OPEN`
+- Detects `FORCED_CLOSED`: `enable_thinking=false` produces both start+end markers; `enable_thinking=true` produces only start marker
+- Handles the reverse case: if both start and end are still empty, looks for a single-segment diff on each side to extract both markers
+
+**R3 — `compare_reasoning_scope()`**: Compares assistant message with reasoning+text-content vs reasoning+tool-calls.
+
+- Only runs if `jinja_caps.supports_tool_calls`
+- Detects `TOOLS_ONLY`: reasoning content present in B (with tools) but not in A (with text content)
+- Extracts reasoning markers from the tool call output using PEG parsers
+
+### Phase 2: Content Analysis
+
+**C1**: Two comparisons in the `analyze_content` constructor:
+
+- Comparison 1: content-only output vs tool-call output → `diff_tools`
+- Comparison 2: content-only output vs reasoning+empty-content output → `diff_reasoning`
+
+Classification logic:
+
+- `PLAIN`: `diff_tools.left` equals the response string (content is the entire diff, no wrapper)
+- `ALWAYS_WRAPPED`: markers found surrounding the content text in `pure_content` → extracts `start`/`end`
+
+### Phase 3: Tool Call Analysis
+
+**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output.
+
+- Extracts the tool call section as `diff.right`
+- Calls `analyze_tool_call_format()` which first strips reasoning markers from the haystack, then:
+  - Calls `in_json_haystack()` for both function name and argument name needles
+  - `in_json_haystack()` uses a PEG parser to check whether the needle appears in a JSON context (preceded by `{` or `:` with surrounding quotes)
+  - If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`
+  - If function name not in JSON, arg name is in JSON → `TAG_WITH_JSON`
+  - If neither in JSON → `TAG_WITH_TAGGED`
+  - `analyze_tool_call_format_json_native()`: parses the JSON object, matches field values to needles to populate `name_field`, `args_field`, `id_field`, `gen_id_field`; detects `tools_array_wrapped`; extracts `section_start`/`section_end`
+  - `analyze_tool_call_format_non_json()`: uses PEG parsers on the haystack to find up to two opening markers (section + per-call) then up to two closing markers
+
+**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls.
+
+- Computes a secondary diff of the second call portion vs the common suffix
+- If the second call content starts with `section_start` → the section marker is actually per-call → moves `section_start/end` to `per_call_start/end` and clears the section markers
+
+**T3 — `extract_function_markers()`**: Compares function name `FUN_FIRST` vs `FUN_SECOND` (two different named functions).
+
+- Finds where the function name appears in `diff.left`
+- Extracts `function.name_prefix` from the common prefix up to the function marker, and `function.name_suffix` from after the name up to the next marker
+- Extends `name_suffix` into `diff.suffix` (to the first marker for TAG_WITH_TAGGED; to the first `{` or `[` for TAG_WITH_JSON)
+- Extracts `function.close` from after the last argument value up to the per-call/section end marker
+
+**T4 — `analyze_arguments()`** (TAG_WITH_TAGGED only):
+
+- **A1 `extract_argument_name_markers()`**: Compares `arg_name_A` vs `arg_name_B` (two different argument names).
+  - Finds shared surrounding structure → `arguments.name_prefix`, `arguments.name_suffix`
+- **A2 `extract_argument_value_markers()`**: Compares argument value `"XXXX"` vs `"YYYY"` (same arg, different value).
+  - Finds markers surrounding the value → `arguments.value_prefix`, `arguments.value_suffix`
+
+**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments (same function).
+
+- Uses `until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND)` to find what separates the two argument blocks
+
+**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument.
+
+- Uses `until_common_prefix()` and `after_common_suffix()` with the empty and single-arg JSON strings as anchors to find container markers (`arguments.start`, `arguments.end`)
+
+**T7 — `extract_call_id_markers()`**: Compares call IDs `"call00001"` vs `"call99999"`.
+
+- Determines whether function name appears in `diff.prefix` or `diff.suffix` to classify position:
+  - Function name in prefix only → `BETWEEN_FUNC_AND_ARGS` or `POST_ARGS` (further distinguished by where `{` appears)
+  - Function name in suffix only → `PRE_FUNC_NAME`
+- Extracts `call_id.prefix` and `call_id.suffix` markers around the call ID value
+- Clears `per_call_end` if it incorrectly incorporated the call ID suffix
+
+### Workarounds
+
+A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches after analysis. Each workaround is a lambda that inspects the template source and overrides analysis results. Current workarounds:
+
+1. **Old Qwen/DeepSeek thinking templates** — source contains `content.split('</think>')`: sets `reasoning.mode = FORCED_OPEN` with `<think>`/`</think>` markers if no reasoning was detected
+2. **Granite 3.3** — source contains specific "Write your thoughts" text: forces `TAG_BASED` reasoning with `<think>`/`</think>` and `WRAPPED_WITH_REASONING` content with `<response>`/`</response>`
+3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
+4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
+5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
+
+### Parser Building
+
+Each analyzer struct (`analyze_reasoning`, `analyze_content`, `analyze_tools`) implements `build_parser(parser_build_context&)`. They share a `parser_build_context` that carries the PEG builder, inference inputs, the pre-built reasoning parser, and a pointer to the content analyzer.
+
+#### Reasoning Parser (`analyze_reasoning::build_parser`)
+
+| Mode                              | Parser                                                              |
+|-----------------------------------|---------------------------------------------------------------------|
+| Not extracting reasoning          | `eps()`                                                             |
+| `FORCED_OPEN` or `FORCED_CLOSED`  | `reasoning(until(end)) + end` — opening tag was in the prompt       |
+| `TAG_BASED` or `TOOLS_ONLY`       | `optional(start + reasoning(until(end)) + end)`                     |
+| `DELIMITER`                       | `optional(reasoning(until(end)) + end)` — no start marker           |
+
+#### Content Parser (`analyze_content::build_parser`)
+
+| Condition                              | Parser                                                                          |
+|----------------------------------------|---------------------------------------------------------------------------------|
+| `json_schema` present                  | `reasoning + space() + content(schema(json(), "response-format", ...)) + end()` |
+| Tools present                          | Dispatches to `analyze_tools::build_parser()`                                   |
+| `ALWAYS_WRAPPED` with reasoning        | `reasoning + start + content(until(end)) + end + end()`                         |
+| `ALWAYS_WRAPPED` without reasoning     | `content(until(start)) + start + content(until(end)) + end + end()`             |
+| Default (PLAIN)                        | `reasoning + content(rest()) + end()`                                           |
+
+#### Tool Parsers (`analyze_tools::build_parser`)
+
+Dispatches by `format.mode`:
+
+**`build_tool_parser_json_native()`**: Calls `p.standard_json_tools()` which internally dispatches to:
+
+- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {...}}`
+- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
+- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
+
+Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
+
+**`build_tool_parser_tag_json()`**: For each tool function:
+
+```text
+tool_open(name_prefix + tool_name(literal(name)) + name_suffix) +
+    call_id_section +
+    tool_args(schema(json(), tool_schema))
+  [+ function.close if non-empty]
+```
+
+Wrapped in per-call markers (with optional parallel call repetition) then optionally in section markers.
+
+**`build_tool_parser_tag_tagged()`**: For each tool function, builds one parser per argument:
+
+- String types: `tool_arg_string_value(schema(until(value_suffix), ...))`
+- JSON types: `tool_arg_json_value(schema(json(), ...))`
+- Required args are plain; optional args wrapped in `optional()`
+- Arguments joined with `space()` between consecutive parsers
+
+For closing: uses `function.close` if present; otherwise uses `peek(per_call_end)` to avoid premature close during partial streaming; falls back to `tool_close(space())` to trigger mapper callbacks.
+
+All three tool parsers return:
+
+```text
+reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
+```
+
+### Python Dict Format
+
+When `format.uses_python_dicts` is true (detected when single-quoted strings appear in JSON argument context), `build_parser()` pre-registers a `json-string` rule that accepts both single-quoted and double-quoted strings. This is done before any `p.json()` call so all JSON parsing inherits the flexible rule.
+
+## Mapper
+
+`common_chat_peg_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design:
+
+- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once the name is set, the buffer is flushed to `current_tool->arguments`
+- **`args_target()`**: Returns a reference to whichever destination is currently active (buffer or tool args), eliminating branching
+- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized (for schema-declared string types in tagged format)
+- **Quote normalization**: Python-style quotes (`'key': 'value'`) are converted to JSON (`"key": "value"`)
+- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically
+
+## Files
+
+| File                                      | Purpose                                                              |
+|-------------------------------------------|----------------------------------------------------------------------|
+| `common/chat-auto-parser.h`               | All analysis structs, enums, `autoparser`, `peg_generator`, `templates_params` |
+| `common/chat-auto-parser-generator.cpp`   | Parser generator: `generate_parser()` and `build_parser()` methods   |
+| `common/chat-diff-analyzer.cpp`           | Differential analysis implementation and workarounds                 |
+| `common/chat-auto-parser-helpers.h/cpp`   | `calculate_diff_split()`, `segmentize_markers()`,                    |
+|                                           | `compare_variants()`, string helpers                                 |
+| `common/chat-peg-parser.h/cpp`            | `common_chat_peg_builder`, `common_chat_peg_mapper`, and helpers     |
+| `common/chat.cpp`                         | Entry point: `common_chat_templates_apply_jinja()`                   |
+| `tools/parser/debug-template-parser.cpp`  | Debug tool for template analysis                                     |
+| `tools/parser/template-analysis.cpp`      | Template analysis tool                                               |
+
+## Testing & Debugging
+
+### Debug Tools
+
+**Template Debugger**: `tools/parser/debug-template-parser.cpp`
+
+- Usage: `./bin/llama-debug-template-parser path/to/template.jinja`
+- Shows detected format, markers, generated parser, and GBNF grammar
+
+**Template Analysis**: `tools/parser/template-analysis.cpp`
+
+- Usage: `./bin/llama-template-analysis path/to/template.jinja`
+
+**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
+
+- Shows detailed analysis steps, pattern extraction results, and generated parser structure
+
+**PEG Test Builder**: Fluent API for creating test cases — see [tests/test-chat.cpp:947-1043](tests/test-chat.cpp#L947-L1043). Example usage:
+
+```cpp
+auto tst = peg_tester("models/templates/Template.jinja");
+tst.test("input text")
+   .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+   .tools({tool_json})
+   .parallel_tool_calls(true)
+   .enable_thinking(true)
+   .expect(expected_message)
+   .run();
+```
+
+### Tested Templates
+
+The following templates have active tests in `tests/test-chat.cpp`:
+
+| Template | Format | Notes |
+| -------- | ------ | ----- |
+| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags (specialized handler) |
+| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools |
+| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers |
+| Google Gemma 2 2B | Content only | No tool support |
+| Qwen-QwQ-32B | Reasoning | Forced-open thinking |
+| NousResearch Hermes 2 Pro | JSON_NATIVE | `<tool_call>` wrapper |
+| IBM Granite 3.3 | JSON_NATIVE | `<think></think>` + `<response></response>` |
+| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `<seed:think>` and `<seed:tool_call>` tags |
+| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format |
+| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode |
+| GLM-4.6 | TAG_WITH_TAGGED | `<tool_call>name\n<arg_key>...<arg_value>...` format |
+| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format |
+| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools |
+| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key |
+| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args |
+| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `<TOOLCALL>` wrapper (nested) |
+| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format |
+| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field |
+| Functionary v3.1 | TAG_WITH_JSON | `<function=X>` format |
+| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) |
+| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format |
+| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking |
+| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking |
+| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers |
+| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format |
+| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) |
+| Apriel 1.5 | JSON_NATIVE | `<tool_calls>` wrapper with JSON array |
+| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start |
+| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID |
+| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID |
+| StepFun 3.5 Flash | TAG_WITH_TAGGED | `<function=X><parameter=Y>` format |
+
+## Adding Support for New Templates
+
+To support a new template format:
+
+1. **If it follows standard patterns** — The auto-parser should detect it automatically. Run `llama-debug-template-parser` to verify markers are correctly extracted.
+2. **If differential analysis extracts incorrect markers** — Add a workaround lambda to the `workarounds` vector in `common/chat-diff-analyzer.cpp`. Inspect the template source for a unique identifying substring.
+3. **If it needs fundamentally different handling** — Add a dedicated handler function in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral).
+
+## Edge Cases and Quirks
+
+1. **Forced Thinking**: When `enable_thinking=true` and the model prompt ends with an open reasoning tag (e.g., `<think>`), the parser enters forced thinking mode and immediately expects reasoning content without waiting for a start marker.
+2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start/end`); others wrap the entire section (`section_start/end`). T2 (`check_per_call_markers()`) disambiguates by checking if the second call in a two-call output starts with the section marker.
+3. **Python Dict Format**: The Seed template family uses single-quoted JSON (`'key': 'value'`). The `uses_python_dicts` flag causes the PEG builder to register a flexible `json-string` rule accepting both quote styles before any JSON rules are built.
+4. **Tag Boundary Fixing**: `calculate_diff_split()` iteratively adjusts prefix/suffix boundaries to avoid splitting `<tag>` or `[marker]` tokens, ensuring clean extraction.
+5. **Call ID Side Effects**: When a call ID is detected, `per_call_end` may have been incorrectly set to include the call ID suffix. T7 clears `per_call_end` in this case.
+6. **Tool Analysis Gating**: `analyze_tools` is only constructed (and all tool analysis phases run) when `jinja_caps.supports_tool_calls` is true. Within tool analysis, `check_per_call_markers()` (T2) only runs if `jinja_caps.supports_parallel_tool_calls`.
+7. **`analyze_arguments()` Gating**: Within tool analysis, A1 and A2 (argument name/value marker extraction) only run for `TAG_WITH_TAGGED` format. `extract_argument_separator()` and `extract_args_markers()` run for all non-`JSON_NATIVE` formats.
diff --git a/docs/development/parsing.md b/docs/development/parsing.md

index dbb989bf08e084f9d99bf0e060247e40f800b343..a41057db2b8a986c3ba9f9d9ed2f8e160ce4fd82 100644 (file)
--- a/docs/development/parsing.md
+++ b/docs/development/parsing.md
@@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse
  output from a model that emits arguments as JSON.
  
  ```cpp
-auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
      // Build a choice of all available tools
      auto tool_choice = p.choice();
      for (const auto & tool : tools) {
@@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result);
  
  ### Native
  
-The `common_chat_peg_native_builder` builds a `native` parser suitable for
+The `common_chat_peg_builder` builds a `native` parser suitable for
  models that emit tool arguments as a direct JSON object.
  
  - **`reasoning(p)`** - Tag node for `reasoning_content`
@@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object.
  - **`tool_args(p)`** - Tag the tool arguments
  
  ```cpp
-build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
      auto get_weather_tool = p.tool(p.sequence({
          p.tool_open(p.literal("{")),
          p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
@@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
  
  ### Constructed
  
-The `common_chat_peg_constructed_builder` builds a `constructed` parser
+The `common_chat_peg_builder` builds a `constructed` parser
  suitable for models that emit tool arguments as separate entities, such as XML
  tags.
  
@@ -264,7 +264,7 @@ tags.
  - **`tool_arg_json_value(p)`** - Tag JSON value for the argument
  
  ```cpp
-build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
      auto location_arg = p.tool_arg(
          p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
          p.tool_arg_string_value(p.until("</parameter>")),
diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py

index 9fc90a3c987f3c990bc006bc14a6712ccaa2010c..35f7d47f3c8be7404a0c5fb5ff363c41a76515f8 100755 (executable)
--- a/examples/json_schema_to_grammar.py
+++ b/examples/json_schema_to_grammar.py
@@ -689,6 +689,11 @@ class SchemaConverter:
          elif (schema_type == 'object') or (len(schema) == 0):
              return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
  
+        elif schema_type is None and isinstance(schema, dict):
+            # No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+            # Per JSON Schema semantics this is equivalent to {} and accepts any value.
+            return self._add_rule(rule_name, self._add_primitive('value', PRIMITIVE_RULES['value']))
+
          else:
              assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
              # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
diff --git a/models/templates/Apertus-8B-Instruct.jinja b/models/templates/Apertus-8B-Instruct.jinja

index 10826ff6901aec8d704e9e888c0e53cac3855f4d..432ae59a406c3ffabe72bd011f09dfa66e233eae 100644 (file)
--- a/models/templates/Apertus-8B-Instruct.jinja
+++ b/models/templates/Apertus-8B-Instruct.jinja
@@ -97,20 +97,20 @@
  
  {%- macro render_tools(tools) -%}
      {%- for tool in tools %}
-        {{- "// " + tool.description + "\n" }}
-        {{- "type "+ tool.name + " = " }}
-        {%- if tool.parameters and tool.parameters.properties %}
+        {{- "// " + tool.function.description + "\n" }}
+        {{- "type "+ tool.function.name + " = " }}
+        {%- if tool.function.parameters and tool.function.parameters.properties %}
              {{- "(_: {\n" }}
-            {%- for param_name, param_spec in tool.parameters.properties.items() %}
+            {%- for param_name, param_spec in tool.function.parameters.properties.items() %}
                  {%- if param_spec.description %}
                      {{- "// " + param_spec.description + "\n" }}
                  {%- endif %}
                  {{- param_name }}
-                {%- if param_name not in (tool.parameters.required or []) -%}
+                {%- if param_name not in (tool.function.parameters.required or []) -%}
                      {{- "?" }}
                  {%- endif -%}
                  {{- ": " }}
-                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                {{- render_typescript_type(param_spec, tool.function.parameters.required or []) }}
                  {%- if param_spec.default is defined -%}
                      {%- if param_spec.enum %}
                          {{- ", // default: " + param_spec.default }}
@@ -294,7 +294,7 @@
              {%- for tool_call in message.tool_calls -%}
                  {%- if tool_call.type == 'function' -%}
                      {%- set function = tool_call.function -%}
-                    {{- '{"' + function.name + '": ' + function.arguments + '}' }}
+                    {{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }}
                      {%- if not loop.last -%}
                          {{- ", " }}
                      {%- endif -%}
diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja

new file mode 100755 (executable)

index 0000000..a60a95f
--- /dev/null
+++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
@@ -0,0 +1,172 @@
+{# ---------------------------------------------------------------------- #}
+{# ƛƬ Default setup and flags                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- set messages = messages or [] -%}
+{%- set tools = tools or [] -%}
+{%- set add_generation_prompt = add_generation_prompt or false -%}
+{%- set available_tool_string = '' -%}
+{%- set add_tool_id = true -%}
+{%- set add_thoughts = true -%}            {# whether to include <thinking> reasoning blocks #}
+{%- set add_generation_prompt = true -%}      {# whether to emit reasoning starter before assistant response #}
+{# Optional token placeholders (safe defaults) #}
+{%- set bos_token = bos_token or '' -%}
+{%- set eos_token = eos_token or '' -%}
+{# ---------------------------------------------------------------------- #}
+{# Core reasoning prompt and assistant reasoning prefix                 #}
+{# ---------------------------------------------------------------------- #}
+{%- set reasoning_prompt -%}
+    You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
+    Analyze each question carefully, present your reasoning step-by-step, then provide the final
+    response after the marker [BEGIN FINAL RESPONSE].
+{%- endset -%}
+{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
+{# ---------------------------------------------------------------------- #}
+{# Tool list and tool call output format                                  #}
+{# ---------------------------------------------------------------------- #}
+{%- if tools|length > 0 -%}
+    {%- set available_tool_string -%}
+        You are provided with function signatures within <available_tools></available_tools> XML tags.
+        You may call one or more functions to assist with the user query.
+        Don't make assumptions about the arguments. You should infer the argument values from previous
+        user responses and the system message.
+        Here are the available tools: 
+        <available_tools>
+        {% for tool in tools %}{{ tool|string }}{% endfor %}
+        
+        </available_tools>.
+
+        Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
+        Each JSON object should contain a function name and arguments as follows:
+        <tool_calls>[
+            {"name": <function-name-1>, "arguments": <args-dict-1>},
+            {"name": <function-name-2>, "arguments": <args-dict-2>},
+            ...
+        ]</tool_calls>
+    {%- endset -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Start system block if first message is not system                      #}
+{# ---------------------------------------------------------------------- #}
+{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
+    {%- if tools|length > 0 -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
+    {%- else -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
+    {%- endif -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Iterate through messages                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- for message in messages -%}
+
+    {# ---------------- USER MESSAGE ---------------- #}
+    {%- if message['role'] == 'user' -%}
+        {{ '<|begin_user|>\n' }}
+        {%- if message['content'] is not string -%}
+            {%- for chunk in message['content'] -%}
+                {%- if chunk['type'] == 'text' -%}
+                    {{ chunk['text'] }}
+                {%- elif chunk['type'] in ['image', 'image_url'] -%}
+                    {{ '[IMG]' }}
+                {%- else -%}
+                    {{ raise_exception('Unrecognized content type!') }}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- else -%}
+            {{ message['content'] }}
+        {%- endif -%}
+
+    {# ---------------- SYSTEM MESSAGE ---------------- #}
+    {%- elif message['role'] == 'system' -%}
+        {%- set sys_content = message.get('content', '') -%}
+        {%- if sys_content and sys_content|length > 0 -%}
+            {%- if sys_content is string -%}
+                {%- set system_message = sys_content -%}
+            {%- else -%}
+                {%- set system_message = sys_content[0]['text'] -%}
+            {%- endif -%}
+        {%- else -%}
+            {%- set system_message = '' -%}
+        {%- endif -%}
+
+        {%- if tools|length > 0 -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
+        {%- else -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
+        {%- endif -%}
+
+    {# ---------------- ASSISTANT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if loop.last -%}
+            {%- set add_tool_id = false -%}
+        {%- endif -%}
+
+        {{ '\n<|begin_assistant|>\n' }}
+
+        {%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
+            {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
+        {%- endif -%}
+
+        {%- set asst_content = message.get('content', '') -%}
+        {%- if asst_content and asst_content|length > 0 -%}
+            {%- if asst_content is not string -%}
+                {%- set asst_text = asst_content[0]['text'] -%}
+            {%- else -%}
+                {%- set asst_text = asst_content -%}
+            {%- endif -%}
+            {# For historical turns (not the last), strip reasoning and keep only final response #}
+            {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
+                {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
+            {%- else -%}
+                {{- asst_text -}}
+            {%- endif -%}
+        {%- elif message.get('chosen') and message['chosen']|length > 0 -%}
+            {{ message['chosen'][0] }}
+        {%- endif -%}
+
+        {# Tool call output #}
+        {%- set tool_calls = message.get('tool_calls', []) -%}
+        {%- if tool_calls and tool_calls|length > 0 -%}
+            {{ '\n<tool_calls>[' }}
+            {%- for tool_call in tool_calls -%}
+                {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }}
+                {%- if add_tool_id == true and 'id' in tool_call -%}
+                    {{ ', "id": "' + tool_call['id'] + '"' }}
+                {%- endif -%}
+                {{ '}' }}
+                {%- if not loop.last -%}{{ ', ' }}{%- endif -%}
+            {%- endfor -%}
+            {{ ']</tool_calls>' }}
+        {%- endif -%}
+
+        {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
+        {%- if not loop.last or training_prompt -%}
+            {{ '\n<|end|>\n' }}
+        {%- endif -%}
+
+    {# ---------------- TOOL RESULT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'tool' -%}
+        {%- set tool_content = message.get('content', '') -%}
+        {%- if tool_content is string -%}
+            {%- set tool_message = tool_content -%}
+        {%- else -%}
+            {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
+        {%- endif -%}
+        {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
+
+    {# ---------------- CONTENT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'content' -%}
+        {%- set msg_content = message.get('content', '') -%}
+        {%- if msg_content is not string -%}
+            {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
+        {%- else -%}
+            {{ '<|begin_content|>\n' + msg_content + '\n' }}
+        {%- endif -%}
+    {%- endif -%}
+
+    {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
+    {%- endif -%} 
+
+{%- endfor -%}
diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja

new file mode 100644 (file)

index 0000000..40ef500
--- /dev/null
+++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja
@@ -0,0 +1,77 @@
+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_start_index = 1 %}
+{%- else %}
+    {%- set system_message = "" %}
+    {%- set loop_start_index = 0 %}
+{%- endif %}
+
+{%- if system_message or tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if system_message %}
+        {{- system_message }}
+    {%- endif %}
+    {%- if tools %}
+        {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
+        {{- '[' }}
+        {%- for tool in tools %}
+            {{- tool | tojson }}
+            {%- if not loop.last %}
+                {{- ',\n' }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
+    {%- endif %}
+    {%- if enable_thinking %}
+        {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
+        {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
+        {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
+        {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
+        {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- endif %}
+
+{%- for message in messages[loop_start_index:] %}
+    {%- if message['role'] == 'user' %}
+        {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '<|im_start|>assistant\n' }}
+        {%- set content = message.content | default('') %}
+        {%- set reasoning_content = message.reasoning_content | default('') %}
+        {%- if not reasoning_content and '<think>' in content and '</think>' in content %}
+            {%- set reasoning_parts = content.split('</think>') %}
+            {%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
+            {%- set content = reasoning_parts[1:] | join('</think>') %}
+        {%- endif %}
+        {%- if reasoning_content %}
+            {{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
+        {%- endif %}
+        {{- content.lstrip() }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message['role'] == 'tool' %}
+        {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<|function_output|>' + message['content'] }}
+        {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}
+\ No newline at end of file
diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja

index 078e9f5458ed5ec9a3f61508d499e21589a401f2..fcf1259d33cbfbecfafd562d3fcc6fdbe2df0bca 100644 (file)
--- a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
+++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
@@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea
      {%- elif message.role|lower == 'user' %}
  <|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
      {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
+<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
      {% for tc in message.tool_calls %}
      {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
  
@@ -153,4 +153,4 @@ The following instructions take precedence over instructions in the default prea
  
  ]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
      {%- endif %}
-{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
-\ No newline at end of file
+{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{%- if not enable_thinking -%}<|START_THINKING|><|END_THINKING|>{%- endif %}
+\ No newline at end of file
diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja

new file mode 100644 (file)

index 0000000..2ab98ef
--- /dev/null
+++ b/models/templates/GLM-4.7-Flash.jinja
@@ -0,0 +1,86 @@
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
+\ No newline at end of file
diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja

new file mode 100644 (file)

index 0000000..3738b3d
--- /dev/null
+++ b/models/templates/LFM2-8B-A1B.jinja
@@ -0,0 +1,47 @@
+{{- bos_token -}}
+{%- set system_prompt = "" -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+       {%- set ns.system_prompt = messages[0]["content"] -%}
+       {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+       {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "You can use the following tools: <|tool_list_start|>[" -%}
+       {%- for tool in tools -%}
+               {%- if tool is not string -%}
+                       {%- set tool = tool | tojson -%}
+               {%- endif -%}
+               {%- set ns.system_prompt = ns.system_prompt + tool -%}
+               {%- if not loop.last -%}
+                       {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+               {%- endif -%}
+       {%- endfor -%}
+       {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
+       {{- '**IMPORTANT**: The syntax for calling the tools is: <|tool_call_start|>JSON tool call goes here<|tool_call_end|>. Please only call tools in the specified manner.' -}}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+       {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+       {{- "<|im_start|>" + message["role"] + "\n" -}}
+       {%- set content = message["content"] -%}
+       {%- if content is not string -%}
+               {%- set content = content | tojson -%}
+       {%- endif -%}
+       {%- if message["role"] == "tool" -%}
+               {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
+       {%- elif message["role"] == "assistant" -%}
+               {%- if message.tool_calls %}
+                       {%- for tool_call in message.tool_calls %}
+                               {%- if tool_call.function %}
+                                       {%- set tool_call = tool_call.function %}
+                               {%- endif %}
+                               {{- '\n<|tool_call_start|>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n<|tool_call_end|>\n' }}
+                       {%- endfor %}
+               {%- endif %}
+       {%- endif -%}
+       {{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+       {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
diff --git a/models/templates/Qwen-QwQ-32B.jinja b/models/templates/Qwen-QwQ-32B.jinja

index d475f7068730e9c4fc2f38f655d9c6f54cd84835..ce314a039f688fcafed91a550fe7af82b9c84387 100644 (file)
--- a/models/templates/Qwen-QwQ-32B.jinja
+++ b/models/templates/Qwen-QwQ-32B.jinja
@@ -59,4 +59,5 @@
  {%- endfor %}
  {%- if add_generation_prompt %}
      {{- '<|im_start|>assistant\n<think>\n' }}
+    {%- if not enable_thinking -%}{{- '</think>' -}}{%- endif -%}
  {%- endif %}
diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja

index 49b0e8d0ee7e655434942233b4b0ab2104d5247e..cde8c0e43db214322881e24a01c90aa8c77860c7 100644 (file)
--- a/models/templates/Qwen3-Coder.jinja
+++ b/models/templates/Qwen3-Coder.jinja
@@ -29,7 +29,7 @@
      {%- endif %}
  {%- endif %}
  {%- if tools is iterable and tools | length > 0 %}
-    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
      {{- "<tools>" }}
      {%- for tool in tools %}
          {%- if tool.function is defined %}
@@ -63,7 +63,7 @@
          {{- '\n</function>' }}
      {%- endfor %}
      {{- "\n</tools>" }}
-    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
  {%- endif %}
  {%- if system_message is defined %}
      {{- '<|im_end|>\n' }}
diff --git a/models/templates/StepFun3.5-Flash.jinja b/models/templates/StepFun3.5-Flash.jinja

new file mode 100644 (file)

index 0000000..c09ea49
--- /dev/null
+++ b/models/templates/StepFun3.5-Flash.jinja
@@ -0,0 +1,80 @@
+{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
+{{bos_token}}{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- render_content(messages[0].content) + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson(ensure_ascii=False) }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content) %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
+        {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = render_content(message.reasoning_content) %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- else %}
+                {%- set reasoning_content = '' %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- if tool_call.arguments is defined %}
+                    {%- set arguments = tool_call.arguments %}
+                    {%- for args_name, args_value in arguments|items %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>tool_response\n' }}
+        {%- endif %}
+        {{- '<tool_response>' }}
+        {{- content }}
+        {{- '</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n<think>\n' }}
+{%- endif %}
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja

index c2066bd7391c270626e39c9d7124f00360126412..299f7a7ff1244da2e8e7d6006efa40c6e77849d6 100644 (file)
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
@@ -1 +1,44 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
-\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is none -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
+\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja

index c2066bd7391c270626e39c9d7124f00360126412..9e6ec845d3901690b3a714cdd9ab1e56be01ef79 100644 (file)
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
@@ -1 +1,47 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
-\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}
+        {{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+      {%- else -%}
+        {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+    {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}{% if not enable_thinking %}{{- '</think>' -}}{% endif %}
+{%- endif %}
+\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja

index e5656196a3f0f379499b2a3aee8805332cd297ea..2fd1c415b88496b633b649bbfe3fdc645a18d3bd 100644 (file)
--- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
@@ -1,3 +1,71 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+  {%- if enable_thinking is defined -%}
+    {%- set thinking = enable_thinking -%}
+    {%- else -%}
+    {%- set thinking = false -%}
+  {%- endif -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- if ns.is_first_sp -%}
+      {%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
+      {%- set ns.is_first_sp = false -%}
+      {%- else -%}
+      {%- set ns.system_prompt = ns.system_prompt + '
  
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<｜Assistant｜></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<｜Assistant｜>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}  {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
-\ No newline at end of file
+' + message['content'] -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜></think>'}}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls'] -%}
+      {%- if not ns.is_first -%}
+        {%- if not message['content'] -%}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+          {%- else -%}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+        {%- endif -%}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and not message['tool_calls'] -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜>'}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
+        {%- else -%}{{'</think>'}}
+      {%- endif -%}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- if ns.is_tool -%}{{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>', 1)[1] -%}
+      {%- endif -%}{{content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+  {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<｜Assistant｜>'}}
+  {%- if not thinking -%}{{'</think>'}}
+    {%- else -%}{{'<think>'}}
+  {%- endif -%}
+{%- endif %}
+\ No newline at end of file
diff --git a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja

index 9b8136df73b4d52917afe5cb435d46b71ada8162..b94cfd4d9bd40378b5d111d564efc8c9675bf0fc 100644 (file)
--- a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
+++ b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
@@ -46,7 +46,7 @@ Available functions as JSON spec:
      {%- if 'tool_calls' in message and message['tool_calls'] -%}
        {%- set tool = namespace(calls=[]) -%}
        {%- for call in message['tool_calls'] -%}
-        {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%}
+        {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%}
        {%- endfor -%}
        {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%}
      {%- endif -%}
diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja

index ecb49a210852c024b89f03b4949708841ecb7f60..e286d8a7b5bdfc63579557255fe4ea5c28499233 100644 (file)
--- a/models/templates/moonshotai-Kimi-K2.jinja
+++ b/models/templates/moonshotai-Kimi-K2.jinja
@@ -1,43 +1,43 @@
-{%- if tools -%}\r
-  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>\r
-{%- endif -%}\r
-{%- for message in messages -%}\r
-  {%- if loop.first and messages[0]['role'] != 'system' -%}\r
-    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>\r
-  {%- endif -%}\r
-  {%- if message['role'] == 'system' -%}\r
-    <|im_system|>system<|im_middle|>\r
-  {%- elif message['role'] == 'user' -%}\r
-    <|im_user|>user<|im_middle|>\r
-  {%- elif message['role'] == 'assistant' -%}\r
-    <|im_assistant|>assistant<|im_middle|>\r
-  {%- elif message['role'] == 'tool' -%}\r
-    <|im_system|>tool<|im_middle|>\r
-  {%- endif -%}\r
-  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}\r
-    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}\r
-    <|tool_calls_section_begin|>\r
-    {%- for tool_call in message['tool_calls'] -%}\r
-      {%- set func_name = tool_call['function']['name'] -%}\r
-      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}\r
-      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>\r
-    {%- endfor -%}\r
-    <|tool_calls_section_end|>\r
-  {%- elif message['role'] == 'tool' -%}\r
-    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}\r
-  {%- elif message['content'] is string -%}\r
-    {{ message['content'] }}\r
-  {%- elif message['content'] is not none -%}\r
-    {% for content in message['content'] -%}\r
-      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\r
-        <|media_start|>image<|media_content|><|media_pad|><|media_end|>\r
-      {% else -%}\r
-        {{ content['text'] }}\r
-      {%- endif -%}\r
-    {%- endfor -%}\r
-  {%- endif -%}\r
-  <|im_end|>\r
-{%- endfor -%}\r
-{%- if add_generation_prompt -%}\r
-  <|im_assistant|>assistant<|im_middle|>\r
-{%- endif -%}\r
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
+{%- endif -%}
+{%- for message in messages -%}
+  {%- if loop.first and messages[0]['role'] != 'system' -%}
+    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
+  {%- endif -%}
+  {%- if message['role'] == 'system' -%}
+    <|im_system|>system<|im_middle|>
+  {%- elif message['role'] == 'user' -%}
+    <|im_user|>user<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>assistant<|im_middle|>
+  {%- elif message['role'] == 'tool' -%}
+    <|im_system|>tool<|im_middle|>
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+    <|tool_calls_section_begin|>
+    {%- for tool_call in message['tool_calls'] -%}
+      {%- set func_name = tool_call['function']['name'] -%}
+      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
+      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
+    {%- endfor -%}
+    <|tool_calls_section_end|>
+  {%- elif message['role'] == 'tool' -%}
+    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
+  {%- elif message['content'] is string -%}
+    {{ message['content'] }}
+  {%- elif message['content'] is not none -%}
+    {% for content in message['content'] -%}
+      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+      {% else -%}
+        {{ content['text'] }}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja

index 29e582fbf6355cccd87a693ab1cc90124bdcfe56..1639b639015575fc3fc0459071ca8fc5b32b0fa5 100644 (file)
--- a/models/templates/unsloth-Apriel-1.5.jinja
+++ b/models/templates/unsloth-Apriel-1.5.jinja
@@ -86,22 +86,22 @@ Prior to generating the function calls, you should generate the reasoning for wh
              {%- set add_tool_id = false -%}
          {%- endif -%}
          {{- '<|assistant|>\n' -}}
-        {%- if message['content'] is not none and message['content']|length > 0 -%}
+        {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
              {%- if message['content'] is not string and message['content'][0]['text'] is not none %}
                  {{- message['content'][0]['text'] }}
              {%- else %}
                  {{- message['content'] -}}
              {%- endif -%}
-        {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
+        {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
              {{- message['chosen'][0] -}}
          {%- endif -%}
          {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
              {{- '<thinking>' + message['thought'] + '</thinking>' -}}
          {%- endif -%}
-        {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
              {{- '\n<tool_calls>[' -}}
              {%- for tool_call in message["tool_calls"] -%}
-                {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
+                {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}}
                  {%- if add_tool_id == true -%}
                      {{- ', "id": "' + tool_call['id'] + '"' -}}
                  {%- endif -%}
diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py

new file mode 100644 (file)

index 0000000..9049d80
--- /dev/null
+++ b/scripts/server-test-model.py
@@ -0,0 +1,202 @@
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = ""   # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "max_tokens": 5000,
+    }
+    if tools:
+        payload["tools"] = tools
+    if tool_choice:
+        payload["tool_choice"] = tool_choice
+
+    try:
+        response = requests.post(url, json=payload, stream=stream)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        if e.response is not None:
+            logger.info(f"Response error: {e} for {e.response.content}\n")
+        else:
+            logger.info(f"Error connecting to server: {e}\n")
+        return None
+
+    full_content = ""
+    reasoning_content = ""
+    tool_calls = []
+
+    if stream:
+        logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode("utf-8")
+                if decoded_line.startswith("data: "):
+                    data_str = decoded_line[6:]
+                    if data_str == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            delta = data["choices"][0].get("delta", {})
+
+                            # Content
+                            content_chunk = delta.get("content", "")
+                            if content_chunk:
+                                full_content += content_chunk
+                                logger.info(content_chunk)
+
+                            # Reasoning
+                            reasoning_chunk = delta.get("reasoning_content", "")
+                            if reasoning_chunk:
+                                reasoning_content += reasoning_chunk
+                                logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+                            # Tool calls
+                            if "tool_calls" in delta:
+                                for tc in delta["tool_calls"]:
+                                    index = tc.get("index")
+                                    if index is not None:
+                                        while len(tool_calls) <= index:
+                                            # Using "function" as type default but could be flexible
+                                            tool_calls.append(
+                                                {
+                                                    "id": "",
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": "",
+                                                        "arguments": "",
+                                                    },
+                                                }
+                                            )
+
+                                        if "id" in tc:
+                                            tool_calls[index]["id"] += tc["id"]
+                                        if "function" in tc:
+                                            if "name" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "name"
+                                                ] += tc["function"]["name"]
+                                            if "arguments" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "arguments"
+                                                ] += tc["function"]["arguments"]
+
+                    except json.JSONDecodeError:
+                        logger.info(f"Failed to decode JSON: {data_str}\n")
+        logger.info("\n--- End of Stream ---\n")
+    else:
+        logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            message = data["choices"][0].get("message", {})
+            full_content = message.get("content", "")
+            reasoning_content = message.get("reasoning_content", "")
+            tool_calls = message.get("tool_calls", [])
+            logger.info(full_content)
+        logger.info("--- End of Response ---\n")
+
+    return {
+        "content": full_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls,
+    }
+
+
+def test_chat(url, stream):
+    logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+    messages = [{"role": "user", "content": "What is the capital of France?"}]
+    result = run_query(url, messages, stream=stream)
+
+    if result:
+        if result["content"]:
+            logger.info("PASS: Output received.\n")
+        else:
+            logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+        if result.get("reasoning_content"):
+            logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+        else:
+            logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+    else:
+        logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+    logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in London? Please use the get_weather tool.",
+        }
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+    if result:
+        tcs = result.get("tool_calls")
+        if tcs and len(tcs) > 0:
+            logger.info("PASS: Tool calls detected.")
+            for tc in tcs:
+                func = tc.get("function", {})
+                logger.info(f"  Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+        else:
+            logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+        if result.get("reasoning_content"):
+            logger.info(
+                f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+            )
+    else:
+        logger.info("FAIL: Query failed.\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+    parser.add_argument("--host", default="localhost", help="Server host")
+    parser.add_argument("--port", default=8080, type=int, help="Server port")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+    logger.info(f"Testing server at {base_url}\n")
+
+    # Non-streaming tests
+    test_chat(base_url, stream=False)
+    test_tool_call(base_url, stream=False)
+
+    # Streaming tests
+    test_chat(base_url, stream=True)
+    test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt

index 7e0b17a7c1fdedd4f9ade750e629db8946b192de..46ab7a0cef0615aace32ad723dc52ec03fb3b7e0 100644 (file)
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -187,11 +187,11 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
      # llama_build_and_test(test-double-float.cpp) # SLOW
  endif()
  
-llama_build_and_test(test-chat-parser.cpp)
  llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
-llama_build_and_test(test-chat-template.cpp)
  llama_build_and_test(test-jinja.cpp)
  llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
+llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+llama_build_and_test(test-chat-template.cpp)
  llama_build_and_test(test-json-partial.cpp)
  llama_build_and_test(test-log.cpp)
  llama_build_and_test(
@@ -201,6 +201,7 @@ llama_build_and_test(
      peg-parser/test-gbnf-generation.cpp
      peg-parser/test-json-parser.cpp
      peg-parser/test-json-serialization.cpp
+    peg-parser/test-python-dict-parser.cpp
      peg-parser/test-unicode.cpp
      peg-parser/tests.h
  )
@@ -279,3 +280,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
  
  llama_build_and_test(test-alloc.cpp)
  target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
+
+
diff --git a/tests/peg-parser/test-basic.cpp b/tests/peg-parser/test-basic.cpp

index 1bda6f2e6906df952ab3999bf00a790bdd25fd39..872f16a78df26921a528e4e2a232e05bffe4c460 100644 (file)
--- a/tests/peg-parser/test-basic.cpp
+++ b/tests/peg-parser/test-basic.cpp
@@ -1,3 +1,4 @@
+#include "peg-parser.h"
  #include "tests.h"
  
  void test_basic(testing & t) {
@@ -450,5 +451,21 @@ void test_basic(testing & t) {
  
              t.assert_equal("result_is_fail", true, result.fail());
          });
+
+        // Test markers
+        t.test("marker", [](testing &t) {
+            auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) {
+                return p.marker();
+            });
+
+            common_peg_parse_context ctx_square("[marker]", false);
+            common_peg_parse_context ctx_sharp("<marker>", false);
+
+            auto result_square = bracket_parser.parse(ctx_square);
+            auto result_sharp = bracket_parser.parse(ctx_sharp);
+
+            t.assert_true("result_square_is_success", result_square.success());
+            t.assert_true("result_sharp_is_success", result_sharp.success());
+        });
      });
  }
diff --git a/tests/peg-parser/test-python-dict-parser.cpp b/tests/peg-parser/test-python-dict-parser.cpp

new file mode 100644 (file)

index 0000000..d9946a4
--- /dev/null
+++ b/tests/peg-parser/test-python-dict-parser.cpp
@@ -0,0 +1,318 @@
+#include "tests.h"
+
+void test_python_dict_parser(testing &t) {
+    // Test parsing a simple Python dict object with single quotes
+    t.test("simple Python dict object parsing", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test', 'value': 42, 'flag': True}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing a Python array with mixed types
+    t.test("Python array with mixed types", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "[1, 'hello', True, None, 3.14]";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing nested Python dict with objects and arrays
+    t.test("nested Python dict with objects and arrays", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string input =
+            "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with escaped single quotes
+    t.test("Python dict with escaped single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'message': 'It\\'s working!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with double quotes inside single quotes
+    t.test("Python dict with double quotes inside single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'quote': 'He said \"Hello\"'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test the example from the requirements
+    t.test("complex Python dict example from requirements", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test need_more_input() parsing - incomplete object
+    t.test("need_more_input() parsing - incomplete object", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test', 'value': ";
+        common_peg_parse_context ctx(input, true);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test need_more_input() parsing - incomplete single-quoted string
+    t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test";
+        common_peg_parse_context ctx(input, true);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test unicode in Python dict strings
+    t.test("unicode in Python dict strings", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'message': 'Hello, 世界!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python dict with unicode escapes
+    t.test("Python dict with unicode escapes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'unicode': 'Hello\\u0041'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test that Python parser accepts double-quoted strings too
+    t.test("Python parser accepts double-quoted strings", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{\"name\": \"test\"}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python parser with mixed quote styles
+    t.test("Python parser with mixed quote styles", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{\"name\": 'test', 'value': \"hello\"}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python True/False/None
+    t.test("Python True/False/None", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        t.test("True", [&](testing &t) {
+            std::string input = "True";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("False", [&](testing &t) {
+            std::string input = "False";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("None", [&](testing &t) {
+            std::string input = "None";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("rejects JSON-style true/false/null", [&](testing &t) {
+            for (const auto & kw : {"true", "false", "null"}) {
+                std::string input = kw;
+                common_peg_parse_context ctx(input);
+                auto result = parser.parse(ctx);
+                t.assert_true(std::string("rejects ") + kw, result.fail());
+            }
+        });
+    });
+
+    // Test single-quoted string content parser directly
+    t.test("single-quoted string content parser", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.sequence({ p.literal("'"), p.single_quoted_string_content(), p.literal("'"), p.space() });
+        });
+
+        t.test("simple string", [&](testing &t) {
+            std::string input = "'hello'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with escaped single quote", [&](testing &t) {
+            std::string input = "'it\\'s'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with double quotes", [&](testing &t) {
+            std::string input = "'say \"hello\"'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("incomplete string", [&](testing &t) {
+            std::string input = "'hello";
+            common_peg_parse_context ctx(input, true);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("need_more_input", result.need_more_input());
+        });
+    });
+
+    // Test json() with pre-registered flexible json-string rule (python dict support)
+    t.test("json() parser with flexible json-string rule", [](testing &t) {
+        t.test("json() rejects single quotes by default", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("fail", result.fail());
+        });
+
+        t.test("json() accepts single quotes with pre-registered flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                // Pre-register json-string rule with both quote styles
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() still accepts double quotes with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{\"name\": \"test\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() accepts mixed quote styles with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{\"name\": 'test', 'value': \"hello\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("complex nested structure with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+    });
+}
diff --git a/tests/peg-parser/tests.h b/tests/peg-parser/tests.h

index 4d3f4e9eaf5309966fdf0e8c5d9c5e8d0f67c8b0..debd4286c50afe9d7d7969c84ab4d10b02c9152b 100644 (file)
--- a/tests/peg-parser/tests.h
+++ b/tests/peg-parser/tests.h
@@ -22,3 +22,4 @@ void test_json_parser(testing &t);
  void test_gbnf_generation(testing &t);
  void test_unicode(testing &t);
  void test_json_serialization(testing &t);
+void test_python_dict_parser(testing &t);
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp

index c4b9540f4f40da49e135eadc876ddc8ed6b2c469..faa771e0869d85e025172d5c7fdddb52c1d332a8 100644 (file)
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -7820,6 +7820,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
          test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1,  1}, {1, 1}));
      }
  
+    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 6, 4096, 5120, {1, 1}, {1, 1}));
+
  #if 0
      // test the mat-mat path for Metal
      for (int k = 1; k < 512; ++k) {
diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp

new file mode 100644 (file)

index 0000000..f236486
--- /dev/null
+++ b/tests/test-chat-auto-parser.cpp
@@ -0,0 +1,1889 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "peg-parser.h"
+#include "testing.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+using namespace autoparser;
+
+static void test_calculate_diff_split_basic(testing & t);
+static void test_calculate_diff_split_identical(testing & t);
+static void test_calculate_diff_split_common_prefix(testing & t);
+static void test_calculate_diff_split_common_suffix(testing & t);
+static void test_calculate_diff_split_common_both(testing & t);
+static void test_calculate_diff_split_empty_cases(testing & t);
+static void test_calculate_diff_split_no_common(testing & t);
+static void test_calculate_diff_split_single_char(testing & t);
+static void test_calculate_diff_split_overlaps(testing & t);
+static void test_calculate_diff_split_tag_boundaries(testing & t);
+static void test_calculate_diff_split(testing & t);
+
+static void test_until_common_prefix_basic(testing & t);
+static void test_until_common_prefix(testing & t);
+
+static void test_after_common_suffix_basic(testing & t);
+static void test_after_common_suffix(testing & t);
+
+static void test_analyze_tool_call_pure_json(testing & t);
+static void test_analyze_tool_call_function_name_markers(testing & t);
+static void test_analyze_tool_call_full_markers(testing & t);
+static void test_analyze_tool_call_edge_cases(testing & t);
+
+static void test_compare_variants_basic(testing & t);
+static void test_compare_variants_messages_modifier(testing & t);
+static void test_compare_variants_tools_modifier(testing & t);
+static void test_compare_variants_both_modifiers(testing & t);
+static void test_compare_variants_template_failure(testing & t);
+static void test_compare_variants_identity(testing & t);
+static void test_compare_variants(testing & t);
+
+// Seed-OSS template tool calling analysis tests
+static void test_seed_oss_tool_analysis(testing & t);
+static void test_seed_oss_tool_presence(testing & t);
+static void test_seed_oss_call_count(testing & t);
+static void test_seed_oss_function_names(testing & t);
+static void test_seed_oss_argument_count(testing & t);
+static void test_seed_oss_args_presence(testing & t);
+static void test_seed_oss_tool_with_reasoning(testing & t);
+
+// Nemotron template analysis tests
+static void test_nemotron_analysis(testing & t);
+static void test_nemotron_reasoning_detection(testing & t);
+static void test_nemotron_tool_format(testing & t);
+
+// CohereForAI template analysis tests
+static void test_cohere_reasoning_detection(testing & t);
+static void test_cohere_analysis(testing & t);
+
+// Marker separation
+static void test_marker_separation(testing & t);
+
+// standard_json_tools format tests
+static void test_standard_json_tools_formats(testing & t);
+static void test_standard_json_tools_openai(testing & t);
+static void test_standard_json_tools_cohere(testing & t);
+static void test_standard_json_tools_function_key(testing & t);
+
+// normalize_quotes_to_json tests
+static void test_normalize_quotes_to_json(testing & t);
+static void test_normalize_quotes_with_embedded_quotes(testing & t);
+
+// TAG_WITH_TAGGED argument parsing tests
+static void test_tagged_args_with_embedded_quotes(testing & t);
+
+int main(int argc, char * argv[]) {
+    testing t(std::cout);
+    t.verbose = true;
+
+    // usage: test-chat-auto-parser-helpers [filter_regex]
+
+    if (argc > 1) {
+        t.set_filter(argv[1]);
+    }
+
+    t.test("diff_split", test_calculate_diff_split);
+    t.test("common_prefix", test_until_common_prefix);
+    t.test("common_suffix", test_after_common_suffix);
+    t.test("compare_variants", test_compare_variants);
+    t.test("segments", test_marker_separation);
+    t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
+    t.test("cohere", test_cohere_analysis);
+    t.test("nemotron", test_nemotron_analysis);
+    t.test("standard_json_tools", test_standard_json_tools_formats);
+    t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
+    t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
+
+    return t.summary();
+}
+
+static void test_marker_separation(testing & t) {
+    auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
+    auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
+    auto paired_markers = segmentize_markers("<hello>world</hello>");
+    auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
+    auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
+
+    t.test("single_square_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
+        t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
+    });
+
+    t.test("single_diagonal_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
+        t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
+    });
+
+    t.test("paired_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
+        t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
+        t.assert_equal("second is 'world'", "world", paired_markers[1].value);
+        t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
+    });
+
+    t.test("double_different_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
+        t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
+        t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
+        t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
+    });
+
+    t.test("in_between", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
+        t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
+        t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
+        t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
+
+        t.assert_equal("first is 'im'", "im", in_between[0].value);
+        t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
+        t.assert_equal("third is 'daba'", "daba", in_between[2].value);
+        t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
+        t.assert_equal("fifth is 'da'", "da", in_between[4].value);
+        t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
+    });
+}
+
+static void test_calculate_diff_split(testing & t) {
+    t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
+    t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
+    t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
+    t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
+    t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
+    t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
+    t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
+    t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
+    t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
+    t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
+}
+
+static void test_calculate_diff_split_basic(testing & t) {
+    diff_split result = calculate_diff_split("hello world", "hello test");
+    t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
+    t.assert_equal("left should be 'world'", "world", result.left);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("prefixA suffix", "prefixB suffix");
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_identical(testing & t) {
+    diff_split result = calculate_diff_split("hello", "hello");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("<row><row><row><your><boat><gently>", "<row><row><row><your><boat><gently>");
+    t.assert_equal("prefix should be '<row><row><row><your><boat><gently>'", "<row><row><row><your><boat><gently>", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_prefix(testing & t) {
+    diff_split result = calculate_diff_split("abcdef", "abcxyz");
+    t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
+    t.assert_equal("left should be 'def'", "def", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("same", "sameagain");
+    t.assert_equal("prefix should be 'same'", "same", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'again'", "again", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_suffix(testing & t) {
+    diff_split result = calculate_diff_split("123end", "456end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    result = calculate_diff_split("start", "end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'start'", "start", result.left);
+    t.assert_equal("right should be 'end'", "end", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abcsuffix", "xyzsuffix");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_common_both(testing & t) {
+    diff_split result = calculate_diff_split("helloXworld", "helloYworld");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'world'", "world", result.suffix);
+
+    result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
+    t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
+    t.assert_equal("left should be 'middle'", "middle", result.left);
+    t.assert_equal("right should be 'different'", "different", result.right);
+    t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
+
+    result = calculate_diff_split("startAend", "startBend");
+    t.assert_equal("prefix should be 'start'", "start", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    // Edge case: common prefix and suffix overlap
+    result = calculate_diff_split("aa", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_empty_cases(testing & t) {
+    // Empty left, non-empty right
+    diff_split result = calculate_diff_split("", "hello");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'hello'", "hello", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Non-empty left, empty right
+    result = calculate_diff_split("hello", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'hello'", "hello", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Both empty
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Left single char, empty right
+    result = calculate_diff_split("a", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Empty left, right single char
+    result = calculate_diff_split("", "a");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'a'", "a", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_no_common(testing & t) {
+    diff_split result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("left", "right");
+    // The algorithm finds "t" as a common suffix since both strings end with 't'
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'lef'", "lef", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be 't'", "t", result.suffix);
+
+    result = calculate_diff_split("123", "456");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_single_char(testing & t) {
+    diff_split result = calculate_diff_split("a", "b");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("ab", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'b'", "b", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_overlaps(testing & t) {
+    // One string is substring of another
+    diff_split result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("testing", "test");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'ing'", "ing", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Similar strings with one extra char at start
+    result = calculate_diff_split("Xtest", "Ytest");
+    // The algorithm finds "test" as a common suffix since both strings end with "test"
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'test'", "test", result.suffix);
+
+    // Similar strings with one extra char at end
+    result = calculate_diff_split("testX", "testY");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Strings that are reverses
+    result = calculate_diff_split("abc", "cba");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'cba'", "cba", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_tag_boundaries(testing & t) {
+    // Test with unclosed XML tags
+    diff_split result = calculate_diff_split("test<tag", "test>content");
+    // The fix_tag_boundaries should move incomplete tags appropriately
+    t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
+    t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with unclosed brackets
+    result = calculate_diff_split("test[", "test]value");
+    t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with partial tags on both sides
+    result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
+    // fix_tag_boundaries moves the incomplete '<' from prefix to left/right
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be '<tag>'", "<tag>", result.left);
+    t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with complex nested tags
+    result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
+    // Algorithm finds "ent</div>" as a common suffix because both strings end with it
+    // This is the actual algorithm behavior, though not semantically ideal
+    t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
+    t.assert_equal("left should be 'cont'", "cont", result.left);
+    t.assert_equal("right should be 'differ'", "differ", result.right);
+    t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
+
+    // Test with unclosed angle bracket
+    result = calculate_diff_split("Hello <world>", "Hello test");
+    t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
+    t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with unclosed square bracket
+    result = calculate_diff_split("test [array]", "test other");
+    t.assert_equal("prefix should be 'test '", "test ", result.prefix);
+    t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
+    t.assert_equal("right should be 'other'", "other", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test empty prefix and suffix with tags
+    result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
+    t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
+    t.assert_equal("left should be 'left'", "left", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
+
+    {
+        // real case from template tests, simplified
+        std::string left  = "PREFIX</think>Sure";
+        std::string right = "PREFIX<think>Lemme think</think>Sure";
+        result            = calculate_diff_split(left, right);
+        t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
+        t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
+        t.assert_equal("left should be empty", "", result.left);
+        t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
+    }
+
+    {
+        // Real case: special tokens with |> boundary issue
+        // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
+        std::string prefix    = "SOME_PREFIX";
+        std::string suffix    = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+        std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
+        std::string right_diff =
+            "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
+            "    {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
+            "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
+            "]<|END_ACTION";
+
+        std::string left  = prefix + left_diff + suffix;
+        std::string right = prefix + right_diff + suffix;
+        result            = calculate_diff_split(left, right);
+
+        t.assert_equal("special token prefix", prefix, result.prefix);
+        // The |> should be moved from suffix to complete the tokens
+        t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
+        t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
+        t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+                       result.suffix);
+    }
+}
+
+static void test_until_common_prefix(testing & t) {
+    t.test("until_common_prefix basic", test_until_common_prefix_basic);
+}
+
+static void test_until_common_prefix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
+    t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
+
+    // Additional test cases to ensure robustness
+    // Test with different common prefix lengths
+    result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
+    t.assert_equal("should return 'prefix'", "prefix", result);
+
+    // Test when common prefix is at the start
+    result = until_common_prefix("<common>rest", "<common>left", "<common>right");
+    t.assert_equal("should return empty string when common prefix at start", "", result);
+
+    // Test when there's no common prefix
+    result = until_common_prefix("something", "left", "right");
+    t.assert_equal("should return empty string when no common prefix", "", result);
+
+    // Test with empty strings
+    result = until_common_prefix("test", "", "right");
+    t.assert_equal("should return empty string when left is empty", "", result);
+
+    // Test with longer common prefix
+    result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
+    t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
+}
+
+static void test_after_common_suffix(testing & t) {
+    t.test("after_common_suffix basic", test_after_common_suffix_basic);
+}
+
+static void test_after_common_suffix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
+                                            "<arg name=bar>100</arg>",
+                                            "<arg name=baz>535</arg>");
+    t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
+
+    // Test when common suffix is at the end
+    result = after_common_suffix("rest<common>", "left<common>", "right<common>");
+    t.assert_equal("should return empty string when common suffix at end", "", result);
+
+    // Test with empty strings
+    result = after_common_suffix("test", "left", "");
+    t.assert_equal("should return empty string when right is empty", "", result);
+
+    // Test case with XML-like structure similar to the main example
+    result = after_common_suffix("<outer><inner>value</inner></outer>",
+                                "<inner>value</inner>",
+                                "<inner>different</inner>");
+    t.assert_equal("should return '</outer>'", "</outer>", result);
+
+    // Test with longer common suffix appearing at the end of full
+    result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
+    t.assert_equal("should return '' when common suffix is at end of full", "", result);
+
+    // Test with common suffix appearing in middle but not at end
+    result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
+    t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
+
+    // Test with multi-character common suffix at the very end of full
+    result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
+    t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
+}
+
+static void test_compare_variants(testing & t) {
+    t.test("compare_variants basic", test_compare_variants_basic);
+    t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
+    t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
+    t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
+    t.test("compare_variants template failure", test_compare_variants_template_failure);
+    t.test("compare_variants identity", test_compare_variants_identity);
+}
+
+static void test_compare_variants_basic(testing & t) {
+    // Create a simple template that just echoes messages
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    // The template might not output anything if messages is empty or format is different
+    // Check that we get a valid result
+    t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
+}
+
+static void test_compare_variants_messages_modifier(testing & t) {
+    // Test with messages modifier only
+    common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+    };
+
+    std::optional<compare_variants_result> result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'A'", "A", result->diff.left);
+    t.assert_equal("right should be 'B'", "B", result->diff.right);
+}
+
+static void test_compare_variants_tools_modifier(testing & t) {
+    // Test with tools modifier only
+    common_chat_template tmpl(
+        "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.tools = json::array({
+        json {{"name", "foo"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.tools[0]["name"] = "bar";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'foo'", "foo", result->diff.left);
+    t.assert_equal("right should be 'bar'", "bar", result->diff.right);
+}
+
+static void test_compare_variants_both_modifiers(testing & t) {
+    // Test with both messages and tools modifiers using the for loop approach
+    common_chat_template tmpl(
+        "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+        p.messages[0]["role"] = "newuser";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
+    t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
+}
+
+static void test_compare_variants_template_failure(testing & t) {
+    // Test with template that causes failure during application (not construction)
+    // We use a valid template syntax but one that will fail during application
+    common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should be nullopt on template failure", !result.has_value());
+}
+
+static void test_compare_variants_identity(testing & t) {
+    // Test with identity modifier (no change)
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    // No modifier - should use identity
+    auto result = ::compare_variants(tmpl, params, nullptr);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
+    t.assert_equal("left should be empty", "", result->diff.left);
+    t.assert_equal("right should be empty", "", result->diff.right);
+    t.assert_equal("suffix should be empty", "", result->diff.suffix);
+}
+
+// ============================================================================
+// Seed-OSS Template Tool Calling Analysis Tests
+// ============================================================================
+
+static void test_seed_oss_tool_analysis(testing & t) {
+    t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
+    t.test("Seed-OSS call count", test_seed_oss_call_count);
+    t.test("Seed-OSS function names", test_seed_oss_function_names);
+    t.test("Seed-OSS argument count", test_seed_oss_argument_count);
+    t.test("Seed-OSS args presence", test_seed_oss_args_presence);
+    t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
+}
+
+// Helper to load Seed-OSS template
+static common_chat_template load_seed_oss_template(testing & t) {
+    std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}
+
+// Helper to build tool call JSON
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args}
+        }}
+    };
+}
+
+// Helper to build tools definition
+static json build_tools_definition() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        {"type", "string"},
+        {"description", "First parameter"}
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        {"type", "string"},
+        {"description", "Second parameter"}
+    });
+    parameters_schema["required"] = json::array({"param1", "param2"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_function_name"},
+                {"description", "A test function for debugging"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// T1: Compare with/without tool call (user, assistant)
+static void test_seed_oss_tool_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_no_tools = json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+
+    json assistant_with_tools = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_no_tools;
+    params_no_tools.messages = json::array({user_msg, assistant_no_tools});
+    params_no_tools.tools = build_tools_definition();
+    params_no_tools.add_generation_prompt = false;
+    params_no_tools.enable_thinking = true;
+
+    template_params params_with_tools;
+    params_with_tools.messages = json::array({user_msg, assistant_with_tools});
+    params_with_tools.tools = build_tools_definition();
+    params_with_tools.add_generation_prompt = false;
+    params_with_tools.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_no_tools,
+        [&](template_params & p) {
+            p.messages = params_with_tools.messages;
+        });
+
+    if (!t.assert_true("T1 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+    t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
+    t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
+    t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Left should be the assistant content without tool
+    t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
+
+    // Right should contain the tool call markers
+    t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
+    t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
+    t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
+    t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should be the eos token
+    t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
+}
+
+// T2: Compare one vs two tool calls
+static void test_seed_oss_call_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_one_call = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_two_calls = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_call});
+    params_one.tools = build_tools_definition();
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_calls});
+        });
+
+    if (!t.assert_true("T2 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    // Prefix should include the first tool call
+    t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
+    t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
+    t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
+
+    // Left should be empty (no second tool call in variant A)
+    t.assert_equal("T2 left should be empty", "", diff.left);
+
+    // Right should contain the second tool call
+    t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
+    t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
+    t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should end with the eos token
+    t.assert_equal("T2 suffix should end with '<seed:eos>'", "<seed:eos>", diff.suffix.substr(diff.suffix.length() - 10, 10));
+}
+
+// T3: Compare different function names
+static void test_seed_oss_function_names(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with two different function names
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    parameters_schema["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_alpha"},
+                {"description", "First function"},
+                {"parameters", parameters_schema}
+            }}
+        },
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_beta"},
+                {"description", "Second function"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+
+    json assistant_func_alpha = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json assistant_func_beta = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_alpha;
+    params_alpha.messages = json::array({user_msg, assistant_func_alpha});
+    params_alpha.tools = tools;
+    params_alpha.add_generation_prompt = false;
+    params_alpha.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_alpha,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_func_beta});
+        });
+
+    if (!t.assert_true("T3 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
+    bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
+    bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
+    bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
+    bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
+
+    // Left should contain func_alpha (or be in prefix)
+    t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
+
+    // Right should contain func_beta
+    t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
+
+    // Both should have the same parameter value (in common parts, not in diffs)
+    // Since both have same args, test_value will be in prefix/suffix
+    t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
+        diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
+}
+
+// T4: Compare different argument counts (zero, one, two parameters)
+static void test_seed_oss_argument_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with 0, 1, or 2 required parameters
+    json params_2_required = json::object();
+    params_2_required["type"] = "object";
+    params_2_required["properties"] = json::object();
+    params_2_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_2_required["properties"]["arg2"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 2"}
+    });
+    params_2_required["required"] = json::array({"arg1", "arg2"});
+
+    json params_1_required = json::object();
+    params_1_required["type"] = "object";
+    params_1_required["properties"] = json::object();
+    params_1_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_1_required["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_func"},
+                {"description", "Test function"},
+                {"parameters", params_2_required}
+            }}
+        }
+    });
+
+    // Test: zero args vs one arg
+    json assistant_zero_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object())
+        })}
+    };
+
+    json assistant_one_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}}))
+        })}
+    };
+
+    json assistant_two_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    // Test zero vs one
+    template_params params_zero;
+    params_zero.messages = json::array({user_msg, assistant_zero_args});
+    params_zero.tools = tools;
+    params_zero.add_generation_prompt = false;
+    params_zero.enable_thinking = true;
+
+    auto result_zero_one = ::compare_variants(tmpl, params_zero,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_one_arg});
+        });
+
+    if (!t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value())) {
+        return;
+    }
+    t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
+    t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
+
+    // Test one vs two
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_arg});
+    params_one.tools = tools;
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result_one_two = ::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_args});
+        });
+
+    if (!t.assert_true("T4 one vs two result should have value", result_one_two.has_value())) {
+        return;
+    }
+
+    const auto & diff4 = result_one_two->diff;
+    t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
+        diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
+        diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
+        diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
+}
+
+// T5: Compare different argument values
+static void test_seed_oss_args_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_same_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
+        })}
+    };
+
+    json assistant_other_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_both_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_same;
+    params_same.messages = json::array({user_msg, assistant_same_arg});
+    params_same.tools = build_tools_definition();
+    params_same.add_generation_prompt = false;
+    params_same.enable_thinking = true;
+
+    // Test same arg vs other arg
+    auto result_same_other = ::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_other_arg});
+        });
+
+    if (!t.assert_true("T5 same vs other result should have value", result_same_other.has_value())) {
+        return;
+    }
+    const auto & diff5a = result_same_other->diff;
+    t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
+        diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
+        diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
+        diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
+        diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
+
+    // Test same arg vs both args
+    auto result_same_both = ::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_both_args});
+        });
+
+    if (!t.assert_true("T5 same vs both result should have value", result_same_both.has_value())) {
+        return;
+    }
+    const auto & diff5b = result_same_both->diff;
+    t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
+        diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
+        diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
+        diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
+}
+
+// T6: Tool call with vs without reasoning_content
+static void test_seed_oss_tool_with_reasoning(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_tool_only = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_tool_with_reasoning = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_tool_only;
+    params_tool_only.messages = json::array({user_msg, assistant_tool_only});
+    params_tool_only.tools = build_tools_definition();
+    params_tool_only.add_generation_prompt = false;
+    params_tool_only.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_tool_only,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_tool_with_reasoning});
+        });
+
+    if (!t.assert_true("T6 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    // Left should be empty (no reasoning in variant A)
+    t.assert_equal("T6 left should be empty", "", diff.left);
+
+    // Right should contain the thinking token with reasoning content
+    t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
+    t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
+    t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
+
+    // Prefix should contain the assistant role
+    t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Suffix should contain the tool call
+    t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
+    t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
+}
+
+static common_chat_template load_template(testing & t, const std::string & template_path) {
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}
+
+// ============================================================================
+// Nemotron Template Analysis Tests
+// ============================================================================
+static common_chat_template load_nemotron_template(testing & t) {
+    return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
+}
+
+static void test_nemotron_analysis(testing & t) {
+    t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
+    t.test("Nemotron tool format", test_nemotron_tool_format);
+}
+
+static void test_nemotron_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Test the comparison manually to see what's happening
+    json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
+    json assistant_no_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." }
+    };
+    json assistant_with_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." },
+        { "reasoning_content", "Let me think about this." }
+    };
+
+    template_params params;
+    params.messages = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking = true;
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check reasoning markers
+    t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
+    t.assert_equal("reasoning_end should be '</think>\\n'", "</think>\n", analysis.reasoning.end);
+
+    // Check reasoning mode detection
+    // Nemotron uses forced closed reasoning with add_generation_prompt
+    t.assert_equal("reasoning should be FORCED_CLOSED", reasoning_mode::FORCED_CLOSED, analysis.reasoning.mode);
+
+    // Make sure reasoning markers don't spill over to content markers
+    t.assert_equal("content start should be empty", "", analysis.content.start);
+    t.assert_equal("content end should be empty", "", analysis.content.end);
+
+    t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode);
+}
+
+static void test_nemotron_tool_format(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
+    t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start);
+    t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.tools.format.section_end);
+    t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.tools.format.per_call_start);
+    t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.tools.format.per_call_end);
+    t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+
+    // Check function markers
+    t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.tools.function.name_prefix);
+    t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.tools.function.name_suffix);
+    t.assert_equal("func_close should be '</function>\\n'", "</function>\n", analysis.tools.function.close);
+
+    // Check argument markers (note: markers retain trailing newlines for proper parsing)
+    t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
+    t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
+    t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
+
+    // Check format classification
+    t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
+
+    // Verify tool support
+    t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+}
+
+static common_chat_template load_cohere_template(testing & t) {
+    return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
+}
+
+static void test_cohere_analysis(testing & t) {
+    t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
+}
+
+static void test_cohere_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check reasoning markers - Cohere uses special token format
+    t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start);
+    t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.reasoning.end);
+
+    // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
+    t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning.mode);
+
+    // Check content markers - Cohere wraps all content with START/END_RESPONSE
+    t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.content.start);
+    t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.content.end);
+
+    // Content is always wrapped (both with and without tools)
+    t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content.mode);
+}
+
+static void test_tool_format_cohere(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check tool section markers - Cohere uses ACTION markers
+    t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start);
+    t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.tools.format.section_end);
+
+    // JSON_NATIVE format has no per-call markers
+    t.assert_equal("per_call_start should be empty", "", analysis.tools.format.per_call_start);
+    t.assert_equal("per_call_end should be empty", "", analysis.tools.format.per_call_end);
+
+    // JSON_NATIVE format has empty function markers (no XML-style markers)
+    t.assert_equal("func_name_prefix should be empty", "", analysis.tools.function.name_prefix);
+    t.assert_equal("func_name_suffix should be empty", "", analysis.tools.function.name_suffix);
+    t.assert_equal("func_close should be empty", "", analysis.tools.function.close);
+
+    // JSON_NATIVE format has empty args markers
+    t.assert_equal("args_start should be empty", "", analysis.tools.arguments.start);
+    t.assert_equal("args_end should be empty", "", analysis.tools.arguments.end);
+
+    // JSON_NATIVE format has empty argument markers
+    t.assert_equal("arg_name_prefix should be empty", "", analysis.tools.arguments.name_prefix);
+    t.assert_equal("arg_name_suffix should be empty", "", analysis.tools.arguments.name_suffix);
+    t.assert_equal("arg_value_prefix should be empty", "", analysis.tools.arguments.value_prefix);
+    t.assert_equal("arg_value_suffix should be empty", "", analysis.tools.arguments.value_suffix);
+    t.assert_equal("arg_separator should be empty", "", analysis.tools.arguments.separator);
+
+    // Check JSON field names - Cohere uses non-standard names
+    t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.tools.format.name_field);
+    t.assert_equal("args_field should be 'parameters'", "parameters", analysis.tools.format.args_field);
+    // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
+    t.assert_equal("id_field should be 'tool_call_id'", "", analysis.tools.format.id_field);
+
+    // Check format classification
+    t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools.format.mode);
+
+    // Check flags
+    t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+    t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+    t.assert_true("should not require nonnull content", !analysis.content.requires_nonnull_content);
+    t.assert_true("tools_array_wrapped should be true", analysis.tools.format.tools_array_wrapped);
+}
+
+// ============================================================================
+// standard_json_tools Format Tests
+// ============================================================================
+
+// Helper to build tools definition for tests
+static json build_test_tools() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["location"] = json::object({
+        {"type", "string"},
+        {"description", "The city and state"}
+    });
+    parameters_schema["properties"]["unit"] = json::object({
+        {"type", "string"},
+        {"description", "Temperature unit"},
+        {"enum", json::array({"celsius", "fahrenheit"})}
+    });
+    parameters_schema["required"] = json::array({"location"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "get_current_weather"},
+                {"description", "Get the current weather in a given location"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+static void test_standard_json_tools_formats(testing & t) {
+    t.test("OpenAI format", test_standard_json_tools_openai);
+    t.test("Cohere format", test_standard_json_tools_cohere);
+    t.test("function-as-key format", test_standard_json_tools_function_key);
+}
+
+// Test 1: OpenAI Standard Format
+// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
+static void test_standard_json_tools_openai(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_call>", "</tool_call>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "function.name",
+            /* args_key */ "function.arguments",
+            /* array_wrapped */ false,
+            /* function_is_key */ false,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me check the weather."
+        "<tool_call>"
+        R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
+        "</tool_call>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
+}
+
+// Test 2: Cohere Format
+// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
+static void test_standard_json_tools_cohere(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<|START_ACTION|>[", "]<|END_ACTION|>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "tool_name",
+            /* args_key */ "parameters",
+            /* array_wrapped */ false,  // Brackets are part of section markers
+            /* function_is_key */ false,
+            /* call_id_key */ "",
+            /* gen_call_id_key */ "tool_call_id",
+            /* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
+        );
+        return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me search for that."
+        "<|START_ACTION|>["
+        R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
+        "]<|END_ACTION|>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "0", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
+}
+
+// Test 3: Function-as-Key Format
+// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
+static void test_standard_json_tools_function_key(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_calls>[", "]</tool_calls>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "",  // Name is the key itself
+            /* args_key */ "args",
+            /* array_wrapped */ false,
+            /* function_is_key */ true,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "I'll call the weather function."
+        "<tool_calls>["
+        R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
+        "]</tool_calls>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
+}
+
+// ============================================================================
+// normalize_quotes_to_json Tests
+// ============================================================================
+
+// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                if (next == '\'') {
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            result += c;
+            continue;
+        }
+
+        if (c == '"') {
+            if (in_single_quoted) {
+                result += "\\\"";
+            } else {
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                result += c;
+            } else if (in_single_quoted) {
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
+static void test_normalize_quotes_to_json(testing & t) {
+    t.test("basic single to double quotes", [](testing & t) {
+        std::string input = "{'key': 'value'}";
+        std::string expected = "{\"key\": \"value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("basic conversion", expected, result);
+    });
+
+    t.test("escaped single quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'code': 'print(\\'hello\\')'}";
+        std::string expected = "{\"code\": \"print('hello')\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("escaped single quote", expected, result);
+    });
+
+    t.test("double quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'msg': 'He said \"hi\"'}";
+        std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("double quote escaping", expected, result);
+    });
+
+    t.test("nested backslash escapes", [](testing & t) {
+        std::string input = "{'path': 'C:\\\\Users\\\\test'}";
+        std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("backslash escaping", expected, result);
+    });
+
+    t.test("newline escapes", [](testing & t) {
+        std::string input = "{'text': 'line1\\nline2'}";
+        std::string expected = "{\"text\": \"line1\\nline2\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("newline escaping", expected, result);
+    });
+
+    t.test("mixed quotes", [](testing & t) {
+        std::string input = "{\"already_double\": 'single_value'}";
+        std::string expected = "{\"already_double\": \"single_value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("mixed quotes", expected, result);
+    });
+
+    t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
+}
+
+// Test case that mirrors the Seed-OSS failing test scenario
+static void test_normalize_quotes_with_embedded_quotes(testing & t) {
+    // This is similar to the Seed-OSS template test case
+    // The input has embedded double quotes like "14" and "bar" inside string values
+    std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n    return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n    pass\\n'}";
+
+    // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
+    std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}";
+
+    std::string result = normalize_quotes_to_json(input);
+
+    t.assert_equal("normalize quotes with embedded double quotes", expected, result);
+
+    // Also verify the result is valid JSON
+    try {
+        json parsed = json::parse(result);
+        t.assert_true("result is valid JSON", true);
+        t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
+        t.assert_true("oldString contains embedded quotes",
+            parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
+        t.assert_true("newString contains embedded quotes",
+            parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
+    } catch (const std::exception & e) {
+        t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
+    }
+}
+
+// ============================================================================
+// TAG_WITH_TAGGED Argument Parsing Tests
+// ============================================================================
+
+// Build tools definition for edit function
+static json build_edit_tool() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["filename"] = json::object({
+        {"type", "string"},
+        {"description", "Path of file to edit"}
+    });
+    parameters_schema["properties"]["oldString"] = json::object({
+        {"type", "string"},
+        {"description", "String to replace"}
+    });
+    parameters_schema["properties"]["newString"] = json::object({
+        {"type", "string"},
+        {"description", "New (replacement) value"}
+    });
+    parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "edit"},
+                {"description", "Edit a file"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// Test that reproduces the Seed-OSS template issue with embedded quotes
+static void test_tagged_args_with_embedded_quotes(testing & t) {
+    json tools = build_edit_tool();
+
+    // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Build tool choice for the edit function
+        auto tool_choice = p.choice();
+
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) { continue; }
+            const auto & function = tool_def.at("function");
+            std::string name = function.at("name");
+            const auto & params = function.at("parameters");
+
+            if (!params.contains("properties") || !params.at("properties").is_object()) { continue; }
+
+            const auto & properties = params.at("properties");
+
+            // Build argument parsers
+            std::vector<common_peg_parser> arg_parsers;
+            for (const auto & [param_name, param_schema] : properties.items()) {
+                auto arg = p.tool_arg(
+                    p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
+                    p.space() +
+                    p.tool_arg_string_value(p.until("</parameter>")) +
+                    p.space() +
+                    p.tool_arg_close(p.literal("</parameter>"))
+                );
+                arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
+            }
+
+            // Build arg sequence with space() between
+            common_peg_parser args_seq = p.eps();
+            for (size_t i = 0; i < arg_parsers.size(); i++) {
+                if (i > 0) {
+                    args_seq = args_seq + p.space();
+                }
+                args_seq = args_seq + arg_parsers[i];
+            }
+
+            auto func_parser =
+                p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
+                p.space() + args_seq + p.space() +
+                p.tool_close(p.literal("</function>"));
+
+            tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
+        }
+
+        auto tool_section =
+            p.literal("<seed:tool_call>") + p.space() +
+            tool_choice +
+            p.space() + p.literal("</seed:tool_call>");
+
+        return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
+    });
+
+    // The exact input from the failing test
+    std::string input =
+        "<seed:tool_call>\n"
+        "<function=edit>\n"
+        "<parameter=filename>\n"
+        "foo.cpp\n"
+        "</parameter>\n"
+        "<parameter=oldString>"
+        "def foo(arg = \"14\"):\n"
+        "    return arg + \"bar\"\n"
+        "\n"
+        "</parameter>\n"
+        "<parameter=newString>"
+        "def foo(arg = \"15\"):\n"
+        "    pass\n"
+        "\n"
+        "</parameter>\n"
+        "</function>\n"
+        "</seed:tool_call>";
+
+    common_peg_parse_context ctx(input, false);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
+
+        // Parse the arguments as JSON to verify they're valid
+        std::string args = msg.tool_calls[0].arguments;
+
+        try {
+            json parsed = json::parse(args);
+            t.assert_true("arguments is valid JSON", true);
+
+            // Verify each field has proper value
+            t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
+
+            std::string oldString = parsed.value("oldString", "");
+            t.assert_true("oldString contains embedded quotes",
+                oldString.find("\"14\"") != std::string::npos);
+            t.assert_true("oldString contains bar with quotes",
+                oldString.find("\"bar\"") != std::string::npos);
+
+            std::string newString = parsed.value("newString", "");
+            t.assert_true("newString contains embedded quotes",
+                newString.find("\"15\"") != std::string::npos);
+
+        } catch (const std::exception & e) {
+            t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
+        }
+    }
+}
+
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp

deleted file mode 100644 (file)

index 6f44a2b..0000000
--- a/tests/test-chat-parser.cpp
+++ /dev/null
@@ -1,617 +0,0 @@
-//  Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
-//
-//  Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
-//  e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
-//
-//    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
-//
-#include <exception>
-#include <iostream>
-#include <string>
-
-#include "chat-parser.h"
-#include "common.h"
-#include "log.h"
-#include "regex-partial.h"
-
-template <class T>
-static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
-    if (expected != actual) {
-        std::cerr << label << std::endl;
-        std::cerr << "Expected: " << expected << std::endl;
-        std::cerr << "Actual: " << actual << std::endl;
-        std::cerr << std::flush;
-        throw std::runtime_error("Test failed");
-    }
-}
-
-template <class T>
-static void assert_equals(const T & expected, const T & actual) {
-    assert_equals("", expected, actual);
-}
-static void assert_equals(const char * expected, const std::string & actual) {
-  return assert_equals<std::string>(expected, actual);
-}
-
-static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
-    try {
-        fn();
-    } catch (const std::exception & e) {
-      if (expected_exception_pattern.empty()) {
-          return;
-        }
-        std::regex expected_exception_regex(expected_exception_pattern);
-        std::string actual_message = e.what();
-        if (std::regex_search(actual_message, expected_exception_regex)) {
-            return;
-        }
-        throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
-        throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
-    }
-    throw std::runtime_error("Exception was expected but not thrown");
-}
-
-static void test_reasoning() {
-  //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = true;
-    params.thinking_forced_open = true;
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, params);
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<think>Cogito</think>", builder.result().content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    const std::string variant("content_only_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Pense</think>Bonjour";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
-    assert_equals(variant, std::string("Bonjour"), msg.content);
-  }
-  {
-    const std::string variant("llama_3_inline_think");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_LLAMA_3_X;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = false;
-    const std::string input = "<think>Plan</think>Réponse";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
-    assert_equals(variant, std::string("Réponse"), msg.content);
-  }
-  // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
-    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, params);
-    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
-    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
-    assert_equals(variant, std::string("ok"), builder.consume_rest());
-  }
-  // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = true;
-    params.parse_tool_calls = true;
-    const std::string variant("deepseek_v3_1_reasoning_format_none");
-    const std::string input = "REASONING</think>ok";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-  }
-}
-
-static void test_regex() {
-  auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
-    common_chat_msg_parser builder(input, /* is_partial= */ false, {});
-    assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
-  };
-
-  test_throws("Hello, world!", "abc", "^abc$");
-  test_throws("Hello, world!", "e", "^e$");
-
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    builder.consume_regex(common_regex("Hello"));
-    assert_equals(", world!", builder.consume_rest());
-  }
-
-  {
-    // When in non partial mode, we can say whether the regex was consumed or not.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
-  }
-  {
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
-    assert_equals(true, res.has_value());
-    // Verify captures
-    assert_equals<size_t>(2, res->groups.size());
-    assert_equals("Hell", builder.str(res->groups[0]));
-    assert_equals("el", builder.str(res->groups[1]));
-    // Verify position is after the match
-    assert_equals<size_t>(4, builder.pos());
-    assert_equals("o,", builder.consume_rest());
-  }
-  {
-    // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
-    assert_throws([&]() {
-      builder.try_consume_regex(common_regex("Hello, world!"));
-    }, "^Hello, world!$");
-  }
-
-  // Now regardless of the mode, we can tell these aren't a match.
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
-  }
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_literal("Oh"));
-  }
-}
-
-const std::vector<std::string> barely_healable_jsons = {
-  "{",
-  "{\"",
-  "{\"\\",
-  "{\"n",
-  "{\"name\"",
-  "{\"name\":",
-  "{\"name\":\"",
-  "{\"name\":\"\\",
-  "{\"name\":\"python",
-  "{\"name\":\"python\\",
-  "{\",",
-  "{\":",
-  "{\"[",
-  "{\"]",
-  "{\"{",
-  "{\"}",
-  "{\"1",
-  "{\"name\":\",",
-  "{\"name\":\":",
-  "{\"name\":\"[",
-  "{\"name\":\"]",
-  "{\"name\":\"{",
-  "{\"name\":\"}",
-  "{\"name\":\"1",
-};
-
-static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
-  common_chat_msg_parser builder(input, is_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
-}
-
-static void test_deepseek_v3_1_tool_calls() {
-    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-    // variant: happy path for when it works as the model card says it should
-    const std::string variant("simple");
-    common_chat_parser_params params;
-    params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-    params.reasoning_in_content = false;
-    params.thinking_forced_open = false;
-    params.parse_tool_calls = true;
-    const std::string input = "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-    auto msg = common_chat_parse(input, false, params);
-    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
-    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
-    // JSON arguments are dumped without spaces
-    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
-    assert_equals(variant, std::string(""), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-
-    // variant: simple + thinking open
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_thinking");
-        const std::string in = "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: simple + multiple tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("simple_multiple_tool_calls");
-        const std::string in = "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-
-
-    // variant: thinking forced open + tool call in reasoning content
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING"), m.reasoning_content);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-    //          add the reasoning content as regular content and parse the tool calls.
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals(variant, std::string("REASONING"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, /* is_partial= */ true, params);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>"), m.reasoning_content);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-    }
-
-    // variant: thinking not forced open + reasoning + regular content + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = true;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
-        const std::string in = "REASONING</think>CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: thinking not forced open + missing reasoning + no tool calls
-    {
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-        params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        params.parse_tool_calls = true;
-        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
-        const std::string in = "CONTENT";
-        auto m = common_chat_parse(in, false, params);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-}
-
-static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
-  common_chat_msg_parser builder(input, parse_as_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, js->value.dump());
-}
-
-static void test_json_with_dumped_args_no_args() {
-  // Normal JSON, nothing to heal, nothing to dump
-  test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
-  // Full json is args
-  test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
-
-  // If the arguments are further down, don't heal partial content.
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{"arguments"}}, {}, "{}");
-  }
-  // But heal content that isn't partial.
-  test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
-}
-
-static void test_json_with_dumped_args() {
-
-  // Partial content.
-  test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
-  test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
-  test("{\"content\": ", true, {}, {{"content"}}, "{}");
-
-  // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
-  test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{}}, {}, src);
-  }
-
-  // Full JSON w/ args
-  for (auto parse_as_partial : {true, false}) {
-    test_with_args(
-      R"({"name": "python", "args": {"arg1": 1}})",
-      R"({"name":"python","args":"{\"arg1\":1}"})",
-      parse_as_partial,
-      /* is_partial= */ false
-    );
-  }
-
-  // Partial JSON w/ partial args
-  test_with_args(
-    R"({"foo": "bar", "args": {")",
-    R"({"foo":"bar","args":"{\""})"
-  );
-  // Partial args broken in object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"ar)",
-    R"({"foo":"bar","args":"{\"ar"})"
-  );
-  // Partial args broken after object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1")",
-    R"({"foo":"bar","args":"{\"arg1\""})"
-  );
-  // Partial args broken before object value
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1":)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken before object value (space)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": )",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that may not be complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1 )",
-    R"({"foo":"bar","args":"{\"arg1\":1"})"
-  );
-  // Partial args broken in object value that is incomplete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": ")",
-    R"({"foo":"bar","args":"{\"arg1\":\""})"
-  );
-  // Partial args broken in object value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "1")",
-    R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
-  );
-  // Partial args broken on array opening
-  test_with_args(
-    R"({"foo": "bar", "args": [)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is incomplete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1 )",
-    R"({"foo":"bar","args":"[1"})"
-  );
-  // Partial args broken on array value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": ["1")",
-    R"({"foo":"bar","args":"[\"1\""})"
-  );
-  // Partial args broken after array value
-  test_with_args(
-    R"({"foo": "bar", "args": [1,)",
-    R"({"foo":"bar","args":"[1,"})"
-  );
-  // Partial args broken on nested array
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": [)",
-    R"({"foo":"bar","args":"{\"arg1\":["})"
-  );
-
-  // Unicode tests
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud8)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud80)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
-  );
-}
-
-static void test_positions() {
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_to(100); });
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_back(1); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(8);
-    assert_equals<size_t>(8, builder.pos());
-    builder.move_back(1);
-    assert_equals<size_t>(7, builder.pos());
-    assert_equals("world!", builder.consume_rest());
-
-    builder.move_to(0);
-    assert_equals<size_t>(0, builder.pos());
-
-    assert_throws([&]() { builder.finish(); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(builder.input().size());
-    builder.finish();
-  }
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
-
-    builder.move_to(builder.input().size());
-    assert_equals<size_t>(builder.input().size(), builder.pos());
-    builder.finish();
-  }
-}
-
-int main() {
-    test_positions();
-    test_json_with_dumped_args_no_args();
-    test_json_with_dumped_args();
-    test_reasoning();
-    test_regex();
-    test_deepseek_v3_1_tool_calls();
-    std::cout << "All tests passed!\n";
-    return 0;
-}
diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp

index f767c73c27a2440caa545a5b2ce669616106473f..7626ca12dbd7fafb0019ef57a6f00d5e11365e48 100644 (file)
--- a/tests/test-chat-peg-parser.cpp
+++ b/tests/test-chat-peg-parser.cpp
@@ -1,8 +1,3 @@
-#include <string>
-#include <iostream>
-#include <numeric>
-
-#include "chat-parser.h"
  #include "chat-peg-parser.h"
  #include "chat.h"
  #include "common.h"
@@ -10,6 +5,11 @@
  #include "peg-parser.h"
  #include "testing.h"
  #include "peg-parser/simple-tokenize.h"
+
+#include <iostream>
+#include <numeric>
+#include <string>
+
  #include "nlohmann/json.hpp"
  
  using json = nlohmann::ordered_json;
@@ -17,9 +17,12 @@ using json = nlohmann::ordered_json;
  static json create_tools();
  static void test_example_native(testing & t);
  static void test_example_qwen3_coder(testing & t);
+static void test_example_qwen3_non_coder(testing & t);
  static void test_command7_parser_compare(testing & t);
+static void test_prefix_tool_names(testing & t);
+static void test_tagged_peg_parser(testing & t);
  
-int main(int argc, char *argv[]) {
+int main(int argc, char * argv[]) {
      testing t(std::cout);
      if (argc >= 2) {
          t.set_filter(argv[1]);
@@ -32,7 +35,10 @@ int main(int argc, char *argv[]) {
  
      t.test("native", test_example_native);
      t.test("qwen3 coder", test_example_qwen3_coder);
+    t.test("qwen3 non-coder", test_example_qwen3_non_coder);
      t.test("comparison", test_command7_parser_compare);
+    t.test("prefix tool names", test_prefix_tool_names);
+    t.test("tagged peg parser", test_tagged_peg_parser);
  
      return t.summary();
  }
@@ -41,87 +47,75 @@ static json create_tools() {
      json tools = json::array();
  
      json tool_weather = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_current_weather"},
-            {"description", "Get the current weather in a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_current_weather" },
+              { "description", "Get the current weather in a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description",
+                              "The temperature unit to use. Infer this from the users location." } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
      };
      tools.push_back(tool_weather);
  
      json tool_forecast = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_forecast"},
-            {"description", "Get the weather forecast for a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }},
-                    {"days", {
-                        {"type", "integer"},
-                        {"description", "Number of days to forecast (1-10)"},
-                        {"minimum", 1},
-                        {"maximum", 10}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_forecast" },
+              { "description", "Get the weather forecast for a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description", "The temperature unit to use. Infer this from the users location." } } },
+                        { "days",
+                          { { "type", "integer" },
+                            { "description", "Number of days to forecast (1-10)" },
+                            { "minimum", 1 },
+                            { "maximum", 10 } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
      };
      tools.push_back(tool_forecast);
  
      json tool_search = {
-        {"type", "function"},
-        {"function", {
-            {"name", "search_knowledge_base"},
-            {"description", "Search the internal technical documentation knowledge base."},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"query", {
-                        {"type", "string"},
-                        {"description", "The search query string."}
-                    }},
-                    {"max_results", {
-                        {"type", "integer"},
-                        {"description", "The maximum number of results to return."},
-                        {"default", 5}
-                    }},
-                    {"category", {
-                        {"type", "string"},
-                        {"enum", {"api", "troubleshooting", "billing", "general"}},
-                        {"description", "Filter search by specific category."}
-                    }}
-                }},
-                {"required", {"query", "category"}},
-                {"additionalProperties", false}
-            }},
-            {"strict", true}
-        }}
+        { "type",     "function" },
+        { "function",
+         { { "name", "search_knowledge_base" },
+            { "description", "Search the internal technical documentation knowledge base." },
+            { "parameters",
+              { { "type", "object" },
+                { "properties",
+                  { { "query", { { "type", "string" }, { "description", "The search query string." } } },
+                    { "max_results",
+                      { { "type", "integer" },
+                        { "description", "The maximum number of results to return." },
+                        { "default", 5 } } },
+                    { "category",
+                      { { "type", "string" },
+                        { "enum", { "api", "troubleshooting", "billing", "general" } },
+                        { "description", "Filter search by specific category." } } } } },
+                { "required", { "query", "category" } },
+                { "additionalProperties", false } } },
+            { "strict", true } } }
      };
      tools.push_back(tool_search);
  
@@ -131,39 +125,39 @@ static json create_tools() {
  struct tool_argument {
      std::string name;
      std::string type;
-    bool is_required;
-    json schema;
+    bool        is_required;
+    json        schema;
  };
  
  struct tool_definition {
-    std::string name;
+    std::string                name;
      std::vector<tool_argument> arguments;
-    json schema;
+    json                       schema;
  };
  
  // Test fictitious model output that emits arguments as JSON.
  static void test_example_native(testing & t) {
      struct test_case {
          // Parameters
-        std::string name;
-        json tools;
+        std::string             name;
+        json                    tools;
          common_chat_tool_choice tool_choice;
          common_reasoning_format reasoning_format;
-        json json_schema;
-        bool parallel_tool_calls;
-        bool thinking_forced_open;
-        std::string input;
+        json                    json_schema;
+        bool                    parallel_tool_calls;
+        bool                    thinking_forced_open;
+        std::string             input;
  
          // Expect
-        std::string expect_reasoning;
-        std::string expect_content;
+        std::string                        expect_reasoning;
+        std::string                        expect_content;
          std::vector<common_chat_tool_call> expect_tool_calls;
      };
  
      auto build_parser = [](const test_case & tc) {
-        return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+        return build_chat_peg_parser([&](common_chat_peg_builder & p) {
              auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
-            auto reasoning = p.eps();
+            auto reasoning            = p.eps();
              if (tc.thinking_forced_open) {
                  // If thinking is forced open, expect a closing tag
                  reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
@@ -174,231 +168,188 @@ static void test_example_native(testing & t) {
  
              // tool calling parser
              if (tc.tools.is_array() && !tc.tools.empty()) {
-                auto tools = p.choice();
-                for (const auto & tool : tc.tools) {
-                    const auto & function = tool.at("function");
-                    std::string name = function.at("name");
-                    const auto & schema = function.at("parameters");
-
-                    auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
-                    auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
+                auto tool_call =
+                    p.standard_json_tools("<tool_call>[", "]</tool_call>", tc.tools, tc.parallel_tool_calls,
+                                          tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
  
-                    tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
-                };
-
-                auto parallel_calls = p.eps();
-                if (tc.parallel_tool_calls) {
-                    parallel_calls = p.zero_or_more("," << tools);
-                }
-
-                auto tool_call = p.trigger_rule("tool-call",
-                    p.sequence({
-                        p.literal("<tool_call>["),
-                        tools,
-                        parallel_calls,
-                        p.literal("]</tool_call>")
-                    })
-                );
-
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.until("<tool_call>")),
-                    p.optional(p.space() + tool_call),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("<tool_call>")),
+                                    p.optional(p.space() + tool_call), p.space(), p.end() });
              }
  
              // response_format parser
              if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.schema(p.json(), "response-output", tc.json_schema)),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning),
+                                    p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(),
+                                    p.end() });
              }
  
              // Content-only parser
-            return p.sequence({
-                (reasoning_in_content ? p.eps() : reasoning),
-                p.content(p.rest()),
-                p.end()
-            });
+            return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() });
          });
      };
  
      std::vector<test_case> test_cases = std::vector<test_case>{
          {
-            /* .name =                 */ "content with thinking_forced_open = false",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
          {
-            /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "Hello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ false,
+         /* .input =                */ ("Hello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
          {
-            /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
          {
-            /* .name =                 */ "content with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
          {
-            /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
          {
-            /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York</think>\n"
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York",
-            /* .expect_content =       */ "",
-            /* .expect_tool_calls =    */ {{
+         /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York</think>\n"
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */ "I must get the weather in New York",
+         /* .expect_content =       */ "",
+         /* .expect_tool_calls =    */
+            { {
                  /* .name =      */ "get_current_weather",
                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
                  /* .id =        */ "",
-            }},
-        },
+            } },
+         },
          {
-            /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ true,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
-            /* .expect_content =       */ "Let me search that for you.",
-            /* .expect_tool_calls =    */ {{
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }},
-        },
+         /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ true,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
+             "search that for you."
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */
+            "I must get the weather in New York and San Francisco and a 3 day forecast of each.",                                                                     /* .expect_content =       */ "Let me search that for you.",
+         /* .expect_tool_calls =    */
+            { {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              } },
+         },
          {
-            /* .name =                 */ "response_format with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {
-                {"type", "object"},
-                {"properties", {
-                    {"invoice_number", {{"type", "string"}}},
-                    {"amount", {{"type", "number"}}},
-                    {"due_date", {{"type", "string"}}}
-                }},
-                {"required", {"invoice_number", "amount", "due_date"}}
-            },
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must produce the invoice in the requested format</think>\n"
-                R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
-            ),
-            /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
-            /* .expect_content =       */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "response_format with thinking_forced_open = true",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */
+            { { "type", "object" },
+              { "properties",
+                { { "invoice_number", { { "type", "string" } } },
+                  { "amount", { { "type", "number" } } },
+                  { "due_date", { { "type", "string" } } } } },
+              { "required", { "invoice_number", "amount", "due_date" } } },
+         /* .parallel_tool_calls =  */ false,
+         /* .thinking_forced_open = */ true,
+         /* .input =                */
+            ("I must produce the invoice in the requested format</think>\n"
+             R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
+         /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
+         /* .expect_content =       */
+            R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls =    */ {},
+         },
      };
  
      for (const auto & tc : test_cases) {
          t.test(tc.name, [&](testing & t) {
-            auto parser = build_parser(tc);
-            auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            auto parser  = build_parser(tc);
+            auto lazy    = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
              auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-                for (auto const & def : tc.tools) {
-                    auto function = def.at("function");
+                for (const auto & def : tc.tools) {
+                    auto function   = def.at("function");
                      auto parameters = function.at("parameters");
                      builder.resolve_refs(parameters);
                  };
@@ -406,17 +357,17 @@ static void test_example_native(testing & t) {
              });
  
              t.log("Grammar:");
-            for (auto const & line : string_split(grammar, "\n")) {
+            for (const auto & line : string_split(grammar, "\n")) {
                  t.log(line);
              }
  
              common_peg_parse_context ctx(tc.input, false);
-            auto result = parser.parse(ctx);
+            auto                     result = parser.parse(ctx);
  
              t.assert_true("success", result.success());
  
              common_chat_msg msg;
-            auto mapper = common_chat_peg_native_mapper(msg);
+            auto            mapper = common_chat_peg_mapper(msg);
              mapper.from_ast(ctx.ast, result);
  
              t.assert_equal("content equal", tc.expect_content, msg.content);
@@ -431,16 +382,16 @@ static void test_example_native(testing & t) {
  }
  
  static void test_example_qwen3_coder(testing & t) {
-    auto tools = create_tools();
-    auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
          auto content = p.rule("content", p.content(p.until("<tool_call>")));
  
          std::vector<common_peg_parser> tool_parsers;
-        for (auto const & def : tools) {
-            auto function = def.at("function");
-            std::string name = function.at("name");
-            auto parameters = function.at("parameters");
-            auto properties = parameters.at("properties");
+        for (const auto & def : tools) {
+            auto        function   = def.at("function");
+            std::string name       = function.at("name");
+            auto        parameters = function.at("parameters");
+            auto        properties = parameters.at("properties");
  
              std::set<std::string> required_properties;
              if (function.contains("required")) {
@@ -450,59 +401,36 @@ static void test_example_qwen3_coder(testing & t) {
              std::vector<common_peg_parser> arg_parsers;
              for (const auto & [param_name, param_schema] : properties.items()) {
                  bool is_required = required_properties.find(param_name) != required_properties.end();
-                auto type = param_schema.value("type", "object");
-
-                auto arg = p.tool_arg(p.sequence({
-                    p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
-                    (type == "string" ?
-                        p.tool_arg_string_value(
-                            p.schema(
-                                p.until_one_of({
-                                    "</parameter>\n<parameter=",
-                                    "</parameter>\n</function>"
-                                }),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema,
-                                true
-                            )
-                        ) : p.tool_arg_json_value(
-                            p.schema(
-                                p.json(),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema
-                            )
-                        )
-                    ),
-                    p.tool_arg_close(
-                        "</parameter>\n" +
-                        p.peek(p.literal("<parameter=") | p.literal("</function>"))
-                    )
-                }));
-
-                arg_parsers.push_back(is_required ?
-                    p.rule("tool-" + name + "-arg-" + param_name, arg) :
-                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+                auto type        = param_schema.value("type", "object");
+
+                auto arg = p.tool_arg(
+                    p.sequence({ p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
+                                 (type == "string" ?
+                                      p.tool_arg_string_value(p.schema(
+                                          p.until_one_of({ "</parameter>\n<parameter=", "</parameter>\n</function>" }),
+                                          "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+                                      p.tool_arg_json_value(p.schema(
+                                          p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))),
+                                 p.tool_arg_close("</parameter>\n" +
+                                                  p.peek(p.literal("<parameter=") | p.literal("</function>"))) }));
+
+                arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) :
+                                                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
              }
  
-            tool_parsers.push_back(p.rule("tool-" + name,
-                p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
-                << p.sequence(arg_parsers)
-                << p.tool_close(p.literal("</function>"))
-            ));
+            tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
+                                                              << p.sequence(arg_parsers)
+                                                              << p.tool_close(p.literal("</function>"))));
          };
  
-        auto tool_call = p.trigger_rule("tool-call",
-            "<tool_call>"
-            << p.choice(tool_parsers)
-            << "</tool_call>"
-        );
+        auto tool_call = p.trigger_rule("tool-call", "<tool_call>" << p.choice(tool_parsers) << "</tool_call>");
  
          return content + p.zero_or_more(p.space() + tool_call) + p.end();
      });
  
      auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-        for (auto const & def : tools) {
-            auto function = def.at("function");
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
              auto parameters = function.at("parameters");
              builder.resolve_refs(parameters);
          };
@@ -510,11 +438,11 @@ static void test_example_qwen3_coder(testing & t) {
      });
  
      t.log("Grammar:");
-    for (auto const & line : string_split(grammar, "\n")) {
+    for (const auto & line : string_split(grammar, "\n")) {
          t.log(line);
      }
  
-    t.test("incremental parsing", [&](testing &t) {
+    t.test("incremental parsing", [&](testing & t) {
          std::string input =
              "Let me search the knowledge base for cat pictures."
              "<tool_call>\n"
@@ -538,7 +466,105 @@ static void test_example_qwen3_coder(testing & t) {
              }
  
              common_chat_msg msg;
-            auto mapper = common_chat_peg_constructed_mapper(msg);
+            auto            mapper = common_chat_peg_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            //t.log("Input: " + input);
+            t.log("===========================================");
+            t.log("Iteration " + std::to_string(in.size()));
+            t.log("Reasoning: " + msg.reasoning_content);
+            t.log("Content  : " + msg.content);
+            for (const auto & tc : msg.tool_calls) {
+                t.log("Tool name: " + tc.name);
+                t.log("Tool args: " + tc.arguments);
+            }
+
+            try {
+                // This shouldn't emit any runtime errors
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("failed with ") + e.what(), false);
+            }
+
+            prev = msg;
+        }
+    });
+}
+
+static void test_example_qwen3_non_coder(testing & t) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // tool calling parser using standard JSON format
+        auto tool_call = p.standard_json_tools("<tool_call>", "</tool_call>", tools, true, false);
+
+        return p.sequence({ p.content(p.until("<tool_call>")), p.optional(p.space() + tool_call), p.end() });
+    });
+
+    auto grammar = build_grammar([&](const common_grammar_builder & builder) {
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
+            auto parameters = function.at("parameters");
+            builder.resolve_refs(parameters);
+        };
+        parser.build_grammar(builder);
+    });
+
+    t.log("Grammar:");
+    for (const auto & line : string_split(grammar, "\n")) {
+        t.log(line);
+    }
+
+    t.test("tool call parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "I need to get the weather.\n", msg.content);
+        t.assert_equal("reasoning", "", msg.reasoning_content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+            t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}",
+                           msg.tool_calls[0].arguments);
+        }
+    });
+
+    t.test("incremental parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+
+            auto result = parser.parse(ctx);
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_mapper(msg);
              mapper.from_ast(ctx.ast, result);
  
              //t.log("Input: " + input);
@@ -554,7 +580,7 @@ static void test_example_qwen3_coder(testing & t) {
              try {
                  // This shouldn't emit any runtime errors
                  auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
-            } catch(const std::exception & e) {
+            } catch (const std::exception & e) {
                  t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
                  t.assert_true(std::string("failed with ") + e.what(), false);
              }
@@ -565,38 +591,37 @@ static void test_example_qwen3_coder(testing & t) {
  }
  
  void test_command7_parser_compare(testing & t) {
-    auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
-        auto thinking = p.reasoning_block(
-            "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
+    auto parser = build_chat_peg_parser([](common_chat_peg_builder & p) {
+        auto thinking =
+            p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
  
          auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
  
          auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
-        auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
+        auto tool_call_name =
+            p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
          auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
  
          auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
-        auto tool_call = p.rule("tool-call", p.tool(
-            p.tool_open(p.literal("{"))
-            << tool_call_fields
-            << p.zero_or_more( p.literal(",") << tool_call_fields)
-            << p.tool_close(p.literal("}"))
-        ));
-
-        auto tool_calls = p.rule("tool-calls",
-            "<|START_ACTION|>"
-            << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
-            << "<|END_ACTION|>");
+        auto tool_call =
+            p.rule("tool-call", p.tool(p.tool_open(p.literal("{"))
+                                       << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields)
+                                       << p.tool_close(p.literal("}"))));
+
+        auto tool_calls = p.rule(
+            "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
+                                             << "<|END_ACTION|>");
  
          return p.optional(thinking) << (tool_calls | response) + p.end();
      });
  
-    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
+    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial,
+                            bool print_results) {
          common_peg_parse_context ctx(input, is_partial);
-        auto result = p.parse(ctx);
+        auto                     result = p.parse(ctx);
  
          common_chat_msg msg;
-        auto mapper = common_chat_peg_native_mapper(msg);
+        auto            mapper = common_chat_peg_mapper(msg);
          mapper.from_ast(ctx.ast, result);
  
          if (print_results) {
@@ -614,79 +639,19 @@ void test_command7_parser_compare(testing & t) {
          }
      };
  
-    auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
-        // Original common_chat_combinator_parser taken from chat.cpp
-        common_chat_parser_params params;
-        params.format = COMMON_CHAT_FORMAT_GENERIC;
-        params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-        params.reasoning_in_content = false;
-        params.thinking_forced_open = false;
-        common_chat_msg_parser builder(
-            input,
-            /* .is_partial = */ need_more_input,
-            params
-        );
-
-        builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-        static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-        static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-        static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-        static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-        if (auto res = builder.try_find_regex(start_action_regex)) {
-            // If we didn't extract thoughts, prelude includes them.
-            auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
-            for (const auto & tool_call : tool_calls.value) {
-                std::string name      = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-                std::string id        = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-                std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-                if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            }
-            if (tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-            builder.consume_regex(end_action_regex);
-        } else if (auto res = builder.try_find_regex(start_response_regex)) {
-            if (!builder.try_find_regex(end_response_regex)) {
-                builder.add_content(builder.consume_rest());
-                throw common_chat_msg_partial_exception(end_response_regex.str());
-            }
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-
-        if (print_results) {
-            std::cout << "== Parsed (legacy) ==\n";
-            std::cout << "=== Reasoning ===\n";
-            std::cout << builder.result().reasoning_content << "\n";
-            std::cout << "\n\n=== Content ===\n";
-            std::cout << builder.result().content << "\n";
-            std::cout << "\n\n=== Tool Calls ===\n";
-            for (const auto & tc : builder.result().tool_calls) {
-                std::cout << "id: " << tc.id << "\n";
-                std::cout << "name: " << tc.name << "\n";
-                std::cout << "args: " << tc.arguments << "\n";
-            }
-        }
-    };
-
-    std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
-            "budget of $4000 for a two-week stay, we need to:\n\n"
-            "1. Identify key historical sites and modern attractions in Japan.\n"
-            "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
-            "3. Determine the best modes of transportation for getting around Japan.\n"
-            "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
-            "overspending.\n"
-            "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
-            "to attractions.";
-
-    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
-        "call_0",
-        "plan_trip",
-        nlohmann::json::parse(R"({
+    std::string reasoning =
+        "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
+        "budget of $4000 for a two-week stay, we need to:\n\n"
+        "1. Identify key historical sites and modern attractions in Japan.\n"
+        "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
+        "3. Determine the best modes of transportation for getting around Japan.\n"
+        "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
+        "overspending.\n"
+        "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
+        "to attractions.";
+
+    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {
+        { "call_0", "plan_trip", nlohmann::json::parse(R"({
              "destination": "Japan",
              "duration": 14,
              "budget": 4000,
@@ -694,8 +659,8 @@ void test_command7_parser_compare(testing & t) {
              "accommodation_preferences": "affordable",
              "transportation_preferences": "efficient",
              "meal_preferences": "local cuisine"
-        })")
-    }};
+        })") }
+    };
  
      std::vector<std::string> tokens;
  
@@ -712,10 +677,10 @@ void test_command7_parser_compare(testing & t) {
  
          auto json = nlohmann::json::array();
          for (const auto & tc : tool_calls) {
-            auto tc_json = nlohmann::json::object();
+            auto tc_json            = nlohmann::json::object();
              tc_json["tool_call_id"] = std::get<0>(tc);
-            tc_json["tool_name"] = std::get<1>(tc);
-            tc_json["parameters"] = std::get<2>(tc);
+            tc_json["tool_name"]    = std::get<1>(tc);
+            tc_json["parameters"]   = std::get<2>(tc);
              json.push_back(tc_json);
          }
  
@@ -727,42 +692,284 @@ void test_command7_parser_compare(testing & t) {
  
      std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
  
-    // Run tests
-    t.test("legacy_parse", [&](testing & /* t */) {
-        test_legacy(input, false, false);
-    });
+    t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); });
+    t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100);
+    t.bench(
+        "current_parse_benchmark incremental",
+        [&]() {
+            std::string in;
+            for (auto i = 0u; i < tokens.size(); i++) {
+                in += tokens[i];
+                test_current(parser, in, i + 1 < tokens.size(), false);
+            }
+        },
+        20);
+}
+
+// Test that tool names that are proper prefixes of other tool names don't cause
+// premature matching during incremental parsing.
+// For example, "special_function" should not match when parsing "special_function_with_opt".
+static void test_prefix_tool_names(testing & t) {
+    // Create tools where one name is a proper prefix of another
+    json tools = json::array();
  
-    t.test("current_parse", [&](testing & /* t */) {
-        test_current(parser, input, false, false);
+    json tool_short = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function" },
+              { "description", "A special function" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_short);
+
+    json tool_long = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function_with_opt" },
+              { "description", "A special function with optional params" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                          { "arg2", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_long);
+
+    // Use standard_constructed_tools which had the prefix matching bug
+    std::map<std::string, std::string> markers = {
+        { "tool_call_start_marker", "<tool_call>" },
+        { "tool_call_end_marker", "</tool_call>" },
+        { "function_opener", "<function=" },
+        { "function_closer", "</function>" },
+        { "function_name_suffix", ">" },
+        { "parameter_key_prefix", "<param=" },
+        { "parameter_key_suffix", ">" },
+        { "parameter_closer", "</param>" },
+    };
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto content   = p.rule("content", p.content(p.until("<tool_call>")));
+        auto tool_call = p.standard_constructed_tools(markers, tools, false, false);
+        return content + p.zero_or_more(p.space() + tool_call) + p.end();
      });
  
-    // Run benchmarks
-    t.bench("legacy_parse_benchmark complete", [&]() {
-        test_legacy(input, false, false);
+    // Test parsing the long tool name - this should NOT trigger the short tool name
+    t.test("parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name);
+        }
      });
  
-    t.bench("legacy_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
+    // Test incremental parsing - the key test case
+    // This ensures that when incrementally parsing "special_function_with_opt",
+    // we don't prematurely emit "special_function" as a tool call
+    t.test("incremental parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+            auto                     result = parser.parse(ctx);
+
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                return;
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            // The critical check: during incremental parsing, we should never
+            // see "special_function" as the tool name when parsing "special_function_with_opt"
+            for (const auto & tc : msg.tool_calls) {
+                if (!t.assert_equal("tool name should not be short prefix", false,
+                                    tc.name == "special_function")) {
+                    t.log("Premature tool name match at input: " + in);
+                    return;
+                }
+            }
  
              try {
-                test_legacy(in, i + 1 < tokens.size(), false);
-            } catch (common_chat_msg_partial_exception & /* e */) {
-                // Do nothing, this is expected
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("diff failed with ") + e.what(), false);
+                return;
              }
+
+            prev = msg;
          }
-    }, 20);
  
-    t.bench("current_parse_benchmark complete", [&]() {
-        test_current(parser, input, false, false);
-    }, 100);
+        // Final check: the complete parse should have the correct tool name
+        t.assert_equal("final tool calls count", 1u, prev.tool_calls.size());
+        if (!prev.tool_calls.empty()) {
+            t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name);
+        }
+    });
  
-    t.bench("current_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
-            test_current(parser, in, i + 1 < tokens.size(), false);
+    // Test parsing the short tool name still works
+    t.test("parse short tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input, false);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function", msg.tool_calls[0].name);
          }
-    }, 20);
+    });
+}
+
+static void test_tagged_peg_parser(testing & t) {
+    t.test("basic tag extraction", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Hello World");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("greeting tag", "Hello", result.tags.at("greeting"));
+        t.assert_equal("name tag", "World", result.tags.at("name"));
+    });
+
+    t.test("duplicate tags overwrite", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("first,second");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("item tag", "second", result.tags.at("item"));
+    });
+
+    t.test("no tags extracted", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.rest() + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Hello");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("empty tags", 0u, result.tags.size());
+    });
+
+    t.test("structured extraction", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            auto header = p.tag("header", p.until("\n"));
+            auto body = p.tag("body", p.rest());
+            return header + "\n" + body + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Title\nBody content here");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("header", "Title", result.tags.at("header"));
+        t.assert_equal("body", "Body content here", result.tags.at("body"));
+    });
+
+    t.test("partial parse", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("key:val", true);
+        t.assert_true("not fail", !result.result.fail());
+        t.assert_equal("prefix tag", "key", result.tags.at("prefix"));
+        t.assert_equal("value tag", "val", result.tags.at("value"));
+    });
+
+    t.test("find in the middle", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+        });
+
+        std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call>{ \"fun_name\" : { \"arg\" : 1 }</tool_call>";
+        auto result = parser.parse_anywhere_and_extract(tpl);
+        t.assert_true("success", result.result.success());
+    });
+
+    t.test("fail find in the middle", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+        });
+
+        std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call><fun=fun_name><arg name=arg>1</arg></tool_call>";
+        auto result = parser.parse_anywhere_and_extract(tpl);
+        t.assert_true("failure", result.result.fail());
+    });
+
+    t.test("find function tag with name", [&](testing &t) {
+        std::string haystack = "\n<tool_call>\n<function=foofoo>\n<parameter=first>\nXXXX\n</parameter>\n<parameter=second>\nYYYY\n</parameter>\n</function>\n</tool_call>\n";
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            std::string needle = "foofoo";
+            return p.tag("fun_marker", p.choice({
+            p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", needle })) + p.literal(needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+            p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", needle })) + p.literal(needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+        });
+        auto result = parser.parse_anywhere_and_extract(haystack);
+        t.assert_true("success", result.result.success());
+        t.assert_equal("fun_pre should be '<function='", "<function=", result.tags["fun_pre"]);
+        t.assert_equal("fun_post should be '>'", ">", result.tags["fun_post"]);
+    });
  }
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp

index 27b537a03696ecb5f74101777177ffd5037ad880..6cc132131c111be31e054890bc095d1f97b4893e 100644 (file)
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -1,4 +1,5 @@
  #include <string>
+#include <utility>
  #include <vector>
  #include <sstream>
  #include <regex>
@@ -21,17 +22,16 @@
  
  using json = nlohmann::ordered_json;
  
-int main_automated_tests(void);
+static int main_automated_tests(void);
  
-void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
-void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
+static void run_multiple(const std::string& dir_path, bool stop_on_first_failure, const json& input, bool use_common = false);
+static void run_single(const std::string& contents, json input, bool use_common = false, const std::string & output_path = "");
  
-
-
-std::string HELP = R"(
+static std::string HELP = R"(
  Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
  Options:
    -h, --help               Show this help message and exit.
+  --with-tools             Add a tool and a tool call to the default JSON input
    --json <path>            Path to the JSON input file.
    --stop-on-first-fail     Stop testing on the first failure (default: false).
    --no-common              Use direct Jinja engine instead of common chat templates (default: use common).
@@ -41,7 +41,23 @@ If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
  If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
  )";
  
-std::string DEFAULT_JSON = R"({
+static std::string DEFAULT_JSON = R"({
+    "messages": [
+        {
+            "role": "user",
+            "content": "Hello, how are you?"
+        },
+        {
+            "role": "assistant",
+            "content": "I am fine, thank you!"
+        }
+    ],
+    "bos_token": "<s>",
+    "eos_token": "</s>",
+    "add_generation_prompt": true
+})";
+
+static std::string DEFAULT_JSON_WITH_TOOLS = R"({
      "messages": [
          {
              "role": "user",
@@ -50,6 +66,41 @@ std::string DEFAULT_JSON = R"({
          {
              "role": "assistant",
              "content": "I am fine, thank you!"
+        },
+        {
+            "role": "user",
+            "content": "Call a tool!"
+        },
+        {
+            "role": "assistant",
+            "tool_calls": [
+                {
+                    "id": "call00001",
+                    "type": "function",
+                    "function": {
+                        "name": "test",
+                        "arguments": { "arg": "hello" }
+                    }
+                }
+            ]
+        }
+    ],
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "test",
+                "description": "Test",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "arg": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": ["arg"]
+            }
          }
      ],
      "bos_token": "<s>",
@@ -57,12 +108,14 @@ std::string DEFAULT_JSON = R"({
      "add_generation_prompt": true
  })";
  
+
  int main(int argc, char ** argv) {
      std::vector<std::string> args(argv, argv + argc);
  
      std::string tmpl_path;
      std::string json_path;
      std::string output_path;
+    std::string & json_to_use = DEFAULT_JSON;
      bool stop_on_first_fail = false;
      bool use_common = true;
  
@@ -70,9 +123,12 @@ int main(int argc, char ** argv) {
          if (args[i] == "--help" || args[i] == "-h") {
              std::cout << HELP << "\n";
              return 0;
-        } else if (args[i] == "--json" && i + 1 < args.size()) {
+        }
+        if (args[i] == "--json" && i + 1 < args.size()) {
              json_path = args[i + 1];
              i++;
+        } else if (args[i] == "--with-tools") {
+            json_to_use = DEFAULT_JSON_WITH_TOOLS;
          } else if (args[i] == "--stop-on-first-fail") {
              stop_on_first_fail = true;
          } else if (args[i] == "--output" && i + 1 < args.size()) {
@@ -105,7 +161,7 @@ int main(int argc, char ** argv) {
              std::istreambuf_iterator<char>());
          input_json = json::parse(content);
      } else {
-        input_json = json::parse(DEFAULT_JSON);
+        input_json = json::parse(json_to_use);
      }
  
      std::filesystem::path p(tmpl_path);
@@ -125,7 +181,7 @@ int main(int argc, char ** argv) {
      return 0;
  }
  
-void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
+void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) {
      std::vector<std::string> failed_tests;
  
      // list all files in models/templates/ and run each
@@ -180,7 +236,7 @@ static std::string format_using_common(
      common_chat_templates_inputs inputs;
      inputs.use_jinja = true;
      inputs.messages = messages;
-    inputs.tools = tools;
+    inputs.tools = std::move(tools);
      inputs.add_generation_prompt = true;
      auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
      output = normalize_newlines(output);
@@ -209,7 +265,7 @@ static jinja::value_string format_using_direct_engine(
  
      jinja::runtime runtime(ctx);
      const jinja::value results = runtime.execute(ast);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
  
      std::cout << "\n=== RESULTS ===\n";
      for (const auto & part : parts->as_string().parts) {
@@ -220,7 +276,7 @@ static jinja::value_string format_using_direct_engine(
  }
  
  
-void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
+void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) {
      jinja::enable_debug(true);
  
      jinja::value_string output_parts;
@@ -560,7 +616,7 @@ int main_automated_tests(void) {
      supported_tmpl.resize(res);
      res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
      std::cout << "Built-in chat templates:\n";
-    for (auto tmpl : supported_tmpl) {
+    for (const auto *tmpl : supported_tmpl) {
          std::cout << "  " << tmpl << "\n";
      }
  
@@ -592,6 +648,7 @@ int main_automated_tests(void) {
      }
  
      std::vector<common_chat_msg> messages;
+    messages.reserve(conversation.size());
      for (const auto & msg : conversation) {
          messages.push_back(simple_msg(msg.role, msg.content));
      }
@@ -622,58 +679,6 @@ int main_automated_tests(void) {
          }
      }
  
-    // TODO: llama_chat_format_single will be deprecated, remove these tests later
-
-    // test llama_chat_format_single for system message
-    std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
-    std::vector<common_chat_msg> chat2;
-    auto sys_msg = simple_msg("system", "You are a helpful assistant");
-
-    auto fmt_sys = [&](std::string tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
-        auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
-        std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
-    assert(fmt_sys("mistral-v1") == " [INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3") == "[INST] You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v3-tekken") == "[INST]You are a helpful assistant\n\n");
-    assert(fmt_sys("mistral-v7") == "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT]");
-    assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
-    assert(fmt_sys("llama2-sys") == "[INST] <<SYS>>\nYou are a helpful assistant\n<</SYS>>\n\n");
-    assert(fmt_sys("mistral") == "[INST] You are a helpful assistant\n"); // for old pre-v1 templates
-    assert(fmt_sys("gemma")  == ""); // for gemma, system message is merged with user message
-    assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
-    assert(fmt_sys("gigachat") == "<s>You are a helpful assistant<|message_sep|>");
-
-
-    // test llama_chat_format_single for user message
-    std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
-    chat2.push_back(simple_msg("system", "You are a helpful assistant"));
-    chat2.push_back(simple_msg("user", "Hello"));
-    chat2.push_back(simple_msg("assistant", "I am assistant"));
-    auto new_msg = simple_msg("user", "How are you");
-
-    auto fmt_single = [&](const std::string & tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str.c_str());
-        auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
-        std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
-    assert(fmt_single("mistral-v1") == " [INST] How are you [/INST]");
-    assert(fmt_single("mistral-v3") == "[INST] How are you[/INST]");
-    assert(fmt_single("mistral-v3-tekken") == "[INST]How are you[/INST]");
-    assert(fmt_single("mistral-v7") == "[INST] How are you[/INST]");
-    assert(fmt_single("llama2") == "[INST] How are you [/INST]");
-    assert(fmt_single("mistral") == "[INST] How are you [/INST]"); // for old pre-v1 templates
-    assert(fmt_single("gemma")  == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
-    assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
-    // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
-
      std::cout << "\nOK: All tests passed successfully.\n";
  
      return 0;
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp

index 46aec8395fa66e6d33f72e1be976778b82ff111f..fab7a3780c88fe41574d42c8b2220b1dbbe9924a 100644 (file)
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -5,18 +5,22 @@
  //
  //    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
  //
+#include "../src/llama-grammar.h"
+#include "../src/unicode.h"
+#include "chat-auto-parser.h"
  #include "chat.h"
-
+#include "common.h"
+#include "ggml.h"
  #include "log.h"
  
-#include "../src/unicode.h"
-#include "../src/llama-grammar.h"
-
-#include <nlohmann/json.hpp>
-
+#include <algorithm>
+#include <exception>
  #include <fstream>
-#include <iostream>
  #include <functional>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <set>
+#include <stdexcept>
  #include <string>
  
  using json = nlohmann::ordered_json;
@@ -33,6 +37,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff &
      os << "}";
      return os;
  }
+
  // operator<< for vector<common_chat_msg_diff>:
  static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
      os << "[\n";
@@ -42,6 +47,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector<common_cha
      os << "]";
      return os;
  }
+
  static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
      os << "{ role: " << msg.role << "; ";
      os << "content: " << msg.content << "; ";
@@ -53,7 +59,8 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg)
      os << "reasoning_content: " << msg.reasoning_content << "; ";
      os << "tool_calls: [\n";
      for (const auto & tool_call : msg.tool_calls) {
-        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
+        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id
+           << " },\n";
      }
      os << "]";
      os << "}";
@@ -70,29 +77,29 @@ static common_chat_msg normalize(const common_chat_msg & msg) {
          try {
              tool_call.arguments = json::parse(tool_call.arguments).dump();
          } catch (const std::exception &) {
-            // Do nothing
          }
      }
      return normalized;
  }
  
-
-template <>
-bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
+template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
      return normalize(expected) == normalize(actual);
  }
  
  template <class T> static void assert_equals(const T & expected, const T & actual) {
      if (!equals(expected, actual)) {
-        std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
-        std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
-        std::cerr << std::flush;
+        std::ostringstream oss_expected;
+        oss_expected << expected;
+        std::ostringstream oss_actual;
+        oss_actual << actual;
+        LOG_ERR("Expected: %s\n", oss_expected.str().c_str());
+        LOG_ERR("Actual: %s\n", oss_actual.str().c_str());
+        common_log_flush(common_log_main());
          throw std::runtime_error("Test failed");
      }
  }
  
  static std::string read_file(const std::string & path) {
-    std::cerr << "# Reading: " << path << '\n' << std::flush;
      std::ifstream fs(path, std::ios_base::binary);
      if (!fs.is_open()) {
          fs = std::ifstream("../" + path, std::ios_base::binary);
@@ -118,6 +125,207 @@ static std::unique_ptr<llama_grammar> build_grammar(const std::string & grammar_
          llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0));
  }
  
+// Helper to format a code point as a readable string
+static std::string format_codepoint(uint32_t cp) {
+    if (cp >= 32 && cp < 127) {
+        return std::string("'") + static_cast<char>(cp) + "'";
+    } else if (cp == '\n') {
+        return "'\\n'";
+    } else if (cp == '\r') {
+        return "'\\r'";
+    } else if (cp == '\t') {
+        return "'\\t'";
+    } else {
+        return "U+" + std::to_string(cp);
+    }
+}
+
+// Helper to format expected element from grammar stack
+static std::string format_expected_element(const llama_grammar_rules & /* rules*/, const llama_grammar_element * elem) {
+    if (!elem) {
+        return "<end>";
+    }
+
+    switch (elem->type) {
+        case LLAMA_GRETYPE_END:
+            return "<end of rule>";
+        case LLAMA_GRETYPE_ALT:
+            return "<alternative>";
+        case LLAMA_GRETYPE_RULE_REF:
+            {
+                // Find rule name - just show rule ID for now
+                return "<rule-" + std::to_string(elem->value) + ">";
+            }
+        case LLAMA_GRETYPE_CHAR:
+            {
+                std::string                   result;
+                const llama_grammar_element * pos   = elem;
+                bool                          first = true;
+
+                do {
+                    if (!first) {
+                        result += " | ";
+                    }
+                    first = false;
+
+                    if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+                        // Range like [a-z]
+                        result += "[" + format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value) + "]";
+                        pos += 2;
+                    } else {
+                        result += format_codepoint(pos->value);
+                        pos += 1;
+                    }
+                } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+                return result;
+            }
+        case LLAMA_GRETYPE_CHAR_NOT:
+            {
+                std::string                   result = "[^";
+                const llama_grammar_element * pos    = elem;
+                bool                          first  = true;
+
+                do {
+                    if (!first) {
+                        result += " ";
+                    }
+                    first = false;
+
+                    if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+                        result += format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value);
+                        pos += 2;
+                    } else {
+                        result += format_codepoint(pos->value);
+                        pos += 1;
+                    }
+                } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+                return result + "]";
+            }
+        case LLAMA_GRETYPE_CHAR_ANY:
+            return "<any char>";
+        case LLAMA_GRETYPE_TOKEN:
+            return "<token-" + std::to_string(elem->value) + ">";
+        case LLAMA_GRETYPE_TOKEN_NOT:
+            return "<not-token-" + std::to_string(elem->value) + ">";
+        default:
+            return "<unknown>";
+    }
+}
+
+// Get description of what the grammar expects at current position
+static std::string get_expected_description(const llama_grammar_rules & rules, const llama_grammar_stacks & stacks) {
+    if (stacks.empty()) {
+        return "<no valid continuations>";
+    }
+
+    std::string           result;
+    std::set<std::string> seen;
+
+    for (const auto & stack : stacks) {
+        if (stack.empty()) {
+            if (seen.insert("<end>").second) {
+                if (!result.empty()) {
+                    result += " OR ";
+                }
+                result += "<end>";
+            }
+            continue;
+        }
+
+        const llama_grammar_element * elem = stack.back();
+        std::string                   desc = format_expected_element(rules, elem);
+        if (seen.insert(desc).second) {
+            if (!result.empty()) {
+                result += " OR ";
+            }
+            result += desc;
+        }
+    }
+
+    return result;
+}
+
+// Result of a detailed grammar match attempt
+struct grammar_match_result {
+    bool        success            = false;  // Did the string fully match the grammar?
+    size_t      matched_bytes      = 0;      // Bytes successfully matched before failure
+    size_t      matched_codepoints = 0;      // Codepoints successfully matched before failure
+    size_t      total_bytes        = 0;      // Total bytes in input
+    size_t      total_codepoints   = 0;      // Total codepoints in input
+    std::string matched_prefix;              // The portion that was successfully matched
+    std::string failing_char;                // The character that caused failure (if any)
+    std::string expected_description;        // What the grammar expected at failure point
+    bool        incomplete = false;          // True if matched all input but grammar expects more
+};
+
+// Detailed version of match_string that returns failure information
+static grammar_match_result match_string_detailed(const std::string & input, llama_grammar * grammar) {
+    grammar_match_result result;
+    result.total_bytes = input.size();
+
+    const auto cpts         = unicode_cpts_from_utf8(input);
+    result.total_codepoints = cpts.size();
+
+    auto &       stacks_cur = llama_grammar_get_stacks(grammar);
+    const auto & rules      = llama_grammar_get_rules(grammar);
+
+    size_t byte_pos = 0;
+
+    for (size_t i = 0; i < cpts.size(); i++) {
+        const auto & cpt = cpts[i];
+
+        // Get expected before accepting (for error reporting)
+        std::string expected_before = get_expected_description(rules, stacks_cur);
+
+        llama_grammar_accept(grammar, cpt);
+
+        // Calculate byte position for this codepoint
+        size_t cpt_bytes = 0;
+        if (cpt < 0x80) {
+            cpt_bytes = 1;
+        } else if (cpt < 0x800) {
+            cpt_bytes = 2;
+        } else if (cpt < 0x10000) {
+            cpt_bytes = 3;
+        } else {
+            cpt_bytes = 4;
+        }
+
+        if (stacks_cur.empty()) {
+            // Grammar failed to match at this point
+            result.matched_bytes        = byte_pos;
+            result.matched_codepoints   = i;
+            result.matched_prefix       = input.substr(0, byte_pos);
+            result.failing_char         = format_codepoint(cpt);
+            result.expected_description = expected_before;
+            result.incomplete           = false;
+            return result;
+        }
+
+        byte_pos += cpt_bytes;
+    }
+
+    // All input matched - check if grammar is complete
+    result.matched_bytes      = input.size();
+    result.matched_codepoints = cpts.size();
+    result.matched_prefix     = input;
+
+    if (std::any_of(stacks_cur.begin(), stacks_cur.end(), [](const auto & stack) { return stack.empty(); })) {
+        // An empty stack means that the grammar has been completed
+        result.success    = true;
+        result.incomplete = false;
+    } else {
+        // Grammar expects more input
+        result.success              = false;
+        result.incomplete           = true;
+        result.expected_description = get_expected_description(rules, stacks_cur);
+    }
+
+    return result;
+}
+
  // TODO: extract to common helper (copied from test-grammar-integration.cpp)
  static bool match_string(const std::string & input, llama_grammar * grammar) {
      const auto cpts = unicode_cpts_from_utf8(input);
@@ -146,11 +354,13 @@ static std::string renormalize_json(const std::string & json_str) {
          auto json_obj = json::parse(json_str);
          return json_obj.dump();
      } catch (const std::exception & e) {
-        std::cerr << "Failed to parse JSON: " << e.what() << '\n';
-        return json_str;
+        return "";  // ignore parial JSON contents for comparison purposes
      }
  }
-static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
+
+static void assert_msg_equals(const common_chat_msg & expected,
+                              const common_chat_msg & actual,
+                              bool                    ignore_whitespace_differences = false) {
      assert_equals(expected.role, actual.role);
      if (ignore_whitespace_differences) {
          assert_equals(string_strip(expected.content), string_strip(actual.content));
@@ -183,7 +393,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha
      }
  }
  
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool{
      /* .name = */ "special_function",
      /* .description = */ "I'm special",
      /* .parameters = */ R"({
@@ -197,7 +407,7 @@ common_chat_tool special_function_tool {
          "required": ["arg1"]
      })",
  };
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param{
      /* .name = */ "special_function_with_opt",
      /* .description = */ "I'm special but have optional stuff",
      /* .parameters = */ R"({
@@ -215,7 +425,15 @@ common_chat_tool special_function_tool_with_optional_param {
          "required": ["arg1"]
      })",
  };
-common_chat_tool python_tool {
+static common_chat_tool empty_args_tool{
+    /* .name = */ "empty_args",
+    /* .description = */ "A tool that takes no arguments",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {}
+    })",
+};
+static common_chat_tool python_tool{
      /* .name = */ "python",
      /* .description = */ "an ipython interpreter",
      /* .parameters = */ R"({
@@ -229,7 +447,53 @@ common_chat_tool python_tool {
          "required": ["code"]
      })",
  };
-common_chat_tool todo_list_tool {
+
+static common_chat_tool html_tool{
+    /* .name = */ "html",
+    /* .description = */ "an html validator",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "markup": {
+                "type": "string",
+                "description": "HTML markup to validate."
+            }
+        },
+        "required": ["markup"]
+    })",
+};
+
+static common_chat_tool get_time_tool{
+    /* .name = */ "get_time",
+    /* .description = */ "Get the current time in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool get_weather_tool{
+    /* .name = */ "get_weather",
+    /* .description = */ "Get the current weather in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool todo_list{
      /* .name = */ "todo_list",
      /* .description = */ "Create or update the todo list",
      /* .parameters = */ R"({
@@ -243,44 +507,275 @@ common_chat_tool todo_list_tool {
          "required": ["todos"]
      })",
  };
-common_chat_tool code_interpreter_tool {
-    /* .name = */ "code_interpreter",
-    /* .description = */ "an ipython interpreter",
+
+static common_chat_tool edit_tool{
+    /* .name = */ "edit",
+    /* .description = */ "Edit file",
      /* .parameters = */ R"({
          "type": "object",
          "properties": {
-            "code": {
+            "filename": {
                  "type": "string",
-                "description": "Python code to execute."
+                "description": "Path of file to edit"
+            },
+            "oldString": {
+                "type": "string",
+                "description": "String to replace"
+            },
+            "newString": {
+                "type": "string",
+                "description": "New (replacement) value"
              }
          },
-        "required": ["code"]
+        "required": ["filename", "oldString", "newString"]
      })",
  };
-std::vector<common_chat_tool> tools           { special_function_tool, special_function_tool_with_optional_param, python_tool };
-std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
  
-struct delta_data {
-    std::string        delta;
-    common_chat_params params;
+static common_chat_tool magic_tool{
+    /* .name = */ "magic",
+    /* .description = */ "Magic tool that takes a hash",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": "string"
+            },
+            "ref": {
+                "type": "string"
+            }
+        },
+        "required": ["name", "ref"]
+    })",
+};
+
+static common_chat_tool magic_int_tool{
+    /* .name = */ "magic_int",
+    /* .description = */ "Magic tool that takes a hash",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "ref": {
+                "type": "integer"
+            },
+            "name": {
+                "type": "string"
+            }
+        },
+        "required": ["ref"]
+    })",
+};
+
+static common_chat_tool amount_tool{
+    /* .name = */ "amount",
+    /* .description = */ "Amount converter",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "orig": {
+                "type": "number"
+            }
+        },
+        "required": ["orig"]
+    })",
+};
+
+static common_chat_tool imaginary_number_tool{
+    /* .name = */ "imaginary_number",
+    /* .description = */ "Imaginary number converter",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "number": {
+                "type": "object",
+                "properties": {
+                    "real": {
+                        "type": "number"
+                    },
+                    "imaginary": {
+                        "type": "number"
+                    }
+                },
+                "required": ["real", "imaginary"]
+            }
+        },
+        "required": ["number"]
+    })",
+};
+
+static common_chat_tool string_param_tool{
+    /* .name = */ "string_param",
+    /* .description = */ "Tool with string parameter for testing",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "text": {
+                "type": "string",
+                "description": "A text parameter"
+            }
+        },
+        "required": []
+    })",
+};
+
+static common_chat_tool quoted_unquoted_tool{
+    /* .name = */ "quoted_unquoted",
+    /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "quoted": {
+                "type": "string",
+                "description": "Quoted value"
+            },
+            "unquoted": {
+                "type": "string",
+                "description": "Unquoted value"
+            }
+        },
+        "required": ["quoted", "unquoted"]
+    })",
+};
+
+
+static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
+                                            python_tool, html_tool, todo_list };
+
+const common_chat_msg message_user{
+    "user",
+    "Hey there!",
+    /* .content_parts = */ {},
+    /* .tool_calls = */ {},
+    /* .reasoning_content = */ "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
+};
+
+const common_chat_msg message_user_parts{
+    "user",
+    /* .content = */ "",
+    /* .content_parts = */
+    {
+     { "text", "Hey" },
+     { "text", "there" },
+     },
+    /* .tool_calls = */
+    {                 },
+    /* .reasoning_content = */
+    "",
+    /* .tool_name = */ "",
+    /* .tool_call_id = */ "",
  };
  
-static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
+static common_chat_msg simple_assist_msg(const std::string & content,
+                                         const std::string & reasoning_content = "",
+                                         const std::string & tool_name         = "",
+                                         const std::string & arguments         = "",
+                                         const std::string & id                = "") {
      common_chat_msg msg;
-    msg.role = "assistant";
-    msg.content = content;
+    msg.role              = "assistant";
+    msg.content           = content;
      msg.reasoning_content = reasoning_content;
-    if (!tool_name.empty()) {
+    if (!tool_name.empty() || !id.empty()) {
          msg.tool_calls.push_back({ tool_name, arguments, id });
      }
      return msg;
  }
  
-static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                             const common_chat_msg & user_message,
-                             const common_chat_msg & delta_message,
+static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) {
+    return simple_assist_msg("", "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name,
+                                                             const std::string & arguments,
+                                                             const std::string & reasoning) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls(
+    const std::string &                                      reasoning,
+    const std::string &                                      content,
+    const std::vector<std::pair<std::string, std::string>> & tool_calls) {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning;
+    for (const auto & [name, args] : tool_calls) {
+        msg.tool_calls.push_back({ name, args, "" });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_content_and_tool_call(const std::string & content,
+                                                          const std::string & tool_name,
+                                                          const std::string & arguments) {
+    return simple_assist_msg(content, "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning,
+                                                            const std::string & tool_name,
+                                                            const std::string & arguments) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+const common_chat_msg message_assist       = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
+const common_chat_msg message_assist_thoughts_unparsed_r7b =
+    simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_magistral =
+    simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
+const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
+const common_chat_msg message_assist_thoughts_unopened_unparsed =
+    simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
+const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_noopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_withopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
+const common_chat_msg message_assist_call_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_empty_args  = simple_assist_msg("", "", "special_function");
+const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
+const common_chat_msg message_assist_call_thoughts =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_thoughts_unparsed =
+    simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_thoughts_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_id =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
+const common_chat_msg message_assist_call_idx =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
+const common_chat_msg message_assist_thoughts_call_idx =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
+const common_chat_msg message_assist_thoughts_partial_call =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0");
+const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
+const common_chat_msg message_assist_call_python_lines =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
+const common_chat_msg message_assist_call_python_lines_unclosed =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
+const common_chat_msg message_assist_json_content =
+    simple_assist_msg("{\n  \"response\": \"Hello, world!\\nWhat's up?\"\n}");
+
+struct delta_data {
+    std::string        delta;
+    common_chat_params params;
+};
+
+static delta_data init_delta(const struct common_chat_templates *  tmpls,
+                             const std::vector<std::string> &      end_tokens,
+                             const common_chat_msg &               user_message,
+                             const common_chat_msg &               delta_message,
                               const std::vector<common_chat_tool> & tools,
-                             const common_chat_tool_choice & tool_choice) {
+                             const common_chat_tool_choice &       tool_choice) {
      common_chat_templates_inputs inputs;
      inputs.parallel_tool_calls = true;
      inputs.messages.push_back(user_message);
@@ -331,20 +826,27 @@ static delta_data init_delta(const struct common_chat_templates * tmpls, const s
    gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
    the parsed message is the same as the test_message
  */
-static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                          const common_chat_msg & test_message,
-                          const std::vector<common_chat_tool> & tools = {},
-                          const std::string & expected_delta = "",
-                          bool expect_grammar_triggered = true,
-                          bool test_grammar_if_triggered = true,
-                          common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
-                          bool ignore_whitespace_differences = false
-                        ) {
+static void test_templates(const struct common_chat_templates *  tmpls,
+                           const std::vector<std::string> &      end_tokens,
+                           const common_chat_msg &               test_message,
+                           const std::vector<common_chat_tool> & tools                     = {},
+                           const std::string &                   expected_delta            = "",
+                           bool                                  expect_grammar_triggered  = true,
+                           bool                                  test_grammar_if_triggered = true,
+                           common_reasoning_format               reasoning_format = COMMON_REASONING_FORMAT_NONE,
+                           bool                                  ignore_whitespace_differences = false) {
      common_chat_msg user_message;
-    user_message.role = "user";
+    user_message.role    = "user";
      user_message.content = "Hello, world!";
  
-    for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
+    common_chat_templates_inputs inputs_tools;
+    inputs_tools.messages = { message_user };
+    inputs_tools.tools    = { special_function_tool };
+
+    common_chat_params params = common_chat_templates_apply(tmpls, inputs_tools);
+
+    for (const auto & tool_choice :
+         std::vector<common_chat_tool_choice>{ COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED }) {
          auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
          if (!expected_delta.empty()) {
              if (ignore_whitespace_differences) {
@@ -356,10 +858,14 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
  
          if (expect_grammar_triggered) {
              // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
-            common_chat_parser_params params;
-            params.format = data.params.format;
-            params.reasoning_format = reasoning_format;
-            const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
+            common_chat_parser_params parser_params;
+            parser_params.format           = data.params.format;
+            parser_params.reasoning_format = reasoning_format;
+            if (!parser_params.parser.empty()) {
+                parser_params.parser = common_peg_arena();
+                parser_params.parser.load(params.parser);
+            }
+            const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, parser_params);
              assert_msg_equals(test_message, msg, ignore_whitespace_differences);
          }
  
@@ -372,43 +878,43 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
                  throw std::runtime_error("Failed to build grammar");
              }
              auto earliest_trigger_pos = std::string::npos;
-            auto constrained = data.delta;
+            auto constrained          = data.delta;
              for (const auto & trigger : data.params.grammar_triggers) {
-                size_t pos = std::string::npos;
+                size_t      pos = std::string::npos;
                  std::smatch match;
                  switch (trigger.type) {
                      case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
-                    {
-                        const auto & word = trigger.value;
-                        pos = constrained.find(word);
-                        break;
-                    }
+                        {
+                            const auto & word = trigger.value;
+                            pos               = constrained.find(word);
+                            break;
+                        }
                      case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_search(constrained, match, std::regex(pattern))) {
-                            pos = match.position(1);
+                        {
+                            const auto & pattern = std::regex(trigger.value);
+                            if (std::regex_search(constrained, match, pattern)) {
+                                pos = match.position(pattern.mark_count());
+                            }
+                            break;
                          }
-                        break;
-                    }
                      case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_match(constrained, match, std::regex(pattern))) {
-                            auto mpos = std::string::npos;
-                            for (size_t i = 1; i < match.size(); ++i) {
-                                if (match[i].length() > 0) {
-                                    mpos = match.position(i);
-                                    break;
+                        {
+                            const auto & pattern = trigger.value;
+                            if (std::regex_match(constrained, match, std::regex(pattern))) {
+                                auto mpos = std::string::npos;
+                                for (size_t i = 1; i < match.size(); ++i) {
+                                    if (match[i].length() > 0) {
+                                        mpos = match.position(i);
+                                        break;
+                                    }
                                  }
+                                if (mpos == std::string::npos) {
+                                    mpos = match.position(0);
+                                }
+                                pos = mpos;
                              }
-                            if (mpos == std::string::npos) {
-                                mpos = match.position(0);
-                            }
-                            pos = mpos;
+                            break;
                          }
-                        break;
-                    }
                      default:
                          throw std::runtime_error("Unknown trigger type");
                  }
@@ -421,7 +927,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
              }
              auto grammar_triggered = false;
              if (earliest_trigger_pos != std::string::npos) {
-                constrained = constrained.substr(earliest_trigger_pos);
+                constrained       = constrained.substr(earliest_trigger_pos);
                  grammar_triggered = true;
              }
              if (data.params.grammar_lazy) {
@@ -430,8 +936,7 @@ static void test_templates(const struct common_chat_templates * tmpls, const std
  
              if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
                  throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
-                    "\n\nConstrained: " + constrained +
-                    "\n\nGrammar: " + data.params.grammar);
+                                         "\n\nConstrained: " + constrained + "\n\nGrammar: " + data.params.grammar);
              }
          }
      }
@@ -445,24 +950,31 @@ template <typename T>
  static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
      constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
          auto len = s.size();
-        if (len == 0) return 0;
+        if (len == 0) {
+            return 0;
+        }
          auto i = len;
          for (size_t back = 0; back < 4 && i > 0; ++back) {
              --i;
              unsigned char c = s[i];
              if ((c & 0x80) == 0) {
                  return len;
-            } else if ((c & 0xC0) == 0xC0) {
+            }
+            if ((c & 0xC0) == 0xC0) {
                  size_t expected_len = 0;
-                if ((c & 0xE0) == 0xC0) expected_len = 2;
-                else if ((c & 0xF0) == 0xE0) expected_len = 3;
-                else if ((c & 0xF8) == 0xF0) expected_len = 4;
-                else return i;
-                if (len - i >= expected_len) {
-                    return len;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
                  } else {
                      return i;
                  }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
              }
          }
          return len - std::min(len, size_t(3));
@@ -471,13 +983,15 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
          return s.substr(0, utf8_truncate_safe_len(s));
      };
  
-    auto merged = simple_assist_msg("");
+    auto merged   = simple_assist_msg("");
      auto last_msg = parse_msg("");
      for (size_t i = 1; i <= raw_message.size(); ++i) {
          auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
-        if (curr_msg == simple_assist_msg("")) continue;
-        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
-        for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
+        if (curr_msg == simple_assist_msg("")) {
+            continue;
+        }
+        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({ curr_msg }).dump().c_str());
+        for (auto diff : common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
              LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
              if (!diff.reasoning_content_delta.empty()) {
                  merged.reasoning_content += diff.reasoning_content_delta;
@@ -487,14 +1001,14 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
              }
              if (diff.tool_call_index != std::string::npos) {
                  if (!diff.tool_call_delta.name.empty()) {
-                    merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
+                    merged.tool_calls.push_back({ diff.tool_call_delta.name, "", "" });
                  }
                  if (!diff.tool_call_delta.arguments.empty()) {
                      GGML_ASSERT(!merged.tool_calls.empty());
                      merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
                  }
              }
-            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
+            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({ merged }).dump().c_str());
          }
          assert_msg_equals(curr_msg, merged, true);
          last_msg = curr_msg;
@@ -503,99 +1017,95 @@ static void test_parser_with_streaming(const common_chat_msg & expected, const s
      assert_msg_equals(expected, merged, true);
  }
  
-const common_chat_msg message_user {
-    "user",
-    "Hey there!",
-    /* .content_parts = */ {},
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_user_parts {
-    "user",
-    /* .content = */ "",
-    /* .content_parts = */ {
-        { "text", "Hey" },
-        { "text", "there" },
-    },
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
-    /* .tool_name = */ "",
-    /* .tool_call_id = */ "",
-};
-
-const common_chat_msg message_assist                              = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty                        = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek   = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_md         = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
-const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
-
-const common_chat_msg message_assist_thoughts_unparsed_r7b       = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
-const common_chat_msg message_assist_thoughts                    = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
-const common_chat_msg message_assist_thoughts_unopened_unparsed  = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_no_content         = simple_assist_msg("", "I'm\nthinking");
-const common_chat_msg message_assist_call                        = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_noopt                  = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_withopt                = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
-const common_chat_msg message_assist_call_content                = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_empty_args             = simple_assist_msg("", "", "special_function");
-const common_chat_msg message_assist_call_cutoff_args            = simple_assist_msg("", "", "special_function", "{\"arg");
-const common_chat_msg message_assist_call_thoughts               = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_thoughts_unparsed      = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_thoughts_content       = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_id                     = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
-const common_chat_msg message_assist_call_idx                    = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
-const common_chat_msg message_assist_thoughts_call_idx           = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
-const common_chat_msg message_assist_call_python                 = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
-const common_chat_msg message_assist_call_python_lines           = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
-const common_chat_msg message_assist_call_python_lines_unclosed  = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
-const common_chat_msg message_assist_call_code_interpreter       = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
-
-// Use for PEG parser implementations
-struct peg_test_case {
-    common_chat_templates_inputs params;
-    std::string input;
-    common_chat_msg expect;
+// Use for PEG parser implementations
+struct peg_test_case {
+    common_chat_templates_inputs params;
+    std::string                  input;
+    common_chat_msg              expect;
+    bool                         is_partial = false;
  };
  
  struct make_peg_parser {
      common_chat_params params_;
-    common_peg_arena arena_;
-
-    make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
-        params_ = common_chat_templates_apply(tmpls, inputs);
+    common_peg_arena   arena_;
+    bool               detailed_debug_;
+
+    make_peg_parser(common_chat_templates *              tmpls,
+                    const common_chat_templates_inputs & inputs,
+                    bool                                 detailed_debug = false) {
+        detailed_debug_ = detailed_debug;
+        params_         = common_chat_templates_apply(tmpls, inputs);
          arena_.load(params_.parser);
      }
  
-    common_chat_msg parse(const std::string & msg, bool is_partial) {
+    common_chat_msg parse(const std::string & msg, bool is_partial) const {
          common_chat_parser_params parser_params;
          parser_params.format = params_.format;
+        parser_params.debug = detailed_debug_;
          return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
      }
  };
  
-static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
+static void test_peg_parser(common_chat_templates *                      tmpls,
+                            const std::function<void(peg_test_case &)> & init,
+                            bool                                         detailed_debug) {
+    // UTF-8-safe truncation helper (same as in test_parser_with_streaming)
+    constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
+        auto len = s.size();
+        if (len == 0) {
+            return 0;
+        }
+        auto i = len;
+        for (size_t back = 0; back < 4 && i > 0; ++back) {
+            --i;
+            unsigned char c = s[i];
+            if ((c & 0x80) == 0) {
+                return len;
+            }
+            if ((c & 0xC0) == 0xC0) {
+                size_t expected_len = 0;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
+                } else {
+                    return i;
+                }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
+            }
+        }
+        return len - std::min(len, size_t(3));
+    };
+
      peg_test_case tc;
      init(tc);
      if (tc.params.messages.empty()) {
-        tc.params.messages = {message_user};
+        tc.params.messages = { message_user };
      }
      if (tc.expect.role.empty()) {
          tc.expect.role = "assistant";
      }
  
-    auto parser = make_peg_parser(tmpls, tc.params);
+    auto parser = make_peg_parser(tmpls, tc.params, detailed_debug);
+    if (detailed_debug) {
+        LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str());
+    }
  
      common_chat_msg msg_accum;
      common_chat_msg msg_prev;
      msg_accum.role = msg_prev.role = "assistant";
  
      for (size_t i = 1; i <= tc.input.size(); ++i) {
-        auto is_partial = i < tc.input.size();
-        common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
+        auto            is_partial  = i < tc.input.size() || tc.is_partial;
+        // Use UTF-8 safe truncation to avoid corrupting multi-byte characters
+        size_t          safe_len    = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i));
+        std::string     prefix      = tc.input.substr(0, safe_len);
+        common_chat_msg msg_current = parser.parse(prefix, is_partial);
  
          for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
              if (!diff.reasoning_content_delta.empty()) {
@@ -605,24 +1115,245 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function<v
                  msg_accum.content += diff.content_delta;
              }
              if (diff.tool_call_index != std::string::npos) {
+                // During partial parsing, a new tool call may appear with empty name initially
+                // The name gets filled in as more input is parsed
+                while (msg_accum.tool_calls.size() <= diff.tool_call_index) {
+                    msg_accum.tool_calls.push_back({ "", "", "" });
+                }
+                // Always update name and id from diff (may change during incremental parsing), but only if the delta
+                // actually contains them
                  if (!diff.tool_call_delta.name.empty()) {
-                    msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
+                    msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name;
+                }
+                if (!diff.tool_call_delta.id.empty()) {
+                    msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id;
                  }
                  if (!diff.tool_call_delta.arguments.empty()) {
-                    msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
+                    msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments;
                  }
              }
          }
-        assert_msg_equals(msg_current, msg_accum, true);
+        try {
+            assert_msg_equals(msg_current, msg_accum, true);
+        } catch (std::exception & e) {
+            throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str());
+        }
+
          msg_prev = msg_current;
      }
  
-    assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    if (!tc.is_partial) {
+        assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    }
      assert_msg_equals(tc.expect, msg_accum, true);
+
+    // Test grammar if present in params
+    if (!parser.params_.grammar.empty()) {
+        auto grammar = build_grammar(parser.params_.grammar);
+        if (!grammar) {
+            throw std::runtime_error("Failed to build grammar: " + parser.params_.grammar);
+        }
+
+        // Find the earliest trigger position to determine the constrained portion
+        auto earliest_trigger_pos = std::string::npos;
+        for (const auto & trigger : parser.params_.grammar_triggers) {
+            size_t      pos = std::string::npos;
+            std::smatch match;
+            switch (trigger.type) {
+                case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
+                    {
+                        const auto & word = trigger.value;
+                        pos               = tc.input.find(word);
+                        break;
+                    }
+                case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
+                    {
+                        const auto & pattern = std::regex(trigger.value);
+                        if (std::regex_search(tc.input, match, pattern)) {
+                            pos = match.position(pattern.mark_count());
+                        }
+                        break;
+                    }
+                case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
+                    {
+                        const auto & pattern = trigger.value;
+                        if (std::regex_match(tc.input, match, std::regex(pattern))) {
+                            auto mpos = std::string::npos;
+                            for (size_t i = 1; i < match.size(); ++i) {
+                                if (match[i].length() > 0) {
+                                    mpos = match.position(i);
+                                    break;
+                                }
+                            }
+                            if (mpos == std::string::npos) {
+                                mpos = match.position(0);
+                            }
+                            pos = mpos;
+                        }
+                        break;
+                    }
+                default:
+                    throw std::runtime_error("Unknown trigger type");
+            }
+            if (pos != std::string::npos) {
+                if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
+                    earliest_trigger_pos = pos;
+                }
+            }
+        }
+
+        // Determine the constrained portion of input to test against grammar
+        std::string constrained = tc.input;
+        bool grammar_triggered = false;
+        if (earliest_trigger_pos != std::string::npos) {
+            constrained = tc.input.substr(earliest_trigger_pos);
+            grammar_triggered = true;
+        } else if (!parser.params_.grammar_lazy) {
+            // For non-lazy grammars, the entire input should match
+            grammar_triggered = true;
+        }
+
+        // Test the constrained portion against the grammar
+        if (grammar_triggered && !tc.is_partial) {
+            auto result = match_string_detailed(constrained, grammar.get());
+            if (!result.success) {
+                std::string error_msg;
+                if (result.incomplete) {
+                    error_msg =
+                        "Grammar matched all input but expects more:\n\n"
+                        ">>> Input: " + tc.input +
+                        "\n\n>>> Constrained: " + constrained +
+                        "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+                        std::to_string(result.matched_codepoints) + " codepoints): " +
+                        (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+                        "\n\n>>> Expected next: " + result.expected_description +
+                        "\n\n>>> Grammar: " + parser.params_.grammar;
+                } else {
+                    error_msg =
+                        "Grammar match failed:\n\n"
+                        ">>> Input: " + tc.input +
+                        "\n\n>>> Constrained: " + constrained +
+                        "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+                        std::to_string(result.matched_codepoints) + " codepoints): " +
+                        (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+                        "\n\n>>> Failing character: " + result.failing_char +
+                        "\n\n>>> Expected: " + result.expected_description +
+                        "\n\n>>> Grammar: " + parser.params_.grammar;
+                }
+                throw std::runtime_error(error_msg);
+            }
+        }
+    }
+}
+
+// Global template filter for --template flag
+static std::string g_template_filter;
+
+// Fluent builder for PEG parser tests
+class peg_test_builder;
+
+class peg_tester {
+    common_chat_templates_ptr tmpls_;
+    std::string               template_path_;
+    bool                      detailed_debug_;
+    friend class peg_test_builder;
+
+  public:
+    explicit peg_tester(const std::string & template_path, const bool detailed_debug = false) :
+        tmpls_(read_templates(template_path)),
+        template_path_(template_path),
+        detailed_debug_(detailed_debug) {}
+
+    const std::string & template_path() const { return template_path_; }
+
+    peg_test_builder test(const std::string & input);
+};
+
+class peg_test_builder {
+    peg_tester &  tester_;
+    peg_test_case tc_;
+
+  public:
+    peg_test_builder(peg_tester & tester, const std::string & input) : tester_(tester) { tc_.input = input; }
+
+    // Parameter setters
+    peg_test_builder & reasoning_format(common_reasoning_format fmt) {
+        tc_.params.reasoning_format = fmt;
+        return *this;
+    }
+
+    peg_test_builder & tools(std::vector<common_chat_tool> tools) {
+        tc_.params.tools = std::move(tools);
+        return *this;
+    }
+
+    peg_test_builder & enable_thinking(bool val) {
+        tc_.params.enable_thinking = val;
+        return *this;
+    }
+
+    peg_test_builder & parallel_tool_calls(bool val) {
+        tc_.params.parallel_tool_calls = val;
+        return *this;
+    }
+
+    peg_test_builder & json_schema(const std::string & schema) {
+        tc_.params.json_schema = schema;
+        return *this;
+    }
+
+    peg_test_builder & is_partial(bool val) {
+        tc_.is_partial = val;
+        return *this;
+    }
+
+    // Expect setters
+    peg_test_builder & expect(const common_chat_msg & msg) {
+        tc_.expect = msg;
+        return *this;
+    }
+
+    peg_test_builder & expect_content(const std::string & content) {
+        tc_.expect.content = content;
+        return *this;
+    }
+
+    peg_test_builder & expect_reasoning(const std::string & reasoning) {
+        tc_.expect.reasoning_content = reasoning;
+        return *this;
+    }
+
+    peg_test_builder & expect_tool_calls(std::vector<common_chat_tool_call> calls) {
+        tc_.expect.tool_calls = std::move(calls);
+        return *this;
+    }
+
+    // Execute the test
+    void run() {
+        // Check template filter
+        if (!g_template_filter.empty()) {
+            // Case-insensitive substring match
+            std::string template_path_lower = tester_.template_path();
+            std::string filter_lower        = g_template_filter;
+            std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(),
+                           ::tolower);
+            std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower);
+            if (template_path_lower.find(filter_lower) == std::string::npos) {
+                // Skip this test
+                return;
+            }
+        }
+        LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str());
+        test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_);
+    }
+};
+
+peg_test_builder peg_tester::test(const std::string & input) {
+    return peg_test_builder(*this, input);
  }
  
  static void test_msgs_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
      std::vector<common_chat_msg> msgs{
          message_user,
          message_user_parts,
@@ -633,54 +1364,50 @@ static void test_msgs_oaicompat_json_conversion() {
          message_assist_call_id,
          message_assist_call_idx,
          message_assist_call_python,
-        message_assist_call_code_interpreter,
      };
      for (const auto & msg : msgs) {
-        auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
-        auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_msgs_to_json_oaicompat({ msg });
+        auto msgs2    = common_chat_msgs_parse_oaicompat(oai_json);
          assert_equals((size_t) 1, msgs2.size());
-        auto msg2 = msgs2[0];
+        const auto & msg2 = msgs2[0];
          assert_msg_equals(msg, msg2);
      }
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"user\",\n"
-            "    \"content\": [\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"Hey\"\n"
-            "      },\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"there\"\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
-
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"assistant\",\n"
-            "    \"content\": \"\",\n"
-            "    \"tool_calls\": [\n"
-            "      {\n"
-            "        \"type\": \"function\",\n"
-            "        \"function\": {\n"
-            "          \"name\": \"python\",\n"
-            "          \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
-            "        }\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"user\",\n"
+                              "    \"content\": [\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"Hey\"\n"
+                              "      },\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"there\"\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2));
+
+    // Note: content is "" instead of null due to workaround for templates that render null as "None"
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"assistant\",\n"
+                              "    \"content\": \"\",\n"
+                              "    \"tool_calls\": [\n"
+                              "      {\n"
+                              "        \"type\": \"function\",\n"
+                              "        \"function\": {\n"
+                              "          \"name\": \"python\",\n"
+                              "          \"arguments\": {\n"
+                              "            \"code\": \"print('hey')\"\n"
+                              "          }\n"
+                              "        }\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2));
  
      auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
      assert_equals<size_t>(1, res.size());
@@ -699,16 +1426,15 @@ static void test_msgs_oaicompat_json_conversion() {
  }
  
  static void test_tools_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
      std::vector<common_chat_tool> tools{
          special_function_tool,
          python_tool,
-        code_interpreter_tool,
      };
  
      for (const auto & tool : tools) {
-        auto oai_json = common_chat_tools_to_json_oaicompat({tool});
-        auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_tools_to_json_oaicompat({ tool });
+        auto tools2   = common_chat_tools_parse_oaicompat(oai_json);
          assert_equals((size_t) 1, tools2.size());
          auto tool2 = tools2[0];
          assert_equals(tool.name, tool2.name);
@@ -716,3040 +1442,1428 @@ static void test_tools_oaicompat_json_conversion() {
          assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
      }
  
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"type\": \"function\",\n"
-            "    \"function\": {\n"
-            "      \"name\": \"special_function\",\n"
-            "      \"description\": \"I'm special\",\n"
-            "      \"parameters\": {\n"
-            "        \"type\": \"object\",\n"
-            "        \"properties\": {\n"
-            "          \"arg1\": {\n"
-            "            \"type\": \"integer\",\n"
-            "            \"description\": \"The arg.\"\n"
-            "          }\n"
-            "        },\n"
-            "        \"required\": [\n"
-            "          \"arg1\"\n"
-            "        ]\n"
-            "      }\n"
-            "    }\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"type\": \"function\",\n"
+                              "    \"function\": {\n"
+                              "      \"name\": \"special_function\",\n"
+                              "      \"description\": \"I'm special\",\n"
+                              "      \"parameters\": {\n"
+                              "        \"type\": \"object\",\n"
+                              "        \"properties\": {\n"
+                              "          \"arg1\": {\n"
+                              "            \"type\": \"integer\",\n"
+                              "            \"description\": \"The arg.\"\n"
+                              "          }\n"
+                              "        },\n"
+                              "        \"required\": [\n"
+                              "          \"arg1\"\n"
+                              "        ]\n"
+                              "      }\n"
+                              "    }\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2));
+}
+
+static void test_template_output_peg_parsers(bool detailed_debug) {
+    LOG_DBG("%s\n", __func__);
+
+    // JSON schemas
+    const char * invoice_schema = R"({
+        "type": "object",
+        "properties": {
+            "amount": {"type": "number"},
+            "date": {"type": "string"}
+        }
+    })";
  
      {
-        auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
-        assert_equals((size_t) 1, tools_no_params.size());
-        assert_equals(std::string("test_func"), tools_no_params[0].name);
-        assert_equals(std::string("A test"), tools_no_params[0].description);
-        assert_equals(std::string("{}"), tools_no_params[0].parameters);
+        // Ministral-3-14B-Reasoning-2512
+        auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "[THINK]I'm\nthinking[/THINK]"
+               R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
+                 R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "[THINK]I need to output the invoice details in JSON[/THINK]"
+               "```json\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})"
+               "\n```")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
      }
+
      {
-        auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
-        assert_equals((size_t) 1, tools_no_desc.size());
-        assert_equals(std::string("test_func"), tools_no_desc[0].name);
-        assert_equals(std::string(""), tools_no_desc[0].description);
+        // NVIDIA Nemotron-3 Nano
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").enable_thinking(false).expect(message_assist).run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "I'm\nthinking\n</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "<parameter=arg2>\n2\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        tst.test(
+               "I need to output the invoice details in JSON\n"
+               "</think>\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
      }
+
      {
-        auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func"}}])"));
-        assert_equals((size_t) 1, tools_minimal.size());
-        assert_equals(std::string("test_func"), tools_minimal[0].name);
-        assert_equals(std::string(""), tools_minimal[0].description);
-        assert_equals(std::string("{}"), tools_minimal[0].parameters);
+        // CohereForAI Command-R 7B (2024-tool_use)
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .expect(message_assist_thoughts_unparsed_r7b)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(message_assist_thoughts_partial_call)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|><|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_idx)
+            .run();
      }
-}
-
-// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961
-struct test_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-};
  
-static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) {
-    common_chat_parser_params params;
-    params.format               = syntax.format;
-    params.reasoning_format     = syntax.reasoning_format;
-    params.reasoning_in_content = syntax.reasoning_in_content;
-    params.thinking_forced_open = syntax.thinking_forced_open;
-    params.parse_tool_calls     = syntax.parse_tool_calls;
-    return common_chat_parse(input, is_partial, params);
-}
+    {
+        // Google Gemma 2 2B - does not support tool calling
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja");
  
-static void test_template_output_parsers() {
-    printf("[%s]\n", __func__);
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
  
-    common_chat_templates_inputs inputs_no_tools;
-    inputs_no_tools.messages                = {message_user};
+        tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).run();
+    }
  
-    common_chat_templates_inputs inputs_tools;
-    inputs_tools.messages                   = {message_user};
-    inputs_tools.tools                      = {special_function_tool};
+    {
+        // Qwen-QwQ-32B (reasoning model)
+        auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja");
  
-    common_chat_templates_inputs inputs_tools_builtin;
-    inputs_tools_builtin.messages           = {message_user};
-    inputs_tools_builtin.tools              = {python_tool};
+        // QwQ always has thinking forced open - input starts after the <think>\n in the prompt
+        tst.test("Let me think about this...\n</think>\nThe answer is 42.")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(simple_assist_msg("The answer is 42.", "Let me think about this..."))
+            .run();
  
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
+    }
      {
-        // Not supported yet
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
+        auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug);
+
+        tst.test(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "Hello, world!\nWhat's up?<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
+
+        // Note: Hermes template doesn't support thinking/reasoning natively
+        // Note: We only support one tool calling format per template, no alternate formats
      }
      {
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
-        std::vector<std::string>   end_tokens{ "<|END_OF_TURN_TOKEN|>" };
+        // Test simple content-only template
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug);
  
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format);
-            assert_equals(false, params.thinking_forced_open);
-        }
-
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_r7b,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_call_idx,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                "]<|END_ACTION|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
-                      "<|START_THINKING|><|END_THINKING|>"
-                      "<|START_ACTION|>[\n"
-                      "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                      "]<|END_ACTION|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      COMMON_REASONING_FORMAT_DEEPSEEK);
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<|START_RESPONSE|>Hello, world!\n"
-                      "What's up?<|END_RESPONSE|>",
-                      /* expect_grammar_triggered= */ false);
-    }
-    // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future
-    {
-        auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
-        std::vector<std::string>   end_tokens{ "<end_of_turn>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
-                          inputs_tools)
-                          .format);
-
-        // Generic tool calls doesn't generate / parse content-only messages symmetrically.
-
-        assert_equals(
-            simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","),
-            test_chat_parse(
-                R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-        assert_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "{\n"
-                "  \"response\": \"Hello, world!\\nWhat's up?\"\n"
-                "}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-#if 0
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}");
-#endif
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
      }
      {
-        auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
-        std::vector<std::string>   end_tokens{ "</s>" };
+        // IBM Granite (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
  
-        assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
  
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(
-            tmpls.get(), end_tokens, message_assist_call_id, tools,
-            "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
+        // TODO: pending support for WRAPPED_WITH_REASONING
+        // tst.test("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>")
+        //     .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+        //     .expect(message_assist_thoughts)
+        //     .run();
      }
+
      {
-        assert_msg_equals(
-            simple_assist_msg("Réponse", "raisonnement"),
-            test_chat_parse(
-                message_assist_thoughts_unparsed_magistral.content,
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
+        // ByteDance-Seed-OSS (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("<seed:think>I'm thinking about the answer</seed:think>\nHello, world!")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>\n"
+               "<seed:tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "<parameter=arg2>2</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
+
+        // tool call with inside quotes
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=edit>\n"
+               "<parameter=filename>\n"
+               "foo.cpp\n"
+               "</parameter>\n"
+               "<parameter=oldString>"
+               "def foo(arg = \"14\"):\n"
+               "    return arg + \"bar\"\n"
+               "\n"
+               "</parameter>\n"
+               "<parameter=newString>"
+               "def foo(arg = \"15\"):\n"
+               "    pass\n"
+               "\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                edit_tool
+        })
+            .expect_tool_calls({
+                { "edit", "{\"filename\": \"foo.cpp\", "
+                    "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", "
+                    "\"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}", {}
+                }
+            })
+            .run();
      }
-    {
-        auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
  
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-    }
      {
-        auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
-                inputs_tools)
-                .format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
-                inputs_tools)
-                .format);
-
-        // Test parsing
-        assert_msg_equals(
-            simple_assist_msg("", "", "python", ""),
-            test_chat_parse(
-                "```json\n"
-                "<function_call> { \"name\" : \"python\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name\"",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                // QwQ-32B's template adds a trailing <think> if add_generation_prompt
-                "I'm\nthinking</think>\n"
-                "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function name=\"special_function\">\n"
-                "{\"arg1\": 1}\n"
-                "</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tools>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tools>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<response>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "<response>\n"
-                "    {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "\n"
-                "                    <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
-                "                    </function_call> \n"
-                "``` ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<json>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</json>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<xml>\n"
-                "  {\n"
-                "    \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
-                "  }\n"
-                "</xml>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<JSON>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</JSON>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\n  \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        // Test multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "";
-        message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>\n"
-                "<function=python>{\"code\":\"print('hello')\"}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "This is not a tool call:",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "This is not a tool call:\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-        //     test_chat_parse(
-        //         "I'm\nthinking</think>Hello, world!\nWhat's up?",
-        //         COMMON_CHAT_FORMAT_HERMES_2_PRO));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>");
-
-        // Test multiple tool calls with template
-        common_chat_msg message_assist_multiple_calls_template;
-        message_assist_multiple_calls_template.role = "assistant";
-        message_assist_multiple_calls_template.content = "";
-        message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
-
-        test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>\n"
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
-                      "</tool_call>");
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
-                      "</tool_call>");
-        assert_msg_equals(
-            simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
-            test_chat_parse(
-                "<think><tool_call>nah uhg</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        // Qwen3-Coder (tool calling with XML-style format)
+        auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "<parameter=arg2>\n"
+               "2\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        // Test with code content (multiline)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        // Test with code content (asian unicode chars)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "格\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"格\"}", {} },
+            })
+            .run();
+
+        // Test with HTML tag content
+        tst.test(
+               "<tool_call>\n"
+               "<function=html>\n"
+               "<parameter=markup>\n"
+               "<html>\n"
+               " <head>\n"
+               "  <title>Hello!</title>\n"
+               " </head>\n"
+               "</html>\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                html_tool
+        })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"<html>\\n <head>\\n  <title>Hello!</title>\\n </head>\\n</html>\"}", {} },
+            })
+            .run();
+
+        // Test with TODO list (array of objects)
+        tst.test(
+               "<tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
      }
      {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
-                          inputs_tools_builtin)
-                          .format);
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LLAMA_3_X}));
-
-        // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
-                      "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
-                      "<|python_tag|>python.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"XYZCITY\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
      }
-    {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
  
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
-    }
      {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                      common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        for (auto is_partial : { false, true }) {
-            assert_equals(
-                message_assist_call,
-                test_chat_parse(
-                    "<function=special_function>{\"arg1\": 1}</function>",
-                    is_partial,
-                    {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-        }
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}<",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<function=special_function>{\"arg1\": 1}</function>");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING"))
+            .run();
      }
+
      {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "Hello, world!\nnono\nWhat's up?",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\n"
-                "nono\n"
-                "What's up?>>>special_function\n"
-                "{\"arg1\": 1}\n",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines_unclosed,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "special_function\n"
-                "{\"arg1\": 1} \n                    ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, {},
-                      "all\n"
-                      "Hello, world!\n"
-                      "What's up?",
-                      /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "special_function\n"
-                      "{\"arg1\": 1}");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think>CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({
+                get_time_tool, get_weather_tool
+        })
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .parallel_tool_calls(true)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "REASONING", "CONTENT",
+                { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } }))
+            .run();
      }
-    {
-        auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
  
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
-    }
      {
-        // Original DeepSeek R1 template. Leaves <｜tool▁calls▁begin｜> and others unclosed. Our logic fixes the prompt.
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with"),
-            test_chat_parse(
-                "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-        //               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-        //               "```json\n"
-        //               "{\"arg1\": 1}\n"
-        //               // Look what's not here: <｜tool▁calls▁end｜> (also missing the <｜end▁of▁sentence｜>, but that is removed lazily by the test's delta logic)
-        //               "```<｜tool▁call▁end｜>",
-        //               /* expect_grammar_triggered= */ true,
-        //               /* test_grammar_if_triggered= */ false);
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("REASONING</think>\nCONTENT")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("CONTENT", "REASONING\n"))
+            .run();
      }
+
      {
-        // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
-        auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        assert_msg_equals(message_assist_call_thoughts_unparsed,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<｜tool▁calls｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("CONTENT").expect(simple_assist_msg("CONTENT", "")).run();
      }
+
+    // GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
      {
-        auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|end_of_text|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<think>I'm\nthinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(
-            message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-    // TODO @ngxson : generic tool call should be removed in the future
-#if 0
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}",
-                      /* expect_grammar_triggered= */ false
-        );
-#endif
-    }
+        auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug);
+        tst.test(
+               "<tool_call>special_function\n"
+               "<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // GLM-4.7-Flash tests - format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
+    // Note: Template uses forced-open thinking mode (prompt ends with <think>)
      {
-        auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
-        std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthink",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-
-        // Test parse_tool_calls == false
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-
-        // Test reasoning formats
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                }));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ true,
-                }));
-
-        // Test tool calling in role header
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-    }
+        auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug);
+
+        // Pure content (no reasoning)
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .expect(message_assist)
+            .run();
+
+        // Reasoning with content (forced-open mode - input starts after <think>)
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Tool call without reasoning
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning (forced-open mode)
+        tst.test(
+               "I'm\nthinking</think>"
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>"
+               "<tool_call>special_function_with_opt"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "<arg_key>arg2</arg_key><arg_value>2</arg_value>"
+               "</tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+    }
+
+    // Kimi-K2-Thinking tests - custom parser
+    // Unique feature: tool call ID embeds function name as functions.<name>:<counter>
      {
-        // Seed-OSS format tests
-        auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
-        std::vector<std::string> end_tokens{ "<seed:eos>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-
-        // Test simple reasoning content
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
-            test_chat_parse(
-                "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test budget reflection tags
-        common_chat_msg msg_budget_reflect;
-        msg_budget_reflect.role = "assistant";
-        msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
-        msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
-        assert_msg_equals(
-            msg_budget_reflect,
-            test_chat_parse(
-                "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
-                "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
-                "I need to calculate this step by step.",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test tool calls with Seed-OSS format
-        common_chat_msg msg_tool_call;
-        msg_tool_call.role = "assistant";
-        msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_tool_call,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test reasoning + tool call combination
-        common_chat_msg msg_reasoning_tool;
-        msg_reasoning_tool.role = "assistant";
-        msg_reasoning_tool.content = "";
-        msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
-        msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_reasoning_tool,
-            test_chat_parse(
-                "<seed:think>I need to calculate the sum of these numbers</seed:think>"
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test deltas: the number of tool calls in partial parses should never decrease
-        std::string tool_msg = "<seed:tool_call>\n"
-            "<function=fun>\n"
-            "<parameter=smth>[1, 2, 3]</parameter>\n"
-            "</function>";
-        std::size_t previousToolCalls = 0;
-        for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
-            auto partial = tool_msg.substr(0, i);
-            auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
-            if (partial_res.tool_calls.size() < previousToolCalls) {
-                throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
-            }
-            previousToolCalls = partial_res.tool_calls.size();
-        }
-
-        // Test multiple parameters in tool call
-        common_chat_msg msg_multi_param;
-        msg_multi_param.role = "assistant";
-        msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
-        assert_msg_equals(
-            msg_multi_param,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=process_data>\n"
-                "<parameter=input>test</parameter>\n"
-                "<parameter=format>json</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
-        assert_msg_equals(
-            simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1,\n",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test incomplete reasoning tag
-        assert_msg_equals(
-            simple_assist_msg("", "I was thinking"),
-            test_chat_parse(
-                "<seed:think>I was thinking",
-                /* is_partial= */ true,
+        auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug);
+
+        // Basic content only
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Single tool call
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Single tool call with reasoning
+        tst.test(
+               "<think>I'm thinking about this</think>"
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "I'm thinking about this", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Tool call with content
+        tst.test(
+               "Hello, world!\nWhat's up?"
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Multiple tool calls (parallel) - tests the indexing behavior
+        tst.test(
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.special_function_with_opt:1<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", "functions.special_function_with_opt:1" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning
+        tst.test(
+               "<think>I need to call two functions</think>"
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Python tool with multiline code
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.python:0<|tool_call_argument_begin|>"
+               "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ python_tool })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", "functions.python:0" },
+            })
+            .run();
+
+        // Tool call with empty arguments
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.empty_args:0<|tool_call_argument_begin|>"
+               "{}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}", "functions.empty_args:0"))
+            .run();
+
+        // Partial tool call (streaming)
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": ")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ", "functions.special_function:0"))
+            .run();
+
+        // Three tool calls to verify counter continues incrementing
+        tst.test(
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print(1)\"}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.html:2<|tool_call_argument_begin|>{\"markup\": \"<p>test</p>\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool, html_tool
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print(1)\"}", "functions.python:1" },
+                { "html", "{\"markup\": \"<p>test</p>\"}", "functions.html:2" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning, call *inside thinking block*
+        tst.test(
+               "<think>I need to call two functions"
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning, call *inside thinking block* and *without section markers or end markers
+        tst.test(
+               "<think>I need to call two functions"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Real life test - execute_command
+        tst.test("<|tool_call_begin|>functions.execute_command:0<|tool_call_argument_begin|>{\"command\": \"ls -lah\""
+            ", \"cwd\": \"/home/jarvis/development/exllamav3\", \"timeout\": 10}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                {
+                    /* .name = */ "execute_command",
+                    /* .description = */ "Execute shell command",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "Shell command to execute"
+                            },
+                            "cwd": {
+                                "type": "string",
+                                "description": "Working directory"
+                            },
+                            "timeout": {
+                                "type": "integer",
+                                "description": "The timeout in seconds"
+                            }
+                        },
+                        "required": ["command"]
+                    })"
+                }
+            }).
+            expect_tool_calls({
                  {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test content without reasoning
-        assert_msg_equals(
-            simple_assist_msg("This is a simple response without reasoning."),
-            test_chat_parse(
-                "This is a simple response without reasoning.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+                    "execute_command",
+                    R"({"command": "ls -lah", "cwd": "/home/jarvis/development/exllamav3", "timeout": 10})",
+                    "functions.execute_command:0"
+                }
+            })
+            .run();
      }
-    {
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
-        std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
  
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?\n",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                      /* expect_grammar_triggered= */ true
-        );
-    }
      {
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // variant: thinking forced open, reasoning_format none
-        assert_msg_equals(
-            simple_assist_msg("REASONING</think>ok", ""),
-            test_chat_parse(
-                "REASONING</think>ok",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: happy path for when it works as the model card says it should
-        assert_msg_equals(
-            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + thinking open
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "CONTENT";
-        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-        //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-        //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-        //          add the reasoning content as regular content and parse the tool calls.
-        assert_msg_equals(
-            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>", "", ""),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking not forced open + missing reasoning + no tool calls
-        assert_msg_equals(
-            simple_assist_msg("CONTENT", ""),
-            test_chat_parse(
-                "CONTENT",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-    }
+        auto kimi_id_special_func_tool_call =
+            simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0");
+
+        // Kimi-K2 old template
+        auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(kimi_id_special_func_tool_call)
+            .run();
+
+        // Kimi-K2-Instruct
+        auto tst2 = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug);
+        tst2.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst2.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(kimi_id_special_func_tool_call)
+            .run();
+    }
+
+    // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
+    // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
      {
-        auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|assistant_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                      /* expect_grammar_triggered= */ true
-        );
-
-        // TODO @ngxson : not sure why this fails, but not very important for now
-        // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+        auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug);
+        tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
      }
-    {
-        // LFM2 format tests
-        auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
-            common_chat_templates_inputs inputs;
-            inputs.messages = {
-                std::invoke([&]() -> common_chat_msg {
-                    common_chat_msg msg;
-                    msg.role = "system";
-                    msg.content = "force json schema.\n";
-                    return msg;
-                }),
-                message_user,
-            };
-            inputs.tools = {special_function_tool};
-            return inputs;
-        });
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(true, params.grammar.empty());
-        }
-
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
-            assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
-            assert_equals(true, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(false, params.grammar.empty());
-        }
  
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test single tool call with JSON format
-        common_chat_msg msg_single_tool_call;
-        msg_single_tool_call.role = "assistant";
-        msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
-        assert_msg_equals(
-            msg_single_tool_call,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with string argument
-        common_chat_msg msg_tool_call_string;
-        msg_tool_call_string.role = "assistant";
-        msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_string,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with multiple arguments
-        common_chat_msg msg_multi_args;
-        msg_multi_args.role = "assistant";
-        msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
-        assert_msg_equals(
-            msg_multi_args,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test multiple tool calls in single array
-        common_chat_msg msg_multiple_tools;
-        msg_multiple_tools.role = "assistant";
-        msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
-        assert_msg_equals(
-            msg_multiple_tools,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content before
-        common_chat_msg msg_content_before_tool;
-        msg_content_before_tool.role = "assistant";
-        msg_content_before_tool.content = "Let me check the weather for you.";
-        msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_before_tool,
-            test_chat_parse(
-                "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content after
-        common_chat_msg msg_content_after_tool;
-        msg_content_after_tool.role = "assistant";
-        msg_content_after_tool.content = "Here's the result.";
-        msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_after_tool,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with newlines (common in LLM output)
-        common_chat_msg msg_tool_call_newlines;
-        msg_tool_call_newlines.role = "assistant";
-        msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_newlines,
-            test_chat_parse(
-                "<|tool_call_start|>[{\n    \"name\": \"get_current_time\",\n    \"arguments\": {\n        \"location\": \"Paris\"\n    }\n}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
-        // Unlike other formats, LFM2 template does not render tool calls in conversation history,
-        // so we don't use test_templates() for tool call generation. Instead, the parsing tests
-        // above verify edge cases and format variations for the tool call output format.
+    // MiniMax-M2 tests - XML invoke format with parameter tags
+    // Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
+    {
+        auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
+        tst.test(
+               "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
+               "name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
      }
  
+    // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format
+    // Format: <TOOLCALL>[{"name": "func", "arguments": {...}}]</TOOLCALL>
      {
-        auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
-        std::vector<std::string> end_tokens{ "[e~[" };
-
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug);
+        tst.test("<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL><SPECIAL_12>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
      }
  
+    // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION)
      {
-        auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
-        std::vector<std::string>   end_tokens{ "<|assistant|>", "<|observation|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }), true);
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}), true);
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}
-            ), true);
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-            test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<tool_call>complex_function\n"
-                "<arg_key>name</arg_key>\n"
-                "<arg_value>John Doe</arg_value>\n"
-                "<arg_key>age</arg_key>\n"
-                "<arg_value>30</arg_value>\n"
-                "<arg_key>active</arg_key>\n"
-                "<arg_value>true</arg_value>\n"
-                "<arg_key>score</arg_key>\n"
-                "<arg_value>95.5</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<tool_call>web_search\n"
-                "<arg_key>query</arg_key>\n"
-                "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
-                "<arg_key>limit</arg_key>\n"
-                "<arg_value>3</arg_value>\n"
-                "<arg_key>type</arg_key>\n"
-                "<arg_value>text</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-
-        // Test interleaved thinking
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "\n<think></think>\nHello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+    }
+    // CohereForAI-c4ai-command-r-plus (uses markdown code block format)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug);
+        tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run();
+        // Tool calls: Action: followed by JSON code block
+        tst.test(
+               "Action:\n"
+               "```json\n"
+               "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n"
+               "```")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // mistralai-Mistral-Nemo-Instruct-2407.jinja
+    {
+        auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
      }
-
      {
-        auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg(
-                        "Let me start by examining the relevant files to understand the current implementation.", "",
-                        "read_file",
-                        "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
-                "Let me start by examining the relevant files to understand the current implementation."
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
-        multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
-        multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
-        multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
-        multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" });
-        test_parser_with_streaming(multi_tool_msg,
-                "<think>I'm thinking.</think>Let me call multiple tools."
-                "<|tool_calls_section_begin|>"
-                "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
-                "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}"
-                "<|tool_call_end|>"
-                "<|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-
-        // Test template rendering
-        common_chat_templates_inputs conversation_with_tools = inputs_tools;
-        conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 1",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "complex_function",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 2",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "web_search",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 3",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "read_file",
-            /* .tool_call_id = */ "",
-        });
-        assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<think></think>Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<function=special_function>{\"arg1\": 1}</function>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+    // Functionary v3.2 - recipient-based format: >>>recipient\n{content}
+    {
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
+        tst.test(">>>all\nHello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(">>>special_function\n{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
      }
  
+    // FireFunction
      {
-        // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
-        // but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
-        // schema-aware parameter parsing.
-        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Grammar and PEG parser should be generated with thinking_forced_open
-        {
-            common_chat_templates_inputs inputs;
-            inputs.messages = { message_user };
-            inputs.tools = { special_function_tool };
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
-            assert_equals(true, params.thinking_forced_open);
-            assert_equals(false, params.grammar.empty());
-            assert_equals(false, params.parser.empty());
-            auto grammar = build_grammar(params.grammar);
-            GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar");
-        }
+        auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
      }
-}
  
-static void test_template_output_peg_parsers() {
-    printf("[%s]\n", __func__);
+    // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(true)  // Forced open
+            .expect(message_assist)
+            .run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+    }
+    // llama-cpp DeepSeek R1 template (always forced-open thinking)
+    {
+        auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .parallel_tool_calls(true)
+            .expect(message_assist_call)
+            .run();
+    }
+    // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").enable_thinking(true).expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // MiMo-VL / Hermes 3 / Qwen 2.5 (Common <tool_call> JSON format)
+    for (const auto & path :
+         { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+           "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) {
+        auto tst = peg_tester(path, detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
+
+    // Apriel 1.5
+    {
+        auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_calls>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</tool_calls>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
  
-    // JSON schemas
-    const char * invoice_schema = R"({
-        "type": "object",
-        "properties": {
-            "amount": {"type": "number"},
-            "date": {"type": "string"}
-        }
-    })";
+    // Apriel 1.6 Thinker (reasoning-only support)
+    {
+        auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Implicit reasoning start (forced open)
+        tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Reasoning + Tool calls
+        tst.test(
+               "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
+               "{\"arg1\": 1}}]</tool_calls>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+    }
+
+    // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...}
+    {
+        auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .run();
+    }
+    // Devstral
+    {
+        auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+        tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
+    }
  
      {
-        // Ministral-3-14B-Reasoning-2512
-        auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]"
-                      R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
-                      R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
-                      "```json\n"
-                      R"({"amount": 123.45, "date": "2025-12-03"})"
-                      "\n```";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        // Llama 3.1
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
      }
  
      {
-        // Qwen3-Coder
-        auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with JSON parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=todo_list>\n"
-                "<parameter=todos>\n"
-                "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {todo_list_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "todo_list",
-                /* .arguments = */ "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        // Llama 3.2
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).run();
      }
  
      {
-        // NVIDIA Nemotron-3 Nano
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.enable_thinking = true;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        // Llama 3.3
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).run();
      }
  
+    // GPT-OSS format tests
      {
-        // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open)
-        // Unlike Nemotron, Step-3.5-Flash always emits <think> regardless of enable_thinking,
-        // so all inputs must include a </think> delimiter.
-        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-
-        // Test basic message with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "</think>\nHello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist;
-        });
-
-        // Test tool call without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with thinking
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls with thinking
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test parallel tool calls without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with code string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format (JSON schema with thinking)
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug);
+
+        // Basic content only - final channel
+        tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Basic content only - commentary channel
+        tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Analysis channel (reasoning) with final channel (content)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Analysis channel only (partial) - still works when reasoning format is set
+        tst.test("<|channel|>analysis<|message|>I'm\nthinking")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .is_partial(true)
+            .expect_reasoning("I'm\nthinking")
+            .run();
+
+        // Reasoning format none - reasoning stays in content
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
+            .run();
+
+        // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON"
+        tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call in commentary channel (channel header variant)
+        tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning + content (analysis first, then tool call)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n"
+               "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Tool calling with extra channel before
+        tst.test(
+                "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary"
+                " to=functions.special_function <|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Reasoning after final channel
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..."
+        )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("Thinking about edit...")
+            .expect_content("")
+            .run();
+
+        // Tool calling after final channel
+        tst.test(
+            "<|channel|>final<|message|><|end|>"
+            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>"
+            "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
+            "<|message|>{\"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
+            )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                {
+                    /* .name = */ "edit",
+                    /* .description = */ "Edit a file",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "oldString": {
+                                "type": "string",
+                                "description": "Old string to replace."
+                            },
+                            "newString": {
+                                "type": "string",
+                                "description": "New replacement string."
+                            },
+                            "replaceAll": {
+                                "type": "boolean",
+                                "description": "Whether to replace all occurences."
+                            }
+                        },
+                        "required": ["oldString", "newString"]
+                    })",
+                }
+            })
+            .expect_reasoning("Thinking about edit...")
+            .expect_tool_calls({
+                { "edit", R"({"oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} }
+            })
+            .run();
+
+        // Parallel tool calls
+        tst.test(
+               " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n"
+               "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, "
+               "\"arg2\": 2}")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
      }
  
      {
-        // Solar-Open-100B
-        auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message and reasoning_effort = low
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>123456789"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call_id;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call with reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call without reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist_call_idx;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>"
-                      "<|tool_call:begin|>1"
-                      "<|tool_call:name|>special_function_with_opt"
-                      "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
-                      "<|tool_call:end|>";
-
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        "0",
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        "1",
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
-                      "<|begin|>assistant<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-
-        // Test response format no reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
+        tst.test("I was thinking</think>\nNow I'm not.").
+            enable_thinking(true).
+            reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
+            expect_reasoning("I was thinking").
+            expect_content("Now I'm not.")
+        .run();
+
+        // Test that numeric-looking string values are coerced to strings per the schema
+        tst.test(
+               "Let me call the magic tool\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic>\n"
+               "<parameter=name>\nfooBar\n</parameter>\n"
+               "<parameter=ref>\n5123123\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_tool })
+            .expect_reasoning("Let me call the magic tool")
+            .expect_tool_calls({
+                { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} },
+            })
+            .run();
+
+        // Test that numeric values are correctly interpreted as numbers when schema calls for number
+        tst.test(
+               "Let me call the special function\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n42555916\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect_reasoning("Let me call the special function")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 42555916})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Let me call the special function with opt\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n42555916\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool_with_optional_param })
+            .expect_reasoning("Let me call the special function with opt")
+            .expect_tool_calls({
+                { "special_function_with_opt", R"({"arg1": 42555916})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Let me call the magic_int function\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic_int>\n"
+               "<parameter=ref>\n42555916\n</parameter>\n"
+               "<parameter=name>\nbaz\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Let me call the magic_int function")
+            .expect_tool_calls({
+                { "magic_int", R"({"ref": 42555916, "name": "baz"})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Call string_param with empty text\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=string_param>\n"
+               "<parameter=text>\n\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ string_param_tool })
+            .expect_reasoning("Call string_param with empty text")
+            .expect_tool_calls({
+                { "string_param", R"({"text": ""})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Test simple quoted unquoted\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=quoted_unquoted>\n"
+               "<parameter=quoted>\n\"foo\"\n</parameter>\n"
+               "<parameter=unquoted>\nfoo\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ quoted_unquoted_tool })
+            .expect_reasoning("Test simple quoted unquoted")
+            .expect_tool_calls({
+                { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Test complex quoted unquoted\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=quoted_unquoted>\n"
+               "<parameter=quoted>\n\"printf(\\\"foo\\\");\"\n</parameter>\n"
+               "<parameter=unquoted>\nprintf(\"foo\");\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ quoted_unquoted_tool })
+            .expect_reasoning("Test complex quoted unquoted")
+            .expect_tool_calls({
+                { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test negative number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic_int>\n"
+               "<parameter=ref>\n-14\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Test negative number")
+            .expect_tool_calls({
+                { "magic_int", R"({ "ref" : -14 })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test decimal number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=amount>\n"
+               "<parameter=orig>\n3.14\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ amount_tool })
+            .expect_reasoning("Test decimal number")
+            .expect_tool_calls({
+                { "amount", R"({ "orig" : 3.14 })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test imaginary number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=imaginary_number>\n"
+               "<parameter=number>\n"
+               "{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ imaginary_number_tool })
+            .expect_reasoning("Test imaginary number")
+            .expect_tool_calls({
+                { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
+            })
+            .run();
+
      }
  }
  
  static void test_msg_diffs_compute() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
      {
          common_chat_msg msg1;
  
@@ -3759,9 +2873,7 @@ static void test_msg_diffs_compute() {
          common_chat_msg_diff diff;
          diff.content_delta = "Hello, world!";
  
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
      }
      {
          common_chat_msg msg1;
@@ -3773,37 +2885,35 @@ static void test_msg_diffs_compute() {
          common_chat_msg_diff diff;
          diff.content_delta = " world!";
  
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
      }
      {
          common_chat_msg msg0;
  
          common_chat_msg msg1;
-        msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
+        msg1.tool_calls = {
+            { "special_function", "{\"ar", /* .id = */ "123" }
+        };
  
          common_chat_msg msg2;
-        msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
+        msg2.tool_calls = {
+            { "special_function", "{\"arg1\": 1}", /* .id = */ "123" }
+        };
  
          common_chat_msg_diff diff01;
-        diff01.tool_call_index = 0;
-        diff01.tool_call_delta.name = "special_function";
-        diff01.tool_call_delta.id = "123";
+        diff01.tool_call_index           = 0;
+        diff01.tool_call_delta.name      = "special_function";
+        diff01.tool_call_delta.id        = "123";
          diff01.tool_call_delta.arguments = "{\"ar";
  
-        assert_equals(
-            {diff01},
-            common_chat_msg_diff::compute_diffs(msg0, msg1));
+        assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1));
  
          common_chat_msg_diff diff12;
-        diff12.tool_call_index = 0;
+        diff12.tool_call_index           = 0;
          // Note: neither id nor name change here.
          diff12.tool_call_delta.arguments = "g1\": 1}";
  
-        assert_equals(
-            {diff12},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2));
      }
      {
          common_chat_msg msg0;
@@ -3815,68 +2925,81 @@ static void test_msg_diffs_compute() {
          };
  
          common_chat_msg_diff diff1;
-        diff1.tool_call_index = 0;
-        diff1.tool_call_delta.name = "f1";
-        diff1.tool_call_delta.id = "123";
+        diff1.tool_call_index           = 0;
+        diff1.tool_call_delta.name      = "f1";
+        diff1.tool_call_delta.id        = "123";
          diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
  
          common_chat_msg_diff diff2;
-        diff2.tool_call_index = 1;
-        diff2.tool_call_delta.name = "f2";
-        diff2.tool_call_delta.id = "222";
+        diff2.tool_call_index           = 1;
+        diff2.tool_call_delta.name      = "f2";
+        diff2.tool_call_delta.id        = "222";
          diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
  
-        assert_equals(
-            {diff1, diff2},
-            common_chat_msg_diff::compute_diffs(msg0, msg2));
+        assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2));
      }
  }
  
  int main(int argc, char ** argv) {
-    common_log_set_verbosity_thold(999);
+    bool detailed_debug    = false;
+    bool only_run_filtered = false;
+
+    // Check for --template flag
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "--template" && i + 1 < argc) {
+            g_template_filter = argv[++i];
+            // Only run PEG parser tests with the filter
+            only_run_filtered = true;
+        }
+        if (arg == "--detailed") {
+            detailed_debug = true;
+            common_log_set_verbosity_thold(999);
+        }
+    }
+
+    if (only_run_filtered) {
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All template tests passed!" << '\n';
+        return 0;
+    }
  
-    // try {
  #ifndef _WIN32
-        if (argc > 1) {
-            common_chat_templates_inputs inputs;
-            common_chat_msg msg;
-            msg.role = "user";
-            msg.content = "Hey";
-            inputs.messages = {msg};
-            inputs.tools = { special_function_tool };
-
-            std::cout << "| Template | Format |\n";
-            std::cout << "|----------|--------|\n";
-
-            for (int i = 1; i < argc; i++) {
-                try {
-                    std::string path = argv[i];
-                    if (path.rfind(".jinja") != path.size() - 6) {
-                        std::cerr << "Skipping non-jinja file: " << path << '\n';
-                        continue;
-                    }
-                    auto tmpls = read_templates(path);
-                    auto parts  = string_split(path, "/");
-                    auto name   = parts[parts.size() - 1];
-                    auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
-                    std::cout << "| " << name << " | " << format << " |\n";
-                } catch (const std::exception & e) {
-                    std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
+    if (argc > 1) {
+        common_chat_templates_inputs inputs;
+        common_chat_msg              msg;
+        msg.role        = "user";
+        msg.content     = "Hey";
+        inputs.messages = { msg };
+        inputs.tools    = { special_function_tool };
+
+        std::cout << "| Template | Format |\n";
+        std::cout << "|----------|--------|\n";
+
+        for (int i = 1; i < argc; i++) {
+            try {
+                std::string path = argv[i];
+                if (path.rfind(".jinja") != path.size() - 6) {
+                    std::cerr << "Skipping non-jinja file: " << path << '\n';
+                    continue;
                  }
+                auto         tmpls  = read_templates(path);
+                auto         parts  = string_split(path, "/");
+                const auto & name   = parts[parts.size() - 1];
+                const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
+                std::cout << "| " << name << " | " << format << " |\n";
+            } catch (const std::exception & e) {
+                std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
              }
-        } else
-#endif
-        {
-            test_msg_diffs_compute();
-            test_msgs_oaicompat_json_conversion();
-            test_tools_oaicompat_json_conversion();
-            test_template_output_parsers();
-            test_template_output_peg_parsers();
-            std::cout << "\n[chat] All tests passed!" << '\n';
          }
-        return 0;
-    // } catch (const std::exception & e) {
-    //     std::cerr << "Error: " << e.what() << '\n';
-    //     return 1;
-    // }
+    } else
+#endif
+    {
+        test_msg_diffs_compute();
+        test_msgs_oaicompat_json_conversion();
+        test_tools_oaicompat_json_conversion();
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All tests passed!" << '\n';
+    }
+    return 0;
  }
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp

index a8e9ff33a43b2b937c19b50f3e4f1f7e092c88b8..eb33804c9a768bb9e65927c1186ca3eff3351823 100755 (executable)
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -1340,6 +1340,26 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
          )"""
      });
  
+    test({
+        SUCCESS,
+        "description only (no type) treated as unconstrained",
+        R"""({"description": "The 0-based index of the last line to be retrieved (inclusive). If None, read until the end of the file."})""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= value
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
      test({
          SUCCESS,
          "literal string with escapes",
diff --git a/tests/test-peg-parser.cpp b/tests/test-peg-parser.cpp

index 220745d029342faea2252975be0507d80c4a8022..7d22d776120143830aeaf74b9f843c40527c9437 100644 (file)
--- a/tests/test-peg-parser.cpp
+++ b/tests/test-peg-parser.cpp
@@ -20,6 +20,7 @@ int main(int argc, char *argv[]) {
      t.test("json", test_json_parser);
      t.test("gbnf", test_gbnf_generation);
      t.test("serialization", test_json_serialization);
+    t.test("python-dict", test_python_dict_parser);
  
      return t.summary();
  }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt

index 518f8b9ae745ed4c4936f03adbb1fc78be8e177a..7c63b3aae54751bf62c76d29a3da8647791ff20a 100644 (file)
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -26,6 +26,7 @@ else()
          add_subdirectory(server)
      endif()
      add_subdirectory(tokenize)
+    add_subdirectory(parser)
      add_subdirectory(tts)
      add_subdirectory(mtmd)
      if (GGML_RPC)
diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp

index 13bedf31ebea5b86def6fa3f4f7b8b9334d8ca50..d43d105490753521adbbe6ef1de3f8fa99174221 100644 (file)
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -1,3 +1,4 @@
+#include "chat.h"
  #include "common.h"
  #include "arg.h"
  #include "console.h"
@@ -191,7 +192,8 @@ struct cli_context {
          inputs.use_jinja             = chat_params.use_jinja;
          inputs.parallel_tool_calls   = false;
          inputs.add_generation_prompt = true;
-        inputs.enable_thinking       = chat_params.enable_thinking;
+        inputs.reasoning_format      = COMMON_REASONING_FORMAT_DEEPSEEK;
+        inputs.enable_thinking       = common_chat_templates_support_enable_thinking(chat_params.tmpls.get());
  
          // Apply chat template to the list of messages
          return common_chat_templates_apply(chat_params.tmpls.get(), inputs);
diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt

new file mode 100644 (file)

index 0000000..55e0c63
--- /dev/null
+++ b/tools/parser/CMakeLists.txt
@@ -0,0 +1,20 @@
+if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
+    # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API
+    set(TARGET llama-debug-template-parser)
+    add_executable(${TARGET} debug-template-parser.cpp)
+    target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+    if(LLAMA_TOOLS_INSTALL)
+        install(TARGETS ${TARGET} RUNTIME)
+    endif()
+endif()
+
+set(TARGET llama-template-analysis)
+add_executable(${TARGET} template-analysis.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+    install(TARGETS ${TARGET} RUNTIME)
+endif()
diff --git a/tools/parser/debug-template-parser.cpp b/tools/parser/debug-template-parser.cpp

new file mode 100644 (file)

index 0000000..ffa3a5a
--- /dev/null
+++ b/tools/parser/debug-template-parser.cpp
@@ -0,0 +1,452 @@
+#include "../src/llama-grammar.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "gguf.h"
+#include "jinja/runtime.h"
+#include "log.h"
+
+#include <fstream>
+#include <numeric>
+#include <sstream>
+#include <string>
+
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+using json = nlohmann::ordered_json;
+
+enum class output_mode {
+    ANALYSIS,  // Only output analysis results (default)
+    TEMPLATE,  // Only output rendered template
+    BOTH       // Output both
+};
+
+enum class input_message_type {
+    NONE,                    // Don't render any message scenarios (only analysis)
+    CONTENT_ONLY,            // Simple assistant message with content
+    REASONING_CONTENT,       // Message with reasoning_content + content
+    TOOL_CALL_ONLY,          // Message with tool_calls only
+    CONTENT_TOOL_CALL,       // Message with content + tool_calls
+    REASONING_TOOL_CALL,     // Message with reasoning_content + tool_calls
+    CONTENT_FAKE_TOOL_CALL,  // Message with content but no actual tool_calls (for testing)
+    ALL                      // Render all scenarios
+};
+
+struct debug_options {
+    std::string      template_path;
+    bool             with_tools        = true;
+    bool             generation_prompt = true;
+    bool             enable_reasoning  = true;
+    bool             debug_jinja       = false;
+    bool             force_tool_call   = false;
+    output_mode      mode              = output_mode::BOTH;
+    input_message_type input_message   = input_message_type::NONE;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static std::string read_gguf_chat_template(const std::string & path) {
+    struct gguf_init_params params = { /*no_alloc =*/true,  // We only need metadata, not tensor data
+                                       /*ctx=*/nullptr };
+
+    struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
+    if (ctx == nullptr) {
+        throw std::runtime_error("Could not open GGUF file: " + path);
+    }
+
+    const char * key    = "tokenizer.chat_template";
+    int64_t      key_id = gguf_find_key(ctx, key);
+
+    if (key_id == -1) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
+    }
+
+    const char * template_str = gguf_get_val_str(ctx, key_id);
+    if (template_str == nullptr) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file contains chat template key but value is null");
+    }
+
+    std::string result = template_str;
+    gguf_free(ctx);
+    return result;
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --no-tools              Disable tool definitions\n");
+    LOG_ERR("  --force-tool-call       Set tool calls to forced\n");
+    LOG_ERR("  --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
+    LOG_ERR("  --enable-reasoning=0|1  Enable reasoning parsing (default: 1)\n");
+    LOG_ERR("  --output=MODE           Output mode: analysis, template, both (default: both)\n");
+    LOG_ERR("  --debug-jinja           Enable Jinja fine-grained debug\n");
+    LOG_ERR("  --input-message=TYPE    Message type to render:\n");
+    LOG_ERR("                          content_only, reasoning_content, tool_call_only,\n");
+    LOG_ERR("                          content_tool_call, reasoning_tool_call,\n");
+    LOG_ERR("                          content_fake_tool_call, all\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
+    LOG_ERR("  %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
+}
+
+static bool parse_bool_option(const std::string & value) {
+    return value == "1" || value == "true" || value == "yes";
+}
+
+static bool parse_options(int argc, char ** argv, debug_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    opts.template_path = argv[1];
+
+    for (int i = 2; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--force-tool-call") {
+            opts.force_tool_call = true;
+        } else if (arg == "--debug-jinja") {
+            opts.debug_jinja = true;
+        } else if (arg == "--no-tools") {
+            opts.with_tools = false;
+        } else if (arg.rfind("--generation-prompt=", 0) == 0) {
+            opts.generation_prompt = parse_bool_option(arg.substr(20));
+        } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
+            opts.enable_reasoning = parse_bool_option(arg.substr(19));
+        } else if (arg.rfind("--output=", 0) == 0) {
+            std::string mode = arg.substr(9);
+            if (mode == "analysis") {
+                opts.mode = output_mode::ANALYSIS;
+            } else if (mode == "template") {
+                opts.mode = output_mode::TEMPLATE;
+            } else if (mode == "both") {
+                opts.mode = output_mode::BOTH;
+            } else {
+                LOG_ERR("Unknown output mode: %s\n", mode.c_str());
+                return false;
+            }
+        } else if (arg.rfind("--input-message=", 0) == 0) {
+            std::string type = arg.substr(16);
+            if (type == "content_only") {
+                opts.input_message = input_message_type::CONTENT_ONLY;
+            } else if (type == "reasoning_content") {
+                opts.input_message = input_message_type::REASONING_CONTENT;
+            } else if (type == "tool_call_only") {
+                opts.input_message = input_message_type::TOOL_CALL_ONLY;
+            } else if (type == "content_tool_call") {
+                opts.input_message = input_message_type::CONTENT_TOOL_CALL;
+            } else if (type == "reasoning_tool_call") {
+                opts.input_message = input_message_type::REASONING_TOOL_CALL;
+            } else if (type == "content_fake_tool_call") {
+                opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
+            } else if (type == "all") {
+                opts.input_message = input_message_type::ALL;
+            } else {
+                LOG_ERR("Unknown input message type: %s\n", type.c_str());
+                return false;
+            }
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static json build_user_message() {
+    return json{
+        { "role",    "user"                               },
+        { "content", "Hello, please help me with a task." }
+    };
+}
+
+static json build_content_only_message() {
+    return json{
+        { "role",    "assistant"                                   },
+        { "content", "Hello! I'm here to help you with your task." }
+    };
+}
+
+static json build_reasoning_content_message() {
+    return json{
+        { "role",              "assistant"                                                               },
+        { "content",           "Hello! I'm here to help you with your task."                             },
+        { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
+    };
+}
+
+static json build_tool_call_only_message() {
+    return json{
+        { "role",       "assistant"      },
+        { "content",    nullptr          },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function", json{ { "name", "test_function_name" },
+                                  { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
+              { "id", "123456789" } } }) }
+    };
+}
+
+static json build_content_tool_call_message() {
+    return json{
+        { "role",       "assistant"                                                                              },
+        { "content",    "I'll help you by calling a function."                                                   },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_reasoning_tool_call_message() {
+    return json{
+        { "role",              "assistant"                                                                       },
+        { "content",           nullptr                                                                           },
+        { "reasoning_content", "I need to call a function to help with this task."                               },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_content_fake_tool_call_message() {
+    // This message has content but NO tool_calls field
+    // It's used to test if a template renders tool definitions but not tool calls
+    return json{
+        { "role",    "assistant"                            },
+        { "content", "I'll help you by calling a function." }
+    };
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+static void render_scenario(const common_chat_template & tmpl,
+                            const std::string &          scenario_name,
+                            const json &                 messages,
+                            const json &                 tools,
+                            bool                         add_generation_prompt,
+                            bool                         enable_thinking) {
+    LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
+    LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
+            enable_thinking ? "true" : "false");
+
+    // When add_generation_prompt is true, add a trailing user message to trigger the prompt
+    json final_messages = messages;
+    if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
+        final_messages.push_back(json{
+            { "role",    "user" },
+            { "content", "Now please continue with another response." }
+        });
+    }
+
+    LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
+
+    try {
+        autoparser::templates_params inputs;
+        inputs.messages                         = final_messages;
+        inputs.add_generation_prompt            = add_generation_prompt;
+        inputs.extra_context["enable_thinking"] = enable_thinking;
+
+        if (!tools.is_null() && tools.is_array() && !tools.empty()) {
+            inputs.tools = tools;
+        }
+
+        std::string output = common_chat_template_direct_apply(tmpl, inputs);
+
+        LOG_ERR("\n--- Rendered Output ---\n");
+        LOG_ERR("%s\n", output.c_str());
+        LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
+    } catch (const std::exception & e) {
+        LOG_ERR("Rendering failed: %s\n", e.what());
+    }
+}
+
+static void render_all_scenarios(const common_chat_template & tmpl,
+                                 const json &                 tools,
+                                 bool                         add_generation_prompt,
+                                 bool                         enable_thinking,
+                                 input_message_type             message_type) {
+    json user_msg = build_user_message();
+
+    auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
+        if (message_type == input_message_type::ALL || message_type == type) {
+            json messages = json::array({ user_msg, assistant_msg });
+            render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
+        }
+    };
+
+    render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
+    render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
+    render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
+    render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
+    render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
+    render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
+              build_content_fake_tool_call_message());
+
+    // Also render with add_generation_prompt=true to show the prompt ending
+    if (message_type == input_message_type::ALL) {
+        LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
+
+        json prompt_messages = json::array({ user_msg });
+        render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
+
+        // With enable_thinking toggled
+        render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
+    }
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to most verbose to capture all debug output
+    common_log_set_verbosity_thold(99);
+
+    debug_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
+        jinja::enable_debug(true);
+    }
+
+    std::string template_source;
+    try {
+        // Check if the file is a GGUF file
+        if (opts.template_path.size() >= 5 &&
+            opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
+            template_source = read_gguf_chat_template(opts.template_path);
+        } else {
+            template_source = read_file(opts.template_path);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return 1;
+    }
+
+    LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
+    LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
+            opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+
+        // Build tools definition
+        json tools = opts.with_tools ? build_tools_definition() : json();
+
+        // Render template scenarios if requested
+        if (opts.input_message != input_message_type::NONE &&
+            (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                         TEMPLATE RENDERING OUTPUT\n");
+            LOG_ERR("================================================================================\n");
+
+            render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
+                                 opts.input_message);
+        }
+
+        // Output analysis if requested
+        if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
+            LOG_ERR("\n");
+            LOG_ERR("================================================================================\n");
+            LOG_ERR("                           TEMPLATE ANALYSIS\n");
+            LOG_ERR("================================================================================\n");
+
+            autoparser::autoparser analysis;
+            analysis.analyze_template(chat_template);
+
+            // Generate Parser
+            autoparser::templates_params params;
+            params.messages = json::array({ build_user_message() });
+            params.reasoning_format =
+                opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
+            params.enable_thinking       = opts.enable_reasoning;
+            params.add_generation_prompt = opts.generation_prompt;
+
+            if (opts.with_tools) {
+                params.tools       = tools;
+                params.tool_choice = opts.force_tool_call ? COMMON_CHAT_TOOL_CHOICE_REQUIRED : COMMON_CHAT_TOOL_CHOICE_AUTO;
+            } else {
+                params.tools       = json();
+                params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+            }
+            params.parallel_tool_calls = false;
+
+            auto parser_data = autoparser::peg_generator::generate_parser(chat_template, params, analysis);
+
+            LOG_ERR("\n=== Generated Parser ===\n");
+            common_peg_arena arena;
+            arena.load(parser_data.parser);
+            LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
+
+            LOG_ERR("\n=== Generated Grammar ===\n");
+            LOG_ERR("%s\n", parser_data.grammar.c_str());
+
+            LOG_ERR("\n=== Generated Lazy Grammar ===\n");
+            LOG_ERR("%d\n", parser_data.grammar_lazy);
+
+            LOG_ERR("\n=== Generated Grammar Triggers ===\n");
+            for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
+                LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
+            }
+
+            LOG_ERR("\n=== Preserved Tokens ===\n");
+            for (const std::string & token : parser_data.preserved_tokens) {
+                LOG_ERR("  '%s'\n", token.c_str());
+            }
+
+            if (!parser_data.grammar.empty()) {
+                LOG_ERR("\n=== Verifying created grammar ===\n");
+                auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
+                                                         parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
+                if (grammar != nullptr) {
+                    LOG_ERR("\n=== Grammar successfully created ===\n");
+                }
+            }
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/tools/parser/template-analysis.cpp b/tools/parser/template-analysis.cpp

new file mode 100644 (file)

index 0000000..a92e104
--- /dev/null
+++ b/tools/parser/template-analysis.cpp
@@ -0,0 +1,611 @@
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat.h"
+#include "log.h"
+#include "jinja/caps.h"
+#include "jinja/runtime.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+// ANSI color codes - using 256-color palette for brighter colors (all bold)
+#define ANSI_RESET       "\033[0m"
+#define ANSI_PURPLE      "\033[1m\x1b[38;5;126m"  // Bold bright purple for main headers
+#define ANSI_CYAN        "\033[1m\x1b[38;5;81m"   // Bold bright cyan for section headers
+#define ANSI_BLUE        "\033[1m\x1b[38;5;12m"   // Bold bright blue for labels
+#define ANSI_ORANGE      "\033[1m\x1b[38;5;209m"  // Bold orange for right differences
+#define ANSI_GREEN       "\033[1m\x1b[38;5;83m"   // Bold bright green for left differences
+#define ANSI_GRAY        "\033[1m\x1b[38;5;240m"  // Bold gray (used for "no variables" message)
+#define ANSI_BOLD        "\033[1m"                // Standalone bold
+#define ANSI_PREFIX      "\033[1m\x1b[38;5;176m"  // Bold color for common prefix
+#define ANSI_SUFFIX      "\033[1m\x1b[38;5;61m"   // Bold color for common suffix
+
+// All template paths extracted from tests/test-chat.cpp
+static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
+    "models/templates/Apertus-8B-Instruct.jinja",
+    "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
+    "models/templates/ByteDance-Seed-OSS.jinja",
+    "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
+    "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
+    "models/templates/GLM-4.6.jinja",
+    "models/templates/GLM-4.7-Flash.jinja",
+    "models/templates/Kimi-K2-Instruct.jinja",
+    "models/templates/Kimi-K2-Thinking.jinja",
+    "models/templates/MiMo-VL.jinja",
+    "models/templates/MiniMax-M2.jinja",
+    "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
+    "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
+    "models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
+    "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
+    "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+    "models/templates/Qwen-QwQ-32B.jinja",
+    "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
+    "models/templates/Qwen3-Coder.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
+    "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
+    "models/templates/google-gemma-2-2b-it.jinja",
+    "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
+    "models/templates/llama-cpp-deepseek-r1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.2.jinja",
+    "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
+    "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
+    "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
+    "models/templates/moonshotai-Kimi-K2.jinja",
+    "models/templates/openai-gpt-oss-120b.jinja",
+    "models/templates/unsloth-Apriel-1.5.jinja",
+    "models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
+};
+
+struct analysis_options {
+    std::vector<std::string> template_paths;
+    bool                     analyze_all = false;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --template <name>       Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
+    LOG_ERR("  --template-file <path>  Analyze custom template file\n");
+    LOG_ERR("  --all                   Analyze all templates from test suite\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s --all\n", program_name);
+    LOG_ERR("  %s --template deepseek\n", program_name);
+    LOG_ERR("  %s --template-file my-template.jinja\n", program_name);
+}
+
+static bool parse_options(int argc, char ** argv, analysis_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--all") {
+            opts.analyze_all = true;
+        } else if (arg == "--template") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template requires an argument\n");
+                return false;
+            }
+            std::string pattern = argv[++i];
+            std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+
+            // Find matching templates
+            bool found = false;
+            for (const auto & path : ALL_TEMPLATE_PATHS) {
+                std::string path_lower = path;
+                std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
+                if (path_lower.find(pattern) != std::string::npos) {
+                    opts.template_paths.push_back(path);
+                    found = true;
+                }
+            }
+
+            if (!found) {
+                LOG_ERR("No templates found matching: %s\n", pattern.c_str());
+                return false;
+            }
+        } else if (arg == "--template-file") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template-file requires an argument\n");
+                return false;
+            }
+            opts.template_paths.push_back(argv[++i]);
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    if (opts.analyze_all) {
+        opts.template_paths = ALL_TEMPLATE_PATHS;
+    }
+
+    if (opts.template_paths.empty()) {
+        LOG_ERR("No templates specified\n");
+        print_usage(argv[0]);
+        return false;
+    }
+
+    return true;
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1", "param2" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+// Helper to create a tool call with arguments as JSON object
+static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args_object}  // Pass as JSON object, not serialized string
+        }}
+    };
+}
+
+// Helper functions to create repeating message definitions
+static json make_user_msg() {
+    return json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+}
+
+static json make_user_msg2() {
+    return json{
+        {"role", "user"},
+        {"content", "Thank you."}
+    };
+}
+
+static json make_user_msg2_continue() {
+    return json{
+        {"role", "user"},
+        {"content", "Continue."}
+    };
+}
+
+static json make_assistant_no_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+}
+
+static json make_assistant_one_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+}
+
+static json make_assistant_two_tools() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+}
+
+static json make_assistant_no_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."}
+    };
+}
+
+static json make_assistant_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."},
+        {"reasoning_content", "The user is asking for help. I should respond positively."}
+    };
+}
+
+static json make_assistant_one_tool_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+}
+
+static void print_diff_split(const std::string & title, const diff_split & diff) {
+    LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
+    LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
+    LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
+    LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
+    LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
+}
+
+static void check_reasoning_variables(const common_chat_template & tmpl) {
+    LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
+
+    try {
+        // Create a list of candidate reasoning/thinking variable names to probe
+        std::vector<std::string> candidate_vars = {
+            "enable_reasoning",
+            "use_reasoning",
+            "reasoning_enabled",
+            "has_reasoning",
+            "reasoning_mode",
+            "reasoning_format",
+            "reasoning_active",
+            "with_reasoning",
+            "use_thinking",
+            "thinking_enabled",
+            "has_thinking",
+            "thinking_mode",
+            "thinking_format",
+            "thinking_active",
+            "with_thinking",
+            "enable_reason",
+            "reason_enabled",
+            "enable_think",
+            "think_enabled",
+        };
+
+        jinja::context ctx;
+        ctx.is_get_stats = true;
+
+        json messages = json::array({
+            json{
+                {"role", "user"},
+                {"content", "Test message"}
+            },
+            json{
+                {"role", "assistant"},
+                {"content", "Response"},
+                {"reasoning_content", "Some reasoning"}
+            }
+        });
+
+        // Set up base context
+        jinja::global_from_json(ctx, json{
+            {"messages", messages},
+            {"tools", json::array()},
+            {"bos_token", ""},
+            {"eos_token", ""},
+            {"add_generation_prompt", false},
+            {"enable_thinking", true}  // Already passed, so we'll exclude this from results
+        }, true);
+
+        // Add candidate variables as undefined to probe which ones are accessed
+        for (const auto & var_name : candidate_vars) {
+            ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
+        }
+
+        try {
+            jinja::runtime runtime(ctx);
+            runtime.execute(tmpl.prog);
+        } catch (const std::exception & e) {
+            // Execution may fail, that's okay - we just want to see what variables were accessed
+        }
+
+        // Check which candidate variables were accessed (stats.used = true)
+        std::vector<std::string> accessed_vars;
+        for (const auto & var_name : candidate_vars) {
+            auto val = ctx.get_val(var_name);
+            if (!val->is_undefined()) {
+                // Variable was overwritten, skip it
+                continue;
+            }
+            if (val->stats.used) {
+                accessed_vars.push_back(var_name);
+            }
+        }
+
+        if (accessed_vars.empty()) {
+            LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
+        } else {
+            LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
+            for (const auto & var : accessed_vars) {
+                LOG_ERR("  %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
+            }
+        }
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Error checking reasoning variables: %s\n", e.what());
+    }
+}
+
+static void analyze_template(const std::string & template_path) {
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                    ANALYZING TEMPLATE: %s\n", template_path.c_str());
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    std::string template_source;
+    try {
+        template_source = read_file(template_path);
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return;
+    }
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+        json tools = build_tools_definition();
+
+        // ===== CAPABILITIES ANALYSIS =====
+        LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
+        auto caps = chat_template.original_caps();
+        LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
+        LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
+        LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
+        LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
+        LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false");
+        LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false");
+
+        // ===== DIFFERENTIAL ANALYSIS =====
+
+        // Test 1: With and without tools (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_no_tools;
+            params_no_tools.messages = json::array({ user_msg });
+            params_no_tools.add_generation_prompt = false;
+            params_no_tools.tools = json::array();
+
+            autoparser::templates_params params_with_tools = params_no_tools;
+            params_with_tools.tools = tools;
+
+            std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
+            std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
+
+            auto diff = calculate_diff_split(output_no_tools, output_with_tools);
+            print_diff_split("Diff: With vs Without Tools (single user message)", diff);
+        }
+
+        // Test 2: With and without add_generation_prompt (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_no_prompt;
+            params_no_prompt.messages = json::array({ user_msg });
+            params_no_prompt.add_generation_prompt = false;
+            params_no_prompt.tools = json::array();
+
+            autoparser::templates_params params_with_prompt = params_no_prompt;
+            params_with_prompt.add_generation_prompt = true;
+
+            std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
+            std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
+
+            auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
+            print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
+        }
+
+        // Test 3: Assistant with reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
+        }
+
+        // Test 4: Assistant with reasoning_content (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2();
+
+            autoparser::templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
+        }
+
+        // Test 5: Tool call in last assistant message (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            autoparser::templates_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
+        }
+
+        // Test 6: Tool call in last assistant message (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            autoparser::templates_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            autoparser::templates_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
+        }
+
+        // Test 7: One vs two tool calls (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            autoparser::templates_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
+        }
+
+        // Test 8: One vs two tool calls (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            autoparser::templates_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            autoparser::templates_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
+        }
+
+        // Test 9: Tool call with vs without reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::templates_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.tools = tools;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::templates_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
+        }
+
+        // Check reasoning variables
+        check_reasoning_variables(chat_template);
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+    }
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to capture all output
+    common_log_set_verbosity_thold(99);
+
+    analysis_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      TEMPLATE ANALYSIS TOOL\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+    LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
+
+    for (const auto & path : opts.template_paths) {
+        analyze_template(path);
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_GREEN);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      ANALYSIS COMPLETE\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    return 0;
+}
diff --git a/tools/server/public_legacy/json-schema-to-grammar.mjs b/tools/server/public_legacy/json-schema-to-grammar.mjs

index 38576c45fa071b0889025104f434cf94f18c68a9..bb25887a144fb5f7b64226acc26537db21afd542 100644 (file)
--- a/tools/server/public_legacy/json-schema-to-grammar.mjs
+++ b/tools/server/public_legacy/json-schema-to-grammar.mjs
@@ -729,6 +729,10 @@ export class SchemaConverter {
        return this._addRule(ruleName, out.join(''));
      } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
        return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
+    } else if (schemaType === undefined && typeof schema === 'object' && !Array.isArray(schema) && schema !== null) {
+      // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+      // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+      return this._addRule(ruleName, this._addPrimitive('value', PRIMITIVE_RULES['value']));
      } else {
        if (!(schemaType in PRIMITIVE_RULES)) {
          throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp

index d3aba18489b836b36099689bdfd1f28b07e90cff..32c0d8f481d071cbfefe2a66898fceee01103ec0 100644 (file)
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -1,12 +1,12 @@
-#include "server-common.h"
  #include "server-task.h"
  
+#include "chat.h"
  #include "common.h"
+#include "json-schema-to-grammar.h"
  #include "llama.h"
-#include "chat.h"
  #include "sampling.h"
  #include "speculative.h"
-#include "json-schema-to-grammar.h"
+#include "server-common.h"
  
  using json = nlohmann::ordered_json;
  
@@ -157,7 +157,8 @@ json task_params::to_json(bool only_metrics) const {
  common_chat_msg task_result_state::update_chat_msg(
          const std::string & text_added,
          bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs) {
+        std::vector<common_chat_msg_diff> & diffs,
+        bool filter_tool_calls) {
      generated_text += text_added;
      auto msg_prv_copy = chat_msg;
      SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
@@ -168,7 +169,64 @@ common_chat_msg task_result_state::update_chat_msg(
      if (!new_msg.empty()) {
          new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
          chat_msg = new_msg;
-        diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
+        auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
+
+        if (!filter_tool_calls) {
+            diffs = std::move(all_diffs);
+        } else {
+            for (auto & d : all_diffs) {
+                // If this is a new type of delta, flush all currently pending tool call names
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
+                        continue;
+                    }
+                    if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+
+                if (d.tool_call_index == std::string::npos) {
+                    diffs.push_back(std::move(d));
+                } else {
+                    size_t i = d.tool_call_index;
+                    if (sent_tool_call_names.count(i)) {
+                        if (!d.tool_call_delta.arguments.empty()) {
+                            d.tool_call_delta.name = "";
+                            d.tool_call_delta.id   = "";
+                            diffs.push_back(std::move(d));
+                        }
+                    } else {
+                        // Not sent yet.
+                        if (!d.tool_call_delta.arguments.empty() || !is_partial) {
+                            d.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                            d.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                            diffs.push_back(std::move(d));
+                            sent_tool_call_names.insert(i);
+                        } else {
+                            // Suppress
+                        }
+                    }
+                }
+            }
+            // Final check at EOF
+            if (!is_partial) {
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index      = i;
+                        header.tool_call_delta.id   = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+            }
+        }
      }
      return chat_msg;
  }
diff --git a/tools/server/server-task.h b/tools/server/server-task.h

index e2e3e5a582846045edcbe8e3b87f291718ef3360..1e342531d8efc81694c690e96170bf2a057e4efb 100644 (file)
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -98,6 +98,7 @@ struct task_result_state {
      common_chat_msg chat_msg;
      std::string generated_text; // append new chunks of generated text here
      std::vector<std::string> generated_tool_call_ids;
+    std::unordered_set<size_t> sent_tool_call_names;
  
      // for OpenAI Responses and Anthropic streaming API:
      // track output item / content block state across chunks
@@ -120,7 +121,8 @@ struct task_result_state {
      common_chat_msg update_chat_msg(
          const std::string & text_added,
          bool is_partial,
-        std::vector<common_chat_msg_diff> & diffs);
+        std::vector<common_chat_msg_diff> & diffs,
+        bool filter_tool_calls = false);
  };
  
  struct server_task {
diff --git a/tools/server/tests/unit/test_tool_call.py b/tools/server/tests/unit/test_tool_call.py

index b8f0f10863fb83962368c2443480b98b3e92d6fe..ba41cd44ea93e72b1274a0c7dfc021e675e68664 100755 (executable)
--- a/tools/server/tests/unit/test_tool_call.py
+++ b/tools/server/tests/unit/test_tool_call.py
@@ -100,18 +100,19 @@ def do_test_completion_with_required_tool_tiny(server: ServerProcess, tool: dict
      assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
      # assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
      expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
-    assert expected_function_name == tool_call["function"]["name"]
+    assert expected_function_name == tool_call["function"]["name"], f'Expected tool name to be {tool_call["function"]["name"]} in {choice["message"]}'
      actual_arguments = tool_call["function"]["arguments"]
-    assert isinstance(actual_arguments, str)
+    assert isinstance(actual_arguments, dict) or isinstance(actual_arguments, str), f'Expected arguments to be a dict or str, got: {actual_arguments}'
      if argument_key is not None:
-        actual_arguments = json.loads(actual_arguments)
-        assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
+        if (isinstance(actual_arguments, str)):
+            actual_arguments = json.loads(actual_arguments)
+        assert argument_key in actual_arguments, f"tool arguments: {actual_arguments}, expected: {argument_key}"
  
  
  @pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED])
  @pytest.mark.parametrize("template_name,tool,argument_key", [
-    ("google-gemma-2-2b-it",                          TEST_TOOL,            "success"),
-    ("google-gemma-2-2b-it",                          TEST_TOOL,            "success"),
+    ("Qwen3-Coder",                                   TEST_TOOL,            "success"),
+    ("Qwen3-Coder",                                   TEST_TOOL,            "success"),
      ("meta-llama-Llama-3.3-70B-Instruct",             TEST_TOOL,            "success"),
      ("meta-llama-Llama-3.3-70B-Instruct",             TEST_TOOL,            "success"),
      ("meta-llama-Llama-3.3-70B-Instruct",             PYTHON_TOOL,          "code"),
author	Piotr Wilkin (ilintar) <redacted>
	Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)
committer	GitHub <redacted>
	Fri, 6 Mar 2026 20:01:00 +0000 (21:01 +0100)
common/CMakeLists.txt		patch \| blob \| history
common/chat-auto-parser-generator.cpp	[new file with mode: 0644]	patch \| blob
common/chat-auto-parser-helpers.cpp	[new file with mode: 0644]	patch \| blob
common/chat-auto-parser-helpers.h	[new file with mode: 0644]	patch \| blob
common/chat-auto-parser.h	[new file with mode: 0644]	patch \| blob
common/chat-diff-analyzer.cpp	[new file with mode: 0644]	patch \| blob
common/chat-parser-xml-toolcall.cpp	[deleted file]	patch \| blob \| history
common/chat-parser-xml-toolcall.h	[deleted file]	patch \| blob \| history
common/chat-parser.cpp	[deleted file]	patch \| blob \| history
common/chat-parser.h	[deleted file]	patch \| blob \| history
common/chat-peg-parser.cpp		patch \| blob \| history
common/chat-peg-parser.h		patch \| blob \| history
common/chat.cpp		patch \| blob \| history
common/chat.h		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/jinja/caps.cpp		patch \| blob \| history
common/jinja/runtime.cpp		patch \| blob \| history
common/jinja/value.h		patch \| blob \| history
common/json-schema-to-grammar.cpp		patch \| blob \| history
common/peg-parser.cpp		patch \| blob \| history
common/peg-parser.h		patch \| blob \| history
common/unicode.cpp		patch \| blob \| history
common/unicode.h		patch \| blob \| history
docs/autoparser.md	[new file with mode: 0644]	patch \| blob
docs/development/parsing.md		patch \| blob \| history
examples/json_schema_to_grammar.py		patch \| blob \| history
models/templates/Apertus-8B-Instruct.jinja		patch \| blob \| history
models/templates/Apriel-1.6-15b-Thinker-fixed.jinja	[new file with mode: 0755]	patch \| blob
models/templates/Bielik-11B-v3.0-Instruct.jinja	[new file with mode: 0644]	patch \| blob
models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja		patch \| blob \| history
models/templates/GLM-4.7-Flash.jinja	[new file with mode: 0644]	patch \| blob
models/templates/LFM2-8B-A1B.jinja	[new file with mode: 0644]	patch \| blob
models/templates/Qwen-QwQ-32B.jinja		patch \| blob \| history
models/templates/Qwen3-Coder.jinja		patch \| blob \| history
models/templates/StepFun3.5-Flash.jinja	[new file with mode: 0644]	patch \| blob
models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja		patch \| blob \| history
models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja		patch \| blob \| history
models/templates/deepseek-ai-DeepSeek-V3.1.jinja		patch \| blob \| history
models/templates/fireworks-ai-llama-3-firefunction-v2.jinja		patch \| blob \| history
models/templates/moonshotai-Kimi-K2.jinja		patch \| blob \| history
models/templates/unsloth-Apriel-1.5.jinja		patch \| blob \| history
scripts/server-test-model.py	[new file with mode: 0644]	patch \| blob
tests/CMakeLists.txt		patch \| blob \| history
tests/peg-parser/test-basic.cpp		patch \| blob \| history
tests/peg-parser/test-python-dict-parser.cpp	[new file with mode: 0644]	patch \| blob
tests/peg-parser/tests.h		patch \| blob \| history
tests/test-backend-ops.cpp		patch \| blob \| history
tests/test-chat-auto-parser.cpp	[new file with mode: 0644]	patch \| blob
tests/test-chat-parser.cpp	[deleted file]	patch \| blob \| history
tests/test-chat-peg-parser.cpp		patch \| blob \| history
tests/test-chat-template.cpp		patch \| blob \| history
tests/test-chat.cpp		patch \| blob \| history
tests/test-json-schema-to-grammar.cpp		patch \| blob \| history
tests/test-peg-parser.cpp		patch \| blob \| history
tools/CMakeLists.txt		patch \| blob \| history
tools/cli/cli.cpp		patch \| blob \| history
tools/parser/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
tools/parser/debug-template-parser.cpp	[new file with mode: 0644]	patch \| blob
tools/parser/template-analysis.cpp	[new file with mode: 0644]	patch \| blob
tools/server/public_legacy/json-schema-to-grammar.mjs		patch \| blob \| history
tools/server/server-task.cpp		patch \| blob \| history
tools/server/server-task.h		patch \| blob \| history
tools/server/tests/unit/test_tool_call.py		patch \| blob \| history