fix: gemma 4 template (#21326)

author Piotr Wilkin (ilintar) <redacted>

Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)

committer GitHub <redacted>

Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)
author Piotr Wilkin (ilintar) <redacted>
Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)
committer GitHub <redacted>
Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp

index a368a110a06b9105324ca71644fed301ded6b6dd..60b269c42de83f28f7dd3f945417e9036396e22c 100644 (file)
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -7,11 +7,109 @@
  #include "log.h"
  #include "nlohmann/json.hpp"
  
+#include <algorithm>
  #include <stdexcept>
  #include <string>
  
  using json = nlohmann::ordered_json;
  
+namespace {
+
+// Gemma4-specific PEG builder extending the standard chat builder.
+// Adds value type parsers that use <|\"|> as string delimiters
+// instead of JSON's double quotes, and disables json-to-schema
+// conversion for these types.
+class common_peg_gemma4_builder {
+    common_chat_peg_builder & p_;
+    static constexpr const char * QUOTE = "<|\"|>";
+
+public:
+    explicit common_peg_gemma4_builder(common_chat_peg_builder & p) : p_(p) {}
+
+    common_peg_parser gemma4_string() {
+        return p_.rule("gemma4-string", [&]() {
+            return p_.literal(QUOTE) + p_.until(QUOTE) + p_.literal(QUOTE);
+        });
+    }
+
+    common_peg_parser gemma4_number() {
+        return p_.rule("gemma4-number", [&]() {
+            auto digit1_9 = p_.chars("[1-9]", 1, 1);
+            auto digits   = p_.chars("[0-9]");
+            auto int_part = p_.choice({p_.literal("0"), p_.sequence({digit1_9, p_.chars("[0-9]", 0, -1)})});
+            auto frac     = p_.sequence({p_.literal("."), digits});
+            auto exp      = p_.sequence({p_.choice({p_.literal("e"), p_.literal("E")}),
+                                         p_.optional(p_.chars("[+-]", 1, 1)), digits});
+            auto not_number_continuation = p_.negate(p_.chars("[0-9.eE+-]", 1, 1));
+            return p_.sequence({p_.optional(p_.literal("-")), int_part, p_.optional(frac),
+                                p_.optional(exp), not_number_continuation});
+        });
+    }
+
+    common_peg_parser gemma4_bool() {
+        return p_.rule("gemma4-bool", [&]() {
+            return p_.choice({p_.literal("true"), p_.literal("false")});
+        });
+    }
+
+    common_peg_parser gemma4_null() {
+        return p_.rule("gemma4-null", [&]() {
+            return p_.literal("null");
+        });
+    }
+
+    common_peg_parser gemma4_dict() {
+        return p_.rule("gemma4-dict", [&]() {
+            auto ws = p_.space();
+            auto key = p_.until(":");
+            auto member = p_.sequence({key, p_.literal(":"), ws, gemma4_value()});
+            auto members = p_.sequence({member, p_.zero_or_more(p_.sequence({p_.literal(","), ws, member}))});
+            return p_.sequence({
+                p_.literal("{"), ws,
+                p_.choice({p_.literal("}"), p_.sequence({members, ws, p_.literal("}")})})
+            });
+        });
+    }
+
+    common_peg_parser gemma4_array() {
+        return p_.rule("gemma4-array", [&]() {
+            auto ws = p_.space();
+            auto elements = p_.sequence({gemma4_value(), p_.zero_or_more(p_.sequence({p_.literal(","), ws, gemma4_value()}))});
+            return p_.sequence({
+                p_.literal("["), ws,
+                p_.choice({p_.literal("]"), p_.sequence({elements, ws, p_.literal("]")})})
+            });
+        });
+    }
+
+    common_peg_parser gemma4_value() {
+        return p_.rule("gemma4-value", [&]() {
+            return p_.choice({gemma4_string(), gemma4_dict(), gemma4_array(),
+                              gemma4_number(), gemma4_bool(), gemma4_null()});
+        });
+    }
+
+    // Select the appropriate value parser based on JSON schema type.
+    // Does NOT use schema() - the gemma4 types are pure PEG without
+    // JSON schema metadata, so GBNF is generated directly from the
+    // PEG structure.
+    common_peg_parser gemma4_value_for_type(const json & schema) {
+        if (!schema.contains("type") || !schema.at("type").is_string()) {
+            return gemma4_value();
+        }
+        std::string type = schema.at("type").get<std::string>();
+        if (type == "string")  { return gemma4_string(); }
+        if (type == "number")  { return gemma4_number(); }
+        if (type == "integer") { return gemma4_number(); }
+        if (type == "boolean") { return gemma4_bool(); }
+        if (type == "object")  { return gemma4_dict(); }
+        if (type == "array")   { return gemma4_array(); }
+        return gemma4_value();
+    }
+};
+
+}  // anonymous namespace
+
  // Helper to iterate over tools/functions
  static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
      for (const auto & tool : tools) {
@@ -43,7 +141,9 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
      // Create the result structure
      common_chat_params data;
      data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
-    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.format           = (autoparser.tools.format.mode == tool_format::TAG_WITH_GEMMA4_DICT)
+                            ? COMMON_CHAT_FORMAT_PEG_GEMMA4
+                            : COMMON_CHAT_FORMAT_PEG_NATIVE;
      data.preserved_tokens = autoparser.preserved_tokens;
  
      auto parser = autoparser.build_parser(inputs);
@@ -92,6 +192,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
  
          ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
          ctx.content              = &content;
+        ctx.reasoning            = &reasoning;
  
          // Build reasoning parser
          ctx.reasoning_parser = reasoning.build_parser(ctx);
@@ -440,7 +541,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
      const auto & inputs      = ctx.inputs;
      bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
  
-    // The Gemma4 string quote token used in place of JSON "
+    common_peg_gemma4_builder g4(p);
      static const std::string QUOTE = "<|\"|>";
  
      common_peg_parser tool_choice = p.choice();
@@ -451,7 +552,6 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
          const auto & params = func.at("parameters");
  
          if (!params.contains("properties") || !params.at("properties").is_object()) {
-            // No arguments - just match the function name with empty braces
              auto func_parser = p.atomic(
                  p.tool_open(p.literal(function.name_prefix) + p.tool_name(p.literal(name)) + p.literal("{")) +
                  p.tool_args(p.eps()) +
@@ -486,9 +586,18 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
                      p.tool_arg_string_value(p.schema(p.until(QUOTE),
                          "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) +
                      p.literal(QUOTE);
+            } else if (type == "number" || type == "integer") {
+                value_parser = p.tool_arg_value(g4.gemma4_number());
+            } else if (type == "boolean") {
+                value_parser = p.tool_arg_value(g4.gemma4_bool());
+            } else if (type == "null") {
+                value_parser = p.tool_arg_value(g4.gemma4_null());
+            } else if (type == "object") {
+                value_parser = p.tool_arg_value(g4.gemma4_dict());
+            } else if (type == "array") {
+                value_parser = p.tool_arg_value(g4.gemma4_array());
              } else {
-                // Numbers, booleans: raw text up to the next comma or closing brace
-                value_parser = p.tool_arg_value(p.until_one_of({",", "}"}));
+                value_parser = p.tool_arg_value(g4.gemma4_value());
              }
  
              auto arg = p.tool_arg(
@@ -538,9 +647,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
          tool_calls = p.optional(tool_calls);
      }
  
-    auto content_before_tools = p.until(format.per_call_start);
+    auto content_before_tools = p.until_one_of({ format.per_call_start, ctx.reasoning->start });
      return ctx.reasoning_parser +
-           (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) +
+           (force_tools ? p.eps() : p.optional(p.content(content_before_tools) + p.optional(ctx.reasoning_parser))) +
             tool_calls + p.end();
  }
  
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h

index 514c76576e640b6a86510772630119420c1f5589..9d7d4e69e6d3d7225f51c48537591ff9ad4f21c7 100644 (file)
--- a/common/chat-auto-parser.h
+++ b/common/chat-auto-parser.h
@@ -215,12 +215,14 @@ struct tool_id_analysis {
  // ============================================================================
  
  struct analyze_content;
+struct analyze_reasoning;
  
  struct parser_build_context {
      common_chat_peg_builder & p;
-    const generation_params &          inputs;
+    const generation_params &         inputs;
      common_peg_parser                 reasoning_parser;
      bool                              extracting_reasoning = false;
+    const analyze_reasoning *         reasoning            = nullptr;
      const analyze_content *           content              = nullptr;
  
      parser_build_context(common_chat_peg_builder & p, const generation_params & inputs);
diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp

index 042da92e4cca5c209fca8ac6ddb01b252b49e34e..aadade60fa74b7f90b99f4193b603be74c014426 100644 (file)
--- a/common/chat-diff-analyzer.cpp
+++ b/common/chat-diff-analyzer.cpp
@@ -104,10 +104,11 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
                analysis.tools.function.name_suffix  = "";
                analysis.tools.arguments.start       = "{";
                analysis.tools.arguments.end         = "}";
+              analysis.tools.arguments.name_prefix = "";
                analysis.tools.arguments.name_suffix = ":";
                analysis.tools.arguments.separator   = ",";
                analysis.reasoning.mode              = reasoning_mode::TAG_BASED;
-              analysis.reasoning.start             = "<|channel>thought\n";
+              analysis.reasoning.start             = "<|channel>thought";
                analysis.reasoning.end               = "<channel|>";
                analysis.preserved_tokens.clear();
                analysis.preserved_tokens.push_back("<|tool_call>");
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp

index 07b487e15cc9f867996b1a7243fa20e2ad871710..f2ed77c44022c7ed53b7e14bb1be42699f7b4fdd 100644 (file)
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -75,6 +75,84 @@ static std::string escape_json_string_inner(const std::string & s) {
      return escaped;
  }
  
+static const std::string GEMMA4_QUOTE = "<|\"|>";
+
+static std::string normalize_gemma4_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() * 2);
+
+    enum Ctx { DICT, ARRAY };
+    std::vector<Ctx> ctx;
+
+    auto is_ws = [](char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; };
+    auto skip_ws = [&](size_t & pos) {
+        while (pos < input.size() && is_ws(input[pos])) {
+            result += input[pos++];
+        }
+    };
+
+    auto quote_unquoted_key = [&](size_t & pos) {
+        if (pos < input.size() && input[pos] != '"' && input[pos] != '}') {
+            result += '"';
+            while (pos < input.size() && input[pos] != ':' && !is_ws(input[pos])) {
+                result += input[pos++];
+            }
+            result += '"';
+            skip_ws(pos);
+        }
+    };
+
+    size_t i = 0;
+    while (i < input.size()) {
+        if (i + GEMMA4_QUOTE.size() <= input.size() &&
+            input.compare(i, GEMMA4_QUOTE.size(), GEMMA4_QUOTE) == 0) {
+            result += '"';
+            i += GEMMA4_QUOTE.size();
+            continue;
+        }
+
+        char c = input[i];
+
+        if (c == '{') {
+            result += c;
+            ctx.push_back(DICT);
+            ++i;
+            skip_ws(i);
+            quote_unquoted_key(i);
+            continue;
+        }
+        if (c == '}') {
+            result += c;
+            if (!ctx.empty()) ctx.pop_back();
+            ++i;
+            continue;
+        }
+        if (c == '[') {
+            result += c;
+            ctx.push_back(ARRAY);
+            ++i;
+            continue;
+        }
+        if (c == ']') {
+            result += c;
+            if (!ctx.empty()) ctx.pop_back();
+            ++i;
+            continue;
+        }
+        if (c == ',' && !ctx.empty() && ctx.back() == DICT) {
+            result += c;
+            ++i;
+            skip_ws(i);
+            quote_unquoted_key(i);
+            continue;
+        }
+
+        result += c;
+        ++i;
+    }
+    return result;
+}
+
  // Convert Python-style single-quoted strings to JSON double-quoted strings
  // Only converts outer string delimiters, properly handling escape sequences:
  // - {'key': 'value'} -> {"key": "value"}
@@ -214,6 +292,14 @@ std::string & common_chat_peg_mapper::args_target() {
      return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
  }
  
+std::string common_chat_peg_mapper::normalize_container_value(const std::string & input) {
+    return normalize_quotes_to_json(input);
+}
+
+std::string common_chat_peg_gemma4_mapper::normalize_container_value(const std::string & input) {
+    return normalize_quotes_to_json(normalize_gemma4_to_json(input));
+}
+
  void common_chat_peg_mapper::from_ast(const common_peg_ast_arena &    arena,
                                        const common_peg_parse_result & parse_result_arg) {
      arena.visit(parse_result_arg, [this](const common_peg_ast_node & node) { map(node); });
@@ -352,7 +438,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
              // For potential containers, normalize Python-style single quotes to JSON double quotes
              bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
              if (is_potential_container) {
-                value_content = normalize_quotes_to_json(value_content);
+                value_content = normalize_container_value(value_content);
              }
  
              // Try to parse as JSON value (number, bool, null, object, array)
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h

index 62402923c566e1f3e793720b090ddaed60764295..dd1388ec148ba30e2efc640b94efb8b3765b1a98 100644 (file)
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -17,7 +17,9 @@ class common_chat_peg_mapper {
  
      virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
      virtual void map(const common_peg_ast_node & node);
-    private:
+  protected:
+    virtual std::string normalize_container_value(const std::string & input);
+  private:
        // Tool call handling state
        std::optional<common_chat_tool_call> pending_tool_call;  // Tool call waiting for name
        common_chat_tool_call *              current_tool          = nullptr;
@@ -30,6 +32,13 @@ class common_chat_peg_mapper {
        std::string & args_target();
  };
  
+class common_chat_peg_gemma4_mapper : public common_chat_peg_mapper {
+  public:
+    common_chat_peg_gemma4_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+  protected:
+    std::string normalize_container_value(const std::string & input) override;
+};
+
  struct content_structure;
  struct tool_call_structure;
  
diff --git a/common/chat.cpp b/common/chat.cpp

index f92b2bd2903a32b2b3eb2e9a9f15b1c3b74ce6d6..9cd2dd7076e4108a052b0f2a5bf50f169ddd531c 100644 (file)
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -694,6 +694,8 @@ const char * common_chat_format_name(common_chat_format format) {
              return "peg-simple";
          case COMMON_CHAT_FORMAT_PEG_NATIVE:
              return "peg-native";
+        case COMMON_CHAT_FORMAT_PEG_GEMMA4:
+            return "peg-gemma4";
          default:
              throw std::runtime_error("Unknown chat format");
      }
@@ -1905,8 +1907,13 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
              // Try to extract any partial results from what was successfully parsed
              common_chat_msg msg;
              msg.role = "assistant";
-            auto mapper = common_chat_peg_mapper(msg);
-            mapper.from_ast(ctx.ast, result);
+            std::unique_ptr<common_chat_peg_mapper> mapper;
+            if (params.format == COMMON_CHAT_FORMAT_PEG_GEMMA4) {
+                mapper = std::make_unique<common_chat_peg_gemma4_mapper>(msg);
+            } else {
+                mapper = std::make_unique<common_chat_peg_mapper>(msg);
+            }
+            mapper->from_ast(ctx.ast, result);
  
              if (ctx.is_debug()) {
                  fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
@@ -1921,8 +1928,13 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
      common_chat_msg msg;
      msg.role = "assistant";
  
-    auto mapper = common_chat_peg_mapper(msg);
-    mapper.from_ast(ctx.ast, result);
+    std::unique_ptr<common_chat_peg_mapper> mapper;
+    if (params.format == COMMON_CHAT_FORMAT_PEG_GEMMA4) {
+        mapper = std::make_unique<common_chat_peg_gemma4_mapper>(msg);
+    } else {
+        mapper = std::make_unique<common_chat_peg_mapper>(msg);
+    }
+    mapper->from_ast(ctx.ast, result);
  
      if (ctx.is_debug()) {
          fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
diff --git a/common/chat.h b/common/chat.h

index 6358a1893c6d6921b8dd538b3963eb80b603e363..50c73d4817a1c3dc2a23d4c9537b674e35d9862b 100644 (file)
--- a/common/chat.h
+++ b/common/chat.h
@@ -184,6 +184,7 @@ enum common_chat_format {
      // These are intended to be parsed by the PEG parser
      COMMON_CHAT_FORMAT_PEG_SIMPLE,
      COMMON_CHAT_FORMAT_PEG_NATIVE,
+    COMMON_CHAT_FORMAT_PEG_GEMMA4,
  
      COMMON_CHAT_FORMAT_COUNT,  // Not a format, just the # formats
  };
diff --git a/models/templates/gemma4.jinja b/models/templates/gemma4.jinja

new file mode 100644 (file)

index 0000000..33c51c2
--- /dev/null
+++ b/models/templates/gemma4.jinja
@@ -0,0 +1,266 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'OBJECT' -%}
+                ,properties:{
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                {%- elif value is mapping -%}
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                {%- endif -%}
+                }
+                {%- if value['required'] -%}
+                    ,required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    ,items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{ bos_token }}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {{- messages[0]['content'] | trim -}}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+
+    {{- '<turn|>\n' -}}
+{%- endif %}
+
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+        {{- '<|turn>' + role + '\n' }}
+
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+
+            {%- if message['tool_responses'] -%}
+                {#- Tool Response handling -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- '<|tool_response>' -}}
+                    {%- if tool_response['response'] is mapping -%}
+                        {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}
+                        {%- for key, value in tool_response['response'] | dictsort -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- if not loop.last %},{% endif -%}
+                        {%- endfor -%}
+                        {{- '}' -}}
+                    {%- else -%}
+                        {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}
+                    {%- endif -%}
+                    {{- '<tool_response|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_response' -%}
+            {%- endif -%}
+
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '\n\n<|image|>\n\n' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '\n\n<|video|>\n\n' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+        {%- if not (message['tool_responses'] and not message['content']) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' -%}
+        {{- '<|turn>model\n' -}}
+    {%- endif -%}
+    {%- if not enable_thinking | default(false) -%}
+        {{- '<|channel>thought\n<channel|>' -}}
+    {%- endif -%}
+{%- endif -%}
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp

index a716c35eb481f93f5d9c2b11b2db8d44c16afd91..34d50124c4f1dedd402bbd68b4cd1029eda465df 100644 (file)
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -589,6 +589,51 @@ static common_chat_tool amount_tool{
      })",
  };
  
+static common_chat_tool toggle_tool{
+    /* .name = */ "toggle",
+    /* .description = */ "Toggle a feature",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "enabled": {
+                "type": "boolean",
+                "description": "Whether to enable the feature"
+            }
+        },
+        "required": ["enabled"]
+    })",
+};
+
+static common_chat_tool nullable_tool{
+    /* .name = */ "set_nullable",
+    /* .description = */ "Set a nullable value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "value": {
+                "type": "null",
+                "description": "A null value"
+            }
+        },
+        "required": ["value"]
+    })",
+};
+
+static common_chat_tool config_tool{
+    /* .name = */ "set_config",
+    /* .description = */ "Set configuration",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "config": {
+                "type": "object",
+                "description": "Configuration dict"
+            }
+        },
+        "required": ["config"]
+    })",
+};
+
  static common_chat_tool imaginary_number_tool{
      /* .name = */ "imaginary_number",
      /* .description = */ "Imaginary number converter",
@@ -1869,6 +1914,130 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
          tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).expect_reconstruction().run();
      }
  
+    {
+        // Google Gemma 4 (tool calling with Gemma4 dict format)
+        auto tst = peg_tester("models/templates/gemma4.jinja");
+
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
+
+        // Simple tool call with string argument
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>London<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", R"({"city": "London"})"))
+            .run();
+
+        // Tool call with string argument containing special chars
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>San Francisco<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", R"({"city": "San Francisco"})"))
+            .run();
+
+        // Tool call with empty args
+        tst.test(
+                "<|tool_call>call:empty_args{}<tool_call|>")
+            .tools({ empty_args_tool })
+            .expect(message_with_tool_calls("empty_args", "{}"))
+            .run();
+
+        // Tool call with string and content
+        tst.test(
+                "Hello, world!\nWhat's up?<|tool_call>call:get_time{city:<|\"|>Paris<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_content_and_tool_call("Hello, world!\nWhat's up?", "get_time", R"({"city": "Paris"})"))
+            .run();
+
+        // Parallel tool calls
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>London<|\"|>}<tool_call|>"
+                "<|tool_call>call:get_weather{city:<|\"|>Paris<|\"|>}<tool_call|>")
+            .tools({ get_time_tool, get_weather_tool })
+            .parallel_tool_calls(true)
+            .expect_tool_calls({
+                { "get_time", R"({"city": "London"})", "" },
+                { "get_weather", R"({"city": "Paris"})", "" },
+            })
+            .run();
+
+        // Tool call with integer argument (number type)
+        tst.test(
+                "<|tool_call>call:special_function{arg1:42}<tool_call|>")
+            .tools({ special_function_tool })
+            .expect(message_with_tool_calls("special_function", R"({"arg1": 42})"))
+            .run();
+
+        // Tool call with negative number argument
+        tst.test(
+                "<|tool_call>call:special_function{arg1:-7}<tool_call|>")
+            .tools({ special_function_tool })
+            .expect(message_with_tool_calls("special_function", R"({"arg1": -7})"))
+            .run();
+
+        // Tool call with decimal number argument
+        tst.test(
+                "<|tool_call>call:amount{orig:3.14}<tool_call|>")
+            .tools({ amount_tool })
+            .expect(message_with_tool_calls("amount", R"({"orig": 3.14})"))
+            .run();
+
+        // Tool call with boolean argument (true)
+        tst.test(
+                "<|tool_call>call:toggle{enabled:true}<tool_call|>")
+            .tools({ toggle_tool })
+            .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
+            .run();
+
+        // Tool call with boolean argument (false)
+        tst.test(
+                "<|tool_call>call:toggle{enabled:false}<tool_call|>")
+            .tools({ toggle_tool })
+            .expect(message_with_tool_calls("toggle", R"({"enabled": false})"))
+            .run();
+
+        // Tool call with null argument
+        tst.test(
+                "<|tool_call>call:set_nullable{value:null}<tool_call|>")
+            .tools({ nullable_tool })
+            .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
+            .run();
+
+        // Tool call with array argument (todo list)
+        tst.test(
+                "<|tool_call>call:todo_list{todos:[<|\"|>buy milk<|\"|>,<|\"|>walk dog<|\"|>]}<tool_call|>")
+            .tools({ todo_list })
+            .expect(message_with_tool_calls("todo_list", R"({"todos":["buy milk","walk dog"]})"))
+            .run();
+
+        // Tool call with object/dict argument
+        tst.test(
+                "<|tool_call>call:set_config{config:{theme:<|\"|>dark<|\"|>,count:3}}<tool_call|>")
+            .tools({ config_tool })
+            .expect(message_with_tool_calls("set_config", R"({"config":{"theme":"dark","count":3}})"))
+            .run();
+
+        // Tool call with empty array
+        tst.test(
+                "<|tool_call>call:todo_list{todos:[]}<tool_call|>")
+            .tools({ todo_list })
+            .expect(message_with_tool_calls("todo_list", R"({"todos":[]})"))
+            .run();
+
+        // Tool call with empty dict
+        tst.test(
+                "<|tool_call>call:set_config{config:{}}<tool_call|>")
+            .tools({ config_tool })
+            .expect(message_with_tool_calls("set_config", R"({"config":{}})"))
+            .run();
+
+        // Tool call with scientific notation number
+        tst.test(
+                "<|tool_call>call:amount{orig:1.5e10}<tool_call|>")
+            .tools({ amount_tool })
+            .expect(message_with_tool_calls("amount", R"({"orig": 1.5e10})"))
+            .run();
+    }
+
      {
          // Qwen-QwQ-32B (reasoning model)
          auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja");
author	Piotr Wilkin (ilintar) <redacted>
	Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)
committer	GitHub <redacted>
	Thu, 2 Apr 2026 21:31:02 +0000 (23:31 +0200)
common/chat-auto-parser-generator.cpp		patch \| blob \| history
common/chat-auto-parser.h		patch \| blob \| history
common/chat-diff-analyzer.cpp		patch \| blob \| history
common/chat-peg-parser.cpp		patch \| blob \| history
common/chat-peg-parser.h		patch \| blob \| history
common/chat.cpp		patch \| blob \| history
common/chat.h		patch \| blob \| history
models/templates/gemma4.jinja	[new file with mode: 0644]	patch \| blob
tests/test-chat.cpp		patch \| blob \| history