]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
common : fix tool call type detection for nullable and enum schemas (#21327)
authorSamanvya Tripathi <redacted>
Fri, 3 Apr 2026 15:51:23 +0000 (11:51 -0400)
committerGitHub <redacted>
Fri, 3 Apr 2026 15:51:23 +0000 (17:51 +0200)
* common : fix tool call type detection for nullable and enum schemas

* common, tests : fix grammar delegation for nullable/enum schemas and add tests

Fix enum type inference to scan all enum values (not just index 0) so
schemas like {"enum": [0, "celsius"]} correctly detect string type.

Fix schema_delegates in peg-parser to handle nullable type arrays
(["string", "null"]) and typeless enum schemas in raw mode, allowing
the tagged parser to use raw text instead of JSON-formatted strings.

Add test cases for Qwen3-Coder (TAG_WITH_TAGGED format):
- nullable string ["string", "null"]
- nullable string with null first ["null", "string"]
- nullable integer ["integer", "null"]
- enum without explicit type key

common/chat-auto-parser-generator.cpp
common/peg-parser.cpp
tests/test-chat.cpp

index 60b269c42de83f28f7dd3f945417e9036396e22c..efa251b99bac0f127a5c820bde4112040a580087 100644 (file)
@@ -400,12 +400,34 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
         for (const auto & [param_name, param_schema] : properties.items()) {
             bool        is_required = required.find(param_name) != required.end();
             std::string type        = "object";
-            auto        type_obj    = param_schema.contains("type") ? param_schema.at("type") : json::object();
-            if (type_obj.is_string()) {
-                type_obj.get_to(type);
-            } else if (type_obj.is_object()) {
-                if (type_obj.contains("type") && type_obj.at("type").is_string()) {
-                    type_obj.at("type").get_to(type);
+            if (param_schema.contains("type")) {
+                const auto & type_obj = param_schema.at("type");
+                if (type_obj.is_string()) {
+                    type_obj.get_to(type);
+                } else if (type_obj.is_array()) {
+                    // Handle nullable types like ["string", "null"]
+                    for (const auto & t : type_obj) {
+                        if (t.is_string() && t.get<std::string>() != "null") {
+                            type = t.get<std::string>();
+                            break;
+                        }
+                    }
+                } else if (type_obj.is_object()) {
+                    if (type_obj.contains("type") && type_obj.at("type").is_string()) {
+                        type_obj.at("type").get_to(type);
+                    }
+                }
+            }
+            // Infer string type from enum values when type is unspecified
+            if (type == "object" && param_schema.contains("enum")) {
+                const auto & enum_vals = param_schema.at("enum");
+                if (enum_vals.is_array()) {
+                    for (const auto & v : enum_vals) {
+                        if (v.is_string()) {
+                            type = "string";
+                            break;
+                        }
+                    }
                 }
             }
 
@@ -574,9 +596,33 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
         std::vector<arg_entry> arg_entries;
 
         for (const auto & [param_name, param_schema] : properties.items()) {
-            std::string type    = "object";
-            auto        type_v  = param_schema.contains("type") ? param_schema.at("type") : json::object();
-            if (type_v.is_string()) type_v.get_to(type);
+            std::string type = "object";
+            if (param_schema.contains("type")) {
+                const auto & type_v = param_schema.at("type");
+                if (type_v.is_string()) {
+                    type_v.get_to(type);
+                } else if (type_v.is_array()) {
+                    // Handle nullable types like ["string", "null"]
+                    for (const auto & t : type_v) {
+                        if (t.is_string() && t.get<std::string>() != "null") {
+                            type = t.get<std::string>();
+                            break;
+                        }
+                    }
+                }
+            }
+            // Infer string type from enum values when type is unspecified
+            if (type == "object" && param_schema.contains("enum")) {
+                const auto & enum_vals = param_schema.at("enum");
+                if (enum_vals.is_array()) {
+                    for (const auto & v : enum_vals) {
+                        if (v.is_string()) {
+                            type = "string";
+                            break;
+                        }
+                    }
+                }
+            }
 
             common_peg_parser value_parser = p.eps();
             if (type == "string") {
index 694f9b850ac7cdd3de376b3a5d50b49f36249142..86faacd61f812a2c9d62dd06b79daeaa0913212c 100644 (file)
@@ -1561,7 +1561,23 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
         if (!s.schema) {
             return true;
         }
-        if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") {
+        if (s.raw && s.schema->contains("type")) {
+            const auto & type_val = s.schema->at("type");
+            if (type_val.is_string() && type_val == "string") {
+                return true;
+            }
+            // Handle nullable types like ["string", "null"] - delegate when the
+            // non-null type is string, since the tagged format uses raw text
+            if (type_val.is_array()) {
+                for (const auto & t : type_val) {
+                    if (t.is_string() && t.get<std::string>() != "null") {
+                        return t.get<std::string>() == "string";
+                    }
+                }
+            }
+        }
+        // Delegate for enum schemas in raw mode - enum values are literal strings
+        if (s.raw && !s.schema->contains("type") && s.schema->contains("enum")) {
             return true;
         }
         return false;
index 34d50124c4f1dedd402bbd68b4cd1029eda465df..8605a7678481756863a86b9853559dc41469cbb8 100644 (file)
@@ -657,6 +657,66 @@ static common_chat_tool imaginary_number_tool{
     })",
 };
 
+static common_chat_tool nullable_string_tool{
+    /* .name = */ "set_nullable_str",
+    /* .description = */ "Set a nullable string value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": ["string", "null"],
+                "description": "A nullable string"
+            }
+        },
+        "required": ["name"]
+    })",
+};
+
+static common_chat_tool nullable_string_null_first_tool{
+    /* .name = */ "set_nullable_str_nf",
+    /* .description = */ "Set a nullable string value with null first in type array",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": ["null", "string"],
+                "description": "A nullable string with null first"
+            }
+        },
+        "required": ["name"]
+    })",
+};
+
+static common_chat_tool nullable_int_tool{
+    /* .name = */ "set_nullable_int",
+    /* .description = */ "Set a nullable integer value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "count": {
+                "type": ["integer", "null"],
+                "description": "A nullable integer"
+            }
+        },
+        "required": ["count"]
+    })",
+};
+
+static common_chat_tool enum_no_type_tool{
+    /* .name = */ "set_unit",
+    /* .description = */ "Set a temperature unit",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "unit": {
+                "enum": ["celsius", "fahrenheit"],
+                "description": "Temperature unit"
+            }
+        },
+        "required": ["unit"]
+    })",
+};
+
 static common_chat_tool string_param_tool{
     /* .name = */ "string_param",
     /* .description = */ "Tool with string parameter for testing",
@@ -2200,6 +2260,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
                 }
             })
             .run();
+
     }
 
     {
@@ -2383,6 +2444,58 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
             })
             .expect_reconstruction()
             .run();
+
+        // nullable string type ["string", "null"]
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_str>\n"
+               "<parameter=name>\nhello world\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_string_tool })
+            .expect_tool_calls({
+                { "set_nullable_str", R"({"name": "hello world"})", {} },
+            })
+            .run();
+
+        // nullable string with null first in type array ["null", "string"]
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_str_nf>\n"
+               "<parameter=name>\nhello world\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_string_null_first_tool })
+            .expect_tool_calls({
+                { "set_nullable_str_nf", R"({"name": "hello world"})", {} },
+            })
+            .run();
+
+        // nullable integer type ["integer", "null"] - should use JSON value path, not string
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_int>\n"
+               "<parameter=count>\n42\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_int_tool })
+            .expect_tool_calls({
+                { "set_nullable_int", R"({"count": 42})", {} },
+            })
+            .run();
+
+        // enum without explicit type key - should infer string from enum values
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_unit>\n"
+               "<parameter=unit>\ncelsius\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ enum_no_type_tool })
+            .expect_tool_calls({
+                { "set_unit", R"({"unit": "celsius"})", {} },
+            })
+            .run();
     }
     {
         auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);