From: David Ribeiro Alves Date: Wed, 17 Sep 2025 08:08:02 +0000 (-0700) Subject: common : Fix corrupted memory error on json grammar initialization (#16038) X-Git-Tag: upstream/0.0.6527~30 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=cd08fc3ecc0264b4414b68af3874a6c689ed60c1;p=pkg%2Fggml%2Fsources%2Fllama.cpp common : Fix corrupted memory error on json grammar initialization (#16038) Initalizing RESERVED_NAME in is_reserved_name() is not thread safe and leads to corrupted memory when used from multiple threads as can be seen in the asan trace below. This fixes the initialization to make it thread-safe. #0 0x000100abd018 in std::__1::pair, std::__1::allocator>, void*>*>, bool> std::__1::__hash_table, std::__1::allocator>, std::__1::hash, std::__1::allocator>>, std::__1::equal_to, std::__1::allocator>>, std::__1::allocator, std::__1::allocator>>>::__emplace_unique_key_args, std::__1::allocator>, std::__1::basic_string, std::__1::allocator> const&>(std::__1::basic_string, std::__1::allocator> const&, std::__1::basic_string, std::__1::allocator> const&) __hash_table:1565 #1 0x000100ab0320 in SchemaConverter::visit(nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&, std::__1::basic_string, std::__1::allocator> const&) json-schema-to-grammar.cpp:802 #2 0x000100aafc48 in std::__1::__function::__func const&, common_grammar_options const&)::$_2, std::__1::allocator const&, common_grammar_options const&)::$_2>, std::__1::basic_string, std::__1::allocator> (std::__1::basic_string, std::__1::allocator> const&, nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&)>::operator()(std::__1::basic_string, std::__1::allocator> const&, nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&) function.h:319 #3 0x000100a2c938 in std::__1::__function::__func, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&), std::__1::allocator, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&)>, void (nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&)>::operator()(nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&) function.h:319 #4 0x000100a139f8 in foreach_function(nlohmann::json_abi_v3_12_0::basic_json, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&, std::__1::function, std::__1::allocator>, bool, long long, unsigned long long, double, std::__1::allocator, nlohmann::json_abi_v3_12_0::adl_serializer, std::__1::vector>, void> const&)> const&) chat.cpp:762 #5 0x000100a2a7f4 in std::__1::__function::__func, void (common_grammar_builder const&)>::operator()(common_grammar_builder const&) function.h:319 #6 0x000100aa98f4 in build_grammar(std::__1::function const&, common_grammar_options const&) json-schema-to-grammar.cpp:982 #7 0x0001009c9314 in common_chat_params_init_llama_3_x(minja::chat_template const&, templates_params const&, bool) chat.cpp:1110 #8 0x0001009b8afc in common_chat_templates_apply_jinja(common_chat_templates const*, common_chat_templates_inputs const&) chat.cpp:1992 #9 0x0001009b533c in common_chat_templates_apply(common_chat_templates const*, common_chat_templates_inputs const&) chat.cpp:2074 #10 0x000100810120 in llamacpp_apply_chat_template+0x724 (predict_oai-98384e17fb94e863:arm64+0x100090120) ... ==45482==Register values: x[0] = 0x00006020004147f8 x[1] = 0x00006080000013c8 x[2] = 0x0000000000000000 x[3] = 0x0000604006289738 x[4] = 0x0000000000000002 x[5] = 0x0000000000000001 x[6] = 0x04034000004b4000 x[7] = 0x0000000000000001 x[8] = 0xbebebebebebebebe x[9] = 0x17d7d7d7d7d7d7d7 x[10] = 0x00000c04000828ff x[11] = 0x0000000000000001 x[12] = 0x000000002018d383 x[13] = 0x0000000000000000 x[14] = 0xfa0000000000fafa x[15] = 0x000010700001ffff x[16] = 0x000000019dc012c0 x[17] = 0x00000001021284f8 x[18] = 0x0000000000000000 x[19] = 0x00000001700acdc0 x[20] = 0x0000000000000002 x[21] = 0x000000002018d384 x[22] = 0x16dd16fd2e731151 x[23] = 0x0000007000020000 x[24] = 0x0000000100c69c08 x[25] = 0x0000000100c69c20 x[26] = 0x00006080000013c7 x[27] = 0x0000000100c69c00 x[28] = 0x00000001700acd60 fp = 0x00000001700aceb0 lr = 0x0000000100abce30 sp = 0x00000001700acd60 AddressSanitizer can not provide additional info. SUMMARY: AddressSanitizer: SEGV __hash_table:1565 in std::__1::pair, std::__1::allocator>, void*>*>, bool> std::__1::__hash_table, std::__1::allocator>, std::__1::hash, std::__1::allocator>>, std::__1::equal_to, std::__1::allocator>>, std::__1::allocator, std::__1::allocator>>>::__emplace_unique_key_args, std::__1::allocator>, std::__1::basic_string, std::__1::allocator> const&>(std::__1::basic_string, std::__1::allocator> const&, std::__1::basic_string, std::__1::allocator> const&) Thread T5 created by T0 here: #0 0x0001020b99d4 in pthread_create+0x5c (libclang_rt.asan_osx_dynamic.dylib:arm64e+0x359d4) #1 0x000100873910 in std::sys::pal::unix::thread::Thread::new::h77254fdd87a28e05+0x118 (predict_oai-98384e17fb94e863:arm64+0x1000f3910) #2 0x0001007c7a1c in test::run_test::haeb3c2bcd5ed6cf6+0x76c (predict_oai-98384e17fb94e863:arm64+0x100047a1c) #3 0x0001007aedb0 in test::console::run_tests_console::he9d142d704f3a986+0x149c (predict_oai-98384e17fb94e863:arm64+0x10002edb0) #4 0x0001007c5758 in test::test_main::hf86a5e20735245b9+0x118 (predict_oai-98384e17fb94e863:arm64+0x100045758) #5 0x0001007c5da0 in test::test_main_static::h61ee9c8fd30abca0+0x54 (predict_oai-98384e17fb94e863:arm64+0x100045da0) ... ==45482==ABORTING --- diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 182c7875..db1f0b23 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -257,12 +257,13 @@ std::unordered_map STRING_FORMAT_RULES = { }; static bool is_reserved_name(const std::string & name) { - static std::unordered_set RESERVED_NAMES; - if (RESERVED_NAMES.empty()) { - RESERVED_NAMES.insert("root"); - for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); - for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); - } + static const std::unordered_set RESERVED_NAMES = [] { + std::unordered_set s; + s.insert("root"); + for (const auto & p : PRIMITIVE_RULES) s.insert(p.first); + for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first); + return s; + }(); return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); }