From: DAN™ Date: Tue, 17 Dec 2024 22:24:22 +0000 (-0500) Subject: Use model->gguf_kv for loading the template instead of using the C API. (#10868) X-Git-Tag: upstream/0.0.4488~138 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=d62b532c52e0118323277eaa5f442e11ce6505ed;p=pkg%2Fggml%2Fsources%2Fllama.cpp Use model->gguf_kv for loading the template instead of using the C API. (#10868) * Bump model_template to 16384 bytes to support larger chat templates. * Use `model->gguf_kv` for efficiency. --- diff --git a/src/llama.cpp b/src/llama.cpp index 1cc8a933..c7b180f2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -22651,15 +22651,15 @@ int32_t llama_chat_apply_template( std::string curr_tmpl(tmpl == nullptr ? "" : tmpl); if (tmpl == nullptr) { GGML_ASSERT(model != nullptr); - // load template from model - std::vector model_template(2048, 0); // longest known template is about 1200 bytes - std::string template_key = "tokenizer.chat_template"; - int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size()); - if (res < 0) { + + // load template from model, if available + const auto & it = model->gguf_kv.find("tokenizer.chat_template"); + if (it != model->gguf_kv.end() && it->second.size() > 0) { + curr_tmpl = it->second; + } + else { // worst case: there is no information about template, we will use chatml by default - curr_tmpl = "chatml"; // see llama_chat_apply_template_internal - } else { - curr_tmpl = std::string(model_template.data(), model_template.size()); + curr_tmpl = "chatml"; // see llama_chat_apply_template_internal } }