From: Sigbjørn Skjæret Date: Thu, 27 Jun 2024 07:46:41 +0000 (+0200) Subject: llama : fix CodeLlama FIM token checks (#8144) X-Git-Tag: upstream/0.0.4488~1240 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=911e35bb8bb2fd1c7d3f40f27e96ff432eae7e14;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : fix CodeLlama FIM token checks (#8144) * account for space prefix character * use find instead --- diff --git a/src/llama.cpp b/src/llama.cpp index f78594a6..08005733 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5152,10 +5152,10 @@ static void llm_load_vocab( if (gen_name.find("code") != std::string::npos) { if (model.arch == LLM_ARCH_LLAMA && 32010 < vocab.id_to_token.size() - && vocab.id_to_token[32007].text == "
"
-              && vocab.id_to_token[32008].text == ""
-              && vocab.id_to_token[32009].text == ""
-              && vocab.id_to_token[32010].text == "") {
+              && vocab.id_to_token[32007].text.find("
") != std::string::npos
+              && vocab.id_to_token[32008].text.find("") != std::string::npos
+              && vocab.id_to_token[32009].text.find("") != std::string::npos
+              && vocab.id_to_token[32010].text.find("") != std::string::npos) {
                 vocab.special_prefix_id = 32007;
                 vocab.special_suffix_id = 32008;
                 vocab.special_middle_id = 32009;