]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
llama : fix CodeLlama FIM token checks (#8144)
authorSigbjørn Skjæret <redacted>
Thu, 27 Jun 2024 07:46:41 +0000 (09:46 +0200)
committerGitHub <redacted>
Thu, 27 Jun 2024 07:46:41 +0000 (10:46 +0300)
* account for space prefix character

* use find instead

src/llama.cpp

index f78594a6f7c497f0ec06ffa711448f2e27e7f906..080057332b4dca399e13b4a86aff4563ea63ae21 100644 (file)
@@ -5152,10 +5152,10 @@ static void llm_load_vocab(
         if (gen_name.find("code") != std::string::npos) {
             if (model.arch == LLM_ARCH_LLAMA
               && 32010 < vocab.id_to_token.size()
-              && vocab.id_to_token[32007].text == "<PRE>"
-              && vocab.id_to_token[32008].text == "<SUF>"
-              && vocab.id_to_token[32009].text == "<MID>"
-              && vocab.id_to_token[32010].text == "<EOT>") {
+              && vocab.id_to_token[32007].text.find("<PRE>") != std::string::npos
+              && vocab.id_to_token[32008].text.find("<SUF>") != std::string::npos
+              && vocab.id_to_token[32009].text.find("<MID>") != std::string::npos
+              && vocab.id_to_token[32010].text.find("<EOT>") != std::string::npos) {
                 vocab.special_prefix_id = 32007;
                 vocab.special_suffix_id = 32008;
                 vocab.special_middle_id = 32009;