llama : fix typo in <|im_end|> token text (#6745)

author Georgi Gerganov <redacted>

Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)

committer Georgi Gerganov <redacted>

Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)
author Georgi Gerganov <redacted>
Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)
committer Georgi Gerganov <redacted>
Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)
diff --git a/llama.cpp b/llama.cpp

index 7440c740fefbc65b3c073ddbba6c4b66c9e64bb0..a25d115c1d82af93c3f69df8055b626ab59dcdb9 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -4340,7 +4340,7 @@ static void llm_load_vocab(
              }
          }
  
-        // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
+        // find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
          //
          // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
          //       for now, we apply this workaround to find the EOT token based on its text
@@ -4351,7 +4351,7 @@ static void llm_load_vocab(
                          //       need to fix convert script
                          //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
                          (t.first == "<|eot_id|>" ||
-                         t.first == "<|im_emd|>" ||
+                         t.first == "<|im_end|>" ||
                           t.first == "<end_of_turn>"
                          )
                     ) {
author	Georgi Gerganov <redacted>
	Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)
committer	Georgi Gerganov <redacted>
	Mon, 22 Apr 2024 12:41:11 +0000 (15:41 +0300)