llama : recognize IBM Granite 3.3 FIM tokens (#12988)

author Mikko Juola <redacted>

Thu, 17 Apr 2025 08:37:05 +0000 (01:37 -0700)

committer GitHub <redacted>

Thu, 17 Apr 2025 08:37:05 +0000 (11:37 +0300)
author Mikko Juola <redacted>
Thu, 17 Apr 2025 08:37:05 +0000 (01:37 -0700)
committer GitHub <redacted>
Thu, 17 Apr 2025 08:37:05 +0000 (11:37 +0300)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 464ff01e06fe181135f31319ac6cda36e676fe49..480605173dd910129218d8f2b75589ba1ebf37d0 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1841,6 +1841,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  if (false
                          || t.first == "<|fim_prefix|>"  // Qwen
                          || t.first == "<fim-prefix>"
+                        || t.first == "<fim_prefix>"    // Granite
                          || t.first == "<｜fim▁begin｜>" // DeepSeek
                          || t.first == "<PRE>"
                          || t.first == "▁<PRE>"          // CodeLlama
@@ -1859,6 +1860,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  if (false
                          || t.first == "<|fim_suffix|>" // Qwen
                          || t.first == "<fim-suffix>"
+                        || t.first == "<fim_suffix>"   // Granite
                          || t.first == "<｜fim▁hole｜>" // DeepSeek
                          || t.first == "<SUF>"
                          || t.first == "▁<SUF>"         // CodeLlama
@@ -1877,6 +1879,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  if (false
                          || t.first == "<|fim_middle|>" // Qwen
                          || t.first == "<fim-middle>"
+                        || t.first == "<fim_middle>"   // Granite
                          || t.first == "<｜fim▁end｜>"  // DeepSeek
                          || t.first == "<MID>"
                          || t.first == "▁<MID>"         // CodeLlama
@@ -1895,6 +1898,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  if (false
                          || t.first == "<|fim_pad|>" // Qwen
                          || t.first == "<fim-pad>"
+                        || t.first == "<fim_pad>"   // Granite
                          || t.first == "<PAD>"
                          ) {
                      special_fim_pad_id = t.second;
@@ -1913,6 +1917,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|repo_name|>"
                          || t.first == "<fim-repo>"
                          || t.first == "<REPO>"
+                        || t.first == "<reponame>"    // Granite
                          ) {
                      special_fim_rep_id = t.second;
                      if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
author	Mikko Juola <redacted>
	Thu, 17 Apr 2025 08:37:05 +0000 (01:37 -0700)
committer	GitHub <redacted>
	Thu, 17 Apr 2025 08:37:05 +0000 (11:37 +0300)