vocab : reduce debug logs about non-EOG control tokens (#18541)

author Georgi Gerganov <redacted>

Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)

committer GitHub <redacted>

Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)
author Georgi Gerganov <redacted>
Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)
committer GitHub <redacted>
Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index bd311bea45b382cc8d78fbc92cbfd80decb159f9..a20c6525e46ac35e95c786d08c8e9fe07295fd32 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2203,6 +2203,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
          //       for now, we apply this workaround to find the tokens based on their text
  
          for (const auto & t : token_to_id) {
+            auto & attr = id_to_token[t.second].attr;
+
              // find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
              if (special_eot_id == LLAMA_TOKEN_NULL) {
                  if (false
@@ -2218,10 +2220,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<end_of_utterance>" // smoldocling
                     ) {
                      special_eot_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2232,10 +2234,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|eom_id|>"
                          ) {
                      special_eom_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2252,10 +2254,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|code_prefix|>" // GLM-4.5
                          ) {
                      special_fim_pre_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2272,10 +2274,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|code_suffix|>" // GLM-4.5
                          ) {
                      special_fim_suf_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2292,10 +2294,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|code_middle|>" // GLM-4.5
                          ) {
                      special_fim_mid_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2309,10 +2311,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<PAD>"
                          ) {
                      special_fim_pad_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2327,10 +2329,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<reponame>"    // Granite
                          ) {
                      special_fim_rep_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
@@ -2341,15 +2343,41 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                          || t.first == "<|file_sep|>" // Qwen
                          ) {
                      special_fim_sep_id = t.second;
-                    if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                          LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                                  __func__, t.second, t.first.c_str());
-                        id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                      }
                  }
              }
          }
  
+        // auto-detect unused tokens: e.g. control tokens with the word "unused"
+        // ideally, these tokens should be marked as unused during conversion
+        {
+            uint32_t n_unused = 0;
+
+            for (const auto & t : token_to_id) {
+                auto & attr = id_to_token[t.second].attr;
+
+                if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                    continue;
+                }
+
+                if ((attr & LLAMA_TOKEN_ATTR_UNUSED) == 0) {
+                    if (strstr(t.first.c_str(), "unused") != NULL) {
+                        attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_UNUSED);
+                    }
+                }
+
+                if (attr & LLAMA_TOKEN_ATTR_UNUSED) {
+                    n_unused++;
+                }
+            }
+
+            LLAMA_LOG_INFO("%s: %u unused tokens\n", __func__, n_unused);
+        }
+
          // maintain a list of tokens that cause end-of-generation
          // this is currently determined based on the token text, which is obviously not ideal
          // ref: https://github.com/ggerganov/llama.cpp/issues/9606
@@ -2368,6 +2396,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
          }
  
          for (const auto & t : token_to_id) {
+            auto & attr = id_to_token[t.second].attr;
+
              if (false
                      || t.first == "<|eot_id|>"
                      || t.first == "<|im_end|>"
@@ -2385,24 +2415,28 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                      || t.first == "<end_of_utterance>" // smoldocling
                 ) {
                  special_eog_ids.insert(t.second);
-                if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
+                if ((attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
                      LLAMA_LOG_WARN("%s: control-looking token: %6d '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
                              __func__, t.second, t.first.c_str());
-                    id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
+                    attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_CONTROL);
                  }
              } else {
-                // token is control, but not marked as EOG -> print a debug log
-                if (id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL && special_eog_ids.count(t.second) == 0) {
-                    LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
-                            __func__, t.second, t.first.c_str());
+                if (attr & LLAMA_TOKEN_ATTR_CONTROL && !(attr & LLAMA_TOKEN_ATTR_UNUSED)) {
+                    // token is control, but not marked as EOG -> print a debug log
+                    if (special_eog_ids.count(t.second) == 0) {
+                        LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
+                                __func__, t.second, t.first.c_str());
+                    }
                  }
              }
          }
  
          // @ngxson : quick hack for gpt-oss, always render these tokens
          for (const auto & t : token_to_id) {
+            auto & attr = id_to_token[t.second].attr;
+
              if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
-                id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
+                attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED);
              }
          }
  
@@ -2435,15 +2469,17 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
  
              LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__);
              for (auto tid : special_eog_ids) {
-                LLAMA_LOG_INFO("%s:   - %d ('%s')\n", __func__, tid, id_to_token[tid].text.c_str());
+                auto & text = id_to_token[tid].text;
+
+                LLAMA_LOG_INFO("%s:   - %d ('%s')\n", __func__, tid, text.c_str());
  
-                if (id_to_token[tid].text == "<|return|>") {
+                if (text == "<|return|>") {
                      has_return = true;
-                } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
+                } else if (text == "<|call|>" || text == "<|calls|>") {
                      has_call = true;
-                } else if (id_to_token[tid].text == "<|flush|>") {
+                } else if (text == "<|flush|>") {
                      has_flush = true;
-                } else if (id_to_token[tid].text == "<|end|>") {
+                } else if (text == "<|end|>") {
                      has_end = true;
                      end_id = tid;
                  }
@@ -2451,7 +2487,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
  
              if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
                  special_eog_ids.erase(end_id);
-                id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
+
+                auto & attr = id_to_token[end_id].attr;
+                attr = (llama_token_attr) (attr | LLAMA_TOKEN_ATTR_USER_DEFINED);
+
                  LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
              }
          }
author	Georgi Gerganov <redacted>
	Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)
committer	GitHub <redacted>
	Fri, 2 Jan 2026 14:17:33 +0000 (16:17 +0200)