vocab : add dummy tokens for "no_vocab" type (#11231)

author Georgi Gerganov <redacted>

Tue, 14 Jan 2025 10:54:58 +0000 (12:54 +0200)

committer GitHub <redacted>

Tue, 14 Jan 2025 10:54:58 +0000 (11:54 +0100)
author Georgi Gerganov <redacted>
Tue, 14 Jan 2025 10:54:58 +0000 (12:54 +0200)
committer GitHub <redacted>
Tue, 14 Jan 2025 10:54:58 +0000 (11:54 +0100)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 96b74e93a51ee0e8e7aa88ac9142fab031093a82..4969d2628c131e1187d4cb85e934936381c88974 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1356,8 +1356,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
  
              // read vocab size from metadata
              uint32_t n_tokens = 0;
-            if (!ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) {
-                LLAMA_LOG_WARN("%s: there is no vocab_size in metadata\n", __func__);
+            if (ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) {
+                LLAMA_LOG_WARN("%s: adding %u dummy tokens\n", __func__, n_tokens);
+                id_to_token.resize(n_tokens);
              }
  
              return;
author	Georgi Gerganov <redacted>
	Tue, 14 Jan 2025 10:54:58 +0000 (12:54 +0200)
committer	GitHub <redacted>
	Tue, 14 Jan 2025 10:54:58 +0000 (11:54 +0100)