From: Georgi Gerganov Date: Tue, 14 Jan 2025 10:54:58 +0000 (+0200) Subject: vocab : add dummy tokens for "no_vocab" type (#11231) X-Git-Tag: upstream/0.0.4488~5 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=bbf3e55e352d309573bdafee01a014b0a2492155;p=pkg%2Fggml%2Fsources%2Fllama.cpp vocab : add dummy tokens for "no_vocab" type (#11231) * vocab : add dummy tokens for "no_vocab" type ggml-ci * vocab : minor [no ci] --- diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 96b74e93..4969d262 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1356,8 +1356,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { // read vocab size from metadata uint32_t n_tokens = 0; - if (!ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) { - LLAMA_LOG_WARN("%s: there is no vocab_size in metadata\n", __func__); + if (ml.get_key(LLM_KV_VOCAB_SIZE, n_tokens, false)) { + LLAMA_LOG_WARN("%s: adding %u dummy tokens\n", __func__, n_tokens); + id_to_token.resize(n_tokens); } return;