* gguf-py, llama : add constants and methods related to Llama-3.1 <|eom_id|> token
* llama : find Llama-3.1 <|eom_id|> token id during vocab loading
* llama-vocab : add Llama-3.1 <|eom_id|> token to the set of tokens stopping the generation
---------
Co-authored-by: Stanisław Szymczyk <redacted>
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id"
+ EOM_ID = "tokenizer.ggml.eom_token_id"
class Adapter:
TYPE = "adapter.type"
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
+KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
def add_eot_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
+ def add_eom_token_id(self, id: int) -> None:
+ self.add_uint32(Keys.Tokenizer.EOM_ID, id)
+
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix:
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
return token != -1 && (
token == llama_token_eos_impl(vocab) ||
- token == llama_token_eot_impl(vocab)
+ token == llama_token_eot_impl(vocab) ||
+ token == llama_token_eom_impl(vocab)
);
}
return vocab.special_eot_id;
}
+llama_token llama_token_eom_impl(const struct llama_vocab & vocab) {
+ return vocab.special_eom_id;
+}
+
int32_t llama_tokenize_impl(
const struct llama_vocab & vocab,
const char * text,
id special_suffix_id = -1;
id special_middle_id = -1;
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
+ id special_eom_id = -1;
// tokenizer flags
bool tokenizer_add_space_prefix = false;
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
+llama_token llama_token_eom_impl (const struct llama_vocab & vocab);
int32_t llama_tokenize_impl(
const struct llama_vocab & vocab,
LLM_KV_TOKENIZER_SUFFIX_ID,
LLM_KV_TOKENIZER_MIDDLE_ID,
LLM_KV_TOKENIZER_EOT_ID,
+ LLM_KV_TOKENIZER_EOM_ID,
LLM_KV_ADAPTER_TYPE,
LLM_KV_ADAPTER_LORA_ALPHA,
{ LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
+ { LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
{ LLM_KV_ADAPTER_TYPE, "adapter.type" },
{ LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
{ LLM_KV_TOKENIZER_SUFFIX_ID, vocab.special_suffix_id },
{ LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
{ LLM_KV_TOKENIZER_EOT_ID, vocab.special_eot_id },
+ { LLM_KV_TOKENIZER_EOM_ID, vocab.special_eom_id },
};
for (const auto & it : special_token_types) {
}
}
}
+
+ // find EOM token: "<|eom_id|>"
+ //
+ // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOM_ID
+ // for now, we apply this workaround to find the EOM token based on its text
+ if (vocab.special_eom_id == -1) {
+ const auto & t = vocab.token_to_id.find("<|eom_id|>");
+ if (t != vocab.token_to_id.end()) {
+ vocab.special_eom_id = t->second;
+ }
+ }
}
// build special tokens cache