From: Sigbjørn Skjæret Date: Sat, 21 Jun 2025 16:12:05 +0000 (+0200) Subject: gguf-py : fix Qwen3-Embedding eos token (#14314) X-Git-Tag: upstream/0.0.5760~28 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=aa0ef5c578eef4c2adc7be1282f21bab5f3e8d26;p=pkg%2Fggml%2Fsources%2Fllama.cpp gguf-py : fix Qwen3-Embedding eos token (#14314) --- diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index a792d56f..3b08f613 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -197,6 +197,16 @@ class SpecialVocab: if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'): if not tokenizer_config: special_eos = special_last + elif special_last != special_eos: + if 'eot' not in self.special_token_types: + self.special_token_types = tuple(self.special_token_types) + ('eot', ) + tokenizer_config['eot_token'] = special_eos + elif 'eom' not in self.special_token_types: + self.special_token_types = tuple(self.special_token_types) + ('eom', ) + tokenizer_config['eom_token'] = special_eos + else: + logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!') + tokenizer_config['eos_token'] = special_eos = special_last self.add_special_token['eos'] = True if special_last == special_eos else False if special_last != special_eos: logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing')