]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
convert : fix Qwen/Qwen-7b conversion (#7308)
authoramd-lalithnc <redacted>
Fri, 17 May 2024 07:01:58 +0000 (12:31 +0530)
committerGitHub <redacted>
Fri, 17 May 2024 07:01:58 +0000 (10:01 +0300)
convert-hf-to-gguf.py

index cd875fa4af6afeeefc272e1ddffc3a4bd66e5343..2810e1e41941a81df7fe75ff76200134c5ff5bc1 100755 (executable)
@@ -526,7 +526,7 @@ class Model:
 
         # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
         added_vocab = tokenizer.special_tokens
-        reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()}
+        reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()}
 
         for i in range(vocab_size):
             if i not in reverse_vocab: