]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
convert : remove fsep token from GPTRefactForCausalLM (#8237)
authorJiří Podivín <redacted>
Fri, 12 Jul 2024 08:06:33 +0000 (10:06 +0200)
committerGitHub <redacted>
Fri, 12 Jul 2024 08:06:33 +0000 (11:06 +0300)
The <filename> token used by Refact doesn't serve
the same purpose as the <file_separator> from CodeGemma.

Signed-off-by: Jiri Podivin <redacted>
convert_hf_to_gguf.py

index ebb5ca376133b9eea2c7a163aa2afe4b57bcaaea..cf930be17a6e075405e4569e2807788e0ddac11b 100755 (executable)
@@ -1203,11 +1203,10 @@ class RefactModel(Model):
 
         # TODO: how to determine special FIM tokens automatically?
         special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False,
-                                          special_token_types = ['prefix', 'suffix', 'middle', 'fsep', 'eot'])
+                                          special_token_types = ['prefix', 'suffix', 'middle', 'eot'])
         special_vocab._set_special_token("prefix", 1)
         special_vocab._set_special_token("suffix", 3)
         special_vocab._set_special_token("middle", 2)
-        special_vocab._set_special_token("fsep",   4) # is this correct?
         special_vocab.add_to_gguf(self.gguf_writer)
 
     def set_gguf_parameters(self):