]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
convert : fix duplicate key DeepSeek-R1 conversion error (#14103)
authorSigbjørn Skjæret <redacted>
Tue, 10 Jun 2025 21:29:52 +0000 (23:29 +0200)
committerGitHub <redacted>
Tue, 10 Jun 2025 21:29:52 +0000 (23:29 +0200)
convert_hf_to_gguf.py

index a208c42ba9a8b9f385dc5801db7f5535d669accf..173a103badc6019658db45312d469bad6d4ad31c 100755 (executable)
@@ -556,8 +556,11 @@ class TextModel(ModelBase):
             logger.info(f"gguf: experts used count = {n_experts_used}")
 
         if (head_dim := self.hparams.get("head_dim")) is not None:
-            self.gguf_writer.add_key_length(head_dim)
-            self.gguf_writer.add_value_length(head_dim)
+            # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class)
+            # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210
+            if self.hparams.get("model_type") != "deepseek_v3":
+                self.gguf_writer.add_key_length(head_dim)
+                self.gguf_writer.add_value_length(head_dim)
 
         self.gguf_writer.add_file_type(self.ftype)
         logger.info(f"gguf: file type = {self.ftype}")