From: Sigbjørn Skjæret Date: Tue, 10 Jun 2025 21:29:52 +0000 (+0200) Subject: convert : fix duplicate key DeepSeek-R1 conversion error (#14103) X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=55f6b9fa6563f6ae49113f9abdc980c12348cc1c;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : fix duplicate key DeepSeek-R1 conversion error (#14103) --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a208c42b..173a103b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -556,8 +556,11 @@ class TextModel(ModelBase): logger.info(f"gguf: experts used count = {n_experts_used}") if (head_dim := self.hparams.get("head_dim")) is not None: - self.gguf_writer.add_key_length(head_dim) - self.gguf_writer.add_value_length(head_dim) + # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class) + # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210 + if self.hparams.get("model_type") != "deepseek_v3": + self.gguf_writer.add_key_length(head_dim) + self.gguf_writer.add_value_length(head_dim) self.gguf_writer.add_file_type(self.ftype) logger.info(f"gguf: file type = {self.ftype}")