From: Piotr Wilkin (ilintar) Date: Thu, 17 Jul 2025 23:17:16 +0000 (+0200) Subject: convert : fix Ernie4.5 MoE without shared experts (#14746) X-Git-Tag: upstream/0.0.6073~147 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=670e1360cd40f242ae76ba0966542fae6cb59392;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : fix Ernie4.5 MoE without shared experts (#14746) --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3f35a310..d9185c80 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2910,11 +2910,12 @@ class Ernie4_5MoeModel(Ernie4_5Model): self.gguf_writer.add_expert_used_count(self.hparams["moe_k"]) self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"]) self.gguf_writer.add_leading_dense_block_count(self.hparams["moe_layer_start_index"]) - self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"]) if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None: self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size) - if (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None: - self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads) + if (shared_expert_count := self.hparams.get('moe_num_shared_experts')) is not None: + self.gguf_writer.add_expert_shared_count(shared_expert_count) + if shared_expert_count > 0 and (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None: + self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads) def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: # Modify correction bias name as in DeepseekV2