From: Sigbjørn Skjæret Date: Tue, 6 May 2025 09:12:06 +0000 (+0200) Subject: convert : qwen2/3moe : set yarn metadata if present (#13331) X-Git-Tag: upstream/0.0.5318~28 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=764b85627b46f43d7ea801867cd1b6abef484574;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : qwen2/3moe : set yarn metadata if present (#13331) * set yarn metadata if present * add comment about enabling YaRN Co-authored-by: Xuan-Son Nguyen --------- Co-authored-by: Xuan-Son Nguyen --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a47d7df6..de6d55cb 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2761,6 +2761,13 @@ class Qwen2MoeModel(TextModel): if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None: self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size) logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}") + # YaRN is not enabled by default + # To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) _experts: list[dict[str, Tensor]] | None = None