From: Robert Collins Date: Sat, 7 Dec 2024 21:12:27 +0000 (-0500) Subject: llama : add 128k yarn context for Qwen (#10698) X-Git-Tag: upstream/0.0.4488~201 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=62e84d984875372f4b0fb89a67658e012ff0cc9f;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : add 128k yarn context for Qwen (#10698) * add 128k yarn context for Qwen * added property for model tensors * removing useless line --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a4eece93..c63d929c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1992,6 +1992,14 @@ class Qwen2Model(Model): except FileNotFoundError: self._set_vocab_gpt2() + def set_gguf_parameters(self): + super().set_gguf_parameters() + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) + @Model.register("Qwen2MoeForCausalLM") class Qwen2MoeModel(Model): diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 66247b80..4c8710b3 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -761,6 +761,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, MODEL_TENSOR.ATTN_NORM, MODEL_TENSOR.ATTN_Q, MODEL_TENSOR.ATTN_K,