From: Xuan-Son Nguyen Date: Tue, 18 Mar 2025 18:16:19 +0000 (+0100) Subject: llama : support converting Mistral Small text-only (#12450) X-Git-Tag: upstream/0.0.5028~111 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=29fff308c704c1c752cdb5153361e545e2bac09d;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : support converting Mistral Small text-only (#12450) --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d13d57c5..7a2ef4c7 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1747,6 +1747,25 @@ class LlamaModel(Model): raise ValueError(f"Unprocessed experts: {experts}") +@Model.register("Mistral3ForConditionalGeneration") +class Mistral3Model(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA + + # we need to merge the text_config into the root level of hparams + def __init__(self, *args, **kwargs): + hparams = Model.load_hparams(kwargs["dir_model"]) + if "text_config" in hparams: + hparams = {**hparams, **hparams["text_config"]} + kwargs["hparams"] = hparams + super().__init__(*args, **kwargs) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): + name = name.replace("language_model.", "") + if "multi_modal_projector" in name or "vision_tower" in name: + return [] + return super().modify_tensors(data_torch, name, bid) + + @Model.register("DeciLMForCausalLM") class DeciModel(Model): model_arch = gguf.MODEL_ARCH.DECI