llama : support converting Mistral Small text-only (#12450)

author Xuan-Son Nguyen <redacted>

Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)

committer GitHub <redacted>

Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)
author Xuan-Son Nguyen <redacted>
Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)
committer GitHub <redacted>
Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index d13d57c54154a55df3b8a50c1595f4e551a9fb7f..7a2ef4c7e38ce88db69fd878a0f7185d129afd46 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1747,6 +1747,25 @@ class LlamaModel(Model):
                  raise ValueError(f"Unprocessed experts: {experts}")
  
  
+@Model.register("Mistral3ForConditionalGeneration")
+class Mistral3Model(LlamaModel):
+    model_arch = gguf.MODEL_ARCH.LLAMA
+
+    # we need to merge the text_config into the root level of hparams
+    def __init__(self, *args, **kwargs):
+        hparams = Model.load_hparams(kwargs["dir_model"])
+        if "text_config" in hparams:
+            hparams = {**hparams, **hparams["text_config"]}
+            kwargs["hparams"] = hparams
+        super().__init__(*args, **kwargs)
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
+        name = name.replace("language_model.", "")
+        if "multi_modal_projector" in name or "vision_tower" in name:
+            return []
+        return super().modify_tensors(data_torch, name, bid)
+
+
  @Model.register("DeciLMForCausalLM")
  class DeciModel(Model):
      model_arch = gguf.MODEL_ARCH.DECI
author	Xuan-Son Nguyen <redacted>
	Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)
committer	GitHub <redacted>
	Tue, 18 Mar 2025 18:16:19 +0000 (19:16 +0100)