convert : fix tensor naming conflict for llama 4 vision (#13836)

author Xuan-Son Nguyen <redacted>

Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)

committer GitHub <redacted>

Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)
author Xuan-Son Nguyen <redacted>
Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)
committer GitHub <redacted>
Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index a015ecee08328bbe56acff98462a2efbb6ac9cce..7f935d091a70b785dadfe6958f63687fd099fdf5 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2169,6 +2169,9 @@ class Llama4VisionModel(MmprojModel):
              # process vision tensors
              if "positional_embedding_vlm" in name and ".weight" not in name:
                  name += ".weight"
+            if "multi_modal_projector.linear_1" in name:
+                # despite the name with number postfix, this is a single fully connected layer
+                return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)]
              return [(self.map_tensor_name(name), data_torch)]
          return []
  
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py

index 000ffd00615b580d73419433e437fcade4863d67..48167dd648c0aa5941c3748d5f6cd54eccb3010f 100644 (file)
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -902,7 +902,6 @@ class TensorNameMap:
  
          MODEL_TENSOR.V_MMPROJ_FC: (
              "model.connector.modality_projection.proj", # SmolVLM
-            "multi_modal_projector.linear_1", # llama 4
          ),
  
          MODEL_TENSOR.V_MMPROJ_MLP: (
author	Xuan-Son Nguyen <redacted>
	Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)
committer	GitHub <redacted>
	Wed, 28 May 2025 08:05:54 +0000 (10:05 +0200)
convert_hf_to_gguf.py		patch \| blob \| history
gguf-py/gguf/tensor_mapping.py		patch \| blob \| history