mtmd: fix "v.patch_embd" quant and unsupported im2col ops on Metal for deepseek-ocr...

author Saba Fallah <redacted>

Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)

committer GitHub <redacted>

Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)
author Saba Fallah <redacted>
Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)
committer GitHub <redacted>
Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 82d1004c65cc03f613dc7f0c5c532ef3a90026ae..bcf98cfae76f48a2035d9e9af7acc8cb65ac273c 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -7150,6 +7150,8 @@ class DeepseekOCRVisionModel(MmprojModel):
              return gguf.GGMLQuantizationType.F32
          if ".rel_pos_h" in name or '.rel_pos_w' in name:
              return gguf.GGMLQuantizationType.F32
+        if ".neck." in name or ".net_" in name:
+            return gguf.GGMLQuantizationType.F32
          return super().tensor_force_quant(name, new_name, bid, n_dims)
  
      def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp

index a1f87b3b356e65bd6eacfa3d444194ef3e9112ff..3c8b32be08487215f00979679a01b9412eba6fe8 100644 (file)
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -345,9 +345,12 @@ static bool tensor_allows_quantization(const llama_model_quantize_params * param
  
      // do not quantize specific multimodal tensors
      quantize &= name.find(".position_embd") == std::string::npos;
-    quantize &= name.find("sam.patch_embd") == std::string::npos;
      quantize &= name.find("sam.pos_embd")   == std::string::npos;
+    quantize &= name.find("sam.neck.")      == std::string::npos;
+    quantize &= name.find("sam.net_")       == std::string::npos;
      quantize &= name.find(".rel_pos")       == std::string::npos;
+    quantize &= name.find(".patch_embd")    == std::string::npos;
+    quantize &= name.find(".patch_merger")  == std::string::npos;
  
      return quantize;
  }
author	Saba Fallah <redacted>
	Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)
committer	GitHub <redacted>
	Thu, 26 Mar 2026 23:07:55 +0000 (00:07 +0100)
convert_hf_to_gguf.py		patch \| blob \| history
src/llama-quant.cpp		patch \| blob \| history