* mtmd: fix "v.patch_embd" quant and unsupported im2col ops on Metal for deepseek-ocr
* Update src/llama-quant.cpp
Co-authored-by: Sigbjørn Skjæret <redacted>
---------
Co-authored-by: Sigbjørn Skjæret <redacted>
return gguf.GGMLQuantizationType.F32
if ".rel_pos_h" in name or '.rel_pos_w' in name:
return gguf.GGMLQuantizationType.F32
+ if ".neck." in name or ".net_" in name:
+ return gguf.GGMLQuantizationType.F32
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
// do not quantize specific multimodal tensors
quantize &= name.find(".position_embd") == std::string::npos;
- quantize &= name.find("sam.patch_embd") == std::string::npos;
quantize &= name.find("sam.pos_embd") == std::string::npos;
+ quantize &= name.find("sam.neck.") == std::string::npos;
+ quantize &= name.find("sam.net_") == std::string::npos;
quantize &= name.find(".rel_pos") == std::string::npos;
+ quantize &= name.find(".patch_embd") == std::string::npos;
+ quantize &= name.find(".patch_merger") == std::string::npos;
return quantize;
}