From: compilade <redacted>
Date: Sat, 25 Oct 2025 00:52:00 +0000 (-0400)
Subject: convert : avoid dequantizing mxfp4 for GPT-OSS (#16756)
X-Git-Tag: upstream/0.0.7011~176
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5cca2542ac3f3f86831d32bce744d08fc2b353b0;p=pkg%2Fggml%2Fsources%2Fllama.cpp

convert : avoid dequantizing mxfp4 for GPT-OSS (#16756)
---

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 3e3db999c..859c1443f 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel):
 class GptOssModel(TextModel):
     model_arch = gguf.MODEL_ARCH.GPT_OSS
 
+    # TODO: remove once MXFP4 is supported more generally
+    def dequant_model(self):
+        quant_config = self.hparams.get("quantization_config")
+        if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
+            return
+        return super().dequant_model()
+
     def transform_nibble_layout(self, tensor):
         assert tensor.dtype == torch.uint8
         assert tensor.shape[-1] == 16