convert : avoid dequantizing mxfp4 for GPT-OSS (#16756)

author compilade <redacted>

Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)

committer GitHub <redacted>

Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)
author compilade <redacted>
Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)
committer GitHub <redacted>
Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 3e3db999c92edb3051a69633a5572f8e0a3573b2..859c1443f5f8494d42f83ef2d325aadc978421a9 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel):
  class GptOssModel(TextModel):
      model_arch = gguf.MODEL_ARCH.GPT_OSS
  
+    # TODO: remove once MXFP4 is supported more generally
+    def dequant_model(self):
+        quant_config = self.hparams.get("quantization_config")
+        if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
+            return
+        return super().dequant_model()
+
      def transform_nibble_layout(self, tensor):
          assert tensor.dtype == torch.uint8
          assert tensor.shape[-1] == 16
author	compilade <redacted>
	Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)
committer	GitHub <redacted>
	Sat, 25 Oct 2025 00:52:00 +0000 (20:52 -0400)