From: compilade Date: Sat, 25 Oct 2025 00:52:00 +0000 (-0400) Subject: convert : avoid dequantizing mxfp4 for GPT-OSS (#16756) X-Git-Tag: upstream/0.0.7011~176 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=5cca2542ac3f3f86831d32bce744d08fc2b353b0;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert : avoid dequantizing mxfp4 for GPT-OSS (#16756) --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 3e3db999..859c1443 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel): class GptOssModel(TextModel): model_arch = gguf.MODEL_ARCH.GPT_OSS + # TODO: remove once MXFP4 is supported more generally + def dequant_model(self): + quant_config = self.hparams.get("quantization_config") + if quant_config is not None and quant_config.get("quant_method") == "mxfp4": + return + return super().dequant_model() + def transform_nibble_layout(self, tensor): assert tensor.dtype == torch.uint8 assert tensor.shape[-1] == 16