class GptOssModel(TextModel):
model_arch = gguf.MODEL_ARCH.GPT_OSS
+ # TODO: remove once MXFP4 is supported more generally
+ def dequant_model(self):
+ quant_config = self.hparams.get("quantization_config")
+ if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
+ return
+ return super().dequant_model()
+
def transform_nibble_layout(self, tensor):
assert tensor.dtype == torch.uint8
assert tensor.shape[-1] == 16