def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
blocks0: Tensor = torch.zeros(1)
blocks1: Tensor = torch.zeros(1)
- found_mxfp4_tensors = False
# we assume that tensors are loaded in the correct order
for name, data_torch in self.get_tensors():
if "mlp.experts.down_proj_blocks" in name:
elif "mlp.experts.down_proj_scales" in name:
new_name = self.map_tensor_name(name.replace("_scales", ".weight"))
self.repack_mxfp4(new_name, blocks0, data_torch)
- found_mxfp4_tensors = True
elif "mlp.experts.gate_up_proj_blocks" in name:
blocks0, blocks1 = data_torch[:, ::2, :, :], data_torch[:, 1::2, :, :]
elif "mlp.experts.gate_up_proj_scales" in name:
new_name_up = self.map_tensor_name(name.replace("gate_up_proj_scales", "up_proj.weight"))
self.repack_mxfp4(new_name_gate, blocks0, scales0)
self.repack_mxfp4(new_name_up, blocks1, scales1)
- found_mxfp4_tensors = True
- if not found_mxfp4_tensors:
- raise ValueError("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model.")
return []
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if "down_proj" in name:
if name.endswith("_bias"):
name = name.replace("down_proj_bias", "down_proj.bias")
+ elif "_blocks" not in name and "_scales" not in name:
+ logger.warning(f"{name} is not in MXFP4, performance may be degraded")
+ name = name.replace("down_proj", "down_proj.weight")
+ data_torch = data_torch.transpose(-1, -2)
else:
+ # otherwise, it should already be repacked to ggml MXFP4 format
return []
# split the gate_up into gate and up
(self.map_tensor_name(name_gate), gate_proj_bias),
(self.map_tensor_name(name_up), up_proj_bias)
]
+ elif "_blocks" not in name and "_scales" not in name:
+ logger.warning(f"{name} is not in MXFP4, performance may be degraded")
+ name_up = name.replace("gate_up_proj", "up_proj.weight")
+ name_gate = name.replace("gate_up_proj", "gate_proj.weight")
+ data_torch = data_torch.transpose(-1, -2)
+ gate_proj_weight, up_proj_weight = data_torch[:, ::2, :], data_torch[:, 1::2, :]
+ return [
+ (self.map_tensor_name(name_gate), gate_proj_weight),
+ (self.map_tensor_name(name_up), up_proj_weight)
+ ]
else:
+ # otherwise, it should already be repacked to ggml MXFP4 format
return []
return [(self.map_tensor_name(name), data_torch)]