merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
yield from super().modify_tensors(data_torch, merged_name, bid)
else:
- yield from super().modify_tensors(data_torch, name, bid)
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
if new_name.endswith("conv_stem.conv.bias") or new_name.endswith("layer_scale.gamma"):
data_torch = data_torch.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) # [1, C, 1, 1]
- yield from super().modify_tensors(data_torch, new_name, bid)
+ yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
@ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration")
# Continue with normal processing
name = name.replace("language_model.", "")
- yield from super().modify_tensors(data_torch, name, bid)
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
return
if "altup_unembed_projections" in name:
raise ValueError(f"Unknown name: {name}")
out = self._stack_matrices(self._altup_unembd)
if out is not None:
- yield from super().modify_tensors(out, "model.altup_unembed_projections.weight", bid)
+ yield from ModelBase.modify_tensors(self, out, "model.altup_unembed_projections.weight", bid)
return
else:
return
raise ValueError(f"Unknown name: {name}")
out = self._stack_matrices(self._altup_proj)
if out is not None:
- yield from super().modify_tensors(out, "model.altup_projections.weight", bid)
+ yield from ModelBase.modify_tensors(self, out, "model.altup_projections.weight", bid)
return
else:
return
ffn_dim = self.hparams["intermediate_size"]
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
gate, up = data_torch.split(ffn_dim, dim=-2)
- yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid)
- yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid)
+ yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid)
+ yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid)
has_experts = bool(self.hparams.get('num_local_experts'))
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
gate, up = data_torch.split(ffn_dim, dim=-2)
if has_experts:
- yield from super().modify_tensors(gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid)
- yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid)
+ yield from ModelBase.modify_tensors(self, gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid)
+ yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid)
return
- yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
- yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
+ yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
+ yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
return
if not has_experts and name.endswith("shared_mlp.output_linear.weight"):
- yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid)
+ yield from ModelBase.modify_tensors(self, data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid)
return
yield from super().modify_tensors(data_torch, name, bid)