raise NotImplementedError("set_gguf_parameters() must be implemented in subclasses")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
+ del bid # unused
return [(self.map_tensor_name(name), data_torch)]
def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool:
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
- tensors: list[tuple[str, Tensor]] = []
-
if re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.weight", name):
# Map bloom-style qkv_linear to gpt-style qkv_linear
# bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
)
logger.info("re-format attention.linear_qkv.bias")
- tensors.append((self.map_tensor_name(name), data_torch))
-
- return tensors
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("BloomForCausalLM", "BloomModel")
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
name = re.sub(r'transformer\.', '', name)
- tensors: list[tuple[str, Tensor]] = []
-
if re.match(r"h\.\d+\.self_attention\.query_key_value\.weight", name):
# Map bloom-style qkv_linear to gpt-style qkv_linear
# bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
)
logger.info("re-format attention.linear_qkv.bias")
- tensors.append((self.map_tensor_name(name), data_torch))
-
- return tensors
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("MPTForCausalLM")
self.gguf_writer.add_max_alibi_bias(0.0)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if "scales" in name:
new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias", ".scales"))
new_name = new_name.replace("scales", "act.scales")
else:
new_name = self.map_tensor_name(name, try_suffixes=(".weight", ".bias"))
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
@ModelBase.register("OrionForCausalLM")
head_count = self.hparams["num_attention_heads"]
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
- tensors: list[tuple[str, Tensor]] = []
-
if bid is not None and name == f"model.layers.{bid}.self_attn.W_pack.weight":
logger.info(f"Unpacking and permuting layer {bid}")
- tensors = [
+ yield from [
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid),
self._reverse_hf_permute_part(data_torch, 0, head_count, head_count)),
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid),
self._reverse_hf_part(data_torch, 2)),
]
else:
- tensors = [(self.map_tensor_name(name), data_torch)]
-
- return tensors
+ yield from self.modify_tensors(data_torch, self.map_tensor_name(name), bid)
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
if n_kv_head is not None and n_head != n_kv_head:
self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
head_count = self.hparams["num_attention_heads"]
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
if name.endswith("k_proj.weight"):
data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
if n_kv_head is not None and n_head != n_kv_head:
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# QKV tensor transform
# The original query_key_value tensor contains n_head_kv "kv groups",
# each consisting of n_head/n_head_kv query weights followed by one key
v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
data_torch = torch.cat((q, k, v)).reshape_as(data_torch)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("GPTBigCodeForCausalLM")
n_head_kv = 1
head_dim = self.hparams["n_embd"] // n_head
- tensors: list[tuple[str, Tensor]] = []
-
if bid is not None:
if name == f"transformer.h.{bid}.attn.kv.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), data_torch[:n_head_kv * head_dim]))
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), data_torch[n_head_kv * head_dim:]))
- elif name == f"transformer.h.{bid}.attn.q.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), data_torch))
- elif name == f"transformer.h.{bid}.mlp.gate_up_proj.weight":
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), data_torch[:ff_dim]))
- tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), data_torch[ff_dim:]))
-
- if len(tensors) == 0:
- tensors.append((self.map_tensor_name(name), data_torch))
+ yield from super().modify_tensors(data_torch[:n_head_kv * head_dim], self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), bid)
+ yield from super().modify_tensors(data_torch[n_head_kv * head_dim:], self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), bid)
+ return
+ if name == f"transformer.h.{bid}.attn.q.weight":
+ yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), bid)
+ return
+ if name == f"transformer.h.{bid}.mlp.gate_up_proj.weight":
+ yield from super().modify_tensors(data_torch[:ff_dim], self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
+ yield from super().modify_tensors(data_torch[ff_dim:], self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
+ return
- return tensors
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
if len(self._q_norms[bid]) >= n_head:
return self._stack_qk_norm(bid, n_head, self._q_norms[bid], "q_layernorm")
else:
- return []
+ return
if name.find("k_layernorm.norms") != -1:
assert bid is not None
if len(self._k_norms[bid]) >= n_kv_head:
return self._stack_qk_norm(bid, n_kv_head, self._k_norms[bid], "k_layernorm")
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_name: str = "q_layernorm"):
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.self_attn.{layer_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, merged_name, bid)
def prepare_tensors(self):
super().prepare_tensors()
)
if is_multimodal_tensor:
- return [] # skip vision tensors
+ return # skip vision tensors
elif self.hf_arch == "LlamaModel":
name = "model." + name
elif name.startswith("model.text_model"):
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for wid in ["w1", "w2", "w3"]:
datas: list[Tensor] = []
merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
if rope_params := self.rope_parameters.get("full_attention", self.rope_parameters):
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["gate_proj", "up_proj", "down_proj"]:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
+ yield from super().modify_tensors(data_torch, merged_name, bid)
- return tensors
+ return
else:
- return []
+ return
if name.endswith(".expert_bias"):
name = name.replace(".expert_bias", ".expert_bias.bias")
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register(
self.gguf_writer.add_vision_spatial_merge_size(self.global_config["spatial_merge_size"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
n_head = (
self.hparams["num_attention_heads"] if not self.is_mistral_format else self.find_vparam(["num_attention_heads"])
)
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
if name.endswith(("k_proj.weight", "k_proj.bias")) and not self.is_mistral_format:
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
embed_key = "embed_tokens.weight" if not self.is_mistral_format else "tok_embeddings.weight"
if self.img_break_tok_id > 0 and embed_key in name:
# for pixtral model, we need to extract the [IMG_BREAK] token embedding
img_break_embd = data_torch[self.img_break_tok_id]
name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK]
- return [(self.map_tensor_name(name), img_break_embd)]
+ yield from super().modify_tensors(img_break_embd, name, bid)
- return [] # skip other tensors
+ return # skip other tensors
@ModelBase.register("Idefics3ForConditionalGeneration", "SmolVLMForConditionalGeneration")
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
is_vision_tensor = "vision_tower" in name or "vision_model" in name or "model.connector" in name
if is_vision_tensor:
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
- return [] # skip other tensors
+ return # skip other tensors
@ModelBase.register(
name_gate = name.replace("gate_up_proj", "gate_proj.weight")
dim_half = data_torch.shape[-1] // 2
gate_proj_weight, up_proj_weight = data_torch.transpose(-1, -2).split(dim_half, dim=-2)
- return [
- (self.map_tensor_name(name_gate), gate_proj_weight),
- (self.map_tensor_name(name_up), up_proj_weight)
- ]
+ yield from super().modify_tensors(gate_proj_weight, name_gate, bid)
+ yield from super().modify_tensors(up_proj_weight, name_up, bid)
+ return
if name.endswith("down_proj"):
name += ".weight"
data_torch = data_torch.transpose(-1, -2)
if "multi_modal_projector" in name or "vision_model" in name:
- return []
- return super().modify_tensors(data_torch, name, bid)
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Llama4ForConditionalGeneration")
self.gguf_writer.add_vision_use_gelu(True)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
if "multi_modal_projector" in name or "vision_model" in name:
# process vision tensors
if "positional_embedding_vlm" in name and ".weight" not in name:
name += ".weight"
if "multi_modal_projector.linear_1" in name:
# despite the name with number postfix, this is a single fully connected layer
- return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC] + '.weight', data_torch)]
- return [(self.map_tensor_name(name), data_torch)]
- return []
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC] + '.weight', data_torch)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register(
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("language_model.", "")
if "multi_modal_projector" in name or "vision_tower" in name:
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("DeciLMForCausalLM")
data_torch = DeciModel.permute(data_torch, n_head, n_head)
if name.endswith(("k_proj.weight", "k_proj.bias")):
data_torch = DeciModel.permute(data_torch, n_head, n_kv_head)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
if rope_params := self.rope_parameters.get("full_attention", self.rope_parameters):
# transform weight into 1/0/-1 (in fp32)
data_torch = self.weight_quant(data_torch)
- yield (new_name, data_torch)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("GrokForCausalLM", "Grok1ForCausalLM")
_cur_expert = ""
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- tensors: list[tuple[str, Tensor]] = []
+ deferred: list[tuple[Tensor, str, int | None]] = []
is_expert = ".moe." in name or ".block_sparse_moe.experts." in name
if not is_expert:
- tensors.append((self.map_tensor_name(name), data_torch))
+ deferred.append((data_torch, name, bid))
# process the experts separately
if is_expert or self._cur_expert:
if name in self._experts[bid]:
self._cur_expert = name
self._experts[bid][name].append(data_torch)
- return []
+ return
elif is_expert:
self._cur_expert = name
self._experts[bid][name] = [data_torch]
- return []
+ return
else:
self._cur_expert = ""
merged_name = f"transformer.decoder_layer.{bid}.moe.{wid[0]}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- yield (new_name, data_torch)
+ yield from super().modify_tensors(data_torch, merged_name, bid)
- yield from tensors
+ for t in deferred:
+ yield from super().modify_tensors(*t)
@ModelBase.register("DbrxForCausalLM")
logger.info(f"gguf: file type = {self.ftype}")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
n_expert = self.hparams["ffn_config"]["moe_num_experts"]
n_ff = self.hparams["ffn_config"]["ffn_hidden_size"]
n_embd = self.hparams["d_model"]
# https://huggingface.co/databricks/dbrx-instruct/blob/main/model.safetensors.index.json#L15
new_name = self.map_tensor_name(name if not experts else name + ".weight", try_suffixes=(".weight",))
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool:
del name, new_name, bid # unused
self._set_vocab_sentencepiece()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
if name.endswith(("k_proj.weight")):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("MiniCPM3ForCausalLM")
or name.startswith("vision_model") or name.startswith("audio_tower") \
or name.startswith("model.vision_tower") or name.startswith("model.multi_modal_projector"):
# skip vision and audio tensors
- return []
+ return
yield from super().modify_tensors(data_torch, name, bid)
total_k_dim = num_kv_heads * head_dim
total_v_dim = num_kv_heads * head_dim
q_proj_weight, k_proj_weight, v_proj_weight = data_torch.split([total_q_dim, total_k_dim, total_v_dim], dim=0)
- return [
- (self.map_tensor_name(name_q), q_proj_weight),
- (self.map_tensor_name(name_k), k_proj_weight),
- (self.map_tensor_name(name_v), v_proj_weight)
- ]
+ yield from super().modify_tensors(q_proj_weight, name_q, bid)
+ yield from super().modify_tensors(k_proj_weight, name_k, bid)
+ yield from super().modify_tensors(v_proj_weight, name_v, bid)
# split the up_gate_proj into gate and up
# up_gate_proj shape: [2 * intermediate_size, hidden_size]
- if "up_gate_proj" in name:
+ elif "up_gate_proj" in name:
name_up = name.replace("up_gate_proj.weight", "up_proj.weight")
name_gate = name.replace("up_gate_proj.weight", "gate_proj.weight")
dim_half = data_torch.shape[0] // 2
gate_proj_weight, up_proj_weight = data_torch.split(dim_half, dim=0)
- return [
- (self.map_tensor_name(name_gate), gate_proj_weight),
- (self.map_tensor_name(name_up), up_proj_weight)
- ]
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(gate_proj_weight, name_gate, bid)
+ yield from super().modify_tensors(up_proj_weight, name_up, bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Ernie4_5_MoeForCausalLM")
# skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2)
match = re.match(r"model.mtp_block.(\d+)", name)
if match:
- return []
+ return
# skip all other MTP tensors for now
match = re.match(r"model.mtp_emb_norm.(\d+)", name)
if match:
- return []
+ return
match = re.match(r"model.mtp_hidden_norm.(\d+)", name)
if match:
- return []
+ return
match = re.match(r"model.mtp_linear_proj.(\d+)", name)
if match:
- return []
+ return
# process the experts separately
if name.find("mlp.experts") != -1:
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["gate_proj", "up_proj", "down_proj"]:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
-
- return tensors
- else:
- return []
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
self._set_vocab_gpt2()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
if name.startswith("thinker."):
name = name.replace("thinker.", "")
if name.startswith("visual") or name.startswith("audio") or \
name.startswith("talker") or name.startswith("token2wav"):
# skip multimodal tensors
- return []
- return [(self.map_tensor_name(name), data_torch)]
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
if name.startswith("visual."):
# process visual tensors
# split QKV tensors if needed
wq = data_torch[:c]
wk = data_torch[c: c * 2]
wv = data_torch[c * 2:]
- return [
- (self.map_tensor_name(name.replace("qkv", "q")), wq),
- (self.map_tensor_name(name.replace("qkv", "k")), wk),
- (self.map_tensor_name(name.replace("qkv", "v")), wv),
- ]
+ yield from super().modify_tensors(wq, name.replace("qkv", "q"), bid)
+ yield from super().modify_tensors(wk, name.replace("qkv", "k"), bid)
+ yield from super().modify_tensors(wv, name.replace("qkv", "v"), bid)
elif 'patch_embed.proj.weight' in name:
# split Conv3D into Conv2Ds
c1, c2, kt, kh, kw = data_torch.shape
del c1, c2, kh, kw # unused
assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
- return [
- (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight" , data_torch[:, :, 0, ...]),
- (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...]),
- ]
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight" , data_torch[:, :, 0, ...])
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
else:
- return [(self.map_tensor_name(name), data_torch)]
- return [] # skip other tensors
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen2_5OmniModel")
if "audio_bos_eos_token" in name:
# this tensor is left unused in transformers code
# https://github.com/huggingface/transformers/blob/6e3063422c4b1c014aa60c32b9254fd2902f0f28/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py#L1809
- return []
- return [(self.map_tensor_name(name), data_torch)]
-
- return super().modify_tensors(data_torch, name, bid)
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("InternVisionModel")
return name
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
vision_prefix = ['vision_model', 'mlp', 'model.vision_tower', 'model.multi_modal_projector']
# deal with intern-s1 special case
name = self._mapping_interns1_name(name)
wq = data_torch[:c]
wk = data_torch[c: c * 2]
wv = data_torch[c * 2:]
- return [
- (self.map_tensor_name(name.replace("attn.qkv", "self_attn.q_proj")), wq),
- (self.map_tensor_name(name.replace("attn.qkv", "self_attn.k_proj")), wk),
- (self.map_tensor_name(name.replace("attn.qkv", "self_attn.v_proj")), wv),
- ]
- return [(self.map_tensor_name(name), data_torch)]
- return [] # skip other tensors
+ yield from super().modify_tensors(wq, name.replace("attn.qkv", "self_attn.q_proj"), bid)
+ yield from super().modify_tensors(wk, name.replace("attn.qkv", "self_attn.k_proj"), bid)
+ yield from super().modify_tensors(wv, name.replace("attn.qkv", "self_attn.v_proj"), bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("WavTokenizerDec")
model_arch = gguf.MODEL_ARCH.WAVTOKENIZER_DEC
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if \
name.endswith("codebook.cluster_size") or \
name.endswith("codebook.embed_avg") or \
name.endswith("codebook.inited"):
logger.debug(f"Skipping {name!r}")
- return []
+ return
logger.info(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def set_vocab(self):
self._set_vocab_none()
# Need PyTorch: (128, 2048, 768) [reversed of GGML]
# So: permute(0, 2, 1): (128, 768, 2048) -> (128, 2048, 768)
permuted = data_torch.permute(0, 2, 1).contiguous()
- return [(self.map_tensor_name(mapped), permuted)]
+ yield from super().modify_tensors(permuted, mapped, bid)
+ return
if name.endswith("mlp.experts.gate_up_proj") or name.endswith("mlp.experts.gate_up_proj.weight"):
if data_torch.ndim < 3 or data_torch.shape[-1] % 2 != 0:
mapped_up = f"{base}.up_proj.weight"
perm_gate = gate.permute(0, 2, 1).contiguous()
perm_up = up.permute(0, 2, 1).contiguous()
- return [
- (self.map_tensor_name(mapped_gate), perm_gate),
- (self.map_tensor_name(mapped_up), perm_up),
- ]
+ yield from super().modify_tensors(perm_gate, mapped_gate, bid)
+ yield from super().modify_tensors(perm_up, mapped_up, bid)
+ return
if name.startswith("mlp") or name.startswith("vision_model") or name.startswith("model.vision_tower") or name.startswith("model.multi_modal_projector") or name.startswith("model.visual"):
# skip visual tensors
- return []
+ return
if name.find("experts") != -1:
n_experts = self.hparams["num_experts"]
assert bid is not None
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if "model.vision_" in name:
# skip multimodal tensors
- return []
+ return
if self.is_rerank:
is_tied_head = self.is_tied_embeddings and "embed_tokens" in name
gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.CLS_OUT] + ".weight",
self._get_cls_out_tensor(data_torch),
)
+ yield cls_out_head
if is_tied_head:
- embed = (self.map_tensor_name(name), data_torch)
- return [cls_out_head, embed]
- if is_real_head:
- return [cls_out_head]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen3MoeForCausalLM")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.startswith("mtp"):
- return [] # ignore MTP layers for now
+ return # ignore MTP layers for now
if name.endswith(".A_log"):
data_torch = -torch.exp(data_torch)
elif name.endswith(".dt_bias"):
assert self.hparams_vision is not None
# Skip text model tensors - they go in the text model file
if name.startswith("model.language_model.") or name.startswith("lm_head."):
- return []
+ return
if name.startswith("model.visual."):
name = name.replace("model.visual.", "visual.", 1)
raise ValueError(f"Unexpected deepstack tensor: {name}")
new_name = self.format_tensor_name(tensor_type, idx, suffix=f".{suffix}")
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
+ return
if name.startswith("visual.merger."):
suffix = name.split(".", 2)[2]
new_name = self.format_tensor_name(gguf.MODEL_TENSOR.V_POST_NORM, suffix=f".{suffix.split('.', 1)[1]}")
else:
raise ValueError(f"Unexpected merger tensor: {name}")
- return [(new_name, data_torch)]
+ yield (new_name, data_torch)
+ return
if name == "visual.patch_embed.proj.weight":
# split Conv3D into Conv2Ds along temporal dimension
del c1, c2
if kt != 2:
raise ValueError("Current implementation only supports temporal_patch_size of 2")
- return [
- (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight", data_torch[:, :, 0, ...]),
- (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...]),
- ]
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight", data_torch[:, :, 0, ...])
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
+ return
if name == "visual.patch_embed.proj.bias":
# Include the bias - it's used by the C++ code
- return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".bias", data_torch)]
+ yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".bias", data_torch)
+ return
if name.startswith("visual."):
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
# Fall back to parent class for other tensors
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Glm4vForConditionalGeneration", "Glm4vMoeForConditionalGeneration")
if name.startswith("model.visual."):
name = name.replace("model.visual.", "visual.")
if name.startswith("visual.merger."):
- return [(self.map_tensor_name(name), data_torch)]
- return super().modify_tensors(data_torch, name, bid)
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen3VLForConditionalGeneration")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# Skip vision tensors - they go in the mmproj file
if name.startswith("model.visual."):
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen3VLMoeForConditionalGeneration")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# Skip vision tensors - they go in the mmproj file
if name.startswith("model.visual."):
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("GPT2LMHeadModel")
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- tensors: list[tuple[str, Tensor]] = []
-
# we don't need these
if name.endswith((".attn.bias", ".attn.masked_bias")):
- return tensors
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")):
data_torch = data_torch.transpose(1, 0)
new_name = self.map_tensor_name(name)
- tensors.append((new_name, data_torch))
-
- return tensors
+ yield from super().modify_tensors(data_torch, new_name, bid)
@ModelBase.register("PhiForCausalLM")
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["w1", "w2", "w3"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
return data_torch
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
new_name = self.map_tensor_name(name)
# shuffle for broadcasting of gqa in ggml_mul_mat
elif new_name.endswith("attn_output.weight"):
data_torch = self.shuffle_attn_output_weight(data_torch)
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Plamo2ForCausalLM", "PLaMo2ForCausalLM")
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if name.endswith(".A_log"):
data_torch = -torch.exp(data_torch)
elif name.endswith(".dt_bias"):
elif name.endswith(".norm.weight"):
data_torch += 1.0
- new_name = self.map_tensor_name(name)
-
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Plamo3ForCausalLM", "PLaMo3ForCausalLM")
elif name.endswith(".norm.weight"):
data_torch = data_torch + 1.0
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("CodeShellForCausalLM")
name = name.replace("language_model.", "") # InternVL
if name.startswith("mlp") or name.startswith("vision_model"):
# skip visual tensors
- return []
+ return
if bid is not None and f"model.layers.{bid}.attention.wqkv" in name:
qkv = data_torch
k = LlamaModel.permute(k.reshape((-1, k.shape[-1])), num_heads, num_kv_heads)
v = v.reshape((-1, v.shape[-1]))
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), q),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), k),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v),
- ]
+ yield from super().modify_tensors(q, self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), bid)
+ yield from super().modify_tensors(k, self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), bid)
+ yield from super().modify_tensors(v, self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), bid)
else:
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("InternLM3ForCausalLM")
name = name.replace("language_model.", "") # InternVL
if name.startswith("mlp") or name.startswith("vision_model"):
# skip visual tensors
- return []
+ return
if name.endswith(("q_proj.weight", "q_proj.bias")):
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
if name.endswith(("k_proj.weight", "k_proj.bias")):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel", "BertForSequenceClassification")
special_vocab.add_to_gguf(self.gguf_writer)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if name.startswith("bert."):
name = name[5:]
# we are only using BERT for embeddings so we don't need the pooling layer
if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"):
- return [] # we don't need these
+ return # we don't need these
if name.startswith("cls.predictions"):
- return []
+ return
if name.startswith("cls.seq_relationship"):
- return []
+ return
if self.cls_out_labels:
# For BertForSequenceClassification (direct projection layer)
if name == "classifier.bias":
name = "classifier.out_proj.bias"
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def _xlmroberta_tokenizer_init(self) -> None:
# we need the pad_token_id to know how to chop down position_embd matrix
# These layers act as MLM head, so we don't need them
if name.startswith("vocab_"):
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
if self._position_offset is not None:
data_torch = data_torch[self._position_offset:,:]
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("NomicBertModel")
def modify_tensors(self, data_torch: torch.Tensor, name: str, bid: int | None) -> Iterable[tuple[str, torch.Tensor]]:
# If the tensor is an experts bias tensor, skip it by returning an empty list.
if "mlp.experts.bias" in name:
- return [] # Explicitly return an empty list.
+ return # Explicitly return.
if "mlp.experts.mlp.w1" in name:
data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
data_torch = data_torch.transpose(1, 2)
name += ".weight"
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def set_gguf_parameters(self):
super().set_gguf_parameters()
def modify_tensors(self, data_torch, name, bid):
if name.startswith("decoder."):
- return []
+ return
if name.startswith("model."):
name = name[6:]
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
if name.endswith(".0.lora_A") or name.endswith(".0.lora_B"):
if name.startswith("pooler.dense"):
- return []
+ return
num_loras = data_torch.size(0)
assert num_loras == len(self._lora_names)
new_name = new_name[:-1] + ("a" if new_name[-1:] == "b" else "b")
lora_writer.add_tensor(new_name, data.float().numpy(), raw_dtype=gguf.GGMLQuantizationType.F32)
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# lm_head is not used in llama.cpp, while autoawq will include this tensor in model
# To prevent errors, skip loading lm_head.weight.
if name == "lm_head.weight":
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
- return []
+ return
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
if name.endswith("norm.weight"):
data_torch = data_torch + 1
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Gemma2ForCausalLM")
self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# lm_head is not used in llama.cpp, while autoawq will include this tensor in model
# To prevent errors, skip loading lm_head.weight.
if name == "lm_head.weight":
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
- return []
+ return
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
if name.endswith("norm.weight"):
data_torch = data_torch + 1
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration")
self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", 4))
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if "language_model." in name:
name = name.replace("language_model.", "")
elif name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
- return [] # skip vision tensors
+ return # skip vision tensors
# remove OOV (out-of-vocabulary) rows in token_embd
if "embed_tokens.weight" in name:
if name.endswith("norm.weight"):
data_torch = data_torch + self.norm_shift
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Gemma3TextModel")
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if "vision_model.head." in name:
- return [] # skip redundant tensors for tinygemma3
+ return # skip redundant tensors for tinygemma3
if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
logger.info(f"Correcting norm value for '{name}'")
data_torch = data_torch + 1
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
- return [] # skip other tensors
+ return # skip other tensors
class ConformerAudioModel(MmprojModel):
self._batch_norm_tensors[bid][name] = data_torch
if len(self._batch_norm_tensors[bid]) < 5:
- return []
+ return
weight = self._batch_norm_tensors[bid][f"conformer.layers.{bid}.conv.batch_norm.weight"]
bias = self._batch_norm_tensors[bid][f"conformer.layers.{bid}.conv.batch_norm.bias"]
a = weight / torch.sqrt(running_var + eps)
b = bias - running_mean * a
- return [
- (self.map_tensor_name(f"conformer.layers.{bid}.conv.batch_norm.weight"), a),
- (self.map_tensor_name(f"conformer.layers.{bid}.conv.batch_norm.bias"), b),
- ]
+ yield from super().modify_tensors(a, f"conformer.layers.{bid}.conv.batch_norm.weight", bid)
+ yield from super().modify_tensors(b, f"conformer.layers.{bid}.conv.batch_norm.bias", bid)
+ return
# reshape conv weights
if name.startswith("conformer.pre_encode.conv.") and name.endswith(".bias"):
assert data_torch.shape[2] == 1
data_torch = data_torch.reshape(data_torch.shape[0], data_torch.shape[1])
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Gemma3nForConditionalGeneration")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if (ConformerAudioModel.is_audio_tensor(name)):
name = name.replace("model.audio_tower.conformer.", "conformer.layers.")
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
# Gemma3n uses
# - model.embed_vision.* for projection layers
# - model.vision_tower.* for vision encoder
# Skip non-vision tensors
if not (name.startswith("model.embed_vision.") or name.startswith("model.vision_tower.")):
- return []
+ return
if name.startswith("model.vision_tower.timm_model.blocks."):
# Double-indexed block tensors through custom logic
if new_name.endswith("conv_stem.conv.bias") or new_name.endswith("layer_scale.gamma"):
data_torch = data_torch.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) # [1, C, 1, 1]
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
@ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration")
# TODO: implement self.prediction_coefs.weight.clamp_(...)
if "language_model." not in name:
- return [] # skip non-language model tensors
+ return # skip non-language model tensors
# Pad token embeddings for vision/audio special tokens (262144-262399)
if "embed_tokens.weight" in name or "embed_tokens_per_layer" in name:
# Continue with normal processing
name = name.replace("language_model.", "")
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
if "altup_unembed_projections" in name:
data_torch = data_torch.to(device="cpu")
raise ValueError(f"Unknown name: {name}")
out = self._stack_matrices(self._altup_unembd)
if out is not None:
- return [(self.map_tensor_name("model.altup_unembed_projections.weight"), out)]
+ yield from super().modify_tensors(out, "model.altup_unembed_projections.weight", bid)
+ return
else:
- return []
+ return
if "altup_projections" in name:
data_torch = data_torch.to(device="cpu")
raise ValueError(f"Unknown name: {name}")
out = self._stack_matrices(self._altup_proj)
if out is not None:
- return [(self.map_tensor_name("model.altup_projections.weight"), out)]
+ yield from super().modify_tensors(out, "model.altup_projections.weight", bid)
+ return
else:
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Starcoder2ForCausalLM")
if self._tok_embd is not None and new_name == output_name:
if torch.equal(self._tok_embd, data_torch):
logger.debug(f"{output_name} is equivalent to {tok_embd_name}, omitting")
- return []
+ return
elif new_name == tok_embd_name:
self._tok_embd = data_torch
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
@ModelBase.register("Mamba2ForCausalLM")
# Same as super class, but permuting q_proj, k_proj
# Copied from: LlamaModel
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
if name.endswith("k_proj.weight"):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("SeedOssForCausalLM")
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
# Copied from: Qwen2MoeModel
def prepare_tensors(self):
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for wid in ["w1", "w2", "w3"]:
datas: list[Tensor] = []
merged_name = f"layers.{bid}.feed_forward.experts.{wid}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# skip vision tensors and remove "language_model." for Kimi-VL
if "vision_tower" in name or "multi_modal_projector" in name:
- return []
+ return
if name.startswith("siglip2.") or name.startswith("merger."):
- return []
+ return
if name.startswith("language_model."):
name = name.replace("language_model.", "")
if self.hparams.get("tie_word_embeddings", False):
if name == "lm_head.weight" or name == "model.lm_head.weight":
logger.info("Skipping tied output layer 'lm_head.weight' (will use token_embd.weight)")
- return []
+ return
# rename e_score_correction_bias tensors
if name.endswith("e_score_correction_bias"):
block_count = self.hparams["num_hidden_layers"]
match = re.match(r"model.layers.(\d+)", name)
if match and int(match.group(1)) >= block_count:
- return []
+ return
# process the experts separately
if name.find("mlp.experts") != -1:
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
# note: MLA with the absorption optimization, needs these two split and k_b_proj transposed
if name.endswith("kv_b_proj.weight"):
k_b, v_b = torch.split(kv_b, [qk_nope_head_dim, v_head_dim], dim=1)
k_b = k_b.transpose(1, 2)
- return [
- (self.map_tensor_name(name_kb), k_b),
- (self.map_tensor_name(name_vb), v_b)
- ]
+ yield from super().modify_tensors(k_b, name_kb, bid)
+ yield from super().modify_tensors(v_b, name_vb, bid)
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
# not enough expert weights to merge
if len(expert_cache) < n_experts * len(expert_weights):
- return []
+ return
- tensors: list[tuple[str, Tensor]] = []
for w_name in expert_weights:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
+ yield from super().modify_tensors(data_torch, new_name, bid)
del self._experts_cache[bid]
- return tensors
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("MiMoV2FlashForCausalLM")
# TODO: mimo v2 does not indicate the number of next-token-prediction layers, therefore we cannot do the same way as GLM4_MOE
if "model.mtp." in name:
- return []
+ return
# process the experts separately
if name.find("mlp.experts") != -1:
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["gate_proj", "up_proj", "down_proj"]:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
- return [(self.map_tensor_name(name), data_torch)]
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
if name == "lm_head.weight":
if self.hparams.get("tie_word_embeddings", False):
logger.info("Skipping tied output layer 'lm_head.weight'")
- return []
- return [(self.map_tensor_name(name), data_torch)]
+ return
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Dots1ForCausalLM")
if name.endswith("e_score_correction_bias"):
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
if "shared_experts" in name:
- return [(self.map_tensor_name(name), data_torch)]
- return super().modify_tensors(data_torch, name, bid)
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("PLMForCausalLM")
self.gguf_writer.add_value_length(hparams["v_head_dim"])
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- return [(self.map_tensor_name(name), data_torch)]
-
def prepare_tensors(self):
super().prepare_tensors()
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
self.shared_token_embeddings_found = True
else:
logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("T5EncoderModel")
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
self.shared_token_embeddings_found = True
else:
logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("JAISLMHeadModel")
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
- tensors: list[tuple[str, Tensor]] = []
-
# we don't need these
if name.endswith((".attn.bias")):
- return tensors
+ return
if name.endswith(("relative_pe.slopes")):
# Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
first_val = float(data_torch[0].item())
self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
- return tensors
+ return
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
data_torch = data_torch.transpose(1, 0)
new_name = self.map_tensor_name(name)
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
- tensors.append((new_name, data_torch * self.embeddings_scale))
+ yield from super().modify_tensors(data_torch * self.embeddings_scale, new_name, bid)
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
- tensors.append((new_name, data_torch * self.width_scale))
+ yield from super().modify_tensors(data_torch * self.width_scale, new_name, bid)
else:
- tensors.append((new_name, data_torch))
-
- return tensors
+ yield from super().modify_tensors(data_torch, new_name, bid)
def prepare_tensors(self):
super().prepare_tensors()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.startswith("model.visual."): # ignore visual part of Glm4v
- return []
+ return
elif name.startswith("model.language_model."):
name = name.replace("language_model.", "") # for Glm4v
if self.use_mrope:
data_torch = Glm4Model.normal_to_neox(data_torch, n_head, n_head, head_dim, self.partial_rotary_factor)
if name.endswith(("k_proj.weight", "k_proj.bias")):
data_torch = Glm4Model.normal_to_neox(data_torch, n_head, n_kv_head, head_dim, self.partial_rotary_factor)
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Glm4MoeForCausalLM", "Glm4vMoeForConditionalGeneration")
self, data_torch: Tensor, name: str, bid: int | None
) -> Iterable[tuple[str, Tensor]]:
if name.startswith("model.visual."): # ignore visual part
- return []
+ return
elif name.startswith("model.language_model."):
name = name.replace("language_model.", "") # for multimodal variants
# Handle main token embedding (but not layer-specific NextN embeddings)
if name == "model.embed_tokens.weight" and ".layers." not in name:
- return [(self.map_tensor_name("token_embd.weight"), data_torch)]
+ yield from super().modify_tensors(data_torch, "token_embd.weight", bid)
+ return
# Handle routed experts
if name.find("mlp.experts") != -1:
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
if name.endswith("e_score_correction_bias"):
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
- new_name = self.map_tensor_name(name)
-
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
self.gguf_writer.add_rope_freq_base(rope_freq)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if name.endswith(".rotary_pos_emb.inv_freq") or name.startswith("model.vision."):
- return []
+ return
name = name.removeprefix("transformer.")
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("NemotronForCausalLM")
if name.endswith("norm.weight"):
data_torch = data_torch + 1
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("ExaoneForCausalLM")
new_name = remapper[_n.stem] + _n.suffix
# set shared weights for all NextN/MTP layers
- tensors = []
for bid in range(self.hparams['num_hidden_layers'], self.block_count):
- new_name = new_name.format(bid=bid)
- tensors.append((self.map_tensor_name(new_name), data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, new_name.format(bid=bid), bid)
+ return
if name.endswith("e_score_correction_bias"):
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, new_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
ffn_dim = self.hparams["intermediate_size"]
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
gate, up = data_torch.split(ffn_dim, dim=-2)
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), gate),
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), up),
- ]
+ yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid)
+ yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid)
has_experts = bool(self.hparams.get('num_local_experts'))
assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
gate, up = data_torch.split(ffn_dim, dim=-2)
if has_experts:
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate),
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up),
- ]
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), gate),
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), up),
- ]
+ yield from super().modify_tensors(gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid)
+ yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid)
+ return
+ yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
+ yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
+ return
if not has_experts and name.endswith("shared_mlp.output_linear.weight"):
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), data_torch)
- ]
+ yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid)
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("GraniteMoeHybridForCausalLM", "BambaForCausalLM")
return Mamba2Model.modify_tensors(self, data_torch, name, bid)
elif bid in self._attn_layers:
return GraniteMoeModel.modify_tensors(self, data_torch, name, bid)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def set_gguf_parameters(self):
"""This method merges params from both parents and some that are
if self.is_moe and bid is not None:
if name.endswith("mixer.gate.e_score_correction_bias"):
new_name = name.replace("e_score_correction_bias", "e_score_correction.bias")
- mapped_name = self.map_tensor_name(new_name)
- return [(mapped_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
+ return
if name.endswith("mixer.dt_bias"):
new_name = name.replace("dt_bias", "dt.bias")
- mapped_name = self.map_tensor_name(new_name)
- return [(mapped_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
+ return
if name.endswith("mixer.conv1d.weight"):
squeezed_data = data_torch.squeeze()
- mapped_name = self.map_tensor_name(name)
- return [(mapped_name, squeezed_data)]
+ yield from super().modify_tensors(squeezed_data, name, bid)
+ return
if name.endswith("mixer.A_log"):
transformed_data = -torch.exp(data_torch)
reshaped_data = transformed_data.squeeze().reshape(-1, 1)
- mapped_name = self.map_tensor_name(name)
- return [(mapped_name, reshaped_data)]
+ yield from super().modify_tensors(reshaped_data, name, bid)
+ return
if name.endswith("mixer.D"):
reshaped_data = data_torch.squeeze().reshape(-1, 1)
- mapped_name = self.map_tensor_name(name)
- return [(mapped_name, reshaped_data)]
+ yield from super().modify_tensors(reshaped_data, name, bid)
+ return
if name.endswith("mixer.norm.weight"):
reshaped_data = data_torch.reshape(self.n_group, -1)
- mapped_name = self.map_tensor_name(name)
- return [(mapped_name, reshaped_data)]
+ yield from super().modify_tensors(reshaped_data, name, bid)
+ return
if name.find("mixer.experts") != -1:
n_experts = self.hparams["n_routed_experts"]
if len(self._experts[bid]) >= n_experts * 2:
# merge the experts into a single tensor
- tensors: list[tuple[str, Tensor]] = []
for w_name in ["down_proj", "up_proj"]:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
if name.endswith("attention.dense.weight"):
- return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), data_torch)]
+ yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_OUT, bid), bid)
+ return
elif name.endswith("query_key_value.weight"):
q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2)
- return [
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), BailingMoeModel.permute(q, n_head, n_head)),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), BailingMoeModel.permute(k, n_head, n_kv_head)),
- (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v)
- ]
+ yield from super().modify_tensors(BailingMoeModel.permute(q, n_head, n_head), self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), bid)
+ yield from super().modify_tensors(BailingMoeModel.permute(k, n_head, n_kv_head), self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), bid)
+ yield from super().modify_tensors(v,self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), bid)
+ return
elif name.find("mlp.experts") != -1:
n_experts = self.hparams["num_experts"]
assert bid is not None
- tensors: list[tuple[str, Tensor]] = []
-
if self._experts is None:
self._experts = [{} for _ in range(self.block_count)]
new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
+ yield from super().modify_tensors(data_torch, new_name, bid)
- return tensors
+ return
new_name = self.map_tensor_name(name)
data_torch = data_torch.float()
data_torch /= torch.norm(data_torch, p=2, dim=0, keepdim=True) + 1e-7
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, new_name, bid)
def prepare_tensors(self):
super().prepare_tensors()
n_experts = self.hparams["num_experts"]
assert bid is not None
- tensors: list[tuple[str, Tensor]] = []
-
if self._experts is None:
self._experts = [{} for _ in range(self.block_count)]
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
-
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
if name.endswith(".expert_bias"):
name = name.replace(".expert_bias", ".expert_bias.bias")
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.endswith(".expert_bias"):
# FIXME?: Unused https://huggingface.co/inclusionAI/GroveMoE-Inst/blob/c4c69e5970d18907b5e6ddccdfd55176fe292df1/modeling_grove_moe.py#L303
- return []
+ return
# process the experts separately
if name.find("chunk_experts") != -1:
self._chunk_experts[bid][name] = data_torch
if len(self._chunk_experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.chunk_experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
elif name.find("experts") != -1:
n_experts = self.hparams["num_experts"]
assert bid is not None
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
# ignore image tokenizer for now
# TODO: remove this once image support is implemented for Chameleon
if name.startswith("model.vqmodel"):
- return []
+ return
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
if name.endswith(("k_norm.weight", "k_norm.bias")):
data_torch = ChameleonModel._reverse_hf_permute(data_torch, n_kv_head, hidden_dim)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
# see: https://github.com/huggingface/transformers/blob/72fb02c47dbbe1999ae105319f24631cad6e2e00/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py#L176-L203
@staticmethod
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if name.startswith("model.") or name.startswith("lm_head."):
# skip language model tensors
- return []
+ return
if name.startswith("audio_encoder.whisper."):
name = name.replace("audio_encoder.whisper.","audio_tower.")
name = name.replace("audio_encoder.", "audio_encoder.adapting.")
if name.startswith("audio_encoder.audio_bos_eos_token."):
- return [(self.map_tensor_name("model.vision.boi"), data_torch[0]), (self.map_tensor_name("model.vision.eoi"), data_torch[1])]
+ yield from super().modify_tensors(data_torch[0], "model.vision.boi", bid)
+ yield from super().modify_tensors(data_torch[1], "model.vision.eoi", bid)
+ return
if name.startswith("audio_encoder.adapting."):
name = name.replace("audio_encoder.adapting.","audio.multi_modal_projector.")
if ".2." in name:
name = name.replace(".2.", ".linear_2.")
if ".proj." in name:
- return []
+ return
if "conv1.bias" in name or "conv2.bias" in name:
# transpose conv1 and conv2 bias
data_torch = data_torch.unsqueeze(-1)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("Qwen2AudioForConditionalGeneration")
return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if name.startswith("language_model."):
# skip language model tensors
- return []
+ return
# prevent clash naming with vision tensors
if name.startswith("multi_modal_projector"):
# transpose conv1 and conv2 bias
data_torch = data_torch.unsqueeze(-1)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("UltravoxModel")
if name == "lm_head.weight":
if self.hparams.get("tie_word_embeddings", False):
logger.info("Skipping tied output layer 'lm_head.weight'")
- return []
+ return
if name.find("mlp.experts") != -1:
n_experts = self.hparams["num_experts"]
if len(self._experts[bid]) >= n_experts * 3:
# merge the experts into a single 3d tensor
- tensors: list[tuple[str, Tensor]] = []
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down_proj", "gate_proj", "up_proj"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
# Copied from: Qwen2MoeModel
def prepare_tensors(self):
if name == "lm_head.weight":
if self.hparams.get("tie_word_embeddings", False):
logger.info("Skipping tied output layer 'lm_head.weight'")
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("SmolLM3ForCausalLM")
return []
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if "sinks" in name:
name += ".weight"
data_torch = data_torch.transpose(-1, -2)
else:
# otherwise, it should already be repacked to ggml MXFP4 format
- return []
+ return
# split the gate_up into gate and up
if "gate_up_proj" in name:
name_up = name.replace("gate_up_proj_bias", "up_proj.bias")
name_gate = name.replace("gate_up_proj_bias", "gate_proj.bias")
gate_proj_bias, up_proj_bias = data_torch[..., ::2], data_torch[..., 1::2]
- return [
- (self.map_tensor_name(name_gate), gate_proj_bias),
- (self.map_tensor_name(name_up), up_proj_bias)
- ]
+ yield from super().modify_tensors(gate_proj_bias, name_gate, bid)
+ yield from super().modify_tensors(up_proj_bias, name_up, bid)
elif "_blocks" not in name and "_scales" not in name:
logger.warning(f"{name} is not in MXFP4, performance may be degraded")
name_up = name.replace("gate_up_proj", "up_proj.weight")
name_gate = name.replace("gate_up_proj", "gate_proj.weight")
data_torch = data_torch.transpose(-1, -2)
gate_proj_weight, up_proj_weight = data_torch[:, ::2, :], data_torch[:, 1::2, :]
- return [
- (self.map_tensor_name(name_gate), gate_proj_weight),
- (self.map_tensor_name(name_up), up_proj_weight)
- ]
- else:
- # otherwise, it should already be repacked to ggml MXFP4 format
- return []
-
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(gate_proj_weight, name_gate, bid)
+ yield from super().modify_tensors(up_proj_weight, name_up, bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
def set_vocab(self):
self._set_vocab_gpt2()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if self._is_vision_tensor(name) or ConformerAudioModel.is_audio_tensor(name):
# skip multimodal tensors
- return []
+ return
name = name.replace("language_model.", "") # vision
name = name.replace("lfm.", "model.") # audio
if 'conv.conv' in name:
data_torch = data_torch.squeeze(1)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def _is_vision_tensor(self, name: str) -> bool:
return "vision_tower" in name or "multi_modal_projector" in name
if not name.startswith(self.dense_tensor_name):
name = "model." + name
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
# dense tensor is stored in a separate safetensors file
# not enough expert weights to merge
if len(expert_cache) < n_experts * len(expert_weights):
- return []
+ return
- tensors: list[tuple[str, Tensor]] = []
for w_name in expert_weights:
datas: list[Tensor] = []
data_torch = torch.stack(datas, dim=0)
merged_name = f"layers.{bid}.feed_forward.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
- tensors.append((new_name, data_torch))
+
+ yield from super().modify_tensors(data_torch, merged_name, bid)
del self._experts_cache[bid]
- return tensors
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
self.gguf_writer.add_vision_block_count(self.find_vparam(self.n_block_keys) - vision_feature_layers_to_drop)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
is_vision_tensor = "vision_tower" in name or "multi_modal_projector" in name
if is_vision_tensor:
if "patch_embedding.weight" in name:
data_torch = data_torch.view(data_torch.shape[0], 16, 16, 3).permute(0, 3, 1, 2)
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
- return [] # skip other tensors
+ return # skip other tensors
@ModelBase.register("Lfm2AudioForConditionalGeneration")
def modify_tensors(self, data_torch, name, bid):
# skip language model tensors
if name.startswith("lfm."):
- return []
+ return
# for training only
if any(p in name for p in ["audio_loss_weight"]):
- return []
+ return
# for audio output
if any(p in name for p in ["codebook_offsets", "depth_embeddings", "depth_linear", "depthformer"]):
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("SmallThinkerForCausalLM")
self._experts[bid][name] = data_torch
if len(self._experts[bid]) >= n_experts * 3:
- tensors: list[tuple[str, Tensor]] = []
-
# merge the experts into a single 3d tensor
for w_name in ["down", "gate", "up"]:
datas: list[Tensor] = []
merged_name = f"model.layers.{bid}.block_sparse_moe.experts.{w_name}.weight"
- new_name = self.map_tensor_name(merged_name)
-
- tensors.append((new_name, data_torch))
- return tensors
+ yield from super().modify_tensors(data_torch, merged_name, bid)
+ return
else:
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
def prepare_tensors(self):
super().prepare_tensors()
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# these layers act as MLM head, so we don't need them
if name.startswith("decoder."):
- return []
+ return
if name.startswith("model."):
name = name[6:]
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("ApertusForCausalLM")
self._alpha_n[bid] = data_torch.to("cpu").float().item()
if (len(self._alpha_n) == n_layers):
self.gguf_writer.add_xielu_alpha_n([self._alpha_n[k] for k in sorted(self._alpha_n)])
- return []
+ return
if name.endswith(".act_fn.alpha_p"):
self._alpha_p[bid] = data_torch.to("cpu").float().item()
if (len(self._alpha_p) == n_layers):
self.gguf_writer.add_xielu_alpha_p([self._alpha_p[k] for k in sorted(self._alpha_p)])
- return []
+ return
if name.endswith(".act_fn.beta"):
self._beta[bid] = data_torch.to("cpu").float().item()
if (len(self._beta) == n_layers):
self.gguf_writer.add_xielu_beta([self._beta[k] for k in sorted(self._beta)])
- return []
+ return
if name.endswith(".act_fn.eps"):
self._eps[bid] = data_torch.to("cpu").float().item()
if (len(self._eps) == n_layers):
self.gguf_writer.add_xielu_eps([self._eps[k] for k in sorted(self._eps)])
- return []
+ return
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
class MistralModel(LlamaModel):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
if name.startswith("vision_") or name.startswith("patch_merger.") or "mm_projector" in name:
- return []
+ return
# rename certain tensors so that we can reuse DeepseekV2Model modify_tensors logic
if name.endswith(".qscale_act"):
name = name.replace(".w3.", ".up_proj.")
name = "model." + name
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
class PixtralModel(LlavaVisionModel):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("model.vision_encoder.", "vision_tower.")
name = name.replace("model.vision_projection.", "multi_modal_projector.")
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("KimiVLForConditionalGeneration")
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams_vision.get("layer_norm_eps", 1e-5))
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
is_vision_tensor = "vision_tower" in name or "multi_modal_projector" in name
if is_vision_tensor:
if "pos_emb.weight" in name:
data_torch = data_torch.view(data_torch.shape[0] * data_torch.shape[1], data_torch.shape[2])
- elif "wqkv" in name:
+
+ if "wqkv" in name:
split_dim = 0 if "weight" in name else -1
wq, wk, wv = data_torch.chunk(3, dim=split_dim)
- return [
- (self.map_tensor_name(name.replace("wqkv", "wq")), wq),
- (self.map_tensor_name(name.replace("wqkv", "wk")), wk),
- (self.map_tensor_name(name.replace("wqkv", "wv")), wv)
- ]
-
- return [(self.map_tensor_name(name), data_torch)]
-
- return [] # skip other tensors
+ yield from super().modify_tensors(wq, name.replace("wqkv", "wq"), bid)
+ yield from super().modify_tensors(wk, name.replace("wqkv", "wk"), bid)
+ yield from super().modify_tensors(wv, name.replace("wqkv", "wv"), bid)
+ else:
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("CogVLMForCausalLM")
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.COGVLM)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
if not name.startswith("model.vision."):
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("CogVLMForCausalLM")
model_arch = gguf.MODEL_ARCH.COGVLM
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# block vision tensors
if name.startswith("model.vision."):
- return []
+ return
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("JanusForConditionalGeneration")
'model.generation_head.',
)
if name.startswith(skip_prefixes):
- return []
+ return
if name.startswith('model.language_model.'):
name = name.replace('model.language_model.', 'model.')
elif name.startswith('language_model.'):
name = name.replace('language_model.', '')
- return super().modify_tensors(data_torch, name, bid)
+ yield from super().modify_tensors(data_torch, name, bid)
@ModelBase.register("JanusForConditionalGeneration")
return [(tensor_name, data_torch)]
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# Skip language model tensors as they will be handled by `JanusProModel`
if name.startswith(('model.language_model.', 'language_model.')):
- return []
+ return
# Skip generation-related components
skip_generation_prefixes = (
'generation_head.',
)
if name.startswith(skip_generation_prefixes):
- return []
+ return
# Handle aligner tensors
if name.startswith(('model.aligner.', 'aligner.')):
- return list(self._map_aligner_tensor(data_torch, name))
+ yield from self._map_aligner_tensor(data_torch, name)
+ return
# Handle vision tensors
if name.startswith(('model.vision_model.', 'vision_model.')):
- return [(self.map_tensor_name(name), data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
+ return
- return []
+ return
@ModelBase.register("YoutuVLForConditionalGeneration")
self.gguf_writer.add_vision_wa_layer_indexes(layers=fullatt_block_indexes)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
- del bid # unused
-
# Skip language model tensors
skip_prefixes = ('lm_head.', 'model.layers.', 'model.embed_tokens.', 'model.norm.')
if name.startswith(skip_prefixes):
- return []
+ return
# Try to map the tensor using TensorNameMap (handles vision encoder and projector)
try:
- new_name = self.map_tensor_name(name)
- return [(new_name, data_torch)]
+ yield from super().modify_tensors(data_torch, name, bid)
except ValueError:
# If mapping fails, log warning and skip
logger.warning(f"Cannot map tensor: {name}")
- return []
+ return
@ModelBase.register("SolarOpenForCausalLM")