return None
raise KeyError(f"could not find any of: {keys}")
+ def tensor_force_quant(self, name, new_name, bid, n_dims):
+ del bid, name, n_dims # unused
+ if ".patch_embd.weight" in new_name:
+ return gguf.GGMLQuantizationType.F16 if self.ftype == gguf.LlamaFileType.MOSTLY_F16 else gguf.GGMLQuantizationType.F32
+ return False
+
@ModelBase.register("GPTNeoXForCausalLM")
class GPTNeoXModel(TextModel):
self.gguf_writer.add_vision_use_gelu(True)
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, new_name, n_dims # unused
if ".embeddings." in name:
return gguf.GGMLQuantizationType.F32
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
self.gguf_writer.add_vision_attention_layernorm_eps(self.global_config.get("rms_norm_eps", 1e-6))
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, name, n_dims # unused
- if ".patch_embd." in new_name:
- return gguf.GGMLQuantizationType.F16
if ".position_embd." in new_name:
return gguf.GGMLQuantizationType.F32
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
yield ("audio_tower.embed_positions.weight", pos_embd)
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, new_name, n_dims # unused
if ".conv" in name and ".weight" in name:
return gguf.GGMLQuantizationType.F16
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.startswith("thinker."):
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / downsample_ratio))
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, name, n_dims # unused
- if ".patch_embd." in new_name:
- return gguf.GGMLQuantizationType.F16
if ".position_embd." in new_name:
return gguf.GGMLQuantizationType.F32
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def _mapping_interns1_name(self, name):
names_map = {
self.gguf_writer.add_vision_projector_scale_factor(proj_scale_factor)
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, new_name, n_dims # unused
# related to https://github.com/ggml-org/llama.cpp/issues/13025
if "input_projection" in name:
return gguf.GGMLQuantizationType.F16
if ".embeddings." in name:
return gguf.GGMLQuantizationType.F32
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-5))
def tensor_force_quant(self, name, new_name, bid, n_dims):
- del bid, new_name, n_dims # unused
if ".conv" in name and ".weight" in name:
return gguf.GGMLQuantizationType.F16
- return False
+ return super().tensor_force_quant(name, new_name, bid, n_dims)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused