BYTE = 6
-AnyModel = TypeVar("AnyModel", bound="type[Model]")
+class ModelType(IntEnum):
+ TEXT = 1
+ VISION = 2
-class Model:
- _model_classes: dict[str, type[Model]] = {}
+AnyModel = TypeVar("AnyModel", bound="type[ModelBase]")
+
+
+class ModelBase:
+ _model_classes: dict[ModelType, dict[str, type[ModelBase]]] = {
+ ModelType.TEXT: {},
+ ModelType.VISION: {},
+ }
dir_model: Path
ftype: gguf.LlamaFileType
metadata_override: Path | None = None, model_name: str | None = None,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None):
- if type(self) is Model:
+ if type(self) is ModelBase or \
+ type(self) is TextModel or \
+ type(self) is VisionModel:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
self.dir_model = dir_model
self.get_tensors = get_remote_tensors
else:
- self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
+ self.part_names = ModelBase.get_model_part_names(self.dir_model, "model", ".safetensors")
self.is_safetensors = len(self.part_names) > 0
if not self.is_safetensors:
- self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
- self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams
+ self.part_names = ModelBase.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
+ self.hparams = ModelBase.load_hparams(self.dir_model) if hparams is None else hparams
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
self.tensor_names = None
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
@classmethod
- def __init_subclass__(cls):
- # can't use an abstract property, because overriding it without type errors
- # would require using decorated functions instead of simply defining the property
- if "model_arch" not in cls.__dict__:
- raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
+ def add_prefix_to_filename(cls, path: Path, prefix: str) -> Path:
+ stem, suffix = path.stem, path.suffix
+ new_name = f"{prefix}{stem}{suffix}"
+ return path.with_name(new_name)
def find_hparam(self, keys: Iterable[str], optional: bool = False) -> Any:
key = next((k for k in keys if k in self.hparams), None)
return None
raise KeyError(f"could not find any of: {keys}")
- def set_vocab(self):
- self._set_vocab_gpt2()
-
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
tensor_names_from_parts: set[str] = set()
return new_name
def set_gguf_parameters(self):
- self.gguf_writer.add_block_count(self.block_count)
-
- if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
- self.gguf_writer.add_context_length(n_ctx)
- logger.info(f"gguf: context length = {n_ctx}")
-
- if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
- self.gguf_writer.add_embedding_length(n_embd)
- logger.info(f"gguf: embedding length = {n_embd}")
-
- if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
- self.gguf_writer.add_feed_forward_length(n_ff)
- logger.info(f"gguf: feed forward length = {n_ff}")
-
- if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
- self.gguf_writer.add_head_count(n_head)
- logger.info(f"gguf: head count = {n_head}")
-
- if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
- self.gguf_writer.add_head_count_kv(n_head_kv)
- logger.info(f"gguf: key-value head count = {n_head_kv}")
-
- if (rope_theta := self.hparams.get("rope_theta")) is not None:
- self.gguf_writer.add_rope_freq_base(rope_theta)
- logger.info(f"gguf: rope theta = {rope_theta}")
- if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
- self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
- logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
- if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
- self.gguf_writer.add_layer_norm_eps(f_norm_eps)
- logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
- if (n_experts := self.hparams.get("num_local_experts")) is not None:
- self.gguf_writer.add_expert_count(n_experts)
- logger.info(f"gguf: expert count = {n_experts}")
- if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
- self.gguf_writer.add_expert_used_count(n_experts_used)
- logger.info(f"gguf: experts used count = {n_experts_used}")
-
- if (head_dim := self.hparams.get("head_dim")) is not None:
- self.gguf_writer.add_key_length(head_dim)
- self.gguf_writer.add_value_length(head_dim)
-
- self.gguf_writer.add_file_type(self.ftype)
- logger.info(f"gguf: file type = {self.ftype}")
+ raise NotImplementedError("set_gguf_parameters() must be implemented in subclasses")
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
if self.metadata.size_label is None and total_params > 0:
self.metadata.size_label = gguf.size_label(total_params, shared_params, expert_params, expert_count)
- # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0'
- output_type: str = self.ftype.name.partition("_")[2]
-
- # Filename Output
- if self.fname_out.is_dir():
- # Generate default filename based on model specification and available metadata
- if not vocab_only:
- fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type, model_type="LoRA" if total_params < 0 else None)
- else:
- fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, size_label=None, output_type=None, model_type="vocab")
-
- # Use the default filename
- self.fname_out = self.fname_out / f"{fname_default}.gguf"
- else:
- # Output path is a custom defined templated filename
- # Note: `not is_dir()` is used because `.is_file()` will not detect
- # file template strings as it doesn't actually exist as a file
-
- # Process templated file name with the output ftype, useful with the "auto" ftype
- self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type)
-
self.set_type()
logger.info("Set meta model")
logger.info("Set model parameters")
self.set_gguf_parameters()
- logger.info("Set model tokenizer")
- self.set_vocab()
-
logger.info("Set model quantization version")
self.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
+ def write_vocab(self):
+ raise NotImplementedError("write_vocab() must be implemented in subclasses")
+
def write(self):
self.prepare_tensors()
self.prepare_metadata(vocab_only=False)
self.gguf_writer.write_tensors_to_file(progress=True)
self.gguf_writer.close()
- def write_vocab(self):
- if len(self.gguf_writer.tensors) != 1:
- raise ValueError('Splitting the vocabulary is not supported')
-
- self.prepare_metadata(vocab_only=True)
- self.gguf_writer.write_header_to_file(path=self.fname_out)
- self.gguf_writer.write_kv_data_to_file()
- self.gguf_writer.close()
-
@staticmethod
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
part_names: list[str] = []
@staticmethod
def load_hparams(dir_model: Path):
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
- return json.load(f)
+ hparams = json.load(f)
+ if "text_config" in hparams:
+ hparams = {**hparams, **hparams["text_config"]}
+ return hparams
@classmethod
def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
assert names
def func(modelcls: AnyModel) -> AnyModel:
+ model_type = ModelType.VISION if modelcls.model_arch == gguf.MODEL_ARCH.CLIP_VISION else ModelType.TEXT
for name in names:
- cls._model_classes[name] = modelcls
+ cls._model_classes[model_type][name] = modelcls
return modelcls
return func
@classmethod
def print_registered_models(cls):
- for name in sorted(cls._model_classes.keys()):
- logger.error(f"- {name}")
+ for model_type, model_classes in cls._model_classes.items():
+ logger.error(f"{model_type.name} models:")
+ for name in sorted(model_classes.keys()):
+ logger.error(f" - {name}")
@classmethod
- def from_model_architecture(cls, arch: str) -> type[Model]:
+ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type[ModelBase]:
try:
- return cls._model_classes[arch]
+ return cls._model_classes[model_type][arch]
except KeyError:
raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
+
+class TextModel(ModelBase):
+ @classmethod
+ def __init_subclass__(cls):
+ # can't use an abstract property, because overriding it without type errors
+ # would require using decorated functions instead of simply defining the property
+ if "model_arch" not in cls.__dict__:
+ raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
+
+ def set_vocab(self):
+ self._set_vocab_gpt2()
+
+ def prepare_metadata(self, vocab_only: bool):
+ super().prepare_metadata(vocab_only=vocab_only)
+
+ total_params = self.gguf_writer.get_total_parameter_count()[0]
+ # Extract the encoding scheme from the file type name. e.g. 'gguf.LlamaFileType.MOSTLY_Q8_0' --> 'Q8_0'
+ output_type: str = self.ftype.name.partition("_")[2]
+
+ # Filename Output
+ if self.fname_out.is_dir():
+ # Generate default filename based on model specification and available metadata
+ if not vocab_only:
+ fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type, model_type="LoRA" if total_params < 0 else None)
+ else:
+ fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, size_label=None, output_type=None, model_type="vocab")
+
+ # Use the default filename
+ self.fname_out = self.fname_out / f"{fname_default}.gguf"
+ else:
+ # Output path is a custom defined templated filename
+ # Note: `not is_dir()` is used because `.is_file()` will not detect
+ # file template strings as it doesn't actually exist as a file
+
+ # Process templated file name with the output ftype, useful with the "auto" ftype
+ self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type)
+
+ logger.info("Set model tokenizer")
+ self.set_vocab()
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_block_count(self.block_count)
+
+ if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
+ self.gguf_writer.add_context_length(n_ctx)
+ logger.info(f"gguf: context length = {n_ctx}")
+
+ if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
+ self.gguf_writer.add_embedding_length(n_embd)
+ logger.info(f"gguf: embedding length = {n_embd}")
+
+ if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
+ self.gguf_writer.add_feed_forward_length(n_ff)
+ logger.info(f"gguf: feed forward length = {n_ff}")
+
+ if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
+ self.gguf_writer.add_head_count(n_head)
+ logger.info(f"gguf: head count = {n_head}")
+
+ if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
+ self.gguf_writer.add_head_count_kv(n_head_kv)
+ logger.info(f"gguf: key-value head count = {n_head_kv}")
+
+ if (rope_theta := self.hparams.get("rope_theta")) is not None:
+ self.gguf_writer.add_rope_freq_base(rope_theta)
+ logger.info(f"gguf: rope theta = {rope_theta}")
+ if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
+ self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
+ logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
+ if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
+ self.gguf_writer.add_layer_norm_eps(f_norm_eps)
+ logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
+ if (n_experts := self.hparams.get("num_local_experts")) is not None:
+ self.gguf_writer.add_expert_count(n_experts)
+ logger.info(f"gguf: expert count = {n_experts}")
+ if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
+ self.gguf_writer.add_expert_used_count(n_experts_used)
+ logger.info(f"gguf: experts used count = {n_experts_used}")
+
+ if (head_dim := self.hparams.get("head_dim")) is not None:
+ self.gguf_writer.add_key_length(head_dim)
+ self.gguf_writer.add_value_length(head_dim)
+
+ self.gguf_writer.add_file_type(self.ftype)
+ logger.info(f"gguf: file type = {self.ftype}")
+
+ def write_vocab(self):
+ if len(self.gguf_writer.tensors) != 1:
+ raise ValueError('Splitting the vocabulary is not supported')
+
+ self.prepare_metadata(vocab_only=True)
+ self.gguf_writer.write_header_to_file(path=self.fname_out)
+ self.gguf_writer.write_kv_data_to_file()
+ self.gguf_writer.close()
+
def does_token_look_special(self, token: str | bytes) -> bool:
if isinstance(token, (bytes, bytearray)):
token_text = token.decode(encoding="utf-8")
self.gguf_writer.add_add_eos_token(field.parts[-1].tolist()[0])
-@Model.register("GPTNeoXForCausalLM")
-class GPTNeoXModel(Model):
+class VisionModel(ModelBase):
+ model_arch = gguf.MODEL_ARCH.CLIP_VISION
+ n_text_embd = 0
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ if self.model_arch != gguf.MODEL_ARCH.CLIP_VISION:
+ raise TypeError("VisionModel must be subclassed with model_arch = gguf.MODEL_ARCH.CLIP_VISION")
+
+ # small hack to correct the number of layers
+ self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, 128)
+ self.n_embd_text = self.find_hparam(["hidden_size", "n_embd"])
+ assert self.n_embd_text > 0, "n_embd not found in hparams"
+
+ if "vision_config" not in self.hparams:
+ raise ValueError("vision_config not found in hparams")
+ # move vision config to the top level
+ self.hparams = self.hparams["vision_config"]
+
+ def set_type(self):
+ self.gguf_writer.add_type(gguf.GGUFType.CLIP_VISION)
+
+ def set_gguf_parameters(self):
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.PROJECTION_DIM, self.n_embd_text)
+ self.gguf_writer.add_bool(gguf.Keys.ClipVision.HAS_VISION_ENCODER, True)
+
+ # vision config
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.IMAGE_SIZE, self.find_hparam(["image_size"]))
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.PATCH_SIZE, self.find_hparam(["patch_size"]))
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.EMBEDDING_LENGTH, self.find_hparam(["hidden_size"]))
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.FEED_FORWARD_LENGTH, self.find_hparam(["intermediate_size"]))
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.BLOCK_COUNT, self.find_hparam(["num_hidden_layers"]))
+ self.gguf_writer.add_uint32(gguf.Keys.ClipVision.Attention.HEAD_COUNT, self.find_hparam(["num_attention_heads"]))
+
+ def write_vocab(self):
+ raise ValueError("VisionModel does not support vocab writing")
+
+
+@ModelBase.register("GPTNeoXForCausalLM")
+class GPTNeoXModel(TextModel):
model_arch = gguf.MODEL_ARCH.GPTNEOX
def set_gguf_parameters(self):
return tensors
-@Model.register("BloomForCausalLM", "BloomModel")
-class BloomModel(Model):
+@ModelBase.register("BloomForCausalLM", "BloomModel")
+class BloomModel(TextModel):
model_arch = gguf.MODEL_ARCH.BLOOM
def set_gguf_parameters(self):
return tensors
-@Model.register("MPTForCausalLM")
-class MPTModel(Model):
+@ModelBase.register("MPTForCausalLM")
+class MPTModel(TextModel):
model_arch = gguf.MODEL_ARCH.MPT
def set_vocab(self):
return [(new_name, data_torch)]
-@Model.register("OrionForCausalLM")
-class OrionModel(Model):
+@ModelBase.register("OrionForCausalLM")
+class OrionModel(TextModel):
model_arch = gguf.MODEL_ARCH.ORION
def set_vocab(self):
self.gguf_writer.add_layer_norm_eps(self.hparams["rms_norm_eps"])
-@Model.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
-class BaichuanModel(Model):
+@ModelBase.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
+class BaichuanModel(TextModel):
model_arch = gguf.MODEL_ARCH.BAICHUAN
def set_vocab(self):
return weights[r * n_part:r * n_part + r, ...]
-@Model.register("XverseForCausalLM")
-class XverseModel(Model):
+@ModelBase.register("XverseForCausalLM")
+class XverseModel(TextModel):
model_arch = gguf.MODEL_ARCH.XVERSE
def set_vocab(self):
)
-@Model.register("FalconForCausalLM", "RWForCausalLM")
-class FalconModel(Model):
+@ModelBase.register("FalconForCausalLM", "RWForCausalLM")
+class FalconModel(TextModel):
model_arch = gguf.MODEL_ARCH.FALCON
def set_gguf_parameters(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("GPTBigCodeForCausalLM")
-class StarCoderModel(Model):
+@ModelBase.register("GPTBigCodeForCausalLM")
+class StarCoderModel(TextModel):
model_arch = gguf.MODEL_ARCH.STARCODER
def set_gguf_parameters(self):
self.gguf_writer.add_file_type(self.ftype)
-@Model.register("GPTRefactForCausalLM")
-class RefactModel(Model):
+@ModelBase.register("GPTRefactForCausalLM")
+class RefactModel(TextModel):
model_arch = gguf.MODEL_ARCH.REFACT
def set_vocab(self):
return tensors
-@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
-class StableLMModel(Model):
+@ModelBase.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
+class StableLMModel(TextModel):
model_arch = gguf.MODEL_ARCH.STABLELM
def set_vocab(self):
raise ValueError(f"Unprocessed norms: {norms}")
-@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
-class LlamaModel(Model):
+@ModelBase.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
+class LlamaModel(TextModel):
model_arch = gguf.MODEL_ARCH.LLAMA
undo_permute = True
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("Llama4ForConditionalGeneration")
+@ModelBase.register("Llama4ForConditionalGeneration")
class Llama4Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.LLAMA4
- has_vision: bool = False
undo_permute = False
- # TODO @ngxson : avoid duplicate this code everywhere by at least support "text_config"
- # same with llama, but we need to merge the text_config into the root level of hparams
def __init__(self, *args, **kwargs):
- hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
- if "text_config" in hparams:
- hparams = {**hparams, **hparams["text_config"]}
- kwargs["hparams"] = hparams
super().__init__(*args, **kwargs)
- if "vision_config" in hparams:
- logger.info("Has vision encoder, but it will be ignored")
- self.has_vision = True
# IMPORTANT: the normal "intermediate_size" is renamed to "intermediate_size_mlp", we need to undo this
self.hparams["intermediate_size_moe"] = self.hparams["intermediate_size"]
self.hparams["intermediate_size"] = self.hparams["intermediate_size_mlp"]
return super().modify_tensors(data_torch, name, bid)
-@Model.register("Mistral3ForConditionalGeneration")
+@ModelBase.register("Mistral3ForConditionalGeneration")
class Mistral3Model(LlamaModel):
model_arch = gguf.MODEL_ARCH.LLAMA
- # we need to merge the text_config into the root level of hparams
- def __init__(self, *args, **kwargs):
- hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
- if "text_config" in hparams:
- hparams = {**hparams, **hparams["text_config"]}
- kwargs["hparams"] = hparams
- super().__init__(*args, **kwargs)
-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
name = name.replace("language_model.", "")
if "multi_modal_projector" in name or "vision_tower" in name:
return super().modify_tensors(data_torch, name, bid)
-@Model.register("DeciLMForCausalLM")
-class DeciModel(Model):
+@ModelBase.register("DeciLMForCausalLM")
+class DeciModel(TextModel):
model_arch = gguf.MODEL_ARCH.DECI
@staticmethod
super().prepare_tensors()
-@Model.register("BitnetForCausalLM")
-class BitnetModel(Model):
+@ModelBase.register("BitnetForCausalLM")
+class BitnetModel(TextModel):
model_arch = gguf.MODEL_ARCH.BITNET
def set_vocab(self):
yield (new_name, data_torch)
-@Model.register("GrokForCausalLM")
-class GrokModel(Model):
+@ModelBase.register("GrokForCausalLM")
+class GrokModel(TextModel):
model_arch = gguf.MODEL_ARCH.GROK
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("DbrxForCausalLM")
-class DbrxModel(Model):
+@ModelBase.register("DbrxForCausalLM")
+class DbrxModel(TextModel):
model_arch = gguf.MODEL_ARCH.DBRX
def set_gguf_parameters(self):
return n_dims > 1
-@Model.register("MiniCPMForCausalLM")
-class MiniCPMModel(Model):
+@ModelBase.register("MiniCPMForCausalLM")
+class MiniCPMModel(TextModel):
model_arch = gguf.MODEL_ARCH.MINICPM
def set_gguf_parameters(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("MiniCPM3ForCausalLM")
-class MiniCPM3Model(Model):
+@ModelBase.register("MiniCPM3ForCausalLM")
+class MiniCPM3Model(TextModel):
model_arch = gguf.MODEL_ARCH.MINICPM3
def set_gguf_parameters(self):
)
-@Model.register("QWenLMHeadModel")
-class QwenModel(Model):
+@ModelBase.register("QWenLMHeadModel")
+class QwenModel(TextModel):
model_arch = gguf.MODEL_ARCH.QWEN
@staticmethod
self.gguf_writer.add_file_type(self.ftype)
-@Model.register("Qwen2ForCausalLM")
-class Qwen2Model(Model):
+@ModelBase.register("Qwen2ForCausalLM")
+class Qwen2Model(TextModel):
model_arch = gguf.MODEL_ARCH.QWEN2
def set_vocab(self):
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
-@Model.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
-class Qwen2VLModel(Model):
+@ModelBase.register("Qwen2VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration")
+class Qwen2VLModel(TextModel):
model_arch = gguf.MODEL_ARCH.QWEN2VL
def set_gguf_parameters(self):
yield name, data
-@Model.register("WavTokenizerDec")
-class WavTokenizerDecModel(Model):
+@ModelBase.register("WavTokenizerDec")
+class WavTokenizerDecModel(TextModel):
model_arch = gguf.MODEL_ARCH.WAVTOKENIZER_DEC
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
self.gguf_writer.add_causal_attention(False)
-@Model.register("Qwen2MoeForCausalLM")
-class Qwen2MoeModel(Model):
+@ModelBase.register("Qwen2MoeForCausalLM")
+class Qwen2MoeModel(TextModel):
model_arch = gguf.MODEL_ARCH.QWEN2MOE
def set_gguf_parameters(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("Qwen3ForCausalLM")
+@ModelBase.register("Qwen3ForCausalLM")
class Qwen3Model(Qwen2Model):
model_arch = gguf.MODEL_ARCH.QWEN3
-@Model.register("Qwen3MoeForCausalLM")
+@ModelBase.register("Qwen3MoeForCausalLM")
class Qwen3MoeModel(Qwen2MoeModel):
model_arch = gguf.MODEL_ARCH.QWEN3MOE
-@Model.register("GPT2LMHeadModel")
-class GPT2Model(Model):
+@ModelBase.register("GPT2LMHeadModel")
+class GPT2Model(TextModel):
model_arch = gguf.MODEL_ARCH.GPT2
def set_gguf_parameters(self):
return tensors
-@Model.register("PhiForCausalLM")
-class Phi2Model(Model):
+@ModelBase.register("PhiForCausalLM")
+class Phi2Model(TextModel):
model_arch = gguf.MODEL_ARCH.PHI2
def set_gguf_parameters(self):
self.gguf_writer.add_add_bos_token(False)
-@Model.register("Phi3ForCausalLM")
-class Phi3MiniModel(Model):
+@ModelBase.register("Phi3ForCausalLM")
+class Phi3MiniModel(TextModel):
model_arch = gguf.MODEL_ARCH.PHI3
def set_vocab(self):
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
-@Model.register("PhiMoEForCausalLM")
+@ModelBase.register("PhiMoEForCausalLM")
class PhiMoeModel(Phi3MiniModel):
model_arch = gguf.MODEL_ARCH.PHIMOE
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("PlamoForCausalLM")
-class PlamoModel(Model):
+@ModelBase.register("PlamoForCausalLM")
+class PlamoModel(TextModel):
model_arch = gguf.MODEL_ARCH.PLAMO
def set_vocab(self):
return [(new_name, data_torch)]
-@Model.register("CodeShellForCausalLM")
-class CodeShellModel(Model):
+@ModelBase.register("CodeShellForCausalLM")
+class CodeShellModel(TextModel):
model_arch = gguf.MODEL_ARCH.CODESHELL
def set_gguf_parameters(self):
return [(new_name, data_torch)]
-@Model.register("InternLM2ForCausalLM")
-class InternLM2Model(Model):
+@ModelBase.register("InternLM2ForCausalLM")
+class InternLM2Model(TextModel):
model_arch = gguf.MODEL_ARCH.INTERNLM2
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("InternLM3ForCausalLM")
-class InternLM3Model(Model):
+@ModelBase.register("InternLM3ForCausalLM")
+class InternLM3Model(TextModel):
model_arch = gguf.MODEL_ARCH.LLAMA
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("BertModel", "BertForMaskedLM", "CamembertModel")
-class BertModel(Model):
+@ModelBase.register("BertModel", "BertForMaskedLM", "CamembertModel")
+class BertModel(TextModel):
model_arch = gguf.MODEL_ARCH.BERT
def __init__(self, *args, **kwargs):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("RobertaModel")
+@ModelBase.register("RobertaModel")
class RobertaModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT
return super().modify_tensors(data_torch, name, bid)
-@Model.register("NomicBertModel")
+@ModelBase.register("NomicBertModel")
class NomicBertModel(BertModel):
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
-@Model.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
+@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
class XLMRobertaModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT
return super().modify_tensors(data_torch, name, bid)
-@Model.register("GemmaForCausalLM")
-class GemmaModel(Model):
+@ModelBase.register("GemmaForCausalLM")
+class GemmaModel(TextModel):
model_arch = gguf.MODEL_ARCH.GEMMA
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("Gemma2ForCausalLM")
-class Gemma2Model(Model):
+@ModelBase.register("Gemma2ForCausalLM")
+class Gemma2Model(TextModel):
model_arch = gguf.MODEL_ARCH.GEMMA2
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration")
-class Gemma3Model(Model):
+@ModelBase.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration")
+class Gemma3Model(TextModel):
model_arch = gguf.MODEL_ARCH.GEMMA3
- has_vision: bool = False
-
- # we need to merge the text_config into the root level of hparams
- def __init__(self, *args, **kwargs):
- hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
- if "text_config" in hparams:
- hparams = {**hparams, **hparams["text_config"]}
- kwargs["hparams"] = hparams
- super().__init__(*args, **kwargs)
- if "vision_config" in hparams:
- logger.info("Has vision encoder, but it will be ignored")
- self.has_vision = True
-
- def write(self):
- super().write()
- if self.has_vision:
- logger.info("NOTE: this script only convert the language model to GGUF")
- logger.info(" for the vision model, please use gemma3_convert_encoder_to_gguf.py")
def set_vocab(self):
self._set_vocab_sentencepiece()
if name.startswith("language_model."):
name = name.replace("language_model.", "")
+
elif name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
- or name.startswith("multimodal_projector.") or name.startswith("vision_model."): # this is for old HF model, should be removed later
- # ignore vision tensors
- return []
+ or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
+ return [] # skip vision tensors
# remove OOV (out-of-vocabulary) rows in token_embd
if "embed_tokens.weight" in name:
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("Starcoder2ForCausalLM")
-class StarCoder2Model(Model):
+@ModelBase.register("Gemma3ForConditionalGeneration")
+class Gemma3VisionModel(VisionModel):
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ hparams = self.hparams
+ self.gguf_writer.add_string(gguf.Keys.ClipVision.PROJECTOR_TYPE, "gemma3")
+ # default values below are taken from HF tranformers code
+ self.gguf_writer.add_float32(gguf.Keys.ClipVision.Attention.LAYERNORM_EPS, hparams.get("layer_norm_eps", 1e-6))
+ self.gguf_writer.add_array(gguf.Keys.ClipVision.IMAGE_MEAN, [0.5, 0.5, 0.5])
+ self.gguf_writer.add_array(gguf.Keys.ClipVision.IMAGE_STD, [0.5, 0.5, 0.5])
+ self.gguf_writer.add_bool (gguf.Keys.ClipVision.USE_GELU, True)
+
+ def tensor_force_quant(self, name, new_name, bid, n_dims):
+ del bid, new_name, n_dims # unused
+ # related to https://github.com/ggml-org/llama.cpp/issues/13025
+ if "input_projection" in name:
+ return gguf.GGMLQuantizationType.F16
+ if ".embeddings." in name:
+ return gguf.GGMLQuantizationType.F32
+ return False
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+ del bid # unused
+
+ if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
+ or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
+ # process vision tensors
+ name = name.replace("_weight", ".weight")
+ if "fc1" in name:
+ name = name.replace("fc1", "fc2")
+ else:
+ name = name.replace("fc2", "fc1")
+
+ # correct norm value ; only this "soft_emb_norm" need to be corrected as it's part of Gemma projector
+ # the other norm values are part of SigLIP model, and they are already correct
+ # ref code: Gemma3RMSNorm
+ if "soft_emb_norm.weight" in name:
+ logger.info(f"Correcting norm value for '{name}'")
+ data_torch = data_torch + 1
+
+ return [(self.map_tensor_name(name), data_torch)]
+
+ return [] # skip other tensors
+
+
+@ModelBase.register("Starcoder2ForCausalLM")
+class StarCoder2Model(TextModel):
model_arch = gguf.MODEL_ARCH.STARCODER2
-@Model.register("Rwkv6ForCausalLM")
-class Rwkv6Model(Model):
+@ModelBase.register("Rwkv6ForCausalLM")
+class Rwkv6Model(TextModel):
model_arch = gguf.MODEL_ARCH.RWKV6
def set_vocab(self):
yield (new_name, data_torch)
-@Model.register("RWKV6Qwen2ForCausalLM")
+@ModelBase.register("RWKV6Qwen2ForCausalLM")
class RWKV6Qwen2Model(Rwkv6Model):
model_arch = gguf.MODEL_ARCH.RWKV6QWEN2
yield (new_name, data)
-@Model.register("Rwkv7ForCausalLM", "RWKV7ForCausalLM")
-class Rwkv7Model(Model):
+@ModelBase.register("Rwkv7ForCausalLM", "RWKV7ForCausalLM")
+class Rwkv7Model(TextModel):
model_arch = gguf.MODEL_ARCH.RWKV7
def set_vocab(self):
yield (new_name, data_torch)
-@Model.register("RwkvHybridForCausalLM")
+@ModelBase.register("RwkvHybridForCausalLM")
class ARwkv7Model(Rwkv7Model):
model_arch = gguf.MODEL_ARCH.ARWKV7
self.gguf_writer.add_head_count(0)
-@Model.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM")
-class MambaModel(Model):
+@ModelBase.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM")
+class MambaModel(TextModel):
model_arch = gguf.MODEL_ARCH.MAMBA
def set_vocab(self):
return [(new_name, data_torch)]
-@Model.register("CohereForCausalLM")
-class CommandR2Model(Model):
+@ModelBase.register("CohereForCausalLM")
+class CommandR2Model(TextModel):
model_arch = gguf.MODEL_ARCH.COMMAND_R
def __init__(self, *args, **kwargs):
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
-@Model.register("Cohere2ForCausalLM")
-class Cohere2Model(Model):
+@ModelBase.register("Cohere2ForCausalLM")
+class Cohere2Model(TextModel):
model_arch = gguf.MODEL_ARCH.COHERE2
def set_gguf_parameters(self):
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
-@Model.register("OlmoForCausalLM")
-@Model.register("OLMoForCausalLM")
-class OlmoModel(Model):
+@ModelBase.register("OlmoForCausalLM")
+@ModelBase.register("OLMoForCausalLM")
+class OlmoModel(TextModel):
model_arch = gguf.MODEL_ARCH.OLMO
def set_gguf_parameters(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("Olmo2ForCausalLM")
-class Olmo2Model(Model):
+@ModelBase.register("Olmo2ForCausalLM")
+class Olmo2Model(TextModel):
model_arch = gguf.MODEL_ARCH.OLMO2
-@Model.register("OlmoeForCausalLM")
-class OlmoeModel(Model):
+@ModelBase.register("OlmoeForCausalLM")
+class OlmoeModel(TextModel):
model_arch = gguf.MODEL_ARCH.OLMOE
def set_gguf_parameters(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("JinaBertModel", "JinaBertForMaskedLM")
+@ModelBase.register("JinaBertModel", "JinaBertForMaskedLM")
class JinaBertV2Model(BertModel):
model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
return super().modify_tensors(data_torch, name, bid)
-@Model.register("OpenELMForCausalLM")
-class OpenELMModel(Model):
+@ModelBase.register("OpenELMForCausalLM")
+class OpenELMModel(TextModel):
model_arch = gguf.MODEL_ARCH.OPENELM
@staticmethod
yield (self.map_tensor_name(name), data_torch)
-@Model.register("ArcticForCausalLM")
-class ArcticModel(Model):
+@ModelBase.register("ArcticForCausalLM")
+class ArcticModel(TextModel):
model_arch = gguf.MODEL_ARCH.ARCTIC
def set_vocab(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("DeepseekForCausalLM")
-class DeepseekModel(Model):
+@ModelBase.register("DeepseekForCausalLM")
+class DeepseekModel(TextModel):
model_arch = gguf.MODEL_ARCH.DEEPSEEK
def set_vocab(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("DeepseekV2ForCausalLM")
-@Model.register("DeepseekV3ForCausalLM")
-class DeepseekV2Model(Model):
+@ModelBase.register("DeepseekV2ForCausalLM")
+@ModelBase.register("DeepseekV3ForCausalLM")
+class DeepseekV2Model(TextModel):
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
def set_vocab(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("PLMForCausalLM")
-class PLMModel(Model):
+@ModelBase.register("PLMForCausalLM")
+class PLMModel(TextModel):
model_arch = gguf.MODEL_ARCH.PLM
def set_vocab(self):
super().prepare_tensors()
-@Model.register("T5WithLMHeadModel")
-@Model.register("T5ForConditionalGeneration")
-@Model.register("MT5ForConditionalGeneration")
-@Model.register("UMT5ForConditionalGeneration")
-class T5Model(Model):
+@ModelBase.register("T5WithLMHeadModel")
+@ModelBase.register("T5ForConditionalGeneration")
+@ModelBase.register("MT5ForConditionalGeneration")
+@ModelBase.register("UMT5ForConditionalGeneration")
+class T5Model(TextModel):
model_arch = gguf.MODEL_ARCH.T5
def __init__(self, *args, **kwargs):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("T5EncoderModel")
-class T5EncoderModel(Model):
+@ModelBase.register("T5EncoderModel")
+class T5EncoderModel(TextModel):
model_arch = gguf.MODEL_ARCH.T5ENCODER
def __init__(self, *args, **kwargs):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("JAISLMHeadModel")
-class JaisModel(Model):
+@ModelBase.register("JAISLMHeadModel")
+class JaisModel(TextModel):
model_arch = gguf.MODEL_ARCH.JAIS
def __init__(self, *args, **kwargs):
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
-@Model.register("Glm4ForCausalLM")
-class Glm4Model(Model):
+@ModelBase.register("Glm4ForCausalLM")
+class Glm4Model(TextModel):
model_arch = gguf.MODEL_ARCH.GLM4
def set_vocab(self):
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
-@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
-class ChatGLMModel(Model):
+@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
+class ChatGLMModel(TextModel):
model_arch = gguf.MODEL_ARCH.CHATGLM
def set_vocab_chatglm3(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("NemotronForCausalLM")
-class NemotronModel(Model):
+@ModelBase.register("NemotronForCausalLM")
+class NemotronModel(TextModel):
model_arch = gguf.MODEL_ARCH.NEMOTRON
def set_vocab(self):
return [(self.map_tensor_name(name), data_torch)]
-@Model.register("ExaoneForCausalLM")
-class ExaoneModel(Model):
+@ModelBase.register("ExaoneForCausalLM")
+class ExaoneModel(TextModel):
model_arch = gguf.MODEL_ARCH.EXAONE
def set_gguf_parameters(self):
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
-@Model.register("GraniteForCausalLM")
+@ModelBase.register("GraniteForCausalLM")
class GraniteModel(LlamaModel):
"""Conversion for IBM's GraniteForCausalLM"""
model_arch = gguf.MODEL_ARCH.GRANITE
logger.info("gguf: (granite) logits_scale = %s", logits_scale)
-@Model.register("GraniteMoeForCausalLM")
+@ModelBase.register("GraniteMoeForCausalLM")
class GraniteMoeModel(GraniteModel):
"""Conversion for IBM's GraniteMoeForCausalLM"""
model_arch = gguf.MODEL_ARCH.GRANITE_MOE
return super().modify_tensors(data_torch, name, bid)
-@Model.register("BailingMoeForCausalLM")
-class BailingMoeModel(Model):
+@ModelBase.register("BailingMoeForCausalLM")
+class BailingMoeModel(TextModel):
model_arch = gguf.MODEL_ARCH.BAILINGMOE
def set_vocab(self):
raise ValueError(f"Unprocessed experts: {experts}")
-@Model.register("ChameleonForConditionalGeneration")
-@Model.register("ChameleonForCausalLM") # obsolete
-class ChameleonModel(Model):
+@ModelBase.register("ChameleonForConditionalGeneration")
+@ModelBase.register("ChameleonForCausalLM") # obsolete
+class ChameleonModel(TextModel):
model_arch = gguf.MODEL_ARCH.CHAMELEON
def set_gguf_parameters(self):
"--remote", action="store_true",
help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token.",
)
+ parser.add_argument(
+ "--mmproj", action="store_true",
+ help="(Experimental) Export multimodal projector (mmproj) for vision models. This will only work on some vision models. A prefix 'mmproj-' will be added to the output file name.",
+ )
args = parser.parse_args()
if not args.print_supported_models and args.model is None:
if args.print_supported_models:
logger.error("Supported models:")
- Model.print_registered_models()
+ ModelBase.print_registered_models()
sys.exit(0)
if args.verbose:
logger.info(f"Loading model: {dir_model.name}")
- hparams = Model.load_hparams(dir_model)
+ hparams = ModelBase.load_hparams(dir_model)
+
+ if args.mmproj:
+ if "mmproj" not in fname_out.name:
+ fname_out = ModelBase.add_prefix_to_filename(fname_out, "mmproj-")
with torch.inference_mode():
output_type = ftype_map[args.outtype]
model_architecture = hparams["architectures"][0]
+ model_type = ModelType.VISION if args.mmproj else ModelType.TEXT
try:
- model_class = Model.from_model_architecture(model_architecture)
+ model_class = ModelBase.from_model_architecture(model_architecture, model_type=model_type)
except NotImplementedError:
logger.error(f"Model {model_architecture} is not supported")
sys.exit(1)