From: Aaron Teo Date: Mon, 22 Dec 2025 12:03:49 +0000 (+0800) Subject: convert: rework ftype heuristics (#18214) X-Git-Tag: upstream/0.0.7599~94 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a28310488c3fc68511a0d404d9836b0eae4e498b;p=pkg%2Fggml%2Fsources%2Fllama.cpp convert: rework ftype heuristics (#18214) * convert: rework ftype heuristics Signed-off-by: Aaron Teo convert: fix type-check Signed-off-by: Aaron Teo convert: bring back heuristics comment Signed-off-by: Aaron Teo * convert: revert to using first tensor Signed-off-by: Aaron Teo * convert: rework heuristics logic Signed-off-by: Aaron Teo * convert: rm redundant float32 check Co-authored-by: Sigbjørn Skjæret --------- Signed-off-by: Aaron Teo Co-authored-by: Sigbjørn Skjæret --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 432be599..22f703e6 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -141,16 +141,24 @@ class ModelBase: self.model_name = model_name self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py - # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type + # Apply heuristics to figure out typical tensor encoding based on first tensor's dtype + # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. if self.ftype == gguf.LlamaFileType.GUESSED: - # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie. - _, first_tensor = next(self.get_tensors()) - if first_tensor.dtype == torch.float16: - logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})") - self.ftype = gguf.LlamaFileType.MOSTLY_F16 + for _, tensor in self.get_tensors(): + if tensor.dim() < 2: + continue + + if tensor.dtype == torch.bfloat16: + self.ftype = gguf.LlamaFileType.MOSTLY_BF16 + logger.info("heuristics detected bfloat16 tensor dtype, setting --outtype bf16") + break + elif tensor.dtype == torch.float16: + self.ftype = gguf.LlamaFileType.MOSTLY_F16 + logger.info("heuristics detected float16 tensor dtype, setting --outtype f16") + break else: - logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})") - self.ftype = gguf.LlamaFileType.MOSTLY_BF16 + self.ftype = gguf.LlamaFileType.MOSTLY_F16 + logger.info("heuristics unable to detect tensor dtype, defaulting to --outtype f16") self.dequant_model() @@ -10557,8 +10565,8 @@ def parse_args() -> argparse.Namespace: help="path to write to; default: based on input. {ftype} will be replaced by the outtype.", ) parser.add_argument( - "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16", - help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", + "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="auto", + help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type", ) parser.add_argument( "--bigendian", action="store_true",