From: Aaron Teo <redacted>
Date: Mon, 22 Dec 2025 12:03:49 +0000 (+0800)
Subject: convert: rework ftype heuristics (#18214)
X-Git-Tag: upstream/0.0.7599~94
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=a28310488c3fc68511a0d404d9836b0eae4e498b;p=pkg%2Fggml%2Fsources%2Fllama.cpp

convert: rework ftype heuristics (#18214)

* convert: rework ftype heuristics

Signed-off-by: Aaron Teo <redacted>

convert: fix type-check

Signed-off-by: Aaron Teo <redacted>

convert: bring back heuristics comment

Signed-off-by: Aaron Teo <redacted>

* convert: revert to using first tensor

Signed-off-by: Aaron Teo <redacted>

* convert: rework heuristics logic

Signed-off-by: Aaron Teo <redacted>

* convert: rm redundant float32 check

Co-authored-by: Sigbjørn Skjæret <redacted>

---------

Signed-off-by: Aaron Teo <redacted>
Co-authored-by: Sigbjørn Skjæret <redacted>
---

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 432be5994..22f703e6a 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -141,16 +141,24 @@ class ModelBase:
         self.model_name = model_name
         self.dir_model_card = dir_model  # overridden in convert_lora_to_gguf.py
 
-        # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
+        # Apply heuristics to figure out typical tensor encoding based on first tensor's dtype
+        # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
         if self.ftype == gguf.LlamaFileType.GUESSED:
-            # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
-            _, first_tensor = next(self.get_tensors())
-            if first_tensor.dtype == torch.float16:
-                logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
-                self.ftype = gguf.LlamaFileType.MOSTLY_F16
+            for _, tensor in self.get_tensors():
+                if tensor.dim() < 2:
+                    continue
+
+                if tensor.dtype == torch.bfloat16:
+                    self.ftype = gguf.LlamaFileType.MOSTLY_BF16
+                    logger.info("heuristics detected bfloat16 tensor dtype, setting --outtype bf16")
+                    break
+                elif tensor.dtype == torch.float16:
+                    self.ftype = gguf.LlamaFileType.MOSTLY_F16
+                    logger.info("heuristics detected float16 tensor dtype, setting --outtype f16")
+                    break
             else:
-                logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
-                self.ftype = gguf.LlamaFileType.MOSTLY_BF16
+                self.ftype = gguf.LlamaFileType.MOSTLY_F16
+                logger.info("heuristics unable to detect tensor dtype, defaulting to --outtype f16")
 
         self.dequant_model()
 
@@ -10557,8 +10565,8 @@ def parse_args() -> argparse.Namespace:
         help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
     )
     parser.add_argument(
-        "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
-        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
+        "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="auto",
+        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type",
     )
     parser.add_argument(
         "--bigendian", action="store_true",