convert: rework ftype heuristics (#18214)

author Aaron Teo <redacted>

Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)

committer GitHub <redacted>

Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)
author Aaron Teo <redacted>
Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)
committer GitHub <redacted>
Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 432be599469235bd37e0e77ddb85a1ee669b8431..22f703e6ad40a7787479a57781da82a15b442620 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -141,16 +141,24 @@ class ModelBase:
          self.model_name = model_name
          self.dir_model_card = dir_model  # overridden in convert_lora_to_gguf.py
  
-        # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
+        # Apply heuristics to figure out typical tensor encoding based on first tensor's dtype
+        # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
          if self.ftype == gguf.LlamaFileType.GUESSED:
-            # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
-            _, first_tensor = next(self.get_tensors())
-            if first_tensor.dtype == torch.float16:
-                logger.info(f"choosing --outtype f16 from first tensor type ({first_tensor.dtype})")
-                self.ftype = gguf.LlamaFileType.MOSTLY_F16
+            for _, tensor in self.get_tensors():
+                if tensor.dim() < 2:
+                    continue
+
+                if tensor.dtype == torch.bfloat16:
+                    self.ftype = gguf.LlamaFileType.MOSTLY_BF16
+                    logger.info("heuristics detected bfloat16 tensor dtype, setting --outtype bf16")
+                    break
+                elif tensor.dtype == torch.float16:
+                    self.ftype = gguf.LlamaFileType.MOSTLY_F16
+                    logger.info("heuristics detected float16 tensor dtype, setting --outtype f16")
+                    break
              else:
-                logger.info(f"choosing --outtype bf16 from first tensor type ({first_tensor.dtype})")
-                self.ftype = gguf.LlamaFileType.MOSTLY_BF16
+                self.ftype = gguf.LlamaFileType.MOSTLY_F16
+                logger.info("heuristics unable to detect tensor dtype, defaulting to --outtype f16")
  
          self.dequant_model()
  
@@ -10557,8 +10565,8 @@ def parse_args() -> argparse.Namespace:
          help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
      )
      parser.add_argument(
-        "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="f16",
-        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
+        "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"], default="auto",
+        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type",
      )
      parser.add_argument(
          "--bigendian", action="store_true",
author	Aaron Teo <redacted>
	Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)
committer	GitHub <redacted>
	Mon, 22 Dec 2025 12:03:49 +0000 (20:03 +0800)