imatrix : warn when GGUF imatrix is saved without .gguf suffix (#15076)

author compilade <redacted>

Mon, 4 Aug 2025 21:26:52 +0000 (17:26 -0400)

committer GitHub <redacted>

Mon, 4 Aug 2025 21:26:52 +0000 (23:26 +0200)
author compilade <redacted>
Mon, 4 Aug 2025 21:26:52 +0000 (17:26 -0400)
committer GitHub <redacted>
Mon, 4 Aug 2025 21:26:52 +0000 (23:26 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index 0b216ec0d0c02b5bb0297d351215127048637c60..a02db0b0a0db6a1af4d61419021f67a6b8a5fb51 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2649,10 +2649,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
      ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
      add_opt(common_arg(
          {"--output-format"}, "{gguf,dat}",
-        string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
+        string_format("output format for imatrix file (default: %s)", params.imat_dat > 0 ? "dat" : "gguf"),
          [](common_params & params, const std::string & value) {
-            /**/ if (value == "gguf") { params.imat_dat = false; }
-            else if (value == "dat")  { params.imat_dat = true;  }
+            /**/ if (value == "gguf") { params.imat_dat = -1; }
+            else if (value == "dat")  { params.imat_dat = 1;  }
              else { throw std::invalid_argument("invalid output format"); }
          }
      ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
diff --git a/common/common.h b/common/common.h

index 6b900b795f438b3e7b39ca6b326e1bc99f15e9c7..6a47dac4b9dbdb87387c1ae9338fe5edc80fe148 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -439,7 +439,7 @@ struct common_params {
      int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations
      int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
      int32_t i_chunk     =  0; // start processing from this chunk
-    bool    imat_dat    = false; // whether the legacy imatrix.dat format should be output
+    int8_t  imat_dat    =  0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat)
  
      bool process_output  = false; // collect data for the output tensor
      bool compute_ppl     = true;  // whether to compute perplexity
diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp

index 9ceceb478df4f4b9e5aa3de3198055abf2e34bb3..f28a036deebe37b9dade96b673fc245658233e7b 100644 (file)
--- a/tools/imatrix/imatrix.cpp
+++ b/tools/imatrix/imatrix.cpp
@@ -506,13 +506,17 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {
  
  void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
      auto fname = m_params.out_file;
-    bool use_legacy_format = m_params.imat_dat;
+    int8_t use_legacy_format = m_params.imat_dat;
  
-    if (use_legacy_format) {
+    if (use_legacy_format > 0) {
          this->save_imatrix_legacy(n_chunk);
          return;
      }
-    // else, default to GGUF imatrix
+    // only warn when `--output-format gguf` is not specified
+    if (use_legacy_format == 0 && !string_ends_with(fname, ".gguf")) {
+        LOG_WRN("\n%s: saving imatrix using GGUF format with a different suffix than .gguf\n", __func__);
+        LOG_WRN("%s: if you want the previous imatrix format, use --output-format dat\n", __func__);
+    }
  
      if (n_chunk > 0) {
          fname += ".at_";
author	compilade <redacted>
	Mon, 4 Aug 2025 21:26:52 +0000 (17:26 -0400)
committer	GitHub <redacted>
	Mon, 4 Aug 2025 21:26:52 +0000 (23:26 +0200)
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
tools/imatrix/imatrix.cpp		patch \| blob \| history