imatrix : use GGUF by default (#14842)

author compilade <redacted>

Sun, 3 Aug 2025 20:00:05 +0000 (16:00 -0400)

committer GitHub <redacted>

Sun, 3 Aug 2025 20:00:05 +0000 (22:00 +0200)
author compilade <redacted>
Sun, 3 Aug 2025 20:00:05 +0000 (16:00 -0400)
committer GitHub <redacted>
Sun, 3 Aug 2025 20:00:05 +0000 (22:00 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index cd853119131e9cf437b62c625a3c110d11d4d558..0b216ec0d0c02b5bb0297d351215127048637c60 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2647,6 +2647,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              params.n_out_freq = value;
          }
      ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+    add_opt(common_arg(
+        {"--output-format"}, "{gguf,dat}",
+        string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
+        [](common_params & params, const std::string & value) {
+            /**/ if (value == "gguf") { params.imat_dat = false; }
+            else if (value == "dat")  { params.imat_dat = true;  }
+            else { throw std::invalid_argument("invalid output format"); }
+        }
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
      add_opt(common_arg(
          {"--save-frequency"}, "N",
          string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),
diff --git a/common/common.h b/common/common.h

index b8b01a7e99790cc11e5696ef952b85dbe7476d98..6b900b795f438b3e7b39ca6b326e1bc99f15e9c7 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -439,6 +439,7 @@ struct common_params {
      int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations
      int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
      int32_t i_chunk     =  0; // start processing from this chunk
+    bool    imat_dat    = false; // whether the legacy imatrix.dat format should be output
  
      bool process_output  = false; // collect data for the output tensor
      bool compute_ppl     = true;  // whether to compute perplexity
diff --git a/tools/imatrix/README.md b/tools/imatrix/README.md

index 7417a2dec9e6cad121ace63dc8c0c63f955af1b5..4505cb4ce8c7d2f6e211098b7cfcb90562b7fb94 100644 (file)
--- a/tools/imatrix/README.md
+++ b/tools/imatrix/README.md
@@ -7,7 +7,7 @@ More information is available in <https://github.com/ggml-org/llama.cpp/pull/486
  
  ```
  ./llama-imatrix \
-    -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \
+    -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \
      [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \
      [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \
      [--show-statistics] [...]
@@ -20,6 +20,7 @@ The parameters in square brackets are optional and have the following meaning:
  * `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
  * `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
  * `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
+* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf".
  * `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
  * `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
  * `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.
@@ -45,14 +46,19 @@ Recent versions of `llama-imatrix` store data in GGUF format by default. For the
  
  ```bash
  # generate and save the imatrix using legacy format
-./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt -o imatrix-legcy-format.dat -ngl 99
+./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt --output-format dat -o imatrix-legcy-format.dat -ngl 99
  ```
  
  ```bash
-# covert legacy (binary) imatrix format to new (GGUF) format
+# convert legacy (binary) imatrix format to new (GGUF) format
  ./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf
  ```
  
+```bash
+# convert new (GGUF) imatrix format to legacy (binary) format
+./llama-imatrix --in-file imatrix-new-format.gguf --output-format dat -o imatrix-legacy-format.dat
+```
+
  ```bash
  # combine existing imatrices
  ./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf
diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp

index f5262e5e83da9bacd05eae3c818347e4271e118a..9ceceb478df4f4b9e5aa3de3198055abf2e34bb3 100644 (file)
--- a/tools/imatrix/imatrix.cpp
+++ b/tools/imatrix/imatrix.cpp
@@ -26,7 +26,7 @@
  static void print_usage(int, char ** argv) {
      LOG("\nexample usage:\n");
      LOG("\n    %s \\\n"
-            "       -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n"
+            "       -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
              "       [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
              "       [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
              "       [--show-statistics] [...]\n" , argv[0]);
@@ -506,13 +506,13 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {
  
  void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
      auto fname = m_params.out_file;
+    bool use_legacy_format = m_params.imat_dat;
  
-    // TODO: use the new format in more cases
-    if (!string_ends_with(fname, ".gguf")) {
-        LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__);
+    if (use_legacy_format) {
          this->save_imatrix_legacy(n_chunk);
          return;
      }
+    // else, default to GGUF imatrix
  
      if (n_chunk > 0) {
          fname += ".at_";
author	compilade <redacted>
	Sun, 3 Aug 2025 20:00:05 +0000 (16:00 -0400)
committer	GitHub <redacted>
	Sun, 3 Aug 2025 20:00:05 +0000 (22:00 +0200)
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
tools/imatrix/README.md		patch \| blob \| history
tools/imatrix/imatrix.cpp		patch \| blob \| history