params.n_out_freq = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+ add_opt(common_arg(
+ {"--output-format"}, "{gguf,dat}",
+ string_format("output format for imatrix file (default: %s)", params.imat_dat ? "dat" : "gguf"),
+ [](common_params & params, const std::string & value) {
+ /**/ if (value == "gguf") { params.imat_dat = false; }
+ else if (value == "dat") { params.imat_dat = true; }
+ else { throw std::invalid_argument("invalid output format"); }
+ }
+ ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"--save-frequency"}, "N",
string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
int32_t i_chunk = 0; // start processing from this chunk
+ bool imat_dat = false; // whether the legacy imatrix.dat format should be output
bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity
```
./llama-imatrix \
- -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \
+ -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \
[--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \
[--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \
[--show-statistics] [...]
* `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
* `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
* `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
+* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf".
* `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
* `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
* `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.
```bash
# generate and save the imatrix using legacy format
-./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt -o imatrix-legcy-format.dat -ngl 99
+./llama-imatrix -m ggml-model-f16.gguf -f calibration-data.txt --output-format dat -o imatrix-legcy-format.dat -ngl 99
```
```bash
-# covert legacy (binary) imatrix format to new (GGUF) format
+# convert legacy (binary) imatrix format to new (GGUF) format
./llama-imatrix --in-file imatrix-legacy-format.dat -o imatrix-new-format.gguf
```
+```bash
+# convert new (GGUF) imatrix format to legacy (binary) format
+./llama-imatrix --in-file imatrix-new-format.gguf --output-format dat -o imatrix-legacy-format.dat
+```
+
```bash
# combine existing imatrices
./llama-imatrix --in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf -o imatrix-combined.gguf
static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s \\\n"
- " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n"
+ " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
" [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
" [--show-statistics] [...]\n" , argv[0]);
void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
auto fname = m_params.out_file;
+ bool use_legacy_format = m_params.imat_dat;
- // TODO: use the new format in more cases
- if (!string_ends_with(fname, ".gguf")) {
- LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__);
+ if (use_legacy_format) {
this->save_imatrix_legacy(n_chunk);
return;
}
+ // else, default to GGUF imatrix
if (n_chunk > 0) {
fname += ".at_";