imatrix : Add --parse-special for enabling parsing of special tokens in imatrix calcu...

author Bartowski <redacted>

Fri, 9 May 2025 09:53:58 +0000 (05:53 -0400)

committer GitHub <redacted>

Fri, 9 May 2025 09:53:58 +0000 (11:53 +0200)
author Bartowski <redacted>
Fri, 9 May 2025 09:53:58 +0000 (05:53 -0400)
committer GitHub <redacted>
Fri, 9 May 2025 09:53:58 +0000 (11:53 +0200)
diff --git a/common/arg.cpp b/common/arg.cpp

index 9f87e9910b5405019a1a2367fbfeb8eab68762f6..73a3cfe5392c08d2a94f5eddd18955d0ccbe9dba 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2627,6 +2627,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
              params.i_chunk = value;
          }
      ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+    add_opt(common_arg(
+        {"--parse-special"},
+        string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
+        [](common_params & params) {
+            params.parse_special = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
      add_opt(common_arg(
          {"-pps"},
          string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
diff --git a/common/common.h b/common/common.h

index 90702245463cbbc968cc01b5e0ad016130a7bd65..d051d4ec971c44d27b57874f35f5aa57997ba469 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -409,6 +409,7 @@ struct common_params {
  
      bool process_output = false; // collect data for the output tensor
      bool compute_ppl    = true;  // whether to compute perplexity
+    bool parse_special  = false; // whether to parse special tokens during imatrix tokenization
  
      // cvector-generator params
      int n_pca_batch = 100;
diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp

index 2c39278dba3d97304ac9a8e8a01094d6565a3d7a..81d0404d683d55fff629906e804d7d66b8f4b23c 100644 (file)
--- a/tools/imatrix/imatrix.cpp
+++ b/tools/imatrix/imatrix.cpp
@@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
      LOG("\n    %s \\\n"
              "       -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
              "       [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
-            "       [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
+            "       [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
+            "       [--parse-special]\n" , argv[0]);
      LOG("\n");
  }
  
@@ -439,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
      auto tim1 = std::chrono::high_resolution_clock::now();
      LOG_INF("%s: tokenizing the input ..\n", __func__);
  
-    std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
+    std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
  
      auto tim2 = std::chrono::high_resolution_clock::now();
      LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
author	Bartowski <redacted>
	Fri, 9 May 2025 09:53:58 +0000 (05:53 -0400)
committer	GitHub <redacted>
	Fri, 9 May 2025 09:53:58 +0000 (11:53 +0200)
common/arg.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
tools/imatrix/imatrix.cpp		patch \| blob \| history