params.i_chunk = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+ add_opt(common_arg(
+ {"--parse-special"},
+ string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
+ [](common_params & params) {
+ params.parse_special = true;
+ }
+ ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
add_opt(common_arg(
{"-pps"},
string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity
+ bool parse_special = false; // whether to parse special tokens during imatrix tokenization
// cvector-generator params
int n_pca_batch = 100;
LOG("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
- " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
+ " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
+ " [--parse-special]\n" , argv[0]);
LOG("\n");
}
auto tim1 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenizing the input ..\n", __func__);
- std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
+ std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
auto tim2 = std::chrono::high_resolution_clock::now();
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());