add_opt(common_arg(
{"--chat-template-kwargs"}, "STRING",
string_format("sets additional params for the json template parser"),
- [](common_params & params, const std::string & value) {
+ [](common_params & params, const std::string & value) {
auto parsed = json::parse(value);
for (const auto & item : parsed.items()) {
params.default_template_kwargs[item.key()] = item.value().dump();
common_log_set_file(common_log_main(), value.c_str());
}
));
- add_opt(common_arg({ "--log-colors" }, "[on|off|auto]",
- "Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
- "'auto' enables colors when output is to a terminal",
- [](common_params &, const std::string & value) {
- if (is_truthy(value)) {
- common_log_set_colors(common_log_main(), LOG_COLORS_ENABLED);
- } else if (is_falsey(value)) {
- common_log_set_colors(common_log_main(), LOG_COLORS_DISABLED);
- } else if (is_autoy(value)) {
- common_log_set_colors(common_log_main(), LOG_COLORS_AUTO);
- } else {
- throw std::invalid_argument(
- string_format("error: unkown value for --log-colors: '%s'\n", value.c_str()));
- }
- }).set_env("LLAMA_LOG_COLORS"));
+ add_opt(common_arg(
+ {"--log-colors"}, "[on|off|auto]",
+ "Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
+ "'auto' enables colors when output is to a terminal",
+ [](common_params &, const std::string & value) {
+ if (is_truthy(value)) {
+ common_log_set_colors(common_log_main(), LOG_COLORS_ENABLED);
+ } else if (is_falsey(value)) {
+ common_log_set_colors(common_log_main(), LOG_COLORS_DISABLED);
+ } else if (is_autoy(value)) {
+ common_log_set_colors(common_log_main(), LOG_COLORS_AUTO);
+ } else {
+ throw std::invalid_argument(
+ string_format("error: unkown value for --log-colors: '%s'\n", value.c_str()));
+ }
+ }
+ ).set_env("LLAMA_LOG_COLORS"));
add_opt(common_arg(
{"-v", "--verbose", "--log-verbose"},
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
}
).set_examples({LLAMA_EXAMPLE_TTS}));
- // model-specific
+ add_opt(common_arg(
+ {"--diffusion-steps"}, "N",
+ string_format("number of diffusion steps (default: %d)", params.diffusion.steps),
+ [](common_params & params, int value) { params.diffusion.steps = value; }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-visual"},
+ string_format("enable visual diffusion mode (show progressive generation) (default: %s)", params.diffusion.visual_mode ? "true" : "false"),
+ [](common_params & params) { params.diffusion.visual_mode = true; }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-eps"}, "F",
+ string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps),
+ [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-algorithm"}, "N",
+ string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)", params.diffusion.algorithm),
+ [](common_params & params, int value) { params.diffusion.algorithm = value; }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-alg-temp"}, "F",
+ string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp),
+ [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-block-length"}, "N",
+ string_format("llada block length for generation (default: %d)", params.diffusion.block_length),
+ [](common_params & params, int value) { params.diffusion.block_length = value; }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-cfg-scale"}, "F",
+ string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale),
+ [](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ {"--diffusion-add-gumbel-noise"}, "F",
+ string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"),
+ [](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ add_opt(common_arg(
+ { "-lr", "--learning-rate" }, "ALPHA",
+ string_format("adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)", (double) params.lr.lr0),
+ [](common_params & params, const std::string & value) { params.lr.lr0 = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg({ "-lr-min", "--learning-rate-min" }, "ALPHA",
+ string_format("(if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)",
+ (double) params.lr.lr_min),
+ [](common_params & params, const std::string & value) { params.lr.lr_min = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"-decay-epochs", "--learning-rate-decay-epochs"}, "ALPHA",
+ string_format("(if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)", (double) params.lr.decay_epochs),
+ [](common_params & params, const std::string & value) { params.lr.decay_epochs = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"-wd", "--weight-decay"}, "WD",
+ string_format("adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g).", (double) params.lr.wd),
+ [](common_params & params, const std::string & value) { params.lr.wd = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"-val-split", "--val-split"}, "FRACTION",
+ string_format("fraction of data to use as validation set for training (default: %.2g).", (double) params.val_split),
+ [](common_params & params, const std::string & value) { params.val_split = std::stof(value); }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"-epochs", "--epochs"}, "N",
+ string_format("optimizer max # of epochs (default: %d)", params.lr.epochs),
+ [](common_params & params, int epochs) { params.lr.epochs = epochs; }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"-opt", "--optimizer"}, "sgd|adamw", "adamw or sgd",
+ [](common_params & params, const std::string & name) {
+ params.optimizer = common_opt_get_optimizer(name.c_str());
+ if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
+ throw std::invalid_argument("invalid --optimizer, valid options: adamw, sgd");
+ }
+ }
+ ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+
+ // presets
add_opt(common_arg(
{"--tts-oute-default"},
string_format("use default OuteTTS models (note: can download weights from the internet)"),
).set_examples({LLAMA_EXAMPLE_TTS}));
add_opt(common_arg(
- {"--embd-bge-small-en-default"},
- string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"),
- [](common_params & params) {
- params.model.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
- params.model.hf_file = "bge-small-en-v1.5-q8_0.gguf";
- params.embd_normalize = 2;
- params.n_ctx = 512;
- params.verbose_prompt = true;
- params.embedding = true;
- }
- ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
-
- add_opt(common_arg(
- {"--embd-e5-small-en-default"},
- string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
- [](common_params & params) {
- params.model.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
- params.model.hf_file = "e5-small-v2-q8_0.gguf";
- params.embd_normalize = 2;
- params.n_ctx = 512;
- params.verbose_prompt = true;
- params.embedding = true;
- }
- ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
-
- add_opt(common_arg(
- {"--embd-gte-small-default"},
- string_format("use default gte-small model (note: can download weights from the internet)"),
+ {"--embd-gemma-default"},
+ string_format("use default EmbeddingGemma model (note: can download weights from the internet)"),
[](common_params & params) {
- params.model.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
- params.model.hf_file = "gte-small-q8_0.gguf";
- params.embd_normalize = 2;
- params.n_ctx = 512;
+ params.model.hf_repo = "ggml-org/embeddinggemma-300M-qat-q4_0-GGUF";
+ params.model.hf_file = "embeddinggemma-300M-qat-Q4_0.gguf";
+ params.port = 8011;
+ params.n_ubatch = 2048;
+ params.n_batch = 2048;
+ params.n_parallel = 32;
+ params.n_ctx = 2048*params.n_parallel;
params.verbose_prompt = true;
params.embedding = true;
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
- { "--diffusion-steps" }, "N",
- string_format("number of diffusion steps (default: %d)", params.diffusion.steps),
- [](common_params & params, int value) { params.diffusion.steps = value; }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
- add_opt(common_arg(
- { "--diffusion-visual" },
- string_format("enable visual diffusion mode (show progressive generation) (default: %s)",
- params.diffusion.visual_mode ? "true" : "false"),
- [](common_params & params) { params.diffusion.visual_mode = true; }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ {"--gpt-oss-20b-default"},
+ string_format("use gpt-oss-20b (note: can download weights from the internet)"),
+ [](common_params & params) {
+ params.model.hf_repo = "ggml-org/gpt-oss-20b-GGUF";
+ params.model.hf_file = "gpt-oss-20b-mxfp4.gguf";
+ params.port = 8013;
+ params.n_ubatch = 2048;
+ params.n_batch = 32768;
+ params.n_parallel = 2;
+ params.n_ctx = 131072*params.n_parallel;
+ params.sampling.temp = 1.0f;
+ params.sampling.top_p = 1.0f;
+ params.sampling.top_k = 0;
+ params.sampling.min_p = 0.01f;
+ params.use_jinja = true;
+ //params.default_template_kwargs["reasoning_effort"] = "\"high\"";
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
- { "--diffusion-eps" }, "F",
- string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps),
- [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
- add_opt(common_arg(
- { "--diffusion-algorithm" }, "N",
- string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)",
- params.diffusion.algorithm),
- [](common_params & params, int value) { params.diffusion.algorithm = value; }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
- add_opt(common_arg(
- { "--diffusion-alg-temp" }, "F",
- string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp),
- [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
+ {"--gpt-oss-120b-default"},
+ string_format("use gpt-oss-120b (note: can download weights from the internet)"),
+ [](common_params & params) {
+ params.model.hf_repo = "ggml-org/gpt-oss-120b-GGUF";
+ params.port = 8013;
+ params.n_ubatch = 2048;
+ params.n_batch = 32768;
+ params.n_parallel = 2;
+ params.n_ctx = 131072*params.n_parallel;
+ params.sampling.temp = 1.0f;
+ params.sampling.top_p = 1.0f;
+ params.sampling.top_k = 0;
+ params.sampling.min_p = 0.01f;
+ params.use_jinja = true;
+ //params.default_template_kwargs["reasoning_effort"] = "\"high\"";
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
- { "--diffusion-block-length" }, "N",
- string_format("llada block length for generation (default: %d)", params.diffusion.block_length),
- [](common_params & params, int value) { params.diffusion.block_length = value; }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
- add_opt(common_arg(
- { "--diffusion-cfg-scale" }, "F",
- string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale),
- [](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
- add_opt(common_arg(
- { "--diffusion-add-gumbel-noise" }, "F",
- string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"),
- [](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); }
- ).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
-
+ {"--vision-gemma-4b-default"},
+ string_format("use Gemma 3 4B QAT (note: can download weights from the internet)"),
+ [](common_params & params) {
+ params.model.hf_repo = "ggml-org/gemma-3-4b-it-qat-GGUF";
+ params.port = 8014;
+ params.n_ctx = 0;
+ params.use_jinja = true;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
- add_opt(
- common_arg({ "-lr", "--learning-rate" }, "ALPHA",
- string_format(
- "adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)",
- (double) params.lr.lr0),
- [](common_params & params, const std::string & value) { params.lr.lr0 = std::stof(value); })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(
- common_arg({ "-lr-min", "--learning-rate-min" }, "ALPHA",
- string_format(
- "(if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)",
- (double) params.lr.lr_min),
- [](common_params & params, const std::string & value) { params.lr.lr_min = std::stof(value); })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(
- common_arg({ "-decay-epochs", "--learning-rate-decay-epochs" }, "ALPHA",
- string_format(
- "(if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)",
- (double) params.lr.decay_epochs),
- [](common_params & params, const std::string & value) { params.lr.decay_epochs = std::stof(value); })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(common_arg(
- { "-wd", "--weight-decay" }, "WD",
- string_format(
- "adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g).",
- (double) params.lr.wd),
- [](common_params & params, const std::string & value) { params.lr.wd = std::stof(value); })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(common_arg({ "-val-split", "--val-split" }, "FRACTION",
- string_format("fraction of data to use as validation set for training (default: %.2g).",
- (double) params.val_split),
- [](common_params & params, const std::string & value) { params.val_split = std::stof(value); })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(common_arg({ "-epochs", "--epochs" }, "N",
- string_format("optimizer max # of epochs (default: %d)", params.lr.epochs),
- [](common_params & params, int epochs) { params.lr.epochs = epochs; })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
- add_opt(common_arg({ "-opt", "--optimizer" }, "sgd|adamw", "adamw or sgd",
- [](common_params & params, const std::string & name) {
- params.optimizer = common_opt_get_optimizer(name.c_str());
- if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
- throw std::invalid_argument("invalid --optimizer, valid options: adamw, sgd");
- }
- })
- .set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+ add_opt(common_arg(
+ {"--vision-gemma-12b-default"},
+ string_format("use Gemma 3 12B QAT (note: can download weights from the internet)"),
+ [](common_params & params) {
+ params.model.hf_repo = "ggml-org/gemma-3-12b-it-qat-GGUF";
+ params.port = 8014;
+ params.n_ctx = 0;
+ params.use_jinja = true;
+ }
+ ).set_examples({LLAMA_EXAMPLE_SERVER}));
return ctx_arg;
}