#include "log.h"
#include "sampling.h"
#include "download.h"
+#include "preset.h"
// fix problem with std::min and std::max
#if defined(_WIN32)
}
}
+static std::string clean_file_name(const std::string & fname) {
+ std::string clean_fname = fname;
+ string_replace_all(clean_fname, "\\", "_");
+ string_replace_all(clean_fname, "/", "_");
+ return clean_fname;
+}
+
+static bool common_params_handle_remote_preset(common_params & params, llama_example ex) {
+ GGML_ASSERT(!params.model.hf_repo.empty());
+
+ const bool offline = params.offline;
+ std::string model_endpoint = get_model_endpoint();
+ auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini";
+
+ // prepare local path for caching
+ auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini");
+ auto preset_path = fs_get_cache_file(preset_fname);
+ const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
+ const bool has_preset = status >= 200 && status < 400;
+
+ // remote preset is optional, so we don't error out if not found
+ if (has_preset) {
+ LOG_INF("applying remote preset from %s\n", preset_url.c_str());
+ common_preset_context ctx(ex, /* only_remote_allowed */ true);
+ common_preset global; // unused for now
+ auto remote_presets = ctx.load_from_ini(preset_path, global);
+ if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) {
+ common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME);
+ LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline
+ preset.apply_to_params(params);
+ } else {
+ throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section");
+ }
+ } else {
+ LOG_INF("%s", "no remote preset found, skipping\n");
+ }
+
+ return has_preset;
+}
+
struct handle_model_result {
bool found_mmproj = false;
common_params_model mmproj;
// make sure model path is present (for caching purposes)
if (model.path.empty()) {
// this is to avoid different repo having same file name, or same file name in different subdirs
- std::string filename = model.hf_repo + "_" + model.hf_file;
- // to make sure we don't have any slashes in the filename
- string_replace_all(filename, "/", "_");
+ std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file);
model.path = fs_get_cache_file(filename);
}
}
};
- std::set<std::string> seen_args;
+ auto parse_cli_args = [&]() {
+ std::set<std::string> seen_args;
- for (int i = 1; i < argc; i++) {
- const std::string arg_prefix = "--";
+ for (int i = 1; i < argc; i++) {
+ const std::string arg_prefix = "--";
- std::string arg = argv[i];
- if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
- std::replace(arg.begin(), arg.end(), '_', '-');
- }
- if (arg_to_options.find(arg) == arg_to_options.end()) {
- throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
- }
- if (!seen_args.insert(arg).second) {
- LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
- }
- auto & tmp = arg_to_options[arg];
- auto opt = *tmp.first;
- bool is_positive = tmp.second;
- if (opt.has_value_from_env()) {
- fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
- }
- try {
- if (opt.handler_void) {
- opt.handler_void(params);
- continue;
+ std::string arg = argv[i];
+ if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+ std::replace(arg.begin(), arg.end(), '_', '-');
}
- if (opt.handler_bool) {
- opt.handler_bool(params, is_positive);
- continue;
+ if (arg_to_options.find(arg) == arg_to_options.end()) {
+ throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
}
-
- // arg with single value
- check_arg(i);
- std::string val = argv[++i];
- if (opt.handler_int) {
- opt.handler_int(params, std::stoi(val));
- continue;
+ if (!seen_args.insert(arg).second) {
+ LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
+ }
+ auto & tmp = arg_to_options[arg];
+ auto opt = *tmp.first;
+ bool is_positive = tmp.second;
+ if (opt.has_value_from_env()) {
+ fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
}
- if (opt.handler_string) {
- opt.handler_string(params, val);
- continue;
+ try {
+ if (opt.handler_void) {
+ opt.handler_void(params);
+ continue;
+ }
+ if (opt.handler_bool) {
+ opt.handler_bool(params, is_positive);
+ continue;
+ }
+
+ // arg with single value
+ check_arg(i);
+ std::string val = argv[++i];
+ if (opt.handler_int) {
+ opt.handler_int(params, std::stoi(val));
+ continue;
+ }
+ if (opt.handler_string) {
+ opt.handler_string(params, val);
+ continue;
+ }
+
+ // arg with 2 values
+ check_arg(i);
+ std::string val2 = argv[++i];
+ if (opt.handler_str_str) {
+ opt.handler_str_str(params, val, val2);
+ continue;
+ }
+ } catch (std::exception & e) {
+ throw std::invalid_argument(string_format(
+ "error while handling argument \"%s\": %s\n\n"
+ "usage:\n%s\n\nto show complete usage, run with -h",
+ arg.c_str(), e.what(), opt.to_string().c_str()));
}
+ }
+ };
- // arg with 2 values
- check_arg(i);
- std::string val2 = argv[++i];
- if (opt.handler_str_str) {
- opt.handler_str_str(params, val, val2);
- continue;
- }
- } catch (std::exception & e) {
- throw std::invalid_argument(string_format(
- "error while handling argument \"%s\": %s\n\n"
- "usage:\n%s\n\nto show complete usage, run with -h",
- arg.c_str(), e.what(), opt.to_string().c_str()));
+ // parse the first time to get -hf option (used for remote preset)
+ parse_cli_args();
+
+ // maybe handle remote preset
+ if (!params.model.hf_repo.empty()) {
+ std::string cli_hf_repo = params.model.hf_repo;
+ bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex);
+
+ // special case: if hf_repo explicitly set by preset, we need to preserve it (ignore CLI value)
+ // this is useful when we have one HF repo pointing to other HF repos (one model - multiple GGUFs)
+ std::string preset_hf_repo = params.model.hf_repo;
+ bool preset_has_hf_repo = preset_hf_repo != cli_hf_repo;
+
+ if (has_preset) {
+ // re-parse CLI args to override preset values
+ parse_cli_args();
+ }
+
+ // preserve hf_repo from preset if needed
+ if (preset_has_hf_repo) {
+ params.model.hf_repo = preset_hf_repo;
}
}
return none;
}
+static bool is_http_status_ok(int status) {
+ return status >= 200 && status < 400;
+}
+
#ifdef LLAMA_USE_CURL
//
}
// download one single file from remote URL to local path
-static bool common_download_file_single_online(const std::string & url,
+// returns status code or -1 on error
+static int common_download_file_single_online(const std::string & url,
const std::string & path,
const std::string & bearer_token,
const common_header_list & custom_headers) {
static const int max_attempts = 3;
static const int retry_delay_seconds = 2;
+
for (int i = 0; i < max_attempts; ++i) {
std::string etag;
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
if (remove(path.c_str()) != 0) {
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
- return false;
+ return -1;
}
}
if (std::filesystem::exists(path_temporary)) {
if (remove(path_temporary.c_str()) != 0) {
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
- return false;
+ return -1;
}
}
if (std::filesystem::exists(path)) {
if (remove(path.c_str()) != 0) {
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
- return false;
+ return -1;
}
}
}
long http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
- if (http_code < 200 || http_code >= 400) {
+
+ int status = static_cast<int>(http_code);
+ if (!is_http_status_ok(http_code)) {
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
- return false;
+ return status; // TODO: maybe only return on certain codes
}
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
- return false;
+ return -1;
}
+
+ return static_cast<int>(http_code);
} else {
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
- }
- break;
+ return 304; // Not Modified - fake cached response
+ }
}
- return true;
+ return -1; // max attempts reached
}
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
}
// download one single file from remote URL to local path
-static bool common_download_file_single_online(const std::string & url,
+// returns status code or -1 on error
+static int common_download_file_single_online(const std::string & url,
const std::string & path,
const std::string & bearer_token,
const common_header_list & custom_headers) {
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
if (file_exists) {
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
- return true;
+ return 304; // 304 Not Modified - fake cached response
}
+ return head->status; // cannot use cached file, return raw status code
+ // TODO: maybe retry only on certain codes
}
std::string etag;
if (file_exists) {
if (!should_download_from_scratch) {
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
- return true;
+ return 304; // 304 Not Modified - fake cached response
}
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
if (remove(path.c_str()) != 0) {
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
- return false;
+ return -1;
}
}
existing_size = std::filesystem::file_size(path_temporary);
} else if (remove(path_temporary.c_str()) != 0) {
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
- return false;
+ return -1;
}
}
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
- return false;
+ return -1;
}
if (!etag.empty()) {
write_etag(path, etag);
}
- break;
+
+ return head->status; // TODO: use actual GET status?
}
- return true;
+ return -1; // max attempts reached
}
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
-static bool common_download_file_single(const std::string & url,
- const std::string & path,
- const std::string & bearer_token,
- bool offline,
- const common_header_list & headers) {
+int common_download_file_single(const std::string & url,
+ const std::string & path,
+ const std::string & bearer_token,
+ bool offline,
+ const common_header_list & headers) {
if (!offline) {
return common_download_file_single_online(url, path, bearer_token, headers);
}
if (!std::filesystem::exists(path)) {
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
- return false;
+ return -1;
}
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
- return true;
+ return 304; // Not Modified - fake cached response
}
// download multiple files from remote URLs to local paths
std::async(
std::launch::async,
[&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
- return common_download_file_single(it.first, it.second, bearer_token, offline, headers);
+ const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
+ return is_http_status_ok(http_status);
},
item
)
return false;
}
- if (!common_download_file_single(model.url, model.path, bearer_token, offline, headers)) {
+ const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
+ if (!is_http_status_ok(http_status)) {
return false;
}
} else if (res_code == 401) {
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
} else {
- throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
+ throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str()));
}
// check response
std::string local_path = fs_get_cache_file(model_filename);
const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
- if (!common_download_file_single(blob_url, local_path, token, false, {})) {
+ const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
+ if (!is_http_status_ok(http_status)) {
throw std::runtime_error("Failed to download Docker Model");
}
throw std::runtime_error("download functionality is not enabled in this build");
}
+int common_download_file_single(const std::string &,
+ const std::string &,
+ const std::string &,
+ bool,
+ const common_header_list &) {
+ throw std::runtime_error("download functionality is not enabled in this build");
+}
+
#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
std::vector<common_cached_model_info> common_list_cached_models() {
// returns list of cached models
std::vector<common_cached_model_info> common_list_cached_models();
+// download single file from url to local path
+// returns status code or -1 on error
+int common_download_file_single(const std::string & url,
+ const std::string & path,
+ const std::string & bearer_token,
+ bool offline,
+ const common_header_list & headers = {});
+
// resolve and download model from Docker registry
// return local path to downloaded model file
std::string common_docker_resolve_model(const std::string & docker);
return str.substr(pos);
}
+// only allow a subset of args for remote presets for security reasons
+// do not add more args unless absolutely necessary
+// args that output to files are strictly prohibited
+static std::set<std::string> get_remote_preset_whitelist(const std::map<std::string, common_arg> & key_to_opt) {
+ static const std::set<std::string> allowed_options = {
+ "model-url",
+ "hf-repo",
+ "hf-repo-draft",
+ "hf-repo-v", // vocoder
+ "hf-file-v", // vocoder
+ "mmproj-url",
+ "pooling",
+ "jinja",
+ "batch-size",
+ "ubatch-size",
+ "cache-reuse",
+ // note: sampling params are automatically allowed by default
+ // negated args will be added automatically
+ };
+
+ std::set<std::string> allowed_keys;
+
+ for (const auto & it : key_to_opt) {
+ const std::string & key = it.first;
+ const common_arg & opt = it.second;
+ if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) {
+ allowed_keys.insert(key);
+ // also add variant keys (args without leading dashes and env vars)
+ for (const auto & arg : opt.get_args()) {
+ allowed_keys.insert(rm_leading_dashes(arg));
+ }
+ for (const auto & env : opt.get_env()) {
+ allowed_keys.insert(env);
+ }
+ }
+ }
+
+ return allowed_keys;
+}
+
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
std::vector<std::string> args;
}
}
+void common_preset::apply_to_params(common_params & params) const {
+ for (const auto & [opt, val] : options) {
+ // apply each option to params
+ if (opt.handler_string) {
+ opt.handler_string(params, val);
+ } else if (opt.handler_int) {
+ opt.handler_int(params, std::stoi(val));
+ } else if (opt.handler_bool) {
+ opt.handler_bool(params, common_arg_utils::is_truthy(val));
+ } else if (opt.handler_str_str) {
+ // not supported yet
+ throw std::runtime_error(string_format(
+ "%s: option with two values is not supported yet",
+ __func__
+ ));
+ } else if (opt.handler_void) {
+ opt.handler_void(params);
+ } else {
+ GGML_ABORT("unknown handler type");
+ }
+ }
+}
+
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
std::map<std::string, std::map<std::string, std::string>> parsed;
return value;
}
-common_preset_context::common_preset_context(llama_example ex)
+common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed)
: ctx_params(common_params_parser_init(default_params, ex)) {
common_params_add_preset_options(ctx_params.options);
key_to_opt = get_map_key_opt(ctx_params);
+
+ // setup allowed keys if only_remote_allowed is true
+ if (only_remote_allowed) {
+ filter_allowed_keys = true;
+ allowed_keys = get_remote_preset_whitelist(key_to_opt);
+ }
}
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
LOG_DBG("loading preset: %s\n", preset.name.c_str());
for (const auto & [key, value] : section.second) {
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
+ if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
+ throw std::runtime_error(string_format(
+ "option '%s' is not allowed in remote presets",
+ key.c_str()
+ ));
+ }
if (key_to_opt.find(key) != key_to_opt.end()) {
const auto & opt = key_to_opt.at(key);
if (is_bool_arg(opt)) {
#include <string>
#include <vector>
#include <map>
+#include <set>
//
// INI preset parser and writer
// merge another preset into this one, overwriting existing options
void merge(const common_preset & other);
+
+ // apply preset options to common_params
+ void apply_to_params(common_params & params) const;
};
// interface for multiple presets in one file
common_params default_params; // unused for now
common_params_context ctx_params;
std::map<std::string, common_arg> key_to_opt;
- common_preset_context(llama_example ex);
+
+ bool filter_allowed_keys = false;
+ std::set<std::string> allowed_keys;
+
+ // if only_remote_allowed is true, only accept whitelisted keys
+ common_preset_context(llama_example ex, bool only_remote_allowed = false);
// load presets from INI file
common_presets load_from_ini(const std::string & path, common_preset & global) const;
--- /dev/null
+# llama.cpp INI Presets
+
+## Introduction
+
+The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp.
+
+### Using Presets with the Server
+
+When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details.
+
+### Using a Remote Preset
+
+> [!NOTE]
+>
+> This feature is currently only supported via the `-hf` option.
+
+For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model.
+
+Example:
+
+```ini
+hf-repo-draft = username/my-draft-model-GGUF
+temp = 0.5
+top-k = 20
+top-p = 0.95
+```
+
+For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options.
+
+Example usage:
+
+Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above:
+
+```sh
+llama-cli -hf username/my-model-with-preset
+
+# This is equivalent to:
+llama-cli -hf username/my-model-with-preset \
+ --hf-repo-draft username/my-draft-model-GGUF \
+ --temp 0.5 \
+ --top-k 20 \
+ --top-p 0.95
+```
+
+You can also override preset arguments by specifying them on the command line:
+
+```sh
+# Force temp = 0.1, overriding the preset value
+llama-cli -hf username/my-model-with-preset --temp 0.1
+```
+
+If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo for each preset. Each HF repo should contain a `preset.ini` file that references the actual model(s):
+
+```ini
+hf-repo = user/my-model-main
+hf-repo-draft = user/my-model-draft
+temp = 0.8
+ctx-size = 1024
+; (and other configurations)
+```