common: support remote preset (#18520)

author Xuan-Son Nguyen <redacted>

Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)

committer GitHub <redacted>

Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)
author Xuan-Son Nguyen <redacted>
Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)
committer GitHub <redacted>
Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)
diff --git a/common/arg.cpp b/common/arg.cpp

index 9c0e6fbe789214bb9ec030fa685080fba93e54f7..72750a3cba0a3e764bf5d36d1bbf150e69b59664 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -6,6 +6,7 @@
  #include "log.h"
  #include "sampling.h"
  #include "download.h"
+#include "preset.h"
  
  // fix problem with std::min and std::max
  #if defined(_WIN32)
@@ -268,6 +269,46 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector
      }
  }
  
+static std::string clean_file_name(const std::string & fname) {
+    std::string clean_fname = fname;
+    string_replace_all(clean_fname, "\\", "_");
+    string_replace_all(clean_fname, "/", "_");
+    return clean_fname;
+}
+
+static bool common_params_handle_remote_preset(common_params & params, llama_example ex) {
+    GGML_ASSERT(!params.model.hf_repo.empty());
+
+    const bool offline = params.offline;
+    std::string model_endpoint = get_model_endpoint();
+    auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini";
+
+    // prepare local path for caching
+    auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini");
+    auto preset_path = fs_get_cache_file(preset_fname);
+    const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
+    const bool has_preset = status >= 200 && status < 400;
+
+    // remote preset is optional, so we don't error out if not found
+    if (has_preset) {
+        LOG_INF("applying remote preset from %s\n", preset_url.c_str());
+        common_preset_context ctx(ex, /* only_remote_allowed */ true);
+        common_preset global; // unused for now
+        auto remote_presets = ctx.load_from_ini(preset_path, global);
+        if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) {
+            common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME);
+            LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline
+            preset.apply_to_params(params);
+        } else {
+            throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section");
+        }
+    } else {
+        LOG_INF("%s", "no remote preset found, skipping\n");
+    }
+
+    return has_preset;
+}
+
  struct handle_model_result {
      bool found_mmproj = false;
      common_params_model mmproj;
@@ -309,9 +350,7 @@ static handle_model_result common_params_handle_model(
              // make sure model path is present (for caching purposes)
              if (model.path.empty()) {
                  // this is to avoid different repo having same file name, or same file name in different subdirs
-                std::string filename = model.hf_repo + "_" + model.hf_file;
-                // to make sure we don't have any slashes in the filename
-                string_replace_all(filename, "/", "_");
+                std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file);
                  model.path = fs_get_cache_file(filename);
              }
  
@@ -425,61 +464,87 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
          }
      };
  
-    std::set<std::string> seen_args;
+    auto parse_cli_args = [&]() {
+        std::set<std::string> seen_args;
  
-    for (int i = 1; i < argc; i++) {
-        const std::string arg_prefix = "--";
+        for (int i = 1; i < argc; i++) {
+            const std::string arg_prefix = "--";
  
-        std::string arg = argv[i];
-        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
-            std::replace(arg.begin(), arg.end(), '_', '-');
-        }
-        if (arg_to_options.find(arg) == arg_to_options.end()) {
-            throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
-        }
-        if (!seen_args.insert(arg).second) {
-            LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
-        }
-        auto & tmp = arg_to_options[arg];
-        auto opt = *tmp.first;
-        bool is_positive = tmp.second;
-        if (opt.has_value_from_env()) {
-            fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
-        }
-        try {
-            if (opt.handler_void) {
-                opt.handler_void(params);
-                continue;
+            std::string arg = argv[i];
+            if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+                std::replace(arg.begin(), arg.end(), '_', '-');
              }
-            if (opt.handler_bool) {
-                opt.handler_bool(params, is_positive);
-                continue;
+            if (arg_to_options.find(arg) == arg_to_options.end()) {
+                throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
              }
-
-            // arg with single value
-            check_arg(i);
-            std::string val = argv[++i];
-            if (opt.handler_int) {
-                opt.handler_int(params, std::stoi(val));
-                continue;
+            if (!seen_args.insert(arg).second) {
+                LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
+            }
+            auto & tmp = arg_to_options[arg];
+            auto opt = *tmp.first;
+            bool is_positive = tmp.second;
+            if (opt.has_value_from_env()) {
+                fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
              }
-            if (opt.handler_string) {
-                opt.handler_string(params, val);
-                continue;
+            try {
+                if (opt.handler_void) {
+                    opt.handler_void(params);
+                    continue;
+                }
+                if (opt.handler_bool) {
+                    opt.handler_bool(params, is_positive);
+                    continue;
+                }
+
+                // arg with single value
+                check_arg(i);
+                std::string val = argv[++i];
+                if (opt.handler_int) {
+                    opt.handler_int(params, std::stoi(val));
+                    continue;
+                }
+                if (opt.handler_string) {
+                    opt.handler_string(params, val);
+                    continue;
+                }
+
+                // arg with 2 values
+                check_arg(i);
+                std::string val2 = argv[++i];
+                if (opt.handler_str_str) {
+                    opt.handler_str_str(params, val, val2);
+                    continue;
+                }
+            } catch (std::exception & e) {
+                throw std::invalid_argument(string_format(
+                    "error while handling argument \"%s\": %s\n\n"
+                    "usage:\n%s\n\nto show complete usage, run with -h",
+                    arg.c_str(), e.what(), opt.to_string().c_str()));
              }
+        }
+    };
  
-            // arg with 2 values
-            check_arg(i);
-            std::string val2 = argv[++i];
-            if (opt.handler_str_str) {
-                opt.handler_str_str(params, val, val2);
-                continue;
-            }
-        } catch (std::exception & e) {
-            throw std::invalid_argument(string_format(
-                "error while handling argument \"%s\": %s\n\n"
-                "usage:\n%s\n\nto show complete usage, run with -h",
-                arg.c_str(), e.what(), opt.to_string().c_str()));
+    // parse the first time to get -hf option (used for remote preset)
+    parse_cli_args();
+
+    // maybe handle remote preset
+    if (!params.model.hf_repo.empty()) {
+        std::string cli_hf_repo = params.model.hf_repo;
+        bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex);
+
+        // special case: if hf_repo explicitly set by preset, we need to preserve it (ignore CLI value)
+        // this is useful when we have one HF repo pointing to other HF repos (one model - multiple GGUFs)
+        std::string preset_hf_repo = params.model.hf_repo;
+        bool preset_has_hf_repo = preset_hf_repo != cli_hf_repo;
+
+        if (has_preset) {
+            // re-parse CLI args to override preset values
+            parse_cli_args();
+        }
+
+        // preserve hf_repo from preset if needed
+        if (preset_has_hf_repo) {
+            params.model.hf_repo = preset_hf_repo;
          }
      }
  
diff --git a/common/download.cpp b/common/download.cpp

index 6f56b5518f5d5ea6a92624cec2e1c4fa45582144..a1e0e518e9ae050b5de4d6659eaa52c3434b6e39 100644 (file)
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -157,6 +157,10 @@ static std::string read_etag(const std::string & path) {
      return none;
  }
  
+static bool is_http_status_ok(int status) {
+    return status >= 200 && status < 400;
+}
+
  #ifdef LLAMA_USE_CURL
  
  //
@@ -306,12 +310,14 @@ static bool common_download_head(CURL *              curl,
  }
  
  // download one single file from remote URL to local path
-static bool common_download_file_single_online(const std::string & url,
+// returns status code or -1 on error
+static int common_download_file_single_online(const std::string & url,
                                                 const std::string & path,
                                                 const std::string & bearer_token,
                                                 const common_header_list & custom_headers) {
      static const int max_attempts        = 3;
      static const int retry_delay_seconds = 2;
+
      for (int i = 0; i < max_attempts; ++i) {
          std::string etag;
  
@@ -371,7 +377,7 @@ static bool common_download_file_single_online(const std::string & url,
                  LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
                  if (remove(path.c_str()) != 0) {
                      LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
-                    return false;
+                    return -1;
                  }
              }
  
@@ -380,14 +386,14 @@ static bool common_download_file_single_online(const std::string & url,
                  if (std::filesystem::exists(path_temporary)) {
                      if (remove(path_temporary.c_str()) != 0) {
                          LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
-                        return false;
+                        return -1;
                      }
                  }
  
                  if (std::filesystem::exists(path)) {
                      if (remove(path.c_str()) != 0) {
                          LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
-                        return false;
+                        return -1;
                      }
                  }
              }
@@ -414,23 +420,27 @@ static bool common_download_file_single_online(const std::string & url,
  
              long http_code = 0;
              curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
-            if (http_code < 200 || http_code >= 400) {
+
+            int status = static_cast<int>(http_code);
+            if (!is_http_status_ok(http_code)) {
                  LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
-                return false;
+                return status; // TODO: maybe only return on certain codes
              }
  
              if (rename(path_temporary.c_str(), path.c_str()) != 0) {
                  LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
-                return false;
+                return -1;
              }
+
+            return static_cast<int>(http_code);
          } else {
              LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
-        }
  
-        break;
+            return 304; // Not Modified - fake cached response
+        }
      }
  
-    return true;
+    return -1; // max attempts reached
  }
  
  std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
@@ -625,7 +635,8 @@ static bool common_pull_file(httplib::Client & cli,
  }
  
  // download one single file from remote URL to local path
-static bool common_download_file_single_online(const std::string & url,
+// returns status code or -1 on error
+static int common_download_file_single_online(const std::string & url,
                                                 const std::string & path,
                                                 const std::string & bearer_token,
                                                 const common_header_list & custom_headers) {
@@ -659,8 +670,10 @@ static bool common_download_file_single_online(const std::string & url,
              LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
              if (file_exists) {
                  LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
-                return true;
+                return 304; // 304 Not Modified - fake cached response
              }
+            return head->status; // cannot use cached file, return raw status code
+            // TODO: maybe retry only on certain codes
          }
  
          std::string etag;
@@ -692,12 +705,12 @@ static bool common_download_file_single_online(const std::string & url,
          if (file_exists) {
              if (!should_download_from_scratch) {
                  LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
-                return true;
+                return 304; // 304 Not Modified - fake cached response
              }
              LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
              if (remove(path.c_str()) != 0) {
                  LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
-                return false;
+                return -1;
              }
          }
  
@@ -709,7 +722,7 @@ static bool common_download_file_single_online(const std::string & url,
                  existing_size = std::filesystem::file_size(path_temporary);
              } else if (remove(path_temporary.c_str()) != 0) {
                  LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
-                return false;
+                return -1;
              }
          }
  
@@ -730,15 +743,16 @@ static bool common_download_file_single_online(const std::string & url,
  
          if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
              LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
-            return false;
+            return -1;
          }
          if (!etag.empty()) {
              write_etag(path, etag);
          }
-        break;
+
+        return head->status; // TODO: use actual GET status?
      }
  
-    return true;
+    return -1; // max attempts reached
  }
  
  std::pair<long, std::vector<char>> common_remote_get_content(const std::string          & url,
@@ -777,22 +791,22 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
  
  #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
  
-static bool common_download_file_single(const std::string & url,
-                                        const std::string & path,
-                                        const std::string & bearer_token,
-                                        bool                offline,
-                                        const common_header_list & headers) {
+int common_download_file_single(const std::string & url,
+                                const std::string & path,
+                                const std::string & bearer_token,
+                                bool offline,
+                                const common_header_list & headers) {
      if (!offline) {
          return common_download_file_single_online(url, path, bearer_token, headers);
      }
  
      if (!std::filesystem::exists(path)) {
          LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
-        return false;
+        return -1;
      }
  
      LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
-    return true;
+    return 304; // Not Modified - fake cached response
  }
  
  // download multiple files from remote URLs to local paths
@@ -810,7 +824,8 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
              std::async(
                  std::launch::async,
                  [&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
-                    return common_download_file_single(it.first, it.second, bearer_token, offline, headers);
+                    const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
+                    return is_http_status_ok(http_status);
                  },
                  item
              )
@@ -837,7 +852,8 @@ bool common_download_model(const common_params_model & model,
          return false;
      }
  
-    if (!common_download_file_single(model.url, model.path, bearer_token, offline, headers)) {
+    const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
+    if (!is_http_status_ok(http_status)) {
          return false;
      }
  
@@ -975,7 +991,7 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
      } else if (res_code == 401) {
          throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
      } else {
-        throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
+        throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str()));
      }
  
      // check response
@@ -1094,7 +1110,8 @@ std::string common_docker_resolve_model(const std::string & docker) {
          std::string local_path = fs_get_cache_file(model_filename);
  
          const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
-        if (!common_download_file_single(blob_url, local_path, token, false, {})) {
+        const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
+        if (!is_http_status_ok(http_status)) {
              throw std::runtime_error("Failed to download Docker Model");
          }
  
@@ -1120,6 +1137,14 @@ std::string common_docker_resolve_model(const std::string &) {
      throw std::runtime_error("download functionality is not enabled in this build");
  }
  
+int common_download_file_single(const std::string &,
+                                const std::string &,
+                                const std::string &,
+                                bool,
+                                const common_header_list &) {
+    throw std::runtime_error("download functionality is not enabled in this build");
+}
+
  #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
  
  std::vector<common_cached_model_info> common_list_cached_models() {
diff --git a/common/download.h b/common/download.h

index 9ea209393908cf42167f0bbed66cf0e1e17409cd..c79be2f90ebf354a8b1640971cbf994598501236 100644 (file)
--- a/common/download.h
+++ b/common/download.h
@@ -65,6 +65,14 @@ bool common_download_model(
  // returns list of cached models
  std::vector<common_cached_model_info> common_list_cached_models();
  
+// download single file from url to local path
+// returns status code or -1 on error
+int common_download_file_single(const std::string & url,
+                                const std::string & path,
+                                const std::string & bearer_token,
+                                bool offline,
+                                const common_header_list & headers = {});
+
  // resolve and download model from Docker registry
  // return local path to downloaded model file
  std::string common_docker_resolve_model(const std::string & docker);
diff --git a/common/preset.cpp b/common/preset.cpp

index e2fc18c5dad22c948e1d0eed077f164884e6a5a2..aec14e07692e81d628b4dbcc3528739312a5557c 100644 (file)
--- a/common/preset.cpp
+++ b/common/preset.cpp
@@ -16,6 +16,46 @@ static std::string rm_leading_dashes(const std::string & str) {
      return str.substr(pos);
  }
  
+// only allow a subset of args for remote presets for security reasons
+// do not add more args unless absolutely necessary
+// args that output to files are strictly prohibited
+static std::set<std::string> get_remote_preset_whitelist(const std::map<std::string, common_arg> & key_to_opt) {
+    static const std::set<std::string> allowed_options = {
+        "model-url",
+        "hf-repo",
+        "hf-repo-draft",
+        "hf-repo-v", // vocoder
+        "hf-file-v", // vocoder
+        "mmproj-url",
+        "pooling",
+        "jinja",
+        "batch-size",
+        "ubatch-size",
+        "cache-reuse",
+        // note: sampling params are automatically allowed by default
+        // negated args will be added automatically
+    };
+
+    std::set<std::string> allowed_keys;
+
+    for (const auto & it : key_to_opt) {
+        const std::string & key = it.first;
+        const common_arg & opt = it.second;
+        if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) {
+            allowed_keys.insert(key);
+            // also add variant keys (args without leading dashes and env vars)
+            for (const auto & arg : opt.get_args()) {
+                allowed_keys.insert(rm_leading_dashes(arg));
+            }
+            for (const auto & env : opt.get_env()) {
+                allowed_keys.insert(env);
+            }
+        }
+    }
+
+    return allowed_keys;
+}
+
  std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
      std::vector<std::string> args;
  
@@ -121,6 +161,29 @@ void common_preset::merge(const common_preset & other) {
      }
  }
  
+void common_preset::apply_to_params(common_params & params) const {
+    for (const auto & [opt, val] : options) {
+        // apply each option to params
+        if (opt.handler_string) {
+            opt.handler_string(params, val);
+        } else if (opt.handler_int) {
+            opt.handler_int(params, std::stoi(val));
+        } else if (opt.handler_bool) {
+            opt.handler_bool(params, common_arg_utils::is_truthy(val));
+        } else if (opt.handler_str_str) {
+            // not supported yet
+            throw std::runtime_error(string_format(
+                "%s: option with two values is not supported yet",
+                __func__
+            ));
+        } else if (opt.handler_void) {
+            opt.handler_void(params);
+        } else {
+            GGML_ABORT("unknown handler type");
+        }
+    }
+}
+
  static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
      std::map<std::string, std::map<std::string, std::string>> parsed;
  
@@ -230,10 +293,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
      return value;
  }
  
-common_preset_context::common_preset_context(llama_example ex)
+common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed)
          : ctx_params(common_params_parser_init(default_params, ex)) {
      common_params_add_preset_options(ctx_params.options);
      key_to_opt = get_map_key_opt(ctx_params);
+
+    // setup allowed keys if only_remote_allowed is true
+    if (only_remote_allowed) {
+        filter_allowed_keys = true;
+        allowed_keys = get_remote_preset_whitelist(key_to_opt);
+    }
  }
  
  common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
@@ -250,6 +319,12 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
          LOG_DBG("loading preset: %s\n", preset.name.c_str());
          for (const auto & [key, value] : section.second) {
              LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
+            if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
+                throw std::runtime_error(string_format(
+                    "option '%s' is not allowed in remote presets",
+                    key.c_str()
+                ));
+            }
              if (key_to_opt.find(key) != key_to_opt.end()) {
                  const auto & opt = key_to_opt.at(key);
                  if (is_bool_arg(opt)) {
diff --git a/common/preset.h b/common/preset.h

index 3a84d1be29ce99139d4a30deba4b7d71bc85bec3..11ba6ef81240eecc926b227a2f2b5b77c2fc845c 100644 (file)
--- a/common/preset.h
+++ b/common/preset.h
@@ -6,6 +6,7 @@
  #include <string>
  #include <vector>
  #include <map>
+#include <set>
  
  //
  // INI preset parser and writer
@@ -40,6 +41,9 @@ struct common_preset {
  
      // merge another preset into this one, overwriting existing options
      void merge(const common_preset & other);
+
+    // apply preset options to common_params
+    void apply_to_params(common_params & params) const;
  };
  
  // interface for multiple presets in one file
@@ -50,7 +54,12 @@ struct common_preset_context {
      common_params default_params; // unused for now
      common_params_context ctx_params;
      std::map<std::string, common_arg> key_to_opt;
-    common_preset_context(llama_example ex);
+
+    bool filter_allowed_keys = false;
+    std::set<std::string> allowed_keys;
+
+    // if only_remote_allowed is true, only accept whitelisted keys
+    common_preset_context(llama_example ex, bool only_remote_allowed = false);
  
      // load presets from INI file
      common_presets load_from_ini(const std::string & path, common_preset & global) const;
diff --git a/docs/preset.md b/docs/preset.md

new file mode 100644 (file)

index 0000000..be50bb9
--- /dev/null
+++ b/docs/preset.md
@@ -0,0 +1,60 @@
+# llama.cpp INI Presets
+
+## Introduction
+
+The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp.
+
+### Using Presets with the Server
+
+When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details.
+
+### Using a Remote Preset
+
+> [!NOTE]
+>
+> This feature is currently only supported via the `-hf` option.
+
+For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model.
+
+Example:
+
+```ini
+hf-repo-draft = username/my-draft-model-GGUF
+temp = 0.5
+top-k = 20
+top-p = 0.95
+```
+
+For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options.
+
+Example usage:
+
+Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above:
+
+```sh
+llama-cli -hf username/my-model-with-preset
+
+# This is equivalent to:
+llama-cli -hf username/my-model-with-preset \
+  --hf-repo-draft username/my-draft-model-GGUF \
+  --temp 0.5 \
+  --top-k 20 \
+  --top-p 0.95
+```
+
+You can also override preset arguments by specifying them on the command line:
+
+```sh
+# Force temp = 0.1, overriding the preset value
+llama-cli -hf username/my-model-with-preset --temp 0.1
+```
+
+If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo for each preset. Each HF repo should contain a `preset.ini` file that references the actual model(s):
+
+```ini
+hf-repo = user/my-model-main
+hf-repo-draft = user/my-model-draft
+temp = 0.8
+ctx-size = 1024
+; (and other configurations)
+```
author	Xuan-Son Nguyen <redacted>
	Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)
committer	GitHub <redacted>
	Thu, 8 Jan 2026 21:35:40 +0000 (22:35 +0100)
common/arg.cpp		patch \| blob \| history
common/download.cpp		patch \| blob \| history
common/download.h		patch \| blob \| history
common/preset.cpp		patch \| blob \| history
common/preset.h		patch \| blob \| history
docs/preset.md	[new file with mode: 0644]	patch \| blob