preset: allow named remote preset (#18728)

author Xuan-Son Nguyen <redacted>

Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)

committer GitHub <redacted>

Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)
author Xuan-Son Nguyen <redacted>
Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)
committer GitHub <redacted>
Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)
diff --git a/common/arg.cpp b/common/arg.cpp

index 869ec545e69b640fa684fe633991545a36b55760..ec0a2f015e06f5f109643d57e9ffa5bd10db59b3 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -281,12 +281,20 @@ static std::string clean_file_name(const std::string & fname) {
  static bool common_params_handle_remote_preset(common_params & params, llama_example ex) {
      GGML_ASSERT(!params.model.hf_repo.empty());
  
+    // the returned hf_repo is without tag
+    auto [hf_repo, hf_tag] = common_download_split_repo_tag(params.model.hf_repo);
+
+    // "latest" tag (default if not specified) is translated to "default" preset
+    if (hf_tag == "latest") {
+        hf_tag = "default";
+    }
+
      const bool offline = params.offline;
      std::string model_endpoint = get_model_endpoint();
-    auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini";
+    auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";
  
      // prepare local path for caching
-    auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini");
+    auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
      auto preset_path = fs_get_cache_file(preset_fname);
      const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
      const bool has_preset = status >= 200 && status < 400;
@@ -295,14 +303,15 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
      if (has_preset) {
          LOG_INF("applying remote preset from %s\n", preset_url.c_str());
          common_preset_context ctx(ex, /* only_remote_allowed */ true);
-        common_preset global; // unused for now
+        common_preset global;
          auto remote_presets = ctx.load_from_ini(preset_path, global);
-        if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) {
-            common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME);
+        remote_presets = ctx.cascade(global, remote_presets);
+        if (remote_presets.find(hf_tag) != remote_presets.end()) {
+            common_preset preset = remote_presets.at(hf_tag);
              LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline
              preset.apply_to_params(params);
          } else {
-            throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section");
+            throw std::runtime_error("Remote preset.ini does not contain [" + std::string(hf_tag) + "] section");
          }
      } else {
          LOG_INF("%s", "no remote preset found, skipping\n");
diff --git a/common/download.cpp b/common/download.cpp

index a1e0e518e9ae050b5de4d6659eaa52c3434b6e39..dc7d5c84789eb87d8f630827a9695d5d7b787e40 100644 (file)
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -161,6 +161,16 @@ static bool is_http_status_ok(int status) {
      return status >= 200 && status < 400;
  }
  
+std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
+    auto parts = string_split<std::string>(hf_repo_with_tag, ':');
+    std::string tag = parts.size() > 1 ? parts.back() : "latest";
+    std::string hf_repo = parts[0];
+    if (string_split<std::string>(hf_repo, '/').size() != 2) {
+        throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
+    }
+    return {hf_repo, tag};
+}
+
  #ifdef LLAMA_USE_CURL
  
  //
@@ -922,12 +932,8 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
                                        const std::string & bearer_token,
                                        bool offline,
                                        const common_header_list & custom_headers) {
-    auto parts = string_split<std::string>(hf_repo_with_tag, ':');
-    std::string tag = parts.size() > 1 ? parts.back() : "latest";
-    std::string hf_repo = parts[0];
-    if (string_split<std::string>(hf_repo, '/').size() != 2) {
-        throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
-    }
+    // the returned hf_repo is without tag
+    auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
  
      std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
  
diff --git a/common/download.h b/common/download.h

index c79be2f90ebf354a8b1640971cbf994598501236..1c1d8e6db595f6458c6e9a635caf21a76191a8fa 100644 (file)
--- a/common/download.h
+++ b/common/download.h
@@ -17,6 +17,12 @@ struct common_remote_params {
  // get remote file content, returns <http_code, raw_response_body>
  std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
  
+// split HF repo with tag into <repo, tag>
+// for example: "user/model:tag" -> <"user/model", "tag">
+// if tag is not present, default to "latest"
+// example: "user/model" -> <"user/model", "latest">
+std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
+
  struct common_cached_model_info {
      std::string manifest_path;
      std::string user;
diff --git a/common/preset.cpp b/common/preset.cpp

index aec14e07692e81d628b4dbcc3528739312a5557c..57ccd000b5c6754f4bcae4683b24c147367f5489 100644 (file)
--- a/common/preset.cpp
+++ b/common/preset.cpp
@@ -32,8 +32,10 @@ static std::set<std::string> get_remote_preset_whitelist(const std::map<std::str
          "batch-size",
          "ubatch-size",
          "cache-reuse",
+        "chat-template-kwargs",
+        "mmap",
          // note: sampling params are automatically allowed by default
-        // negated args will be added automatically
+        // negated args will be added automatically if the positive arg is specified above
      };
  
      std::set<std::string> allowed_keys;
@@ -318,6 +320,11 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
          }
          LOG_DBG("loading preset: %s\n", preset.name.c_str());
          for (const auto & [key, value] : section.second) {
+            if (key == "version") {
+                // skip version key (reserved for future use)
+                continue;
+            }
+
              LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
              if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
                  throw std::runtime_error(string_format(
@@ -334,7 +341,10 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
                  }
                  LOG_DBG("accepted option: %s = %s\n", key.c_str(), preset.options[opt].c_str());
              } else {
-                // TODO: maybe warn about unknown key?
+                throw std::runtime_error(string_format(
+                    "option '%s' not recognized in preset '%s'",
+                    key.c_str(), preset.name.c_str()
+                ));
              }
          }
  
diff --git a/docs/preset.md b/docs/preset.md

index be50bb99266a77d9696de9eb6da9d19d9695ddd9..d49fb0a1aeb37469e91d1774b706d2f044f187c9 100644 (file)
--- a/docs/preset.md
+++ b/docs/preset.md
@@ -58,3 +58,40 @@ temp = 0.8
  ctx-size = 1024
  ; (and other configurations)
  ```
+
+### Named presets
+
+If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo containing a single `preset.ini` file that references the actual model(s):
+
+```ini
+[*]
+mmap = 1
+
+[gpt-oss-20b-hf]
+hf          = ggml-org/gpt-oss-20b-GGUF
+batch-size  = 2048
+ubatch-size = 2048
+top-p       = 1.0
+top-k       = 0
+min-p       = 0.01
+temp        = 1.0
+chat-template-kwargs = {"reasoning_effort": "high"}
+
+[gpt-oss-120b-hf]
+hf          = ggml-org/gpt-oss-120b-GGUF
+batch-size  = 2048
+ubatch-size = 2048
+top-p       = 1.0
+top-k       = 0
+min-p       = 0.01
+temp        = 1.0
+chat-template-kwargs = {"reasoning_effort": "high"}
+```
+
+You can then use it via `llama-cli` or `llama-server`, example:
+
+```sh
+llama-server -hf user/repo:gpt-oss-120b-hf
+```
+
+Please make sure to provide the correct `hf-repo` for each child preset. Otherwise, you may get error: `The specified tag is not a valid quantization scheme.`
author	Xuan-Son Nguyen <redacted>
	Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)
committer	GitHub <redacted>
	Sat, 10 Jan 2026 14:12:29 +0000 (15:12 +0100)
common/arg.cpp		patch \| blob \| history
common/download.cpp		patch \| blob \| history
common/download.h		patch \| blob \| history
common/preset.cpp		patch \| blob \| history
docs/preset.md		patch \| blob \| history