contrib: support modelscope community (#12664)

author tastelikefeet <redacted>

Fri, 11 Apr 2025 12:01:56 +0000 (20:01 +0800)

committer GitHub <redacted>

Fri, 11 Apr 2025 12:01:56 +0000 (14:01 +0200)
author tastelikefeet <redacted>
Fri, 11 Apr 2025 12:01:56 +0000 (20:01 +0800)
committer GitHub <redacted>
Fri, 11 Apr 2025 12:01:56 +0000 (14:01 +0200)
diff --git a/README.md b/README.md

index a129d27d54c919c46291179ecf1c9ab224cfb8b1..cf45f23cf447528be198034eb2142e5a95b39694 100644 (file)
--- a/README.md
+++ b/README.md
@@ -260,7 +260,9 @@ The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](htt
  - [Trending](https://huggingface.co/models?library=gguf&sort=trending)
  - [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)
  
-You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
+You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
+
+By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.
  
  After downloading a model, use the CLI tools to run it locally - see below.
  
diff --git a/common/arg.cpp b/common/arg.cpp

index 642fefb57548fce8f8e4aa684cb3ef4fb42e9ce2..0b57f9da1eec225ee74070c7aada117f2d069fa2 100644 (file)
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -228,12 +228,13 @@ static bool common_download_file_single(const std::string & url, const std::stri
      curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
      curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
  
+    http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
      // Check if hf-token or bearer-token was specified
      if (!bearer_token.empty()) {
          std::string auth_header = "Authorization: Bearer " + bearer_token;
          http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
-        curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
      }
+    curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
  
  #if defined(_WIN32)
      // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
@@ -544,7 +545,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
      curl_ptr       curl(curl_easy_init(), &curl_easy_cleanup);
      curl_slist_ptr http_headers;
      std::string res_str;
-    std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
+
+    std::string model_endpoint = get_model_endpoint();
+
+    std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
      curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
      curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
      typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
@@ -659,13 +663,8 @@ static void common_params_handle_model(
                  }
              }
  
-            std::string hf_endpoint = "https://huggingface.co/";
-            const char * hf_endpoint_env = getenv("HF_ENDPOINT");
-            if (hf_endpoint_env) {
-                hf_endpoint = hf_endpoint_env;
-                if (hf_endpoint.back() != '/') hf_endpoint += '/';
-            }
-            model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
+            std::string model_endpoint = get_model_endpoint();
+            model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
              // make sure model path is present (for caching purposes)
              if (model.path.empty()) {
                  // this is to avoid different repo having same file name, or same file name in different subdirs
diff --git a/common/common.cpp b/common/common.cpp

index d4882c5123cce4c6afc0b0bda68f150c3d027d07..4e1a020d017a132e36e42bb4a1c6e287e418bbfe 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1027,6 +1027,19 @@ struct common_init_result common_init_from_params(common_params & params) {
      return iparams;
  }
  
+std::string get_model_endpoint() {
+    const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
+    // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
+    const char * hf_endpoint_env = getenv("HF_ENDPOINT");
+    const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
+    std::string model_endpoint = "https://huggingface.co/";
+    if (endpoint_env) {
+        model_endpoint = endpoint_env;
+        if (model_endpoint.back() != '/') model_endpoint += '/';
+    }
+    return model_endpoint;
+}
+
  void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
      llama_clear_adapter_lora(ctx);
      for (auto & la : lora) {
diff --git a/common/common.h b/common/common.h

index 725b5123d24f989ed7f3ca76af431ed69b1711db..e6eaa8e80cf050f5f89083eca29c59b763f47235 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -543,6 +543,8 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p
  // clear LoRA adapters from context, then apply new list of adapters
  void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
  
+std::string                   get_model_endpoint();
+
  //
  // Batch utils
  //
diff --git a/examples/run/run.cpp b/examples/run/run.cpp

index 68e94b0b3c3f86a40c3d991a2b96469358f32499..e63c2aac3314ed7a9214ad8da184e898a2d62c15 100644 (file)
--- a/examples/run/run.cpp
+++ b/examples/run/run.cpp
@@ -697,8 +697,10 @@ class LlamaData {
          std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
          std::string              url;
  
+        std::string model_endpoint = get_model_endpoint();
+
          if (pos == std::string::npos) {
-            auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/");
+            auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
              hfr                             = model_name;
  
              nlohmann::json manifest;
@@ -713,7 +715,7 @@ class LlamaData {
              hff = model.substr(pos + 1);
          }
  
-        url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
+        url = model_endpoint + hfr + "/resolve/main/" + hff;
  
          return download(url, bn, true, headers);
      }
author	tastelikefeet <redacted>
	Fri, 11 Apr 2025 12:01:56 +0000 (20:01 +0800)
committer	GitHub <redacted>
	Fri, 11 Apr 2025 12:01:56 +0000 (14:01 +0200)
README.md		patch \| blob \| history
common/arg.cpp		patch \| blob \| history
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/run/run.cpp		patch \| blob \| history