- [Trending](https://huggingface.co/models?library=gguf&sort=trending)
- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf)
-You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf <user>/<model>[:quant]`
+You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from [Hugging Face](https://huggingface.co/) or other model hosting sites, such as [ModelScope](https://modelscope.cn/), by using this CLI argument: `-hf <user>/<model>[:quant]`.
+
+By default, the CLI would download from Hugging Face, you can switch to other options with the environment variable `MODEL_ENDPOINT`. For example, you may opt to downloading model checkpoints from ModelScope or other model sharing communities by setting the environment variable, e.g. `MODEL_ENDPOINT=https://www.modelscope.cn/`.
After downloading a model, use the CLI tools to run it locally - see below.
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
+ http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
// Check if hf-token or bearer-token was specified
if (!bearer_token.empty()) {
std::string auth_header = "Authorization: Bearer " + bearer_token;
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
}
+ curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
#if defined(_WIN32)
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
curl_slist_ptr http_headers;
std::string res_str;
- std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
+
+ std::string model_endpoint = get_model_endpoint();
+
+ std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
}
}
- std::string hf_endpoint = "https://huggingface.co/";
- const char * hf_endpoint_env = getenv("HF_ENDPOINT");
- if (hf_endpoint_env) {
- hf_endpoint = hf_endpoint_env;
- if (hf_endpoint.back() != '/') hf_endpoint += '/';
- }
- model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
+ std::string model_endpoint = get_model_endpoint();
+ model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
// make sure model path is present (for caching purposes)
if (model.path.empty()) {
// this is to avoid different repo having same file name, or same file name in different subdirs
return iparams;
}
+std::string get_model_endpoint() {
+ const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
+ // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
+ const char * hf_endpoint_env = getenv("HF_ENDPOINT");
+ const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
+ std::string model_endpoint = "https://huggingface.co/";
+ if (endpoint_env) {
+ model_endpoint = endpoint_env;
+ if (model_endpoint.back() != '/') model_endpoint += '/';
+ }
+ return model_endpoint;
+}
+
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
llama_clear_adapter_lora(ctx);
for (auto & la : lora) {
// clear LoRA adapters from context, then apply new list of adapters
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
+std::string get_model_endpoint();
+
//
// Batch utils
//
std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
std::string url;
+ std::string model_endpoint = get_model_endpoint();
+
if (pos == std::string::npos) {
- auto [model_name, manifest_url] = extract_model_and_tag(model, "https://huggingface.co/v2/");
+ auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
hfr = model_name;
nlohmann::json manifest;
hff = model.substr(pos + 1);
}
- url = "https://huggingface.co/" + hfr + "/resolve/main/" + hff;
+ url = model_endpoint + hfr + "/resolve/main/" + hff;
return download(url, bn, true, headers);
}