}
// download one single file from remote URL to local path
-static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token) {
- // Initialize libcurl
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
- curl_slist_ptr http_headers;
- if (!curl) {
- LOG_ERR("%s: error initializing libcurl\n", __func__);
- return false;
- }
-
- // Set the URL, allow to follow http redirection
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
-
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
- // Check if hf-token or bearer-token was specified
- if (!bearer_token.empty()) {
- std::string auth_header = "Authorization: Bearer " + bearer_token;
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
- }
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
-
-#if defined(_WIN32)
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
- // operating system. Currently implemented under MS-Windows.
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
-#endif
-
+static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
// Check if the file already exists locally
auto file_exists = std::filesystem::exists(path);
std::string last_modified;
if (file_exists) {
+ if (offline) {
+ LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
+ return true; // skip verification/downloading
+ }
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
std::ifstream metadata_in(metadata_path);
if (metadata_in.good()) {
}
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
} else {
+ if (offline) {
+ LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
+ return false;
+ }
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
}
bool head_request_ok = false;
bool should_download = !file_exists; // by default, we should download if the file does not exist
- // get ETag to see if the remote file has changed
- {
- typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
- auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
+ // Initialize libcurl
+ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
+ curl_slist_ptr http_headers;
+ if (!curl) {
+ LOG_ERR("%s: error initializing libcurl\n", __func__);
+ return false;
+ }
+
+ // Set the URL, allow to follow http redirection
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
- static std::regex header_regex("([^:]+): (.*)\r\n");
- static std::regex etag_regex("ETag", std::regex_constants::icase);
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
+ http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
+ // Check if hf-token or bearer-token was specified
+ if (!bearer_token.empty()) {
+ std::string auth_header = "Authorization: Bearer " + bearer_token;
+ http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
+ }
+ curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
- std::string header(buffer, n_items);
- std::smatch match;
- if (std::regex_match(header, match, header_regex)) {
- const std::string & key = match[1];
- const std::string & value = match[2];
- if (std::regex_match(key, match, etag_regex)) {
- headers->etag = value;
- } else if (std::regex_match(key, match, last_modified_regex)) {
- headers->last_modified = value;
- }
- }
- return n_items;
- };
+#if defined(_WIN32)
+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
+ // operating system. Currently implemented under MS-Windows.
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
+#endif
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
- curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
+ typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
+ auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
+ common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
- // we only allow retrying once for HEAD requests
- // this is for the use case of using running offline (no internet), retrying can be annoying
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
- if (!was_perform_successful) {
- head_request_ok = false;
- }
+ static std::regex header_regex("([^:]+): (.*)\r\n");
+ static std::regex etag_regex("ETag", std::regex_constants::icase);
+ static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
- long http_code = 0;
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
- if (http_code == 200) {
- head_request_ok = true;
- } else {
- LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
- head_request_ok = false;
+ std::string header(buffer, n_items);
+ std::smatch match;
+ if (std::regex_match(header, match, header_regex)) {
+ const std::string & key = match[1];
+ const std::string & value = match[2];
+ if (std::regex_match(key, match, etag_regex)) {
+ headers->etag = value;
+ } else if (std::regex_match(key, match, last_modified_regex)) {
+ headers->last_modified = value;
+ }
}
+ return n_items;
+ };
+
+ curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
+
+ // we only allow retrying once for HEAD requests
+ // this is for the use case of using running offline (no internet), retrying can be annoying
+ bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
+ if (!was_perform_successful) {
+ head_request_ok = false;
+ }
+
+ long http_code = 0;
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
+ if (http_code == 200) {
+ head_request_ok = true;
+ } else {
+ LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
+ head_request_ok = false;
}
// if head_request_ok is false, we don't have the etag or last-modified headers
// download multiple files from remote URLs to local paths
// the input is a vector of pairs <url, path>
-static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token) {
+static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
// Prepare download in parallel
std::vector<std::future<bool>> futures_download;
for (auto const & item : urls) {
- futures_download.push_back(std::async(std::launch::async, [bearer_token](const std::pair<std::string, std::string> & it) -> bool {
- return common_download_file_single(it.first, it.second, bearer_token);
+ futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
+ return common_download_file_single(it.first, it.second, bearer_token, offline);
}, item));
}
static bool common_download_model(
const common_params_model & model,
- const std::string & bearer_token) {
+ const std::string & bearer_token,
+ bool offline) {
// Basic validation of the model.url
if (model.url.empty()) {
LOG_ERR("%s: invalid model url\n", __func__);
return false;
}
- if (!common_download_file_single(model.url, model.path, bearer_token)) {
+ if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
return false;
}
}
// Download in parallel
- common_download_file_multiple(urls, bearer_token);
+ common_download_file_multiple(urls, bearer_token, offline);
}
return true;
*
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
*/
-static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
+static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
std::string tag = parts.size() > 1 ? parts.back() : "latest";
std::string hf_repo = parts[0];
long res_code = 0;
std::string res_str;
bool use_cache = false;
- try {
- auto res = common_remote_get_content(url, params);
- res_code = res.first;
- res_str = std::string(res.second.data(), res.second.size());
- } catch (const std::exception & e) {
- LOG_WRN("error: failed to get manifest: %s\n", e.what());
- LOG_WRN("try reading from cache\n");
- // try to read from cache
+ if (!offline) {
try {
+ auto res = common_remote_get_content(url, params);
+ res_code = res.first;
+ res_str = std::string(res.second.data(), res.second.size());
+ } catch (const std::exception & e) {
+ LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
+ }
+ }
+ if (res_code == 0) {
+ if (std::filesystem::exists(cached_response_path)) {
+ LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
res_str = read_file(cached_response_path);
res_code = 200;
use_cache = true;
- } catch (const std::exception & e) {
- throw std::runtime_error("error: failed to get manifest (check your internet connection)");
+ } else {
+ throw std::runtime_error(
+ offline ? "error: failed to get manifest (offline mode)"
+ : "error: failed to get manifest (check your internet connection)");
}
}
std::string ggufFile;
return false;
}
-static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
+static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
LOG_ERR("error: built without CURL, cannot download model from internet\n");
return false;
}
-static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &) {
+static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
return false;
}
static bool common_download_model(
const common_params_model &,
- const std::string &) {
+ const std::string &,
+ bool) {
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
return false;
}
-static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &) {
+static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
return {};
}
static handle_model_result common_params_handle_model(
struct common_params_model & model,
const std::string & bearer_token,
- const std::string & model_path_default) {
+ const std::string & model_path_default,
+ bool offline) {
handle_model_result result;
// handle pre-fill default model path and url based on hf_repo and hf_file
{
// short-hand to avoid specifying --hf-file -> default it to --model
if (model.hf_file.empty()) {
if (model.path.empty()) {
- auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token);
+ auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
exit(1); // built without CURL, error message already printed
}
// then, download it if needed
if (!model.url.empty()) {
- bool ok = common_download_model(model, bearer_token);
+ bool ok = common_download_model(model, bearer_token, offline);
if (!ok) {
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
exit(1);
// handle model and download
{
- auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
+ auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
if (params.no_mmproj) {
params.mmproj = {};
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
// only download mmproj if the current example is using it
for (auto & ex : mmproj_examples) {
if (ctx_arg.ex == ex) {
- common_params_handle_model(params.mmproj, params.hf_token, "");
+ common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
break;
}
}
- common_params_handle_model(params.speculative.model, params.hf_token, "");
- common_params_handle_model(params.vocoder.model, params.hf_token, "");
+ common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
+ common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
}
if (params.escape) {
common_log_set_verbosity_thold(INT_MAX);
}
));
+ add_opt(common_arg(
+ {"--offline"},
+ "Offline mode: forces use of cache, prevents network access",
+ [](common_params & params) {
+ params.offline = true;
+ }
+ ).set_env("LLAMA_OFFLINE"));
add_opt(common_arg(
{"-lv", "--verbosity", "--log-verbosity"}, "N",
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",