From: Eric Curtin Date: Thu, 18 Sep 2025 15:22:50 +0000 (+0100) Subject: Add resumable downloads for llama-server model loading (#15963) X-Git-Tag: upstream/0.0.6527~16 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4ca088b036313c2d8e682f4cfeb7c29edd85d0b9;p=pkg%2Fggml%2Fsources%2Fllama.cpp Add resumable downloads for llama-server model loading (#15963) - Implement resumable downloads in common_download_file_single function - Add detection of partial download files (.downloadInProgress) - Check server support for HTTP Range requests via Accept-Ranges header - Implement HTTP Range request with "bytes=-" header - Open files in append mode when resuming vs create mode for new downloads Signed-off-by: Eric Curtin --- diff --git a/common/arg.cpp b/common/arg.cpp index 9fd8858e..54df2df8 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -57,12 +57,32 @@ static std::string read_file(const std::string & fname) { } static void write_file(const std::string & fname, const std::string & content) { - std::ofstream file(fname); + const std::string fname_tmp = fname + ".tmp"; + std::ofstream file(fname_tmp); if (!file) { throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str())); } - file << content; - file.close(); + + try { + file << content; + file.close(); + + // Makes write atomic + if (rename(fname_tmp.c_str(), fname.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str()); + // If rename fails, try to delete the temporary file + if (remove(fname_tmp.c_str()) != 0) { + LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); + } + } + } catch (...) { + // If anything fails, try to delete the temporary file + if (remove(fname_tmp.c_str()) != 0) { + LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str()); + } + + throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str())); + } } common_arg & common_arg::set_examples(std::initializer_list examples) { @@ -217,250 +237,294 @@ struct curl_slist_ptr { } }; -#define CURL_MAX_RETRY 3 -#define CURL_RETRY_DELAY_SECONDS 2 +static CURLcode common_curl_perf(CURL * curl) { + CURLcode res = curl_easy_perform(curl); + if (res != CURLE_OK) { + LOG_ERR("%s: curl_easy_perform() failed\n", __func__); + } + + return res; +} + +// Send a HEAD request to retrieve the etag and last-modified headers +struct common_load_model_from_url_headers { + std::string etag; + std::string last_modified; + std::string accept_ranges; +}; -static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds, const char * method_name) { - int remaining_attempts = max_attempts; +struct FILE_deleter { + void operator()(FILE * f) const { fclose(f); } +}; - while (remaining_attempts > 0) { - LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method_name, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); +static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) { + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase); + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } else if (std::regex_match(key, match, accept_ranges_regex)) { + headers->accept_ranges = value; + } + } + + return n_items; +} - CURLcode res = curl_easy_perform(curl); - if (res == CURLE_OK) { - return true; - } +static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) { + return std::fwrite(data, size, nmemb, static_cast(fd)); +} - int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; - LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); +// helper function to hide password in URL +static std::string llama_download_hide_password_in_url(const std::string & url) { + // Use regex to match and replace the user[:password]@ pattern in URLs + // Pattern: scheme://[user[:password]@]host[...] + static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)"); + std::smatch match; - remaining_attempts--; - if (remaining_attempts == 0) break; - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + if (std::regex_match(url, match, url_regex)) { + // match[1] = scheme (e.g., "https://") + // match[2] = user[:password]@ part + // match[3] = rest of URL (host and path) + return match[1].str() + "********@" + match[3].str(); } - LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + return url; // No credentials found or malformed URL +} - return false; +static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) { + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + +# if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +# endif + + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback); } -// download one single file from remote URL to local path -static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) { - // Check if the file already exists locally - auto file_exists = std::filesystem::exists(path); +static void common_curl_easy_setopt_get(CURL * curl) { + curl_easy_setopt(curl, CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback); - // If the file exists, check its JSON metadata companion file. - std::string metadata_path = path + ".json"; - nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead - std::string etag; - std::string last_modified; + // display download progress + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); +} - if (file_exists) { - if (offline) { - LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return true; // skip verification/downloading - } - // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). - std::ifstream metadata_in(metadata_path); - if (metadata_in.good()) { - try { - metadata_in >> metadata; - LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); - if (metadata.contains("etag") && metadata.at("etag").is_string()) { - etag = metadata.at("etag"); - } - if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { - last_modified = metadata.at("lastModified"); - } - } catch (const nlohmann::json::exception & e) { - LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); - } - } - // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again) - } else { - if (offline) { - LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); - return false; - } - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); +static bool common_pull_file(CURL * curl, const std::string & path_temporary) { + if (std::filesystem::exists(path_temporary)) { + const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary)); + LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str()); + const std::string range_str = partial_size + "-"; + curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str()); } - // Send a HEAD request to retrieve the etag and last-modified headers - struct common_load_model_from_url_headers { - std::string etag; - std::string last_modified; - }; + // Always open file in append mode could be resuming + std::unique_ptr outfile(fopen(path_temporary.c_str(), "ab")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str()); + return false; + } - common_load_model_from_url_headers headers; - bool head_request_ok = false; - bool should_download = !file_exists; // by default, we should download if the file does not exist + common_curl_easy_setopt_get(curl); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get()); - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; + return common_curl_perf(curl) == CURLE_OK; +} + +static bool common_download_head(CURL * curl, + curl_slist_ptr & http_headers, + const std::string & url, + const std::string & bearer_token) { if (!curl) { LOG_ERR("%s: error initializing libcurl\n", __func__); return false; } - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); // Check if hf-token or bearer-token was specified if (!bearer_token.empty()) { std::string auth_header = "Authorization: Bearer " + bearer_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); + http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); } - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - -#if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { - common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; - - static std::regex header_regex("([^:]+): (.*)\r\n"); - static std::regex etag_regex("ETag", std::regex_constants::icase); - static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr); + common_curl_easy_setopt_head(curl, url); + return common_curl_perf(curl) == CURLE_OK; +} - std::string header(buffer, n_items); - std::smatch match; - if (std::regex_match(header, match, header_regex)) { - const std::string & key = match[1]; - const std::string & value = match[2]; - if (std::regex_match(key, match, etag_regex)) { - headers->etag = value; - } else if (std::regex_match(key, match, last_modified_regex)) { - headers->last_modified = value; +// download one single file from remote URL to local path +static bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline) { + // If the file exists, check its JSON metadata companion file. + std::string metadata_path = path + ".json"; + static const int max_attempts = 3; + static const int retry_delay_seconds = 2; + for (int i = 0; i < max_attempts; ++i) { + nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead + std::string etag; + std::string last_modified; + + // Check if the file already exists locally + const auto file_exists = std::filesystem::exists(path); + if (file_exists) { + if (offline) { + LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); + return true; // skip verification/downloading + } + // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). + std::ifstream metadata_in(metadata_path); + if (metadata_in.good()) { + try { + metadata_in >> metadata; + LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), + metadata.dump().c_str()); + if (metadata.contains("etag") && metadata.at("etag").is_string()) { + etag = metadata.at("etag"); + } + if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { + last_modified = metadata.at("lastModified"); + } + } catch (const nlohmann::json::exception & e) { + LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + } } + // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again) + } else { + if (offline) { + LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); + return false; + } + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); } - return n_items; - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress - curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); - curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + bool head_request_ok = false; + bool should_download = !file_exists; // by default, we should download if the file does not exist - // we only allow retrying once for HEAD requests - // this is for the use case of using running offline (no internet), retrying can be annoying - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD"); - if (!was_perform_successful) { - head_request_ok = false; - } - - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code == 200) { - head_request_ok = true; - } else { - LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); - head_request_ok = false; - } - - // if head_request_ok is false, we don't have the etag or last-modified headers - // we leave should_download as-is, which is true if the file does not exist - if (head_request_ok) { - // check if ETag or Last-Modified headers are different - // if it is, we need to download the file again - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); - should_download = true; - } else if (!last_modified.empty() && last_modified != headers.last_modified) { - LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); - should_download = true; + // Initialize libcurl + curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); + common_load_model_from_url_headers headers; + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + curl_slist_ptr http_headers; + const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token); + if (!was_perform_successful) { + head_request_ok = false; } - } - if (should_download) { - std::string path_temporary = path + ".downloadInProgress"; - if (file_exists) { - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code == 200) { + head_request_ok = true; + } else { + LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + head_request_ok = false; + } + + // if head_request_ok is false, we don't have the etag or last-modified headers + // we leave should_download as-is, which is true if the file does not exist + bool should_download_from_scratch = false; + if (head_request_ok) { + // check if ETag or Last-Modified headers are different + // if it is, we need to download the file again + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), + headers.etag.c_str()); + should_download = true; + should_download_from_scratch = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, + last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + should_download_from_scratch = true; + } + } + + const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none"; + if (should_download) { + if (file_exists && + !accept_ranges_supported) { // Resumable downloads not supported, delete and start again. + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } } - } - // Set the output file + const std::string path_temporary = path + ".downloadInProgress"; + if (should_download_from_scratch) { + if (std::filesystem::exists(path_temporary)) { + if (remove(path_temporary.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); + return false; + } + } - struct FILE_deleter { - void operator()(FILE * f) const { - fclose(f); + if (std::filesystem::exists(path)) { + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } } - }; - - std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); - return false; - } - - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); - auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd); - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); - // display download progress - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + // Write the updated JSON metadata file. + metadata.update({ + { "url", url }, + { "etag", headers.etag }, + { "lastModified", headers.last_modified } + }); + write_file(metadata_path, metadata.dump(4)); + LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", + __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(), + headers.etag.c_str(), headers.last_modified.c_str()); + const bool was_pull_successful = common_pull_file(curl.get(), path_temporary); + if (!was_pull_successful) { + if (i + 1 < max_attempts) { + const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; + LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } else { + LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + } - // helper function to hide password in URL - auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { - std::size_t protocol_pos = url.find("://"); - if (protocol_pos == std::string::npos) { - return url; // Malformed URL + continue; } - std::size_t at_pos = url.find('@', protocol_pos + 3); - if (at_pos == std::string::npos) { - return url; // No password in URL + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; } - return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); - }; - - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, - llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET"); - if (!was_perform_successful) { - return false; - } - - long http_code = 0; - curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } else { + LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); } - // Causes file to be closed explicitly here before we rename it. - outfile.reset(); - - // Write the updated JSON metadata file. - metadata.update({ - {"url", url}, - {"etag", headers.etag}, - {"lastModified", headers.last_modified} - }); - write_file(metadata_path, metadata.dump(4)); - LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); - - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - } else { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); + break; } return true; @@ -770,7 +834,7 @@ static std::string common_docker_get_token(const std::string & repo) { } static std::string common_docker_resolve_model(const std::string & docker) { - // Parse ai/smollm2:135M-Q4_K_M + // Parse ai/smollm2:135M-Q4_0 size_t colon_pos = docker.find(':'); std::string repo, tag; if (colon_pos != std::string::npos) {