#include <sys/syslimits.h>
#endif
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
-#define LLAMA_CURL_MAX_HEADER_LENGTH 256
#endif // LLAMA_USE_CURL
using json = nlohmann::ordered_json;
return false;
}
+void gpt_params_handle_model_default(gpt_params & params) {
+ if (!params.hf_repo.empty()) {
+ // short-hand to avoid specifying --hf-file -> default it to --model
+ if (params.hf_file.empty()) {
+ if (params.model.empty()) {
+ throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n");
+ }
+ params.hf_file = params.model;
+ } else if (params.model.empty()) {
+ params.model = "models/" + string_split(params.hf_file, '/').back();
+ }
+ } else if (!params.model_url.empty()) {
+ if (params.model.empty()) {
+ auto f = string_split(params.model_url, '#').front();
+ f = string_split(f, '?').front();
+ f = string_split(f, '/').back();
+ params.model = "models/" + f;
+ }
+ } else if (params.model.empty()) {
+ params.model = DEFAULT_MODEL_PATH;
+ }
+}
+
bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
bool invalid_param = false;
std::string arg;
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
}
- // short-hand to avoid specifying --hf-file -> default it to --model
- if (!params.hf_repo.empty() && params.hf_file.empty()) {
- params.hf_file = params.model;
- }
+ gpt_params_handle_model_default(params);
if (params.escape) {
process_escapes(params.prompt);
printf(" --control-vector-layer-range START END\n");
printf(" layer range to apply the control vector(s) to, start and end inclusive\n");
printf(" -m FNAME, --model FNAME\n");
- printf(" model path (default: %s)\n", params.model.c_str());
+ printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH);
printf(" -md FNAME, --model-draft FNAME\n");
printf(" draft model for speculative decoding (default: unused)\n");
printf(" -mu MODEL_URL, --model-url MODEL_URL\n");
#ifdef LLAMA_USE_CURL
-static bool llama_download_file(CURL * curl, const char * url, const char * path) {
+static bool starts_with(const std::string & str, const std::string & prefix) {
+ // While we wait for C++20's std::string::starts_with...
+ return str.rfind(prefix, 0) == 0;
+}
+
+static bool llama_download_file(const std::string & url, const std::string & path) {
+
+ // Initialize libcurl
+ std::unique_ptr<CURL, decltype(&curl_easy_cleanup)> curl(curl_easy_init(), &curl_easy_cleanup);
+ if (!curl) {
+ fprintf(stderr, "%s: error initializing libcurl\n", __func__);
+ return false;
+ }
+
bool force_download = false;
// Set the URL, allow to follow http redirection
- curl_easy_setopt(curl, CURLOPT_URL, url);
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
#if defined(_WIN32)
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
// operating system. Currently implemented under MS-Windows.
- curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
#endif
// Check if the file already exists locally
struct stat model_file_info;
- auto file_exists = (stat(path, &model_file_info) == 0);
-
- // If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
- char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
- char etag_path[PATH_MAX] = {0};
- snprintf(etag_path, sizeof(etag_path), "%s.etag", path);
+ auto file_exists = (stat(path.c_str(), &model_file_info) == 0);
- char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
- char last_modified_path[PATH_MAX] = {0};
- snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path);
+ // If the file exists, check its JSON metadata companion file.
+ std::string metadata_path = path + ".json";
+ nlohmann::json metadata;
+ std::string etag;
+ std::string last_modified;
if (file_exists) {
- auto * f_etag = fopen(etag_path, "r");
- if (f_etag) {
- if (!fgets(etag, sizeof(etag), f_etag)) {
- fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
- } else {
- fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag);
- }
- fclose(f_etag);
- }
-
- auto * f_last_modified = fopen(last_modified_path, "r");
- if (f_last_modified) {
- if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
- fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
- } else {
- fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path,
- last_modified);
+ // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
+ std::ifstream metadata_in(metadata_path);
+ if (metadata_in.good()) {
+ try {
+ metadata_in >> metadata;
+ fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
+ if (metadata.contains("url") && metadata["url"].is_string()) {
+ auto previous_url = metadata["url"].get<std::string>();
+ if (previous_url != url) {
+ fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
+ return false;
+ }
+ }
+ if (metadata.contains("etag") && metadata["etag"].is_string()) {
+ etag = metadata["etag"];
+ }
+ if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) {
+ last_modified = metadata["lastModified"];
+ }
+ } catch (const nlohmann::json::exception & e) {
+ fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
+ return false;
}
- fclose(f_last_modified);
}
+ } else {
+ fprintf(stderr, "%s: no previous model file found %s\n", __func__, path.c_str());
}
// Send a HEAD request to retrieve the etag and last-modified headers
struct llama_load_model_from_url_headers {
- char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
- char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
+ std::string etag;
+ std::string last_modified;
};
llama_load_model_from_url_headers headers;
{
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
- // Convert header field name to lowercase
- for (size_t i = 0; i < n_items && buffer[i] != ':'; ++i) {
- buffer[i] = tolower(buffer[i]);
- }
-
- const char * etag_prefix = "etag: ";
- if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) {
- strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF
- }
-
- const char * last_modified_prefix = "last-modified: ";
- if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) {
- strncpy(headers->last_modified, buffer + strlen(last_modified_prefix),
- n_items - strlen(last_modified_prefix) - 2); // Remove CRLF
+ static std::regex header_regex("([^:]+): (.*)\r\n");
+ static std::regex etag_regex("ETag", std::regex_constants::icase);
+ static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
+
+ std::string header(buffer, n_items);
+ std::smatch match;
+ if (std::regex_match(header, match, header_regex)) {
+ const std::string & key = match[1];
+ const std::string & value = match[2];
+ if (std::regex_match(key, match, etag_regex)) {
+ headers->etag = value;
+ } else if (std::regex_match(key, match, last_modified_regex)) {
+ headers->last_modified = value;
+ }
}
return n_items;
};
- curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
- curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
- curl_easy_setopt(curl, CURLOPT_HEADERDATA, &headers);
+ curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
- CURLcode res = curl_easy_perform(curl);
+ CURLcode res = curl_easy_perform(curl.get());
if (res != CURLE_OK) {
- curl_easy_cleanup(curl);
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
return false;
}
long http_code = 0;
- curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
if (http_code != 200) {
// HEAD not supported, we don't know if the file has changed
// force trigger downloading
}
}
- // If the ETag or the Last-Modified headers are different: trigger a new download
- bool should_download = !file_exists
- || force_download
- || (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0)
- || (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0);
+ bool should_download = !file_exists || force_download;
+ if (!should_download) {
+ if (!etag.empty() && etag != headers.etag) {
+ fprintf(stderr, "%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
+ should_download = true;
+ } else if (!last_modified.empty() && last_modified != headers.last_modified) {
+ fprintf(stderr, "%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
+ should_download = true;
+ }
+ }
if (should_download) {
- char path_temporary[PATH_MAX] = {0};
- snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path);
+ std::string path_temporary = path + ".downloadInProgress";
if (file_exists) {
- fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path);
- if (remove(path) != 0) {
- curl_easy_cleanup(curl);
- fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path);
+ fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
+ if (remove(path.c_str()) != 0) {
+ fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str());
return false;
}
}
// Set the output file
- auto * outfile = fopen(path_temporary, "wb");
+ std::unique_ptr<FILE, decltype(&fclose)> outfile(fopen(path_temporary.c_str(), "wb"), fclose);
if (!outfile) {
- curl_easy_cleanup(curl);
- fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path);
+ fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
return false;
}
auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
return fwrite(data, size, nmemb, (FILE *)fd);
};
- curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile);
+ curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
// display download progress
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
// helper function to hide password in URL
auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
// start the download
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
- llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified);
- auto res = curl_easy_perform(curl);
+ llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
+ auto res = curl_easy_perform(curl.get());
if (res != CURLE_OK) {
- fclose(outfile);
- curl_easy_cleanup(curl);
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
return false;
}
long http_code = 0;
- curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE, &http_code);
+ curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
if (http_code < 200 || http_code >= 400) {
- fclose(outfile);
- curl_easy_cleanup(curl);
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
return false;
}
- // Clean up
- fclose(outfile);
+ // Causes file to be closed explicitly here before we rename it.
+ outfile.reset();
- // Write the new ETag to the .etag file
- if (strlen(headers.etag) > 0) {
- auto * etag_file = fopen(etag_path, "w");
- if (etag_file) {
- fputs(headers.etag, etag_file);
- fclose(etag_file);
- fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag);
- }
- }
+ // Write the updated JSON metadata file.
+ metadata.update({
+ {"url", url},
+ {"etag", headers.etag},
+ {"lastModified", headers.last_modified}
+ });
+ std::ofstream(metadata_path) << metadata.dump(4);
+ fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
- // Write the new lastModified to the .etag file
- if (strlen(headers.last_modified) > 0) {
- auto * last_modified_file = fopen(last_modified_path, "w");
- if (last_modified_file) {
- fputs(headers.last_modified, last_modified_file);
- fclose(last_modified_file);
- fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path,
- headers.last_modified);
- }
- }
-
- if (rename(path_temporary, path) != 0) {
- curl_easy_cleanup(curl);
- fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path);
+ if (rename(path_temporary.c_str(), path.c_str()) != 0) {
+ fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
return false;
}
}
return NULL;
}
- // Initialize libcurl
- auto * curl = curl_easy_init();
-
- if (!curl) {
- fprintf(stderr, "%s: error initializing libcurl\n", __func__);
- return NULL;
- }
-
- if (!llama_download_file(curl, model_url, path_model)) {
+ if (!llama_download_file(model_url, path_model)) {
return NULL;
}
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
if (!ctx_gguf) {
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model);
- curl_easy_cleanup(curl);
return NULL;
}
gguf_free(ctx_gguf);
}
- curl_easy_cleanup(curl);
-
if (n_split > 1) {
char split_prefix[PATH_MAX] = {0};
char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
- auto * curl = curl_easy_init();
- bool res = llama_download_file(curl, split_url, split_path);
- curl_easy_cleanup(curl);
-
- return res;
+ return llama_download_file(split_url, split_path);
}, idx));
}
fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau);
fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta);
fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false");
- fprintf(stream, "model: %s # default: models/7B/ggml-model.bin\n", params.model.c_str());
+ fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH);
fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());
fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false");
fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers);