examples: cache hf model when --model not provided (#7353)

author Amir <redacted>

Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)

committer GitHub <redacted>

Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)
author Amir <redacted>
Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)
committer GitHub <redacted>
Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)
diff --git a/common/common.cpp b/common/common.cpp

index e624fc7f35352d39c7c4fcf175d03906869709c0..ae11650b446a4701047986edb1fd5008766b9567 100644 (file)
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1354,7 +1354,12 @@ void gpt_params_handle_model_default(gpt_params & params) {
              }
              params.hf_file = params.model;
          } else if (params.model.empty()) {
-            params.model = "models/" + string_split(params.hf_file, '/').back();
+            std::string cache_directory = get_cache_directory();
+            const bool success = create_directory_with_parents(cache_directory);
+            if (!success) {
+                throw std::runtime_error("failed to create cache directory: " + cache_directory);
+            }
+            params.model = cache_directory + string_split(params.hf_file, '/').back();
          }
      } else if (!params.model_url.empty()) {
          if (params.model.empty()) {
@@ -2516,6 +2521,31 @@ bool create_directory_with_parents(const std::string & path) {
  #endif // _WIN32
  }
  
+std::string get_cache_directory() {
+    std::string cache_directory = "";
+    if (getenv("LLAMA_CACHE")) {
+        cache_directory = std::getenv("LLAMA_CACHE");
+        if (cache_directory.back() != DIRECTORY_SEPARATOR) {
+            cache_directory += DIRECTORY_SEPARATOR;
+        }
+    } else {
+#ifdef __linux__
+        if (std::getenv("XDG_CACHE_HOME")) {
+            cache_directory = std::getenv("XDG_CACHE_HOME");
+        } else {
+            cache_directory = std::getenv("HOME") + std::string("/.cache/");
+        }
+#elif defined(__APPLE__)
+        cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
+#elif defined(_WIN32)
+        cache_directory = std::getenv("APPDATA");
+#endif // __linux__
+        cache_directory += "llama.cpp";
+        cache_directory += DIRECTORY_SEPARATOR;
+    }
+    return cache_directory;
+}
+
  void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {
      if (data.empty()) {
          fprintf(stream, "%s:\n", prop_name);
diff --git a/common/common.h b/common/common.h

index 566490e2f881acb870818deb941b9ce1563718f2..a8e5e50e6b810a1f0dd004be2970eac50258ee36 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -281,6 +281,7 @@ bool llama_should_add_bos_token(const llama_model * model);
  //
  
  bool create_directory_with_parents(const std::string & path);
+std::string get_cache_directory();
  void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
  void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
  void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
diff --git a/examples/main/README.md b/examples/main/README.md

index 97e2ae4c2dc431d7d451773e33ad71a5d7aa8aee..ee930f4e79a0d8db83847b6db183cf6c55c460cd 100644 (file)
--- a/examples/main/README.md
+++ b/examples/main/README.md
@@ -325,3 +325,5 @@ These options provide extra functionality and customization when running the LLa
  -   `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance.
  -   `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
  -   `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
+
+-   `-hfr URL --hf-repo URL`: The url to the Hugging Face model repository. Used in conjunction with `--hf-file` or `-hff`. The model is downloaded and stored in the file provided by `-m` or `--model`. If `-m` is not provided, the model is auto-stored in the path specified by the `LLAMA_CACHE` environment variable  or in an OS-specific local cache.
author	Amir <redacted>
	Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)
committer	GitHub <redacted>
	Tue, 21 May 2024 14:13:12 +0000 (17:13 +0300)
common/common.cpp		patch \| blob \| history
common/common.h		patch \| blob \| history
examples/main/README.md		patch \| blob \| history