Reduce model loading time (#43)

author uint256_t <redacted>

Mon, 13 Mar 2023 16:33:43 +0000 (01:33 +0900)

committer GitHub <redacted>

Mon, 13 Mar 2023 16:33:43 +0000 (18:33 +0200)
author uint256_t <redacted>
Mon, 13 Mar 2023 16:33:43 +0000 (01:33 +0900)
committer GitHub <redacted>
Mon, 13 Mar 2023 16:33:43 +0000 (18:33 +0200)
diff --git a/main.cpp b/main.cpp

index d068761e39dcce9137a7f215237a1af96248343c..ee0952f74483f9a74c30da02a6401adc604fa7b3 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -87,7 +87,10 @@ struct llama_model {
  bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
      printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
  
+    std::vector<char> f_buf(1024*1024);
+
      auto fin = std::ifstream(fname, std::ios::binary);
+    fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
      if (!fin) {
          fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
          return false;
@@ -325,6 +328,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
          printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
  
          fin = std::ifstream(fname_part, std::ios::binary);
+        fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
          fin.seekg(file_offset);
  
          // load weights
author	uint256_t <redacted>
	Mon, 13 Mar 2023 16:33:43 +0000 (01:33 +0900)
committer	GitHub <redacted>
	Mon, 13 Mar 2023 16:33:43 +0000 (18:33 +0200)