From: uint256_t Date: Mon, 13 Mar 2023 16:33:43 +0000 (+0900) Subject: Reduce model loading time (#43) X-Git-Tag: gguf-v0.4.0~1254 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=63fd76fbb06f9b723ca11505352387a3148b1814;p=pkg%2Fggml%2Fsources%2Fllama.cpp Reduce model loading time (#43) * Use buffering * Use vector * Minor --------- Co-authored-by: Georgi Gerganov --- diff --git a/main.cpp b/main.cpp index d068761e..ee0952f7 100644 --- a/main.cpp +++ b/main.cpp @@ -87,7 +87,10 @@ struct llama_model { bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); + std::vector f_buf(1024*1024); + auto fin = std::ifstream(fname, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; @@ -325,6 +328,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str()); fin = std::ifstream(fname_part, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); fin.seekg(file_offset); // load weights