From: Georgi Gerganov Date: Tue, 13 Jun 2023 17:20:07 +0000 (+0300) Subject: llama : do a warm-up eval at start for better timings (#1824) X-Git-Tag: gguf-v0.4.0~639 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2347e45e7bdb09c9a7d74b2c0bc86c2b65f0c343;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : do a warm-up eval at start for better timings (#1824) --- diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 66d56314..efa913e1 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -331,6 +331,13 @@ int main(int argc, char ** argv) { std::vector embd; + // do one empty run to warm up the model + { + const std::vector tmp = { llama_token_bos(), }; + llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); + llama_reset_timings(ctx); + } + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (embd.size() > 0) {