From: gtygo Date: Thu, 15 Aug 2024 07:40:12 +0000 (+0800) Subject: retrieval : fix memory leak in retrieval query handling (#8955) X-Git-Tag: upstream/0.0.4488~897 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4b9afbbe9037f8a2d659097c0c7d9fce32c6494c;p=pkg%2Fggml%2Fsources%2Fllama.cpp retrieval : fix memory leak in retrieval query handling (#8955) * retrieval * Reuse querybatch to reduce frequent memory allocation * delete unused white space --- diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 65b19ce7..aab9d810 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -253,6 +253,8 @@ int main(int argc, char ** argv) { chunks[i].tokens.clear(); } + struct llama_batch query_batch = llama_batch_init(n_batch, 0, 1); + // start loop, receive query and return top k similar chunks based on cosine similarity std::string query; while (true) { @@ -260,7 +262,6 @@ int main(int argc, char ** argv) { std::getline(std::cin, query); std::vector query_tokens = llama_tokenize(ctx, query, true); - struct llama_batch query_batch = llama_batch_init(n_batch, 0, 1); batch_add_seq(query_batch, query_tokens, 0); std::vector query_emb(n_embd, 0); @@ -293,6 +294,7 @@ int main(int argc, char ** argv) { } // clean up + llama_batch_free(query_batch); llama_print_timings(ctx); llama_free(ctx); llama_free_model(model);