From: Stephan Walter Date: Sun, 2 Apr 2023 07:18:53 +0000 (+0000) Subject: llama : do not allocate KV cache for "vocab_only == true" (#682) X-Git-Tag: gguf-v0.4.0~1028 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=81040f10aae3160317c5787c9c59acb219927826;p=pkg%2Fggml%2Fsources%2Fllama.cpp llama : do not allocate KV cache for "vocab_only == true" (#682) Fixes sanitizer CI --- diff --git a/llama.cpp b/llama.cpp index bed24207..1b3157cd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file( } // reserve memory for context buffers - { + if (!params.vocab_only) { if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) { fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__); llama_free(ctx);