From: bssrdf Date: Thu, 13 Mar 2025 18:29:19 +0000 (-0400) Subject: gpt-2 : add comment about KV cache type (#1142) X-Git-Tag: upstream/0.0.1898~92 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=ef094525080b661166f07a0d688fcfbf96e68e56;p=pkg%2Fggml%2Fsources%2Fggml gpt-2 : add comment about KV cache type (#1142) * change KV cache to fp16 to take advantage of tensor cores * added a note/comment to indicate kv can be FP16 --- diff --git a/examples/gpt-2/main-backend.cpp b/examples/gpt-2/main-backend.cpp index ca1f761a..29f197a7 100644 --- a/examples/gpt-2/main-backend.cpp +++ b/examples/gpt-2/main-backend.cpp @@ -337,6 +337,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; + // k and v here can also be GGML_TYPE_F16 to save memory and speed up the computation + // if backend supports it model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);