gpt-2 : add comment about KV cache type (#1142)

author bssrdf <redacted>

Thu, 13 Mar 2025 18:29:19 +0000 (14:29 -0400)

committer GitHub <redacted>

Thu, 13 Mar 2025 18:29:19 +0000 (20:29 +0200)
author bssrdf <redacted>
Thu, 13 Mar 2025 18:29:19 +0000 (14:29 -0400)
committer GitHub <redacted>
Thu, 13 Mar 2025 18:29:19 +0000 (20:29 +0200)
diff --git a/examples/gpt-2/main-backend.cpp b/examples/gpt-2/main-backend.cpp

index ca1f761aca2e50940fdb701728c1edfecc7ae5c2..29f197a7c3ea5367d1f523df71a23def5f4ecc0f 100644 (file)
--- a/examples/gpt-2/main-backend.cpp
+++ b/examples/gpt-2/main-backend.cpp
@@ -337,6 +337,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
          const int n_mem      = n_layer*n_ctx;
          const int n_elements = n_embd*n_mem;
  
+        // k and v here can also be GGML_TYPE_F16 to save memory and speed up the computation
+        // if backend supports it
          model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);
          model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements);