llama : remove redundant reshape in build_kv_store (#6369)

author Daniel Bevenius <redacted>

Fri, 29 Mar 2024 07:23:22 +0000 (08:23 +0100)

committer GitHub <redacted>

Fri, 29 Mar 2024 07:23:22 +0000 (09:23 +0200)
author Daniel Bevenius <redacted>
Fri, 29 Mar 2024 07:23:22 +0000 (08:23 +0100)
committer GitHub <redacted>
Fri, 29 Mar 2024 07:23:22 +0000 (09:23 +0200)
diff --git a/llama.cpp b/llama.cpp

index 77ec9b7a1935d6ba3b6d422132b13ddc0fd946af..1875e247168411249d407d63af2ec4b78746fb77 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -5523,8 +5523,8 @@ static void llm_build_kv_store(
      GGML_ASSERT(kv.size == n_ctx);
  
      // compute the transposed [n_tokens, n_embd] V matrix
-    struct ggml_tensor * v_cur_t = ggml_transpose(ctx, ggml_reshape_2d(ctx, v_cur, n_embd_v_gqa, n_tokens));
-    //struct ggml_tensor * v_cur_t = ggml_transpose(ctx, v_cur); // TODO: reshape above is likely not needed
+    assert(v_cur->ne[0] == n_embd_v_gqa && v_cur->ne[1] == n_tokens);
+    struct ggml_tensor * v_cur_t = ggml_transpose(ctx, v_cur);
      cb(v_cur_t, "v_cur_t", il);
  
      struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_k_gqa,
author	Daniel Bevenius <redacted>
	Fri, 29 Mar 2024 07:23:22 +0000 (08:23 +0100)
committer	GitHub <redacted>
	Fri, 29 Mar 2024 07:23:22 +0000 (09:23 +0200)