llama : fix embd when offloading non-repeating layers (#1891)

author Johannes Gäßler <redacted>

Fri, 16 Jun 2023 18:25:51 +0000 (20:25 +0200)

committer GitHub <redacted>

Fri, 16 Jun 2023 18:25:51 +0000 (21:25 +0300)
author Johannes Gäßler <redacted>
Fri, 16 Jun 2023 18:25:51 +0000 (20:25 +0200)
committer GitHub <redacted>
Fri, 16 Jun 2023 18:25:51 +0000 (21:25 +0300)
diff --git a/llama.cpp b/llama.cpp

index a9043884460da70940942c9cd21783f5b2140f7a..81f047ed29819296c3c08eab130037b9e6d342a7 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -1658,7 +1658,7 @@ static bool llama_eval_internal(
  
          // cur = cur*norm(broadcasted)
          cur = ggml_mul(ctx0, cur, model.norm);
-        offload_func_nr(cur);
+        // offload_func_nr(cur); // TODO CPU + GPU mirrored backend
          ggml_set_name(cur, "result_norm");
  
          embeddings = cur;
author	Johannes Gäßler <redacted>
	Fri, 16 Jun 2023 18:25:51 +0000 (20:25 +0200)
committer	GitHub <redacted>
	Fri, 16 Jun 2023 18:25:51 +0000 (21:25 +0300)