llama : de-duplicate deepseek2 norm

author Georgi Gerganov <redacted>

Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)

committer Georgi Gerganov <redacted>

Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)
author Georgi Gerganov <redacted>
Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)
committer Georgi Gerganov <redacted>
Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)
diff --git a/src/llama.cpp b/src/llama.cpp

index 400a4232beeb09dff60a71fe8bd8ccf11d0a7a48..ddf0262d443b06ccc81ae5d48be126810e6d226f 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -12873,12 +12873,12 @@ struct llm_build_context {
              struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
              cb(ffn_inp, "ffn_inp", il);
  
-            if ((uint32_t) il < hparams.n_layer_dense_lead) {
-                cur = llm_build_norm(ctx0, ffn_inp, hparams,
-                        model.layers[il].ffn_norm, NULL,
-                        LLM_NORM_RMS, cb, il);
-                cb(cur, "ffn_norm", il);
+            cur = llm_build_norm(ctx0, ffn_inp, hparams,
+                    model.layers[il].ffn_norm, NULL,
+                    LLM_NORM_RMS, cb, il);
+            cb(cur, "ffn_norm", il);
  
+            if ((uint32_t) il < hparams.n_layer_dense_lead) {
                  cur = llm_build_ffn(ctx0, cur,
                          model.layers[il].ffn_up,   NULL, NULL,
                          model.layers[il].ffn_gate, NULL, NULL,
@@ -12888,11 +12888,6 @@ struct llm_build_context {
                  cb(cur, "ffn_out", il);
              } else {
                  // MoE branch
-                cur = llm_build_norm(ctx0, ffn_inp, hparams,
-                        model.layers[il].ffn_norm, NULL,
-                        LLM_NORM_RMS, cb, il);
-                cb(cur, "ffn_norm", il);
-
                  ggml_tensor * moe_out =
                          llm_build_moe_ffn(ctx0, cur,
                              model.layers[il].ffn_gate_inp,
author	Georgi Gerganov <redacted>
	Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)
committer	Georgi Gerganov <redacted>
	Mon, 15 Jul 2024 11:10:39 +0000 (14:10 +0300)