models : fix LFM2 tensors (#17548)

author Georgi Gerganov <redacted>

Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)

committer GitHub <redacted>

Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)
author Georgi Gerganov <redacted>
Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)
committer GitHub <redacted>
Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp

index 7ef87acf1b35d4692f77fd796795680dd54130d5..6da9e0a37184e25893599eb7218c2dc7603db5ef 100644 (file)
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -2237,7 +2237,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
              { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
              { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
              { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
-            { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" },
+            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
              { LLM_TENSOR_OUTPUT,            "output" },
          }
      },
@@ -2259,7 +2259,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
              { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
              { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
              { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
-            { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" },
+            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
              { LLM_TENSOR_FFN_GATE_INP,      "blk.%d.ffn_gate_inp" },
              { LLM_TENSOR_FFN_GATE_EXPS,     "blk.%d.ffn_gate_exps" },
              { LLM_TENSOR_FFN_DOWN_EXPS,     "blk.%d.ffn_down_exps" },
@@ -2490,8 +2490,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
  static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
      {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
      {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
-    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
      {LLM_TENSOR_TOKEN_TYPES,                {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
+    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
      {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
      {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
      {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index a042ea9632ce44ebcd1be64f5cdc1902423fad9b..cba875f11431b776e1627389bf5cc312bcdee88b 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -6133,9 +6133,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
              case LLM_ARCH_LFM2:
              case LLM_ARCH_LFM2MOE:
                  {
-                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,      "weight"), {n_embd, n_vocab}, 0);
-                    tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
-                    output   = create_tensor(tn(LLM_TENSOR_OUTPUT,          "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
+                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
+
+                    output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
+                    output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
  
                      if (output == NULL) {
                          output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
diff --git a/src/models/lfm2.cpp b/src/models/lfm2.cpp

index ca06bacd7bcb81a207879255b0a57a0aba6f90d2..7f805d78795a9848bf75460f38f4843eda6ae953 100644 (file)
--- a/src/models/lfm2.cpp
+++ b/src/models/lfm2.cpp
@@ -9,6 +9,8 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
      ggml_tensor * cur = build_inp_embd(model.tok_embd);
      cb(cur, "model.embed_tokens", -1);
  
+    ggml_build_forward_expand(gf, cur);
+
      ggml_tensor * inp_pos     = build_inp_pos();
      auto *        inp_hybrid  = build_inp_mem_hybrid();
      ggml_tensor * inp_out_ids = build_inp_out_ids();
@@ -40,12 +42,12 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
          cur = ggml_add(ctx0, cur, ffn_out);
      }
  
-    cur = build_norm(cur, model.tok_norm, NULL, LLM_NORM_RMS, -1);
-    cb(cur, "model.embedding_norm", -1);
+    cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
+    cb(cur, "result_norm", -1);
      res->t_embd = cur;
  
      cur = build_lora_mm(model.output, cur);
-    cb(cur, "lm_head", -1);
+    cb(cur, "result_output", -1);
  
      res->t_logits = cur;
author	Georgi Gerganov <redacted>
	Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)
committer	GitHub <redacted>
	Thu, 27 Nov 2025 14:04:29 +0000 (16:04 +0200)
src/llama-arch.cpp		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history
src/models/lfm2.cpp		patch \| blob \| history