model : support GLM 4.6 (make a few NextN/MTP tensors not required) (#16359)

author Bartowski <redacted>

Tue, 30 Sep 2025 20:24:36 +0000 (16:24 -0400)

committer GitHub <redacted>

Tue, 30 Sep 2025 20:24:36 +0000 (22:24 +0200)
author Bartowski <redacted>
Tue, 30 Sep 2025 20:24:36 +0000 (16:24 -0400)
committer GitHub <redacted>
Tue, 30 Sep 2025 20:24:36 +0000 (22:24 +0200)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 2470f87850f2b62492919c49adef3891afcb1d97..63655bf6517b4c50da74af9a8e02148ea86c97e7 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -4825,11 +4825,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                          // NextN/MTP tensors (preserved but unused) - conditionally load for last nextn_predict_layers
                          if (hparams.nextn_predict_layers > 0 && static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers) {
                              layer.nextn.eh_proj          = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", i), { 2 * n_embd, n_embd }, flags);
-                            layer.nextn.embed_tokens     = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags);
                              layer.nextn.enorm            = create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", i), { n_embd }, flags);
                              layer.nextn.hnorm            = create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", i), { n_embd }, flags);
-                            layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags);
-                            layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags);
+
+                            // Optional tensors
+                            layer.nextn.embed_tokens     = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
+                            layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
+                            layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags | TENSOR_NOT_REQUIRED);
                          }
                      }
                  }
author	Bartowski <redacted>
	Tue, 30 Sep 2025 20:24:36 +0000 (16:24 -0400)
committer	GitHub <redacted>
	Tue, 30 Sep 2025 20:24:36 +0000 (22:24 +0200)