model : fix div-by-zero for Nemotron V2 (#18309)

author Alessandro98-git <redacted>

Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)

committer GitHub <redacted>

Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)
author Alessandro98-git <redacted>
Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)
committer GitHub <redacted>
Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 87fefd576cb0619643d1365984f987ca3e1dafc0..0d5bcc64fe553d44f27d94745062bb226add4e9b 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -5243,9 +5243,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                      const int64_t n_group    = hparams.ssm_n_group;
                      const int64_t d_in_proj  = 2*d_inner + 2*n_group*d_state + n_ssm_head;
  
-                    const int64_t n_ff_exp = hparams.n_ff_exp ? hparams.n_ff_exp : n_ff / n_expert_used;
-                    const int64_t n_ff_shexp = hparams.n_ff_shexp;
-
                      // embeddings
                      tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
  
@@ -5297,6 +5294,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                              layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias",   i), {n_embd},         TENSOR_NOT_REQUIRED);
                          }  else {
                              if (n_expert != 0) {
+                                const int64_t n_ff_exp = hparams.n_ff_exp ? hparams.n_ff_exp : n_ff / n_expert_used;
+                                const int64_t n_ff_shexp = hparams.n_ff_shexp;
+
                                  layer.ffn_gate_inp    = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP,  "weight", i), { n_embd, n_expert}, 0);
                                  layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert         }, 0);
author	Alessandro98-git <redacted>
	Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)
committer	GitHub <redacted>
	Tue, 23 Dec 2025 02:04:57 +0000 (03:04 +0100)