model : make rope_yarn_log_mul optional for deepseek2 (#14896)

author Gabriel Larson <redacted>

Sun, 27 Jul 2025 08:18:37 +0000 (03:18 -0500)

committer GitHub <redacted>

Sun, 27 Jul 2025 08:18:37 +0000 (11:18 +0300)
author Gabriel Larson <redacted>
Sun, 27 Jul 2025 08:18:37 +0000 (03:18 -0500)
committer GitHub <redacted>
Sun, 27 Jul 2025 08:18:37 +0000 (11:18 +0300)
diff --git a/src/llama-hparams.h b/src/llama-hparams.h

index c422cd7be827a08deda24191903853c036a6f856..ec7fd6a42bf54d2006da127e375b200046c851fb 100644 (file)
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -98,7 +98,7 @@ struct llama_hparams {
      float    rope_freq_scale_train;
      float    rope_freq_scale_train_swa;
      uint32_t n_ctx_orig_yarn;
-    float    rope_yarn_log_mul;
+    float    rope_yarn_log_mul = 0.0f;
  
      std::array<int, 4> rope_sections;
  
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index f16789c2a77e927803a76014e121326473579829..71f89e19072ded81e794f7c781ec0f077719475e 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                      // that have no expert_gating_func model parameter set
                      hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
                  }
-                ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
+                ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
  
                  switch (hparams.n_layer) {
                      case 27: type = LLM_TYPE_16B; break;
author	Gabriel Larson <redacted>
	Sun, 27 Jul 2025 08:18:37 +0000 (03:18 -0500)
committer	GitHub <redacted>
	Sun, 27 Jul 2025 08:18:37 +0000 (11:18 +0300)
src/llama-hparams.h		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history