]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
model : make rope_yarn_log_mul optional for deepseek2 (#14896)
authorGabriel Larson <redacted>
Sun, 27 Jul 2025 08:18:37 +0000 (03:18 -0500)
committerGitHub <redacted>
Sun, 27 Jul 2025 08:18:37 +0000 (11:18 +0300)
* make rope_yarn_log_mul optional for deepseek2

* default rope_yarn_log_mul = 0.0f

src/llama-hparams.h
src/llama-model.cpp

index c422cd7be827a08deda24191903853c036a6f856..ec7fd6a42bf54d2006da127e375b200046c851fb 100644 (file)
@@ -98,7 +98,7 @@ struct llama_hparams {
     float    rope_freq_scale_train;
     float    rope_freq_scale_train_swa;
     uint32_t n_ctx_orig_yarn;
-    float    rope_yarn_log_mul;
+    float    rope_yarn_log_mul = 0.0f;
 
     std::array<int, 4> rope_sections;
 
index f16789c2a77e927803a76014e121326473579829..71f89e19072ded81e794f7c781ec0f077719475e 100644 (file)
@@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                     // that have no expert_gating_func model parameter set
                     hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
                 }
-                ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
+                ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
 
                 switch (hparams.n_layer) {
                     case 27: type = LLM_TYPE_16B; break;