llama : fix MiniCPM inference after Granite Four changes (#14850)

author yummy <redacted>

Thu, 24 Jul 2025 09:50:51 +0000 (17:50 +0800)

committer GitHub <redacted>

Thu, 24 Jul 2025 09:50:51 +0000 (11:50 +0200)
author yummy <redacted>
Thu, 24 Jul 2025 09:50:51 +0000 (17:50 +0800)
committer GitHub <redacted>
Thu, 24 Jul 2025 09:50:51 +0000 (11:50 +0200)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index 35e718aa9896f175003572f17cd05b77bacf635b..a997a1e80f8cfc580de1c0bdc419c201af9d216a 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -646,6 +646,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                  ml.get_key(LLM_KV_RESIDUAL_SCALE,              hparams.f_residual_scale);
                  ml.get_key(LLM_KV_LOGIT_SCALE,                 hparams.f_logit_scale);
  
+                // MiniCPM uses rope by default, unlike Granite which uses it as a switch
+                hparams.rope_finetuned = true;
+
                  switch (hparams.n_layer) {
                      case 52: type = LLM_TYPE_1B; break;
                      case 40: type = LLM_TYPE_2B; break;