convert : enable expert group selection for all models with it (#16691)

author Sigbjørn Skjæret <redacted>

Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)

committer GitHub <redacted>

Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)
author Sigbjørn Skjæret <redacted>
Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)
committer GitHub <redacted>
Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 05d791806df1e8a73528b3aa23b193484cfd6ab7..093f2ab467f4daa2ccec10bc189fbbc74e02fc43 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -742,6 +742,12 @@ class TextModel(ModelBase):
          if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
              self.gguf_writer.add_expert_used_count(n_experts_used)
              logger.info(f"gguf: experts used count = {n_experts_used}")
+        if (n_expert_groups := self.hparams.get("n_group")) is not None:
+            self.gguf_writer.add_expert_group_count(n_expert_groups)
+            logger.info(f"gguf: expert groups count = {n_expert_groups}")
+        if (n_group_used := self.hparams.get("topk_group")) is not None:
+            self.gguf_writer.add_expert_group_used_count(n_group_used)
+            logger.info(f"gguf: expert groups used count = {n_group_used}")
  
          if (head_dim := self.hparams.get("head_dim")) is not None:
              self.gguf_writer.add_key_length(head_dim)
@@ -8233,8 +8239,6 @@ class BailingMoeV2Model(TextModel):
          self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
          self.gguf_writer.add_expert_count(hparams["num_experts"])
          self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
-        self.gguf_writer.add_expert_group_count(hparams["n_group"])
-        self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
          self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
  
          if hparams["score_function"] == "sigmoid":
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index a6a6fa7d7198ad22334a80e7d89408bc3509a750..b88ff51f5da1d8ee61f9719fd8f9fbf295803193 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -6369,6 +6369,8 @@ void llama_model::print_info() const {
          LLAMA_LOG_INFO("%s: n_ff             = %s\n",     __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
          LLAMA_LOG_INFO("%s: n_expert         = %u\n",     __func__, hparams.n_expert);
          LLAMA_LOG_INFO("%s: n_expert_used    = %u\n",     __func__, hparams.n_expert_used);
+        LLAMA_LOG_INFO("%s: n_expert_groups  = %d\n",     __func__, hparams.n_expert_groups);
+        LLAMA_LOG_INFO("%s: n_group_used     = %d\n",     __func__, hparams.n_group_used);
          LLAMA_LOG_INFO("%s: causal attn      = %d\n",     __func__, hparams.causal_attn);
          LLAMA_LOG_INFO("%s: pooling type     = %d\n",     __func__, hparams.pooling_type);
          LLAMA_LOG_INFO("%s: rope type        = %d\n",     __func__, hparams.rope_type);
@@ -6469,8 +6471,6 @@ void llama_model::print_info() const {
          LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
          LLAMA_LOG_INFO("%s: n_ff_shexp           = %d\n",     __func__, hparams.n_ff_shexp);
          LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared);
-        LLAMA_LOG_INFO("%s: n_expert_groups      = %d\n",     __func__, hparams.n_expert_groups);
-        LLAMA_LOG_INFO("%s: n_group_used         = %d\n",     __func__, hparams.n_group_used);
          LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale);
          LLAMA_LOG_INFO("%s: expert_weights_norm  = %d\n",     __func__, hparams.expert_weights_norm);
          LLAMA_LOG_INFO("%s: expert_gating_func   = %s\n",     __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));
author	Sigbjørn Skjæret <redacted>
	Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)
committer	GitHub <redacted>
	Sun, 26 Oct 2025 16:21:23 +0000 (17:21 +0100)
convert_hf_to_gguf.py		patch \| blob \| history
src/llama-model.cpp		patch \| blob \| history