* Fix GLM 4.7 MoE gating func
* Update src/models/deepseek2.cpp
Co-authored-by: Sigbjørn Skjæret <redacted>
* Update src/llama-model.cpp
Co-authored-by: Xuan-Son Nguyen <redacted>
---------
Co-authored-by: Sigbjørn Skjæret <redacted>
Co-authored-by: Xuan-Son Nguyen <redacted>
if (hparams.expert_gating_func == LLAMA_EXPERT_GATING_FUNC_TYPE_NONE) {
// for compatibility with existing DeepSeek V2 and V2.5 GGUFs
// that have no expert_gating_func model parameter set
- hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
+ if ((hparams.n_layer == 47 || hparams.n_layer == 48) && n_vocab == 154880) {
+ // GLM 4.7 Lite
+ hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID;
+ } else {
+ hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
+ }
}
if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f)) {