From: Sigbjørn Skjæret Date: Tue, 9 Dec 2025 11:15:06 +0000 (+0100) Subject: model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652) X-Git-Tag: upstream/0.0.7446~113 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=42b12b560886dc2093b17af11c97ef6d276a3b97;p=pkg%2Fggml%2Fsources%2Fllama.cpp model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652) * nit, DeepSeek V1 MoE is 16B * base type on n_ff_exp instead --- diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e09d59e2..04fccc97 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1606,8 +1606,9 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); - switch (hparams.n_layer) { - case 28: type = LLM_TYPE_20B; break; + switch (hparams.n_ff_exp) { + case 1408: type = LLM_TYPE_16B; break; + case 1792: type = LLM_TYPE_20B; break; default: type = LLM_TYPE_UNKNOWN; } } break;