]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
model : nit, DeepSeek V1 MoE is 16B and GigaChat is 20B (#12652)
authorSigbjørn Skjæret <redacted>
Tue, 9 Dec 2025 11:15:06 +0000 (12:15 +0100)
committerGitHub <redacted>
Tue, 9 Dec 2025 11:15:06 +0000 (12:15 +0100)
* nit, DeepSeek V1 MoE is 16B

* base type on n_ff_exp instead

src/llama-model.cpp

index e09d59e2c149e104fbb94ecd24f6e633058ecfd6..04fccc9793b12e799190af09ac22ee57c96316b3 100644 (file)
@@ -1606,8 +1606,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT,         hparams.n_expert_shared);
                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE,        hparams.expert_weights_scale);
 
-                switch (hparams.n_layer) {
-                    case 28: type = LLM_TYPE_20B; break;
+                switch (hparams.n_ff_exp) {
+                    case 1408: type = LLM_TYPE_16B; break;
+                    case 1792: type = LLM_TYPE_20B; break;
                     default: type = LLM_TYPE_UNKNOWN;
                 }
             } break;