model : detect GigaChat3-10-A1.8B as deepseek lite (#17420)

author ubergarm <redacted>

Fri, 21 Nov 2025 13:51:38 +0000 (08:51 -0500)

committer GitHub <redacted>

Fri, 21 Nov 2025 13:51:38 +0000 (14:51 +0100)
author ubergarm <redacted>
Fri, 21 Nov 2025 13:51:38 +0000 (08:51 -0500)
committer GitHub <redacted>
Fri, 21 Nov 2025 13:51:38 +0000 (14:51 +0100)
diff --git a/src/llama-model.cpp b/src/llama-model.cpp

index e703181a19804d12ec615c88f6e2cae2ef8b38a9..175549a9e30f1b44ef94531b8ab096c87427ac52 100644 (file)
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
              } break;
          case LLM_ARCH_DEEPSEEK2:
              {
-                bool is_lite = (hparams.n_layer == 27);
+                // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+                bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
                  ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                  ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT,   hparams.n_layer_dense_lead);
                  if (!is_lite) {
@@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                  } break;
              case LLM_ARCH_DEEPSEEK2:
                  {
-                    const bool is_lite = (hparams.n_layer == 27);
+                    // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+                    const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
  
                      const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
  
diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp

index 68f72f72bb643157f7da1817a7493cc7469dada7..0b41f7ba8eb37963f61e24eab3c2c0ef91beefc7 100644 (file)
--- a/src/models/deepseek2.cpp
+++ b/src/models/deepseek2.cpp
@@ -4,7 +4,8 @@
  
  llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
      llm_graph_context(params) {
-    bool is_lite = (hparams.n_layer == 27);
+    // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+    bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
  
      const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
author	ubergarm <redacted>
	Fri, 21 Nov 2025 13:51:38 +0000 (08:51 -0500)
committer	GitHub <redacted>
	Fri, 21 Nov 2025 13:51:38 +0000 (14:51 +0100)
src/llama-model.cpp		patch \| blob \| history
src/models/deepseek2.cpp		patch \| blob \| history