* Detect GigaChat3-10-A1.8B as deepseek lite
Hardcodes checking number of layers to detect if lite version of deepseek.
* Add commnent identifying deepseek lite variants
deepseek lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
} break;
case LLM_ARCH_DEEPSEEK2:
{
- bool is_lite = (hparams.n_layer == 27);
+ // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+ bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
if (!is_lite) {
} break;
case LLM_ARCH_DEEPSEEK2:
{
- const bool is_lite = (hparams.n_layer == 27);
+ // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+ const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
llm_graph_context(params) {
- bool is_lite = (hparams.n_layer == 27);
+ // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+ bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);