case LLM_TYPE_1_7B: return "1.7B";
case LLM_TYPE_1_8B: return "1.8B";
case LLM_TYPE_2B: return "2B";
+ case LLM_TYPE_2_6B: return "2.6B";
case LLM_TYPE_2_8B: return "2.8B";
case LLM_TYPE_2_9B: return "2.9B";
case LLM_TYPE_3B: return "3B";
for (uint32_t il = 0; il < hparams.n_layer; ++il) {
hparams.recurrent_layer_arr[il] = hparams.n_head_kv(il) == 0;
}
- switch (hparams.n_embd) {
- case 1024: type = LLM_TYPE_350M; break;
- case 1536: type = LLM_TYPE_700M; break;
- case 2048: type = LLM_TYPE_1_2B; break;
+ switch (hparams.n_ff()) {
+ case 4608: type = LLM_TYPE_350M; break;
+ case 6912: type = LLM_TYPE_700M; break;
+ case 8192: type = LLM_TYPE_1_2B; break;
+ case 10752: type = LLM_TYPE_2_6B; break;
default: type = LLM_TYPE_UNKNOWN;
}
} break;