// available llama models
enum e_model {
MODEL_UNKNOWN,
+ MODEL_3B,
MODEL_7B,
MODEL_13B,
MODEL_30B,
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 128ull * MB },
{ MODEL_7B, 512ull * MB },
{ MODEL_13B, 512ull * MB },
{ MODEL_30B, 512ull * MB },
static const std::map<e_model, size_t> & MEM_REQ_KV_SELF()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 682ull * MB },
{ MODEL_7B, 1026ull * MB },
{ MODEL_13B, 1608ull * MB },
{ MODEL_30B, 3124ull * MB },
static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{
static std::map<e_model, size_t> k_sizes = {
+ { MODEL_3B, 512ull * MB },
{ MODEL_7B, 768ull * MB },
{ MODEL_13B, 1024ull * MB },
{ MODEL_30B, 1280ull * MB },
static const char *llama_model_type_name(e_model type) {
switch (type) {
+ case MODEL_3B: return "3B";
case MODEL_7B: return "7B";
case MODEL_13B: return "13B";
case MODEL_30B: return "30B";
{
switch (hparams.n_layer) {
+ case 26: model.type = e_model::MODEL_3B; break;
case 32: model.type = e_model::MODEL_7B; break;
case 40: model.type = e_model::MODEL_13B; break;
case 60: model.type = e_model::MODEL_30B; break;