"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
"transformer.word_embeddings", # falcon
"word_embeddings", # bloom
- "model.embed_tokens", # llama-hf nemotron olmoe olmo_1124
+ "model.embed_tokens", # llama-hf nemotron olmoe olmo2
"tok_embeddings", # llama-pth
"embeddings.word_embeddings", # bert nomic-bert
"language_model.embedding.word_embeddings", # persimmon
# Output
MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo_1124
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
"output", # llama-pth bloom internlm2
"word_embeddings_for_head", # persimmon
"lm_head.linear", # phi2
MODEL_TENSOR.OUTPUT_NORM: (
"gpt_neox.final_layer_norm", # gptneox
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
- "model.norm", # llama-hf baichuan internlm2 olmoe olmo_1124
+ "model.norm", # llama-hf baichuan internlm2 olmoe olmo2
"norm", # llama-pth
"transformer.norm_f", # mpt dbrx
"ln_f", # refact bloom qwen gpt2
# Attention query
MODEL_TENSOR.ATTN_Q: (
- "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo_1124
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
"layers.{bid}.attention.wq", # llama-pth
"encoder.layer.{bid}.attention.self.query", # bert
"transformer.h.{bid}.attn.q_proj", # gpt-j
# Attention key
MODEL_TENSOR.ATTN_K: (
- "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo_1124
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
"layers.{bid}.attention.wk", # llama-pth
"encoder.layer.{bid}.attention.self.key", # bert
"transformer.h.{bid}.attn.k_proj", # gpt-j
# Attention value
MODEL_TENSOR.ATTN_V: (
- "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo_1124
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
"layers.{bid}.attention.wv", # llama-pth
"encoder.layer.{bid}.attention.self.value", # bert
"transformer.h.{bid}.attn.v_proj", # gpt-j
"transformer.blocks.{bid}.attn.out_proj", # mpt
"transformer.h.{bid}.self_attention.dense", # falcon
"h.{bid}.self_attention.dense", # bloom
- "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo_1124
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
"layers.{bid}.attention.wo", # llama-pth
"encoder.layer.{bid}.attention.output.dense", # bert
"transformer.h.{bid}.attn.out_proj", # gpt-j
),
MODEL_TENSOR.ATTN_POST_NORM: (
- "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo_1124
+ "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
),
# Rotary embeddings
# Post feed-forward norm
MODEL_TENSOR.FFN_POST_NORM: (
- "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo_1124
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
),
MODEL_TENSOR.FFN_GATE_INP: (
"transformer.blocks.{bid}.ffn.up_proj", # mpt
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
"h.{bid}.mlp.dense_h_to_4h", # bloom
- "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo_1124
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
"layers.{bid}.feed_forward.w3", # llama-pth
"encoder.layer.{bid}.intermediate.dense", # bert
"transformer.h.{bid}.mlp.fc_in", # gpt-j
# Feed-forward gate
MODEL_TENSOR.FFN_GATE: (
- "model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo_1124
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
"layers.{bid}.feed_forward.w1", # llama-pth
"transformer.h.{bid}.mlp.w2", # qwen
"transformer.h.{bid}.mlp.c_fc2", # jais
"transformer.blocks.{bid}.ffn.down_proj", # mpt
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
"h.{bid}.mlp.dense_4h_to_h", # bloom
- "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo_1124
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
"layers.{bid}.feed_forward.w2", # llama-pth
"encoder.layer.{bid}.output.dense", # bert
"transformer.h.{bid}.mlp.fc_out", # gpt-j
MODEL_TENSOR.ATTN_Q_NORM: (
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
- "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo_1124
+ "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
"transformer.layers.{bid}.attn.q_norm", # openelm
MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
- "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo_1124
+ "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
"transformer.layers.{bid}.attn.k_norm", # openelm
LLM_ARCH_COMMAND_R,
LLM_ARCH_DBRX,
LLM_ARCH_OLMO,
- LLM_ARCH_OLMO_1124,
+ LLM_ARCH_OLMO2,
LLM_ARCH_OLMOE,
LLM_ARCH_OPENELM,
LLM_ARCH_ARCTIC,
{ LLM_ARCH_COMMAND_R, "command-r" },
{ LLM_ARCH_DBRX, "dbrx" },
{ LLM_ARCH_OLMO, "olmo" },
- { LLM_ARCH_OLMO_1124, "olmo_1124" },
+ { LLM_ARCH_OLMO2, "olmo2" },
{ LLM_ARCH_OLMOE, "olmoe" },
{ LLM_ARCH_OPENELM, "openelm" },
{ LLM_ARCH_ARCTIC, "arctic" },
},
},
{
- LLM_ARCH_OLMO_1124,
+ LLM_ARCH_OLMO2,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
default: model.type = e_model::MODEL_UNKNOWN;
}
} break;
- case LLM_ARCH_OLMO_1124:
+ case LLM_ARCH_OLMO2:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
}
} break;
- case LLM_ARCH_OLMO_1124:
+ case LLM_ARCH_OLMO2:
{
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
return gf;
}
- struct ggml_cgraph * build_olmo_1124() {
+ struct ggml_cgraph * build_olmo2() {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
// mutable variable, needed during the last layer of the computation to skip unused tokens
{
result = llm.build_olmo();
} break;
- case LLM_ARCH_OLMO_1124:
+ case LLM_ARCH_OLMO2:
{
- result = llm.build_olmo_1124();
+ result = llm.build_olmo2();
} break;
case LLM_ARCH_OLMOE:
{
case LLM_ARCH_QWEN:
case LLM_ARCH_QWEN2:
case LLM_ARCH_QWEN2MOE:
- case LLM_ARCH_OLMO_1124:
+ case LLM_ARCH_OLMO2:
case LLM_ARCH_OLMOE:
case LLM_ARCH_PHI2:
case LLM_ARCH_PHI3: