template <bool iswa>
struct llm_build_exaone4 : public llm_graph_context {
- llm_build_exaone4(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) {
+ llm_build_exaone4(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
const int64_t n_embd_head = hparams.n_embd_head_k;
GGML_ASSERT(n_embd_head == hparams.n_embd_head_v);
cb(Kcur, "Kcur", il);
cb(Vcur, "Vcur", il);
- cur = build_attn(inp_attn, gf,
+ cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
cb(cur, "attn_out", il);
case LLM_ARCH_EXAONE4:
{
if (hparams.swa_type == LLAMA_SWA_TYPE_STANDARD) {
- llm = std::make_unique<llm_build_exaone4<true>>(*this, params, gf);
+ llm = std::make_unique<llm_build_exaone4<true>>(*this, params);
} else {
- llm = std::make_unique<llm_build_exaone4<false>>(*this, params, gf);
+ llm = std::make_unique<llm_build_exaone4<false>>(*this, params);
}
} break;
case LLM_ARCH_RWKV6: