Signed-off-by: thxCode <redacted>
// create tensors for the weights
{
const int64_t n_embd = hparams.n_embd;
- const int64_t n_embd_head = n_embd / hparams.n_head;
+ const int64_t n_embd_head = (hparams.n_head == 0) ? 0 : n_embd / hparams.n_head;
const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
const int64_t n_embd_gqa = n_embd_v_gqa;