fix: divide 0 exception in mamba (#7932)

author Frank Mai <redacted>

Mon, 17 Jun 2024 14:11:08 +0000 (22:11 +0800)

committer GitHub <redacted>

Mon, 17 Jun 2024 14:11:08 +0000 (16:11 +0200)
author Frank Mai <redacted>
Mon, 17 Jun 2024 14:11:08 +0000 (22:11 +0800)
committer GitHub <redacted>
Mon, 17 Jun 2024 14:11:08 +0000 (16:11 +0200)
diff --git a/llama.cpp b/llama.cpp

index b324807f897b50c5a72b4278a7b947c6fa22c3d0..dd7020dc0eeabc5e1d788ea289bd3058f21ee072 100644 (file)
--- a/llama.cpp
+++ b/llama.cpp
@@ -5383,7 +5383,7 @@ static bool llm_load_tensors(
      // create tensors for the weights
      {
          const int64_t n_embd       = hparams.n_embd;
-        const int64_t n_embd_head  = n_embd / hparams.n_head;
+        const int64_t n_embd_head  = (hparams.n_head == 0) ? 0 : n_embd / hparams.n_head;
          const int64_t n_embd_k_gqa = hparams.n_embd_k_gqa();
          const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa();
          const int64_t n_embd_gqa   = n_embd_v_gqa;
author	Frank Mai <redacted>
	Mon, 17 Jun 2024 14:11:08 +0000 (22:11 +0800)
committer	GitHub <redacted>
	Mon, 17 Jun 2024 14:11:08 +0000 (16:11 +0200)