convert.py : fix baichuan7B support (#2870)

author jameswu2014 <redacted>

Tue, 29 Aug 2023 09:48:41 +0000 (17:48 +0800)

committer GitHub <redacted>

Tue, 29 Aug 2023 09:48:41 +0000 (12:48 +0300)
author jameswu2014 <redacted>
Tue, 29 Aug 2023 09:48:41 +0000 (17:48 +0800)
committer GitHub <redacted>
Tue, 29 Aug 2023 09:48:41 +0000 (12:48 +0300)
diff --git a/convert.py b/convert.py

index a15e6ccd2367e9eed76e291043ea06718ef75998..3f0a1c932d58f51528b75ef09ec07e4367c35572 100755 (executable)
--- a/convert.py
+++ b/convert.py
@@ -469,7 +469,7 @@ class UnquantizedTensor(Tensor):
  
      def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
          r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
+        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head))
  
      def part(self, n_part: int) -> 'UnquantizedTensor':
          r = self.ndarray.shape[0] // 3
@@ -952,9 +952,10 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
             #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
          elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
              print(f"Unpacking and permuting layer {i}")
-            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
-            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
              tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
+            del tmp[f"model.layers.{i}.self_attn.W_pack.weight"]
          else:
              break
author	jameswu2014 <redacted>
	Tue, 29 Aug 2023 09:48:41 +0000 (17:48 +0800)
committer	GitHub <redacted>
	Tue, 29 Aug 2023 09:48:41 +0000 (12:48 +0300)