convert : add support of baichuan-7b (#2055)

author Judd <redacted>

Sat, 1 Jul 2023 17:00:25 +0000 (01:00 +0800)

committer GitHub <redacted>

Sat, 1 Jul 2023 17:00:25 +0000 (20:00 +0300)
author Judd <redacted>
Sat, 1 Jul 2023 17:00:25 +0000 (01:00 +0800)
committer GitHub <redacted>
Sat, 1 Jul 2023 17:00:25 +0000 (20:00 +0300)
diff --git a/README.md b/README.md

index ee56988c7fbda19f4a9da52342ea9563b96d8fb8..e890dc9c22e75f891df44c3d2c662ab94f60f0d0 100644 (file)
--- a/README.md
+++ b/README.md
@@ -85,6 +85,7 @@ as the main playground for developing new features for the [ggml](https://github
  - [X] [OpenBuddy 🐶 (Multilingual)](https://github.com/OpenBuddy/OpenBuddy)
  - [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
  - [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
+- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)
  
  **Bindings:**
  
diff --git a/convert.py b/convert.py

index e340d2273f378ab0d81ef3ba20e23408c30a78dd..14269277627b13b29f735196878a76eb6f418a03 100644 (file)
--- a/convert.py
+++ b/convert.py
@@ -136,7 +136,7 @@ def find_n_mult(n_ff: int, n_embd: int) -> int:
          calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
          if calc_ff == n_ff:
              return n_mult
-    return 1
+    raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
  
  @dataclass
  class Params:
@@ -321,6 +321,10 @@ class Tensor(metaclass=ABCMeta):
      @abstractmethod
      def permute(self, n_head: int) -> 'Tensor': ...
      @abstractmethod
+    def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
+    @abstractmethod
+    def part(self, n_part: int) -> 'UnquantizedTensor': ...
+    @abstractmethod
      def to_ggml(self) -> 'GGMLCompatibleTensor': ...
  
  
@@ -345,6 +349,14 @@ class UnquantizedTensor(Tensor):
      def to_ggml(self) -> 'UnquantizedTensor':
          return self
  
+    def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
+        r = self.ndarray.shape[0] // 3
+        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
+
+    def part(self, n_part: int) -> 'UnquantizedTensor':
+        r = self.ndarray.shape[0] // 3
+        return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
+
      def permute(self, n_head: int) -> 'UnquantizedTensor':
          return UnquantizedTensor(permute(self.ndarray, n_head))
  
@@ -642,6 +654,19 @@ def permute_lazy(lazy_tensor: LazyTensor, n_head: int) -> LazyTensor:
          return lazy_tensor.load().permute(n_head)
      return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
  
+def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
+    def load() -> Tensor:
+        return lazy_tensor.load().permute_part(n_part, n_head)
+    s = lazy_tensor.shape.copy()
+    s[0] = s[0] // 3
+    return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
+
+def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
+    def load() -> Tensor:
+        return lazy_tensor.load().part(n_part)
+    s = lazy_tensor.shape.copy()
+    s[0] = s[0] // 3
+    return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
  
  def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
      out: LazyModel = {}
@@ -650,11 +675,17 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
      out["output.weight"] = model["lm_head.weight"]
  
      for i in itertools.count():
-        if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
+        if f"model.layers.{i}.self_attn.q_proj.weight" in model:
+            out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
+            out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
+            out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
+        elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
+            out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
+            out[f"layers.{i}.attention.wk.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
+            out[f"layers.{i}.attention.wv.weight"] = part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
+        else:
              break
-        out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
-        out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
-        out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
+
          out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]
  
          out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"]
author	Judd <redacted>
	Sat, 1 Jul 2023 17:00:25 +0000 (01:00 +0800)
committer	GitHub <redacted>
	Sat, 1 Jul 2023 17:00:25 +0000 (20:00 +0300)
README.md		patch \| blob \| history
convert.py		patch \| blob \| history