gguf : track writer state, free unneeded tensors, cleanup (#3871)

author Jared Van Bortel <redacted>

Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)

committer GitHub <redacted>

Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)
author Jared Van Bortel <redacted>
Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)
committer GitHub <redacted>
Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)
diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py

index a2271d225d0012ff64ed22f819e54d0385b0c6c4..7e495cb19638d1c952fbad34b7217e36110e1553 100644 (file)
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -646,18 +646,17 @@ class GGUFValueType(IntEnum):
              sys.exit()
  
  
+class WriterState(Enum):
+    EMPTY   = auto()
+    HEADER  = auto()
+    KV_DATA = auto()
+    TI_DATA = auto()
+
+
  class GGUFWriter:
      fout: BufferedWriter
-    arch: str
-    offset_tensor = 0
-    data_alignment = GGUF_DEFAULT_ALIGNMENT
-    kv_data = b""
-    kv_data_count = 0
-    ti_data = b""
-    ti_data_count = 0
-    use_temp_file: bool
-    temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
-    tensors: list[tuple[np.ndarray[Any, Any], int]]
+    temp_file: tempfile.SpooledTemporaryFile[bytes] | None
+    tensors: list[np.ndarray[Any, Any]]
  
      @property
      def pack_prefix(self):
@@ -683,27 +682,47 @@ class GGUFWriter:
              GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
              GGUFValueType.BOOL:    "?" ,
          }
-        self.add_architecture()
+        self.offset_tensor = 0
+        self.data_alignment = GGUF_DEFAULT_ALIGNMENT
+        self.kv_data = b""
+        self.kv_data_count = 0
+        self.ti_data = b""
+        self.ti_data_count = 0
          self.use_temp_file = use_temp_file
+        self.temp_file = None
          self.tensors = []
          endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
          print(f"This gguf file is for {endianess_str} only")
+        self.state = WriterState.EMPTY
+
+        self.add_architecture()
  
      def write_header_to_file(self):
+        if self.state is not WriterState.EMPTY:
+            raise ValueError(f'Expected output file to be empty, got {self.state}')
+
          self.fout.write(struct.pack("<I", GGUF_MAGIC))
          self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
          self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
          self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
          self.flush()
-#        print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
+        self.state = WriterState.HEADER
  
      def write_kv_data_to_file(self):
+        if self.state is not WriterState.HEADER:
+            raise ValueError(f'Expected output file to contain the header, got {self.state}')
+
          self.fout.write(self.kv_data)
          self.flush()
+        self.state = WriterState.KV_DATA
  
      def write_ti_data_to_file(self):
+        if self.state is not WriterState.KV_DATA:
+            raise ValueError(f'Expected output file to contain KV data, got {self.state}')
+
          self.fout.write(self.ti_data)
          self.flush()
+        self.state = WriterState.TI_DATA
  
      def add_key(self, key: str):
          self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@@ -796,6 +815,9 @@ class GGUFWriter:
          return ((x + n - 1) // n) * n
  
      def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
+        if self.state is not WriterState.EMPTY:
+            raise ValueError(f'Expected output file to be empty, got {self.state}')
+
          assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
  
          encoded_name = name.encode("utf8")
@@ -825,23 +847,22 @@ class GGUFWriter:
          shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
          self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
  
-        pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
-
-        if  self.temp_file is None:
-            self.tensors.append((tensor, pad))
+        if self.temp_file is None:
+            self.tensors.append(tensor)
              return
  
          tensor.tofile(self.temp_file)
+        self.write_padding(self.temp_file, tensor.nbytes)
  
-        if pad != 0:
-            self.temp_file.write(bytes([0] * pad))
-
-    def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
+    def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
          pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
          if pad != 0:
              fp.write(bytes([0] * pad))
  
      def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
+        if self.state is not WriterState.TI_DATA:
+            raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
+
          if self.endianess==GGUFEndian.BIG:
              tensor.byteswap(inplace=True)
          self.write_padding(self.fout, self.fout.tell())
@@ -854,10 +875,13 @@ class GGUFWriter:
          self.write_padding(self.fout, self.fout.tell())
  
          if self.temp_file is None:
-            for (currtensor, currpad) in self.tensors:
-                currtensor.tofile(self.fout)
-                if currpad != 0:
-                    self.fout.write(bytes([0] * currpad))
+            while True:
+                try:
+                    tensor = self.tensors.pop(0)
+                except IndexError:
+                    break
+                tensor.tofile(self.fout)
+                self.write_padding(self.fout, tensor.nbytes)
              return
  
          self.temp_file.seek(0)
@@ -1002,11 +1026,8 @@ class GGUFWriter:
  
  
  class SpecialVocab:
-    load_merges: bool = False
-    merges: list[str] = []
-    special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
-    special_token_ids: dict[str, int] = {}
-    n_vocab: int | None = None
+    merges: list[str]
+    special_token_ids: dict[str, int]
  
      def __init__(
          self, path: str | os.PathLike[str], load_merges: bool = False,
@@ -1016,8 +1037,11 @@ class SpecialVocab:
          self.special_token_ids = {}
          self.n_vocab = n_vocab
          self.load_merges = load_merges
+        self.merges = []
          if special_token_types is not None:
              self.special_token_types = special_token_types
+        else:
+            self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
          self._load(Path(path))
  
      def _load(self, path: Path) -> None:
diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml

index f0741a7c23e034061470dd87dcb7e40c43310ff4..c6cb2c37a0e0a1587a18fac5b8b4e0d117318306 100644 (file)
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@@ -1,6 +1,6 @@
  [tool.poetry]
  name = "gguf"
-version = "0.4.5"
+version = "0.4.6"
  description = "Write ML models in GGUF for GGML"
  authors = ["GGML <ggml@ggml.ai>"]
  packages = [
author	Jared Van Bortel <redacted>
	Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)
committer	GitHub <redacted>
	Tue, 7 Nov 2023 17:43:04 +0000 (12:43 -0500)
gguf-py/gguf/gguf.py		patch \| blob \| history
gguf-py/pyproject.toml		patch \| blob \| history