ci : add flake8 to github actions (python linting) (#4129)

author Galunid <redacted>

Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)

committer GitHub <redacted>

Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)
author Galunid <redacted>
Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)
committer GitHub <redacted>
Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml

new file mode 100644 (file)

index 0000000..56d17b6
--- /dev/null
+++ b/.github/workflows/python-lint.yml
@@ -0,0 +1,20 @@
+name: flake8 Lint
+
+on: [push, pull_request]
+
+jobs:
+  flake8-lint:
+    runs-on: ubuntu-latest
+    name: Lint
+    steps:
+      - name: Check out source repository
+        uses: actions/checkout@v3
+      - name: Set up Python environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: flake8 Lint
+        uses: py-actions/flake8@v2
+        with:
+            ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704"
+            exclude: "examples/*,examples/*/**,*/**/__init__.py"
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py

index 3a618fd4dc0f65e75b61e9f8c93b082732f21664..1105670c1ba5e5602decf7c1179ffe22986c96b5 100755 (executable)
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -827,13 +827,14 @@ class StableLMModel(Model):
          self.gguf_writer.add_embedding_length(hparams["hidden_size"])
          self.gguf_writer.add_block_count(block_count)
          self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
-        self.gguf_writer.add_rope_dimension_count(int(hparams["rope_pct"]*(hparams["hidden_size"] // hparams["num_attention_heads"])))
+        self.gguf_writer.add_rope_dimension_count(int(hparams["rope_pct"] * (hparams["hidden_size"] // hparams["num_attention_heads"])))
          self.gguf_writer.add_head_count(hparams["num_attention_heads"])
          self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
          self.gguf_writer.add_layer_norm_eps(1e-5)
  
  ###### CONVERSION LOGIC ######
  
+
  def parse_args() -> argparse.Namespace:
      parser = argparse.ArgumentParser(description="Convert a huggingface model to a GGML compatible file")
      parser.add_argument(
diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py

index 0c123567073419658e3de38a109c73a6bb925632..e359330afc51fe3456825272e66e6b6473bc1846 100755 (executable)
--- a/convert-llama-ggml-to-gguf.py
+++ b/convert-llama-ggml-to-gguf.py
@@ -14,11 +14,13 @@ if 'NO_LOCAL_GGUF' not in os.environ:
      sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
  import gguf
  
+
  class GGMLFormat(IntEnum):
      GGML = 0
      GGMF = 1
      GGJT = 2
  
+
  class GGMLFType(IntEnum):
      ALL_F32              = 0
      MOSTLY_F16           = 1
@@ -38,6 +40,7 @@ class GGMLFType(IntEnum):
      MOSTLY_Q5_K_M        = 17
      MOSTLY_Q6_K          = 18
  
+
  class Hyperparameters:
      def __init__(self):
          self.n_vocab = self.n_embd = self.n_mult = self.n_head = 0
@@ -69,6 +72,7 @@ class Hyperparameters:
      def __str__(self):
          return f'<Hyperparameters: n_vocab={self.n_vocab}, n_embd={self.n_embd}, n_mult={self.n_mult}, n_head={self.n_head}, n_layer={self.n_layer}, n_rot={self.n_rot}, n_ff={self.n_ff}, ftype={self.ftype.name}>'
  
+
  class Vocab:
      def __init__(self, load_scores = True):
          self.items = []
@@ -90,6 +94,7 @@ class Vocab:
              self.items.append((item_text, item_score))
          return offset - orig_offset
  
+
  class Tensor:
      def __init__(self, use_padding = True):
          self.name = None
@@ -123,6 +128,7 @@ class Tensor:
          # print(n_dims, name_len, dtype, self.dims, self.name, pad)
          return offset - orig_offset
  
+
  class GGMLModel:
      def __init__(self):
          self.hyperparameters = None
@@ -159,8 +165,8 @@ class GGMLModel:
              if ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16):
                  err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
          elif (self.file_format == GGMLFormat.GGJT and self.format_version == 2):
-            if ftype in ( GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
-                          GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
+            if ftype in (GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
+                         GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
                  err = 'Q4 and Q8 quantizations changed in GGJTv3.'
          if len(err) > 0:
              raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
@@ -187,6 +193,7 @@ class GGMLModel:
          hp.set_n_ff(self)
          return offset
  
+
  class GGMLToGGUF:
      def __init__(self, ggml_model, data, cfg, params_override = None, vocab_override = None, special_vocab = None):
          hp = ggml_model.hyperparameters
@@ -217,7 +224,7 @@ class GGMLToGGUF:
          gguf_writer = gguf.GGUFWriter(
              self.cfg.output,
              gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
-            use_temp_file = False )
+            use_temp_file = False)
          self.add_params(gguf_writer)
          self.add_vocab(gguf_writer)
          if self.special_vocab is not None:
@@ -341,7 +348,8 @@ class GGMLToGGUF:
                  mapped_name,
                  data[tensor.start_offset:tensor.start_offset + tensor.len_bytes],
                  raw_shape = tempdims,
-                raw_dtype = tensor.dtype )
+                raw_dtype = tensor.dtype)
+
  
  def handle_metadata(cfg, hp):
      import convert
@@ -365,38 +373,40 @@ def handle_metadata(cfg, hp):
          raise ValueError('Unable to load metadata')
      vocab = convert.load_vocab(
          cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir,
-        cfg.vocabtype )
+        cfg.vocabtype)
      # FIXME: Respect cfg.vocab_dir?
      svocab = gguf.SpecialVocab(cfg.model_metadata_dir,
-        load_merges = cfg.vocabtype == 'bpe',
-        n_vocab = vocab.vocab_size)
+                               load_merges = cfg.vocabtype == 'bpe',
+                               n_vocab = vocab.vocab_size)
      convert.check_vocab_size(params, vocab)
      return (params, vocab, svocab)
  
+
  def handle_args():
      parser = argparse.ArgumentParser(description = 'Convert GGML models to GGUF')
      parser.add_argument('--input', '-i', type = Path, required = True,
-        help = 'Input GGMLv3 filename')
+                        help = 'Input GGMLv3 filename')
      parser.add_argument('--output', '-o', type = Path, required = True,
-        help ='Output GGUF filename')
+                        help ='Output GGUF filename')
      parser.add_argument('--name',
-        help = 'Set model name')
+                        help = 'Set model name')
      parser.add_argument('--desc',
-        help = 'Set model description')
+                        help = 'Set model description')
      parser.add_argument('--gqa', type = int, default = 1,
-        help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
+                        help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
      parser.add_argument('--eps', default = '5.0e-06',
-        help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
+                        help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
      parser.add_argument('--context-length', '-c', type=int, default = 2048,
-        help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
+                        help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
      parser.add_argument('--model-metadata-dir', '-m', type = Path,
-        help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
+                        help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
      parser.add_argument("--vocab-dir", type=Path,
-        help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
+                        help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
      parser.add_argument("--vocabtype", choices=["spm", "bpe"], default="spm",
-        help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
+                        help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
      return parser.parse_args()
  
+
  def main():
      cfg = handle_args()
      print(f'* Using config: {cfg}')
@@ -406,7 +416,7 @@ def main():
      data = np.memmap(cfg.input, mode = 'r')
      model = GGMLModel()
      print('* Scanning GGML input file')
-    offset = model.load(data, 0)
+    offset = model.load(data, 0)  # noqa
      print(f'* GGML model hyperparameters: {model.hyperparameters}')
      vocab_override = None
      params_override = None
@@ -421,12 +431,15 @@ def main():
          print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
          if model.file_format == GGMLFormat.GGML:
              print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
-    converter = GGMLToGGUF(model, data, cfg,
+    converter = GGMLToGGUF(
+        model, data, cfg,
          params_override = params_override,
          vocab_override = vocab_override,
-        special_vocab = special_vocab )
+        special_vocab = special_vocab
+    )
      converter.save()
      print(f'* Successful completion. Output saved to: {cfg.output}')
  
+
  if __name__ == '__main__':
      main()
diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py

index 240f87306e5783380fab0e43bec47099e103a15e..206b7d5ff9e315aa86e3cd11aed78aacfe8be082 100644 (file)
--- a/convert-persimmon-to-gguf.py
+++ b/convert-persimmon-to-gguf.py
@@ -9,6 +9,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
      sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
  import gguf
  
+
  def _flatten_dict(dct, tensors, prefix=None):
      assert isinstance(dct, dict)
      for key in dct.keys():
@@ -21,6 +22,7 @@ def _flatten_dict(dct, tensors, prefix=None):
              raise ValueError(type(dct[key]))
      return None
  
+
  def _get_sentencepiece_tokenizer_info(dir_model: Path):
      tokenizer_path = dir_model / 'adept_vocab.model'
      print('gguf: getting sentencepiece tokenizer from', tokenizer_path)
@@ -54,6 +56,7 @@ def _get_sentencepiece_tokenizer_info(dir_model: Path):
          pass
      return tokens, scores, toktypes
  
+
  def main():
      parser = argparse.ArgumentParser(description="Convert a Persimmon model from Adept (e.g. Persimmon 8b chat) to a GGML compatible file")
      parser.add_argument("--outfile",             type=Path, help="path to write to; default: based on input")
@@ -125,6 +128,5 @@ def main():
      print("")
  
  
-
  if __name__ == '__main__':
      main()
diff --git a/convert.py b/convert.py

old mode 100755 (executable)

new mode 100644 (file)

index 5b6344a..3ad836c
--- a/convert.py
+++ b/convert.py
@@ -46,6 +46,7 @@ DEFAULT_CONCURRENCY = 8
  # data types
  #
  
+
  @dataclass(frozen=True)
  class DataType:
      name: str
@@ -55,15 +56,18 @@ class DataType:
      def elements_to_bytes(self, n_elements: int) -> int:
          return n_elements * self.dtype.itemsize
  
+
  @dataclass(frozen=True)
  class UnquantizedDataType(DataType):
      pass
  
+
  DT_F16  = UnquantizedDataType('F16', dtype = np.dtype(np.float16), valid_conversions = ['F32', 'Q8_0'])
  DT_F32  = UnquantizedDataType('F32', dtype = np.dtype(np.float32), valid_conversions = ['F16', 'Q8_0'])
  DT_I32  = UnquantizedDataType('I32', dtype = np.dtype(np.int16), valid_conversions = [])
  DT_BF16 = UnquantizedDataType('BF16', dtype = np.dtype(np.uint16), valid_conversions = ['F32', 'F16', 'Q8_0'])
  
+
  @dataclass(frozen=True)
  class QuantizedDataType(DataType):
      block_size: int
@@ -77,6 +81,7 @@ class QuantizedDataType(DataType):
          assert n_elements % self.block_size == 0, f'Invalid number of elements {n_elements} for {self.name} with block size {self.block_size}'
          return self.quantized_dtype.itemsize * (n_elements // self.block_size)
  
+
  @dataclass(frozen=True)
  class Q8_0QuantizedDataType(QuantizedDataType):
      # Mini Q8_0 quantization in Python!
@@ -86,6 +91,7 @@ class Q8_0QuantizedDataType(QuantizedDataType):
          n_blocks = arr.size // self.block_size
          blocks = arr.reshape((n_blocks, self.block_size))
          # Much faster implementation of block quantization contributed by @Cebtenzzre
+
          def quantize_blocks_q8_0(blocks: NDArray) -> Iterable[tuple[Any, Any]]:
              d = abs(blocks).max(axis = 1) / np.float32(127)
              with np.errstate(divide = 'ignore'):
@@ -94,10 +100,11 @@ class Q8_0QuantizedDataType(QuantizedDataType):
              yield from zip(d, qs)
          return np.fromiter(quantize_blocks_q8_0(blocks), count = n_blocks, dtype = self.quantized_dtype)
  
+
  DT_Q8_0 = Q8_0QuantizedDataType('Q8_0',
-    dtype = np.dtype(np.float32), valid_conversions = [],
-    ggml_type = gguf.GGMLQuantizationType.Q8_0, block_size = 32,
-    quantized_dtype = np.dtype([('d', '<f2'), ('qs', 'i1', (32,))]))
+                                dtype = np.dtype(np.float32), valid_conversions = [],
+                                ggml_type = gguf.GGMLQuantizationType.Q8_0, block_size = 32,
+                                quantized_dtype = np.dtype([('d', '<f2'), ('qs', 'i1', (32,))]))
  
  # Quantized types skipped here because they may also map to np.float32
  NUMPY_TYPE_TO_DATA_TYPE: dict[np.dtype[Any], DataType] = {}
@@ -116,6 +123,8 @@ SAFETENSORS_DATA_TYPES: dict[str, DataType] = {
  # TODO: match this with `llama_ftype`
  # TODO: rename to LLAMAFileType
  # TODO: move to `gguf.py`
+
+
  class GGMLFileType(enum.IntEnum):
      AllF32     = 0
      MostlyF16  = 1  # except 1d tensors
@@ -128,6 +137,7 @@ class GGMLFileType(enum.IntEnum):
          # 1D tensors are always F32.
          return dt if len(tensor.shape) > 1 else DT_F32
  
+
  GGML_FILE_TYPE_TO_DATA_TYPE: dict[GGMLFileType, DataType] = {
      GGMLFileType.AllF32    : DT_F32,
      GGMLFileType.MostlyF16 : DT_F16,
@@ -138,6 +148,7 @@ GGML_FILE_TYPE_TO_DATA_TYPE: dict[GGMLFileType, DataType] = {
  # hparams loading
  #
  
+
  @dataclass
  class Params:
      n_vocab:    int
@@ -167,11 +178,11 @@ class Params:
  
          # try transformer naming first
          if "model.layers.0.self_attn.q_proj.weight" in model:
-            n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
+            n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
          elif "model.layers.0.self_attn.W_pack.weight" in model:   # next: try baichuan naming
-            n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
+            n_layer = next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
          else:
-            n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
+            n_layer = next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
  
          if n_layer < 1:
              raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
@@ -308,7 +319,7 @@ class BpeVocab:
                      (item['content'], item['id'])
                      for item in tokenizer_json.get('added_tokens', [])
                      # Added tokens here can be duplicates of the main vocabulary.
-                    if item['content'] not in self.bpe_tokenizer )
+                    if item['content'] not in self.bpe_tokenizer)
  
          vocab_size: int = len(self.bpe_tokenizer)
          expected_ids    = list(range(vocab_size, vocab_size + len(added_tokens)))
@@ -326,7 +337,6 @@ class BpeVocab:
  
      def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
          tokenizer = self.bpe_tokenizer
-        from transformers.models.gpt2 import tokenization_gpt2
          reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.items()}
  
          for i, _ in enumerate(tokenizer):
@@ -406,6 +416,7 @@ class SentencePieceVocab:
      def __repr__(self) -> str:
          return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
  
+
  Vocab: TypeAlias = 'BpeVocab | SentencePieceVocab'
  
  #
@@ -413,13 +424,14 @@ Vocab: TypeAlias = 'BpeVocab | SentencePieceVocab'
  # TODO: reuse (probably move to gguf.py?)
  #
  
+
  def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
-    #print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
+    # print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
      if n_head_kv is not None and n_head != n_head_kv:
          n_head = n_head_kv
      return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
-                .swapaxes(1, 2)
-                .reshape(weights.shape))
+            .swapaxes(1, 2)
+            .reshape(weights.shape))
  
  
  class Tensor(metaclass=ABCMeta):
@@ -500,7 +512,7 @@ class LazyTensor:
          ret = self._load()
          # Should be okay if it maps to the same numpy type?
          assert ret.data_type == self.data_type or (self.data_type.dtype == ret.data_type.dtype), \
-                (self.data_type, ret.data_type, self.description)
+            (self.data_type, ret.data_type, self.description)
          return ret
  
      def astype(self, data_type: DataType) -> LazyTensor:
@@ -588,6 +600,7 @@ def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -> LazyTe
          return lazy_tensor.load().permute(n_head, n_head_kv)
      return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_head_kv}) ' + lazy_tensor.description)
  
+
  def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int, n_head_kv: int) -> LazyTensor:
      def load() -> Tensor:
          return lazy_tensor.load().permute_part(n_part, n_head, n_head_kv)
@@ -595,6 +608,7 @@ def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int, n_head_
      s[0] = s[0] // 3
      return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}, {n_head_kv}) ' + lazy_tensor.description)
  
+
  def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
      def load() -> Tensor:
          return lazy_tensor.load().part(n_part)
@@ -744,6 +758,7 @@ def lazy_load_file(path: Path) -> ModelPlus:
  In = TypeVar('In')
  Out = TypeVar('Out')
  
+
  def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int, max_workers: int | None = None, use_processpool_executor: bool = False) -> Iterable[Out]:
      '''Parallel map, but with backpressure.  If the caller doesn't call `next`
      fast enough, this will stop calling `func` at some point rather than
@@ -778,6 +793,7 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc
                      break
              yield result
  
+
  def check_vocab_size(params: Params, vocab: Vocab) -> None:
      if params.n_vocab != vocab.vocab_size:
          assert isinstance(vocab, BpeVocab) or isinstance(vocab, SentencePieceVocab)
@@ -796,7 +812,7 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
  
  
  class OutputFile:
-    def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
+    def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
          self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
  
      def add_meta_arch(self, params: Params) -> None:
@@ -876,7 +892,7 @@ class OutputFile:
          self.gguf.close()
  
      @staticmethod
-    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian=gguf.GGUFEndian.LITTLE) -> None:
+    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
          check_vocab_size(params, vocab)
  
          of = OutputFile(fname_out, endianess=endianess)
@@ -938,8 +954,9 @@ class OutputFile:
  
          of.close()
  
+
  def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
-    wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
+    wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0) +".weight"].data_type
  
      if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
          return GGMLFileType.AllF32
@@ -952,10 +969,12 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
  
      raise Exception(f"Unexpected combination of types: {name_to_type}")
  
+
  def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
      return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
              for (name, tensor) in model.items()}
  
+
  def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
      tmap = gguf.TensorNameMap(ARCH, params.n_layer)
      should_skip: set[gguf.MODEL_TENSOR] = set(gguf.MODEL_TENSOR_SKIP.get(ARCH, []))
@@ -968,7 +987,7 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
              print(f"Permuting layer {i}")
              tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
              tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
-           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
+            # tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
          elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
              print(f"Unpacking and permuting layer {i}")
              tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
@@ -993,6 +1012,7 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
  
      return out
  
+
  def nth_multifile_path(path: Path, n: int) -> Path | None:
      '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
      the nth path in the model.
@@ -1174,8 +1194,8 @@ def main(args_in: list[str] | None = None) -> None:
          # FIXME: Try to respect vocab_dir somehow?
          vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
          special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
-            load_merges = args.vocabtype == 'bpe',
-            n_vocab = vocab.vocab_size)
+                                          load_merges = args.vocabtype == 'bpe',
+                                          n_vocab = vocab.vocab_size)
          outfile = args.outfile
          OutputFile.write_vocab_only(outfile, params, vocab, special_vocab)
          print(f"Wrote {outfile}")
@@ -1188,8 +1208,8 @@ def main(args_in: list[str] | None = None) -> None:
          vocab = load_vocab(vocab_dir, args.vocabtype)
      # FIXME: Try to respect vocab_dir somehow?
      special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
-        load_merges = args.vocabtype == 'bpe',
-        n_vocab = vocab.vocab_size)
+                                      load_merges = args.vocabtype == 'bpe',
+                                      n_vocab = vocab.vocab_size)
  
      model   = model_plus.model
      model   = convert_model_names(model, params)
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py

index ab7382c44f44e651a9cd0ae3a98b1b02ab578ae0..b8ec977c8f3fa52cdd4aab415fb8579c637f5e6e 100644 (file)
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -221,7 +221,7 @@ class GGUFWriter:
          if self.endianess == GGUFEndian.BIG:
              tensor.byteswap(inplace=True)
          if self.use_temp_file and self.temp_file is None:
-            fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
+            fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
              fp.seek(0)
              self.temp_file = fp
  
diff --git a/tests/test-tokenizer-0-falcon.py b/tests/test-tokenizer-0-falcon.py

index 65e1c0dbf700cd60076a00adebae1441a053fc13..4f06ec9bbba5b1affafc34150b0853f8be081321 100644 (file)
--- a/tests/test-tokenizer-0-falcon.py
+++ b/tests/test-tokenizer-0-falcon.py
@@ -14,34 +14,34 @@ dir_tokenizer = args.dir_tokenizer
  tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
  
  tests = [
-        "",
-        " ",
-        "  ",
-        "   ",
-        "\t",
-        "\n",
-        "\t\n",
-        "Hello world",
-        " Hello world",
-        "Hello World",
-        " Hello World",
-        " Hello World!",
-        "Hello, world!",
-        " Hello, world!",
-        " this is 🦙.cpp",
-        "w048 7tuijk dsdfhu",
-        "нещо на Български",
-        "កាន់តែពិសេសអាចខលចេញ",
-        "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
-        "Hello",
-        " Hello",
-        "  Hello",
-        "   Hello",
-        "    Hello",
-        "    Hello\n    Hello",
-        "\n =",
-        "' era",
-    ]
+    "",
+    " ",
+    "  ",
+    "   ",
+    "\t",
+    "\n",
+    "\t\n",
+    "Hello world",
+    " Hello world",
+    "Hello World",
+    " Hello World",
+    " Hello World!",
+    "Hello, world!",
+    " Hello, world!",
+    " this is 🦙.cpp",
+    "w048 7tuijk dsdfhu",
+    "нещо на Български",
+    "កាន់តែពិសេសអាចខលចេញ",
+    "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
+    "Hello",
+    " Hello",
+    "  Hello",
+    "   Hello",
+    "    Hello",
+    "    Hello\n    Hello",
+    "\n =",
+    "' era",
+]
  
  for text in tests:
      print('text: ', text)
diff --git a/tests/test-tokenizer-0-llama.py b/tests/test-tokenizer-0-llama.py

index 21df8e6e4898da3f9b4140b7009281efea59b3e1..f3d4d7e3da76e05316351a5d840908867858c641 100644 (file)
--- a/tests/test-tokenizer-0-llama.py
+++ b/tests/test-tokenizer-0-llama.py
@@ -14,32 +14,32 @@ dir_tokenizer = args.dir_tokenizer
  tokenizer = SentencePieceProcessor(dir_tokenizer + '/tokenizer.model')
  
  tests = [
-        "",
-        " ",
-        "  ",
-        "   ",
-        "\t",
-        "\n",
-        "\t\n",
-        "Hello world",
-        " Hello world",
-        "Hello World",
-        " Hello World",
-        " Hello World!",
-        "Hello, world!",
-        " Hello, world!",
-        " this is 🦙.cpp",
-        "w048 7tuijk dsdfhu",
-        "нещо на Български",
-        "កាន់តែពិសេសអាចខលចេញ",
-        "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
-        "Hello",
-        " Hello",
-        "  Hello",
-        "   Hello",
-        "    Hello",
-        "    Hello\n    Hello",
-    ]
+    "",
+    " ",
+    "  ",
+    "   ",
+    "\t",
+    "\n",
+    "\t\n",
+    "Hello world",
+    " Hello world",
+    "Hello World",
+    " Hello World",
+    " Hello World!",
+    "Hello, world!",
+    " Hello, world!",
+    " this is 🦙.cpp",
+    "w048 7tuijk dsdfhu",
+    "нещо на Български",
+    "កាន់តែពិសេសអាចខលចេញ",
+    "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
+    "Hello",
+    " Hello",
+    "  Hello",
+    "   Hello",
+    "    Hello",
+    "    Hello\n    Hello",
+]
  
  
  for text in tests:
author	Galunid <redacted>
	Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)
committer	GitHub <redacted>
	Mon, 20 Nov 2023 10:35:47 +0000 (11:35 +0100)
.github/workflows/python-lint.yml	[new file with mode: 0644]	patch \| blob
convert-hf-to-gguf.py		patch \| blob \| history
convert-llama-ggml-to-gguf.py		patch \| blob \| history
convert-persimmon-to-gguf.py		patch \| blob \| history
convert.py	[changed mode: 0755->0644]	patch \| blob \| history
gguf-py/gguf/gguf_writer.py		patch \| blob \| history
tests/test-tokenizer-0-falcon.py		patch \| blob \| history
tests/test-tokenizer-0-llama.py		patch \| blob \| history