-import sys, struct, math, argparse
+import sys, struct, math, argparse, warnings
from pathlib import Path
import numpy as np
import gguf
+warnings.filterwarnings('error')
+
# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
if self.vocab_override is not None:
vo = self.vocab_override
print('* Adding vocab item(s)')
- for (idx, vitem) in enumerate(vo.all_tokens()):
- if len(vitem) == 3:
- tokens.append(vitem[0])
- scores.append(vitem[1])
- toktypes.append(vitem[2])
- else:
- # Maybe try to guess the token type here?
- tokens.append(vitem[0])
- scores.append(vitem[1])
+ for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
+ tokens.append(vbytes)
+ scores.append(score)
+ toktypes.append(ttype)
assert len(tokens) == hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
gguf_writer.add_token_types(toktypes)
return
print(f'* Adding {hp.n_vocab} vocab item(s)')
+ assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab'
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
tt = 1 # Normal
- if len(vbytes) == 0:
+ # Special handling for UNK, BOS, EOS tokens.
+ if tokid <= 2:
+ if tokid == 0:
+ vbytes = b'<unk>'
+ tt = 2
+ elif tokid == 1:
+ vbytes = b'<s>'
+ tt = 3
+ else:
+ vbytes = b'</s>'
+ tt = 3
+ elif len(vbytes) == 0:
tt = 3 # Control
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
vbytes = bytes(f'<0x{vbytes[0]:02X}>', encoding = 'UTF-8')
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
gguf_writer.add_token_types(toktypes)
+ gguf_writer.add_unk_token_id(0)
+ gguf_writer.add_bos_token_id(1)
+ gguf_writer.add_eos_token_id(2)
def add_tensors(self, gguf_writer):
nm = self.name_map
data = np.memmap(cfg.input, mode = 'r')
model = GGMLV3Model()
print('* Scanning GGML input file')
- offset = model.load(data, 0)
+ try:
+ offset = model.load(data, 0)
+ except OverflowError:
+ print(f'!!! Caught overflow loading tensors. The most likely issue is running on Windows but not in WSL. Try running in WSL if possible.', file = sys.stderr)
+ raise
print(f'* GGML model hyperparameters: {model.hyperparameters}')
vocab_override = None
params_override = None
converter.save()
print(f'* Successful completion. Output saved to: {cfg.output}')
-main()
+if __name__ == '__main__':
+ main()