]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
fix missing token issue
authorRyanChang <redacted>
Mon, 17 Oct 2022 13:19:45 +0000 (21:19 +0800)
committerRyanChang <redacted>
Mon, 17 Oct 2022 13:19:45 +0000 (21:19 +0800)
convert-pt-to-ggml.py

index 22bd12e5c84320c55f8cfa27cbe22ab7239c0de7..9e9b2dcebefb92b0e38a600d3c0fee78e6fe8717 100644 (file)
@@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()}
 fout.write(struct.pack("i", len(tokens)))
 
 for key in tokens:
-    text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8')
+    text = bytearray([byte_decoder[c] for c in key])
     fout.write(struct.pack("i", len(text)))
     fout.write(text)