From: RyanChang Date: Mon, 17 Oct 2022 13:19:45 +0000 (+0800) Subject: fix missing token issue X-Git-Tag: upstream/1.7.4~1956^2 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=949f97a8b449b187422da982d3591139dea05b6d;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp fix missing token issue --- diff --git a/convert-pt-to-ggml.py b/convert-pt-to-ggml.py index 22bd12e5..9e9b2dce 100644 --- a/convert-pt-to-ggml.py +++ b/convert-pt-to-ggml.py @@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()} fout.write(struct.pack("i", len(tokens))) for key in tokens: - text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8') + text = bytearray([byte_decoder[c] for c in key]) fout.write(struct.pack("i", len(text))) fout.write(text)