From: Joonas Pihlajamaa Date: Sun, 23 Oct 2022 08:55:01 +0000 (+0300) Subject: Add enconding parameter to vocab.json opening to fix errors X-Git-Tag: upstream/1.7.4~1914^2 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=4e887dc350a17ab8077e8b58e9555b084b48bc8e;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp Add enconding parameter to vocab.json opening to fix errors --- diff --git a/models/convert-pt-to-ggml.py b/models/convert-pt-to-ggml.py index 9e9b2dce..04792d7b 100644 --- a/models/convert-pt-to-ggml.py +++ b/models/convert-pt-to-ggml.py @@ -234,7 +234,7 @@ dir_tokenizer = tokenizer.name_or_path # output in the same directory as the model fname_out = dir_out + "/ggml-model.bin" -with open(dir_tokenizer + "/vocab.json", "r") as f: +with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f: tokens = json.load(f) # use 16-bit or 32-bit floats