* Fixed quantization for f16 models not working - this is because the f16 tables were not initialized thus f16 to f32 conversion was failing.
* On some situations, the script fails with the error : UnicodeDecodeError: 'charmap' codec can't decode byte (byte) in position (number) : character maps to <undefined>
This is probably because the encodings are incorrect.
Explicitly specifying them as UTF-8 seems to resolve the issue and allow for correct conversion.
---------
Co-authored-by: Georgi Gerganov <redacted>
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model-f16.bin"
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
# use 16-bit or 32-bit floats
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
-with open(dir_model + "/encoder.json", "r") as f:
+with open(dir_model + "/encoder.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
-with open(dir_model + "/hparams.json", "r") as f:
+with open(dir_model + "/hparams.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
# possible data types
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
-with open(dir_model + "/added_tokens.json", "r") as f:
+with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
encoder_added = json.load(f)
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
# use 16-bit or 32-bit floats
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
-with open(dir_model + "/added_tokens.json", "r") as f:
+with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
encoder_added = json.load(f)
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
# possible data types