gpt : fix pytorch converter text encodings (#78)

author LostRuins <redacted>

Thu, 13 Apr 2023 12:27:56 +0000 (20:27 +0800)

committer GitHub <redacted>

Thu, 13 Apr 2023 12:27:56 +0000 (15:27 +0300)
author LostRuins <redacted>
Thu, 13 Apr 2023 12:27:56 +0000 (20:27 +0800)
committer GitHub <redacted>
Thu, 13 Apr 2023 12:27:56 +0000 (15:27 +0300)
diff --git a/examples/gpt-2/convert-cerebras-to-ggml.py b/examples/gpt-2/convert-cerebras-to-ggml.py

index 6f20a5424e6992ac42994ae885f0b369d2908c4d..7fba7cde7b6959cea7bbc3d5a6e7e98eefd355ee 100644 (file)
--- a/examples/gpt-2/convert-cerebras-to-ggml.py
+++ b/examples/gpt-2/convert-cerebras-to-ggml.py
@@ -42,10 +42,10 @@ if len(sys.argv) < 2:
  dir_model = sys.argv[1]
  fname_out = sys.argv[1] + "/ggml-model-f16.bin"
  
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
      encoder = json.load(f)
  
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
      hparams = json.load(f)
  
  # use 16-bit or 32-bit floats
diff --git a/examples/gpt-2/convert-ckpt-to-ggml.py b/examples/gpt-2/convert-ckpt-to-ggml.py

index 60cd963d21e27000f971f672f0ead3c96293953a..9113141f61f018ac6592d1557255b002b5aea572 100644 (file)
--- a/examples/gpt-2/convert-ckpt-to-ggml.py
+++ b/examples/gpt-2/convert-ckpt-to-ggml.py
@@ -63,10 +63,10 @@ if len(sys.argv) < 3:
  dir_model = sys.argv[1]
  fname_out = sys.argv[1] + "/ggml-model.bin"
  
-with open(dir_model + "/encoder.json", "r") as f:
+with open(dir_model + "/encoder.json", "r", encoding="utf-8") as f:
      encoder = json.load(f)
  
-with open(dir_model + "/hparams.json", "r") as f:
+with open(dir_model + "/hparams.json", "r", encoding="utf-8") as f:
      hparams = json.load(f)
  
  # possible data types
diff --git a/examples/gpt-2/convert-h5-to-ggml.py b/examples/gpt-2/convert-h5-to-ggml.py

index 4e86ce27e65db60d903b6a4a8e45b400b1326d45..6a2b865411d7d3ae93fcf78712ec3a9e9c957fc6 100644 (file)
--- a/examples/gpt-2/convert-h5-to-ggml.py
+++ b/examples/gpt-2/convert-h5-to-ggml.py
@@ -55,13 +55,13 @@ if len(sys.argv) < 2:
  dir_model = sys.argv[1]
  fname_out = sys.argv[1] + "/ggml-model.bin"
  
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
      encoder = json.load(f)
  
-with open(dir_model + "/added_tokens.json", "r") as f:
+with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
      encoder_added = json.load(f)
  
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
      hparams = json.load(f)
  
  # use 16-bit or 32-bit floats
diff --git a/examples/gpt-j/convert-h5-to-ggml.py b/examples/gpt-j/convert-h5-to-ggml.py

index e254f2cc3d50493e9376d1c01a3b28645a6ae5f1..cb7731720e8625c18a78fc20de4c9d11bc22a6f0 100644 (file)
--- a/examples/gpt-j/convert-h5-to-ggml.py
+++ b/examples/gpt-j/convert-h5-to-ggml.py
@@ -57,13 +57,13 @@ if len(sys.argv) < 3:
  dir_model = sys.argv[1]
  fname_out = sys.argv[1] + "/ggml-model.bin"
  
-with open(dir_model + "/vocab.json", "r") as f:
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
      encoder = json.load(f)
  
-with open(dir_model + "/added_tokens.json", "r") as f:
+with open(dir_model + "/added_tokens.json", "r", encoding="utf-8") as f:
      encoder_added = json.load(f)
  
-with open(dir_model + "/config.json", "r") as f:
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
      hparams = json.load(f)
  
  # possible data types
author	LostRuins <redacted>
	Thu, 13 Apr 2023 12:27:56 +0000 (20:27 +0800)
committer	GitHub <redacted>
	Thu, 13 Apr 2023 12:27:56 +0000 (15:27 +0300)
examples/gpt-2/convert-cerebras-to-ggml.py		patch \| blob \| history
examples/gpt-2/convert-ckpt-to-ggml.py		patch \| blob \| history
examples/gpt-2/convert-h5-to-ggml.py		patch \| blob \| history
examples/gpt-j/convert-h5-to-ggml.py		patch \| blob \| history