]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
Fixed tokenizer.model not found error when model dir is symlink (#325)
authorQingyou Meng <redacted>
Mon, 20 Mar 2023 19:33:10 +0000 (03:33 +0800)
committerGitHub <redacted>
Mon, 20 Mar 2023 19:33:10 +0000 (19:33 +0000)
convert-pth-to-ggml.py

index 42f537769cf1102b746e51a17128e9c1b4c3fcc0..108eb1fccfe634b26ff2e9cba98c535c7e28205b 100644 (file)
@@ -17,6 +17,7 @@
 # and vocabulary.
 #
 import argparse
+import os
 import sys
 import json
 import struct
@@ -44,8 +45,14 @@ def get_n_parts(dim):
 
 def load_hparams_and_tokenizer(dir_model):
 
+    # `dir_model` is something like `models/7B` or `models/7B/`.
+    # "tokenizer.model" is expected under model's parent dir.
+    # When `dir_model` is a symlink, f"{dir_model}/../tokenizer.model" would not be found.
+    # Let's use the model's parent dir directly.
+    model_parent_dir = os.path.dirname(os.path.normpath(dir_model))
+
     fname_hparams = f"{dir_model}/params.json"
-    fname_tokenizer = f"{dir_model}/../tokenizer.model"
+    fname_tokenizer = f"{model_parent_dir}/tokenizer.model"
 
     with open(fname_hparams, "r") as f:
         hparams = json.load(f)