From: daminho Date: Thu, 12 Sep 2024 11:28:20 +0000 (+0900) Subject: py : add Phi-1.5/Phi-2 tokenizer (#9361) X-Git-Tag: upstream/0.0.4488~745 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c837981bba7cf6839b69d32b25552ce685936b14;p=pkg%2Fggml%2Fsources%2Fllama.cpp py : add Phi-1.5/Phi-2 tokenizer (#9361) * add phi2 tokenizer * add phi name to convert_hf_to_gguf_update.py * make tokenizer_pre consistent; llama.cpp work --- diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f02c6502..01a8a50a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -626,6 +626,9 @@ class Model: if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae": # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct res = "exaone" + if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085": + # ref: https://huggingface.co/microsoft/phi-2 + res = "phi-2" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 59a0b81a..021f65ab 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -98,6 +98,7 @@ models = [ {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", }, {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", }, {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", }, + {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", }, ]