llama : support InfiniAI Megrez 3b (#10893)

author Yun Dou <redacted>

Mon, 23 Dec 2024 00:35:44 +0000 (08:35 +0800)

committer GitHub <redacted>

Mon, 23 Dec 2024 00:35:44 +0000 (01:35 +0100)
author Yun Dou <redacted>
Mon, 23 Dec 2024 00:35:44 +0000 (08:35 +0800)
committer GitHub <redacted>
Mon, 23 Dec 2024 00:35:44 +0000 (01:35 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index d95fb12967eaaa570105fe665ad7f01b688ebdac..b6c15da94ec1dd4db5ab2f1210e6b676cc430499 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -684,6 +684,9 @@ class Model:
          if chkhsh == "ad851be1dba641f2e3711822f816db2c265f788b37c63b4e1aeacb9ee92de8eb":
              # ref: https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct
              res = "gigachat"
+        if chkhsh == "d4c8f286ea6b520b3d495c4455483cfa2302c0cfcd4be05d781b6a8a0a7cdaf1":
+            # ref: https://huggingface.co/Infinigence/Megrez-3B-Instruct
+            res = "megrez"
  
          if res is None:
              logger.warning("\n")
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index 2ba346640b35283b0849abd8501aa34f811fa030..fea23ddb4ae48d0884ac57f22ec82e8620d61d36 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -106,6 +106,7 @@ models = [
      {"name": "minerva-7b",     "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
      {"name": "roberta-bpe",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
      {"name": "gigachat",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
+    {"name": "megrez",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
  ]
  
  
diff --git a/src/llama.cpp b/src/llama.cpp

index c1524d06bb04d7d86f0aef34409135cc95b60408..4d41602fe2010f66a100ffec1ba7264f9714a943 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1720,6 +1720,7 @@ enum llm_chat_template {
      LLM_CHAT_TEMPLATE_RWKV_WORLD,
      LLM_CHAT_TEMPLATE_GRANITE,
      LLM_CHAT_TEMPLATE_GIGACHAT,
+    LLM_CHAT_TEMPLATE_MEGREZ,
      LLM_CHAT_TEMPLATE_UNKNOWN,
  };
  
@@ -1753,6 +1754,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
      { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
      { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
      { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
+    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
  };
  
  static llm_arch llm_arch_from_string(const std::string & name) {
@@ -6703,6 +6705,9 @@ static void llm_load_vocab(
              } else if (
                  tokenizer_pre == "minerva-7b") {
                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
+            } else if (
+                tokenizer_pre == "megrez") {
+                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
              } else {
                  throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
              }
@@ -22931,6 +22936,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
          return LLM_CHAT_TEMPLATE_GRANITE;
      } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
          return LLM_CHAT_TEMPLATE_GIGACHAT;
+    } else if (tmpl_contains("<|role_start|>")) {
+        return LLM_CHAT_TEMPLATE_MEGREZ;
      }
      return LLM_CHAT_TEMPLATE_UNKNOWN;
  }
@@ -23289,6 +23296,16 @@ static int32_t llama_chat_apply_template_internal(
          if (add_ass) {
              ss << "assistant<|role_sep|>";
          }
+    }  else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
+        // Megrez template
+        for (auto message : chat) {
+            std::string role(message->role);
+            ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
+        }
+
+        if (add_ass) {
+            ss << "<|role_start|>assistant<|role_end|>";
+        }
      } else {
          // template not supported
          return -1;
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp

index 30a910ad5c55df1a223380dea0aff15acdf199d7..51bfb155b47ef14fbddebd0b2506c5e41042c547 100644 (file)
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -77,6 +77,8 @@ int main(void) {
          "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
          // ai-sage/GigaChat-20B-A3B-instruct
          "{% if messages[0]['role'] == 'system' -%}\n    {%- set loop_messages = messages[1:] -%}\n    {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n    {%- set loop_messages = messages -%}\n    {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n    {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n    {% endif %}\n    \n    {%- if loop.index0 == 0 -%}\n        {{ system_message -}}\n    {%- endif -%}\n    {%- if message['role'] == 'user' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n        {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3]  + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if message['role'] == 'assistant' -%}\n        {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n    {%- endif -%}\n    {%- if loop.last and add_generation_prompt -%}\n        {{ 'assistant' + additional_special_tokens[0] -}}\n    {%- endif -%}\n{%- endfor %}",
+        // Infinigence/Megrez-3B-Instruct
+        u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct，将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"
      };
      std::vector<std::string> expected_output = {
          // teknium/OpenHermes-2.5-Mistral-7B
@@ -133,6 +135,8 @@ int main(void) {
          "[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST]    I am an assistant   </s>[INST] Another question[/INST]",
          // ai-sage/GigaChat-20B-A3B-instruct
          "<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>   I am an assistant   <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
+        // Infinigence/Megrez-3B-Instruct
+        "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|>   I am an assistant   <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
      };
      std::vector<char> formatted_chat(1024);
      int32_t res;
author	Yun Dou <redacted>
	Mon, 23 Dec 2024 00:35:44 +0000 (08:35 +0800)
committer	GitHub <redacted>
	Mon, 23 Dec 2024 00:35:44 +0000 (01:35 +0100)
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama.cpp		patch \| blob \| history
tests/test-chat-template.cpp		patch \| blob \| history