llama : add Falcon3 support (#10864)

author Billel Mokeddem <redacted>

Tue, 17 Dec 2024 15:24:56 +0000 (19:24 +0400)

committer GitHub <redacted>

Tue, 17 Dec 2024 15:24:56 +0000 (17:24 +0200)
author Billel Mokeddem <redacted>
Tue, 17 Dec 2024 15:24:56 +0000 (19:24 +0400)
committer GitHub <redacted>
Tue, 17 Dec 2024 15:24:56 +0000 (17:24 +0200)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 9dc1673bc2c06cddaf1a24b38a14985f8162c23d..66e268af614195a04dc96893078e5a2c1435ba10 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -525,6 +525,9 @@ class Model:
              else:
                  token: str = reverse_vocab[i]
                  if token in added_vocab:
+                    # We need to manually encode and decode the added tokens in case special characters
+                    # used for `\n` / `\t` have been manually added in the added tokens
+                    token = tokenizer.decode(tokenizer.encode(token))
                      if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
                          toktypes.append(gguf.TokenType.CONTROL)
                      else:
@@ -571,6 +574,9 @@ class Model:
          if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
              # ref: https://huggingface.co/tiiuae/falcon-7b
              res = "falcon"
+        if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
+            # ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
+            res = "falcon3"
          if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
              # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
              res = "bert-bge"
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index 88058442f6dc4ff1416e734df231a2b7befce42f..2ba346640b35283b0849abd8501aa34f811fa030 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -72,6 +72,7 @@ models = [
      {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
      {"name": "falcon",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
      {"name": "bert-bge",       "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
+    {"name": "falcon3",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
      {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
      {"name": "mpt",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
      {"name": "starcoder",      "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
diff --git a/src/llama.cpp b/src/llama.cpp

index 8b799e0ebeda72e86c2cf6b296f23375756ce338..1cc8a93323b4a945ff51fab3cd2ef3b9eac93cda 100644 (file)
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -1612,6 +1612,7 @@ enum llm_chat_template {
      LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
      LLM_CHAT_TEMPLATE_MISTRAL_V7,
      LLM_CHAT_TEMPLATE_PHI_3,
+    LLM_CHAT_TEMPLATE_FALCON_3,
      LLM_CHAT_TEMPLATE_ZEPHYR,
      LLM_CHAT_TEMPLATE_MONARCH,
      LLM_CHAT_TEMPLATE_GEMMA,
@@ -1644,6 +1645,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
      { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
      { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
      { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
+    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
      { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
      { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
      { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
@@ -6473,6 +6475,11 @@ static void llm_load_vocab(
              } else if (
                      tokenizer_pre == "falcon") {
                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
+            } else if (
+                    tokenizer_pre == "falcon3") {
+                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
+                vocab.tokenizer_ignore_merges = true;
+                vocab.tokenizer_add_bos = true;
              } else if (
                      tokenizer_pre == "mpt") {
                  vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
@@ -22219,6 +22226,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
          }
      } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
          return LLM_CHAT_TEMPLATE_PHI_3;
+    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
+        return LLM_CHAT_TEMPLATE_FALCON_3;
      } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
          return LLM_CHAT_TEMPLATE_ZEPHYR;
      } else if (tmpl_contains("bos_token + message['role']")) {
@@ -22371,6 +22380,15 @@ static int32_t llama_chat_apply_template_internal(
          if (add_ass) {
              ss << "<|assistant|>\n";
          }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
+        // Falcon 3
+        for (auto message : chat) {
+            std::string role(message->role);
+            ss << "<|" << role << "|>\n" << message->content << "\n";
+        }
+        if (add_ass) {
+            ss << "<|assistant|>\n";
+        }
      } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
          // zephyr template
          for (auto message : chat) {
author	Billel Mokeddem <redacted>
	Tue, 17 Dec 2024 15:24:56 +0000 (19:24 +0400)
committer	GitHub <redacted>
	Tue, 17 Dec 2024 15:24:56 +0000 (17:24 +0200)
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama.cpp		patch \| blob \| history