model : add skt/A.X-4.0 model vocabulary (#14589)

author Dowon <redacted>

Wed, 9 Jul 2025 08:22:31 +0000 (17:22 +0900)

committer GitHub <redacted>

Wed, 9 Jul 2025 08:22:31 +0000 (11:22 +0300)
author Dowon <redacted>
Wed, 9 Jul 2025 08:22:31 +0000 (17:22 +0900)
committer GitHub <redacted>
Wed, 9 Jul 2025 08:22:31 +0000 (11:22 +0300)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 4dedc020b61e158bd768bd5477184addac0de832..702827f4d5d2e3d07a77e3a1662066ec808828d3 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -818,6 +818,9 @@ class TextModel(ModelBase):
          if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
              # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
              res = "hunyuan"
+        if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf":
+            # ref: https://huggingface.co/skt/A.X-4.0
+            res = "a.x-4.0"
          if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
              # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
              res = "falcon-h1"
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index 15a326e695dd5ec56c6fb125cdcbea3fb502bdb6..b8cb6027d6de55f41c9b2ac369483b3c0b2f3c93 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -128,6 +128,7 @@ models = [
      {"name": "llama4",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
      {"name": "pixtral",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
      {"name": "seed-coder",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
+    {"name": "a.x-4.0",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
  ]
  
  # some models are known to be broken upstream, so we will skip them as exceptions
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index b7f14dc07b60936d748c01f294ad51e2852c73c5..6aa1d901c5e36933d5074f166bc0dde874ef3fab 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1556,7 +1556,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                      tokenizer_pre == "jina-de" ||
                      tokenizer_pre == "gigachat"   ||
                      tokenizer_pre == "jina-v2-es" ||
-                    tokenizer_pre == "jina-v2-de") {
+                    tokenizer_pre == "jina-v2-de" ||
+                    tokenizer_pre == "a.x-4.0") {
                  pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
              } else if (
                      tokenizer_pre == "jina-v1-en" ||
author	Dowon <redacted>
	Wed, 9 Jul 2025 08:22:31 +0000 (17:22 +0900)
committer	GitHub <redacted>
	Wed, 9 Jul 2025 08:22:31 +0000 (11:22 +0300)
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama-vocab.cpp		patch \| blob \| history