model: Add support for Tiny Aya Models (#19611)

author Saurabh Dash <redacted>

Mon, 16 Feb 2026 15:28:46 +0000 (10:28 -0500)

committer GitHub <redacted>

Mon, 16 Feb 2026 15:28:46 +0000 (16:28 +0100)
author Saurabh Dash <redacted>
Mon, 16 Feb 2026 15:28:46 +0000 (10:28 -0500)
committer GitHub <redacted>
Mon, 16 Feb 2026 15:28:46 +0000 (16:28 +0100)
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py

index 0f614e4df3d1832110f5807b2ba93f922975be84..d7141f01cf02225108dd3c614169d345716b14a8 100755 (executable)
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1124,6 +1124,9 @@ class TextModel(ModelBase):
          if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
              # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
              res = "command-r"
+        if chkhsh == "d772b220ace2baec124bed8cfafce0ead7d6c38a4b65ef11261cf9d5d62246d1":
+            # ref: https://huggingface.co/CohereLabs/tiny-aya-base
+            res = "tiny_aya"
          if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
              # ref: https://huggingface.co/Qwen/Qwen1.5-7B
              res = "qwen2"
@@ -7360,6 +7363,17 @@ class Cohere2Model(TextModel):
          self.gguf_writer.add_rope_dimension_count(int(rotary_pct * (hidden_size // num_attention_heads)))
          self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
  
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        # Cohere2 runtime in llama.cpp expects no bias tensors;
+        # the actual weight only contains 0-value tensors as bias, we can skip them
+        if name.endswith(".bias"):
+            if torch.any(data_torch != 0):
+                raise ValueError(f"Bias tensor {name!r} is not zero.")
+            logger.debug(f"Skipping bias tensor {name!r} for Cohere2 conversion.")
+            return
+
+        yield from super().modify_tensors(data_torch, name, bid)
+
  
  @ModelBase.register("OlmoForCausalLM")
  @ModelBase.register("OLMoForCausalLM")
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py

index a6834515080df70c7bae1b8cf772cd1c6d1aff49..8bd24dbe9171755a5cdbd9d55ad53ba63257484b 100755 (executable)
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -99,6 +99,7 @@ models = [
      {"name": "stablelm2",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
      {"name": "refact",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
      {"name": "command-r",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
+    {"name": "tiny_aya",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereLabs/tiny-aya-base", },
      {"name": "qwen2",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
      {"name": "olmo",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
      {"name": "dbrx",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 62e137fb84279914cc45dc9fc0bc661ae09e3243..b35cb02ce4d8e132fea3b2e9055dd88483344dc5 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -422,6 +422,14 @@ struct llm_tokenizer_bpe : llm_tokenizer {
                      "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
                  };
                  break;
+            case LLAMA_VOCAB_PRE_TYPE_TINY_AYA:
+                regex_exprs = {
+                    // original regex from tokenizer.json: "\\d{1,3}(?=(?:\\d{3})*\\b)"
+                    "\\d{1,3}(?=(?:\\d{3})*\\b)",
+                    // original regex from tokenizer.json: "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+                    "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+                };
+                break;
              case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
                  regex_exprs = {
                      // K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
@@ -2005,10 +2013,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                  tokenizer_pre == "megrez") {
                  pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
              } else if (
-                    tokenizer_pre == "gpt-4o" ||
-                    tokenizer_pre == "llama4") {
+                tokenizer_pre == "gpt-4o" ||
+                tokenizer_pre == "llama4") {
                  pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
                  clean_spaces = false;
+            } else if (
+                tokenizer_pre == "tiny_aya") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_TINY_AYA;
+                clean_spaces = false;
              } else if (
                  tokenizer_pre == "superbpe") {
                  pre_type = LLAMA_VOCAB_PRE_TYPE_SUPERBPE;
diff --git a/src/llama-vocab.h b/src/llama-vocab.h

index 718238fb866f552d75ccae29ea7baa5d7dd59c4f..1312a877ab029b37f7c983e593f4f91b3839cfcc 100644 (file)
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -55,6 +55,7 @@ enum llama_vocab_pre_type {
      LLAMA_VOCAB_PRE_TYPE_YOUTU           = 44,
      LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE      = 45,
      LLAMA_VOCAB_PRE_TYPE_QWEN35          = 46,
+    LLAMA_VOCAB_PRE_TYPE_TINY_AYA        = 47,
  };
  
  struct LLM_KV;
diff --git a/src/unicode.cpp b/src/unicode.cpp

index b88d953bd2714014c363c3a6c141f86ad1fc7b0f..1475b53b6597400235204dda6c01b3f07aa494ec 100644 (file)
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -769,6 +769,12 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
      } else if (regex_expr == "\\p{AFMoE_digits}") {
          // AFMOE digit pattern - use custom implementation for proper splitting
          bpe_offsets = unicode_regex_split_custom_afmoe(text, offsets);
+    } else if (regex_expr == "\\d{1,3}(?=(?:\\d{3})*\\b)") {
+        // tiny_aya digit grouping pattern from tokenizer.json:
+        //   {"type": "Split", "pattern": {"Regex": "\\d{1,3}(?=(?:\\d{3})*\\b)"}, "behavior": "Isolated"}
+        // Splits digits into groups of 3 from the right (e.g., 1234567 -> 1, 234, 567)
+        // TODO: Revisit this regex, incase there are any subtle tokenization differences with the original regex.
+        bpe_offsets = unicode_regex_split_custom_afmoe(text, offsets);
      }
  
      return bpe_offsets;
author	Saurabh Dash <redacted>
	Mon, 16 Feb 2026 15:28:46 +0000 (10:28 -0500)
committer	GitHub <redacted>
	Mon, 16 Feb 2026 15:28:46 +0000 (16:28 +0100)
convert_hf_to_gguf.py		patch \| blob \| history
convert_hf_to_gguf_update.py		patch \| blob \| history
src/llama-vocab.cpp		patch \| blob \| history
src/llama-vocab.h		patch \| blob \| history
src/unicode.cpp		patch \| blob \| history