vocab : add byte token handling to BPE detokenizer for Gemma4 (#21488)

author Aldehir Rojas <redacted>

Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)

committer GitHub <redacted>

Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)
author Aldehir Rojas <redacted>
Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)
committer GitHub <redacted>
Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp

index 75dbaa91ee43fa36654ad5b1767ab7d63c590bc6..de9a9466bc7168ef36fba5e1adaca4a3e79a63d7 100644 (file)
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2813,7 +2813,9 @@ uint8_t llama_vocab::impl::token_to_byte(llama_token id) const {
              return strtol(buf.c_str(), NULL, 16);
          }
          case LLAMA_VOCAB_TYPE_BPE: {
-            GGML_ABORT("fatal error");
+            // Gemma4 uses BPE with SPM-style byte fallback tokens (<0xXX>)
+            auto buf = token_data.text.substr(3, 2);
+            return strtol(buf.c_str(), NULL, 16);
          }
          case LLAMA_VOCAB_TYPE_WPM: {
              GGML_ABORT("fatal error");
@@ -3294,6 +3296,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
                      std::string result = llama_decode_text(token_text);
                      return _try_copy(result.data(), result.size());
                  }
+                if (attr & LLAMA_TOKEN_ATTR_BYTE) {
+                    char byte = (char) token_to_byte(token);
+                    return _try_copy((char*) &byte, 1);
+                }
                  break;
              }
              case LLAMA_VOCAB_TYPE_RWKV: {
author	Aldehir Rojas <redacted>
	Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)
committer	GitHub <redacted>
	Mon, 6 Apr 2026 14:08:37 +0000 (09:08 -0500)