mtmd : fix glm-edge redundant token count (#13139)

author Xuan-Son Nguyen <redacted>

Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)

committer GitHub <redacted>

Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)
author Xuan-Son Nguyen <redacted>
Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)
committer GitHub <redacted>
Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)
diff --git a/examples/llava/mtmd.cpp b/examples/llava/mtmd.cpp

index a994ef0166e6a607ac1092dbb56fdeac0f39ed71..f95f0503569f97ca636f0589415788fc30548530 100644 (file)
--- a/examples/llava/mtmd.cpp
+++ b/examples/llava/mtmd.cpp
@@ -203,9 +203,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
      }
  
      // llava-1.5, llava-1.6, Yi-VL, Yi-34B, granite: don't need to add prefix and suffix
-    // for glm-edge, we don't need to add because the tokens are already in the returned embeddings
-
-    // TODO @ngxson : glm-edge : remove BOI / EOI tokens embeddings, decode them as normal tokens
  
      std::vector<std::string> parts = string_split_str(prompt_modified, ctx->image_marker);
      output.clear();
@@ -246,7 +243,7 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
      };
  
      for (const auto & part : parts) {
-        //printf("tokenizing part: %s\n", part.c_str());
+        // printf("tokenizing part: %s\n", part.c_str());
          bool add_bos = &parts.front() == &part;
          auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special);
          if (tokens.empty()) {
@@ -338,11 +335,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
                  LOG_DBG("image_tokens->ny = %d\n", image_tokens->ny);
                  LOG_DBG("batch_f32 size = %d\n", (int)image_tokens->batch_f32.entries.size());
  
-                if (clip_is_glm(ctx->ctx_clip)) {
-                    // glm-edge
-                    image_tokens->nx += 2; // add 2 for the begin_of_image and end_of_image token embeddings
-                }
-
                  mtmd_input_chunk chunk{
                      MTMD_INPUT_CHUNK_TYPE_IMAGE,
                      {},
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp

index af5e2003198d8080d028340dc4b9f74616808e02..735d2619c928fcfb7ba415ded0663318dba8eae1 100644 (file)
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -447,7 +447,7 @@ int32_t llm_chat_apply_template(
          if (add_ass) {
              ss << "<|assistant|>";
          }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
+    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4 || tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
          ss << "[gMASK]" << "<sop>";
          for (auto message : chat) {
              std::string role(message->role);
@@ -456,14 +456,6 @@ int32_t llm_chat_apply_template(
          if (add_ass) {
              ss << "<|assistant|>";
          }
-    } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
-        for (auto message : chat) {
-            std::string role(message->role);
-            ss << "<|" << role << "|>" << "\n" << message->content;
-        }
-        if (add_ass) {
-            ss << "<|assistant|>";
-        }
      } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
          // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
          for (auto message : chat) {
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp

index be1a640068dc7033b6f05266371db0158d78ff2f..85d89843d6d96f647c0e0de5e0b39c15ef2a980b 100644 (file)
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -187,14 +187,15 @@ int main(void) {
              /* .bos_token= */ "",
              /* .eos_token= */ "",
          },
-        {
-            /* .name= */ "GLMEdge",
-            /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
-            /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
-            /* .bos_token= */ "",
-            /* .eos_token= */ "",
-        },
+        // TODO @ngxson : GLMEdge produces poor result without `[gMASK]<sop>`, so we're temporarily using GLM4 template for it. We should fix this in the future.
+        // {
+        //     /* .name= */ "GLMEdge",
+        //     /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
+        //     /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+        //     /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n   I am an assistant   <|user|>\nAnother question<|assistant|>",
+        //     /* .bos_token= */ "",
+        //     /* .eos_token= */ "",
+        // },
          {
              /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
              /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
author	Xuan-Son Nguyen <redacted>
	Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)
committer	GitHub <redacted>
	Mon, 28 Apr 2025 14:12:56 +0000 (16:12 +0200)
examples/llava/mtmd.cpp		patch \| blob \| history
src/llama-chat.cpp		patch \| blob \| history
tests/test-chat-template.cpp		patch \| blob \| history