mtmd : fix batch_view for m-rope (#13397)

author Xuan-Son Nguyen <redacted>

Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)

committer GitHub <redacted>

Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)
author Xuan-Son Nguyen <redacted>
Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)
committer GitHub <redacted>
Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp

index 5d18e8929b31f9a76b9fa0053d226e40bfcbd88d..2fecf08a44e94b7ed1dd53c7547cf6e670232b8c 100644 (file)
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@@ -554,14 +554,19 @@ struct decode_embd_batch {
      llama_batch get_view(int offset, int n_tokens) {
          llama_pos * pos_ptr;
          pos_view.clear();
-        pos_view.resize(n_tokens * n_pos_per_embd);
+        pos_view.reserve(n_tokens * n_pos_per_embd);
          if (n_pos_per_embd > 1) {
              // mrope
              // for example, with layout of src: 1234...1234...1234...1234...
              //       offset 2 will give us dst: 34...34...34...34...
              for (int i = 0; i < n_pos_per_embd; i++) {
-                auto src = pos.begin() + i * batch.n_tokens + offset;
-                pos_view.insert(pos_view.end(), src, src + n_tokens);
+                // assume n_tokens is less than or equal to batch.n_tokens
+                // batch.n_tokens is number of **total** tokens
+                // n_tokens is number of viewed token
+                size_t src_idx = i * batch.n_tokens + offset;
+                pos_view.insert(pos_view.end(),
+                    pos.data() + src_idx,
+                    pos.data() + src_idx + n_tokens);
              }
              pos_ptr = pos_view.data();
          } else {
author	Xuan-Son Nguyen <redacted>
	Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)
committer	GitHub <redacted>
	Fri, 9 May 2025 09:18:02 +0000 (11:18 +0200)