mtmd : fix memory leak in mtmd_helper_eval_chunk_single (#13961)

author Xuan-Son Nguyen <redacted>

Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)

committer GitHub <redacted>

Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)
author Xuan-Son Nguyen <redacted>
Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)
committer GitHub <redacted>
Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)
diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp

index 508a64c586de1aea5d54b1d58fb5a3664b7165d5..40deab5ab00a840361d59f7e5aed8f2e51eecabd 100644 (file)
--- a/tools/mtmd/mtmd-cli.cpp
+++ b/tools/mtmd/mtmd-cli.cpp
@@ -70,6 +70,7 @@ struct mtmd_cli_context {
      llama_model       * model;
      llama_context     * lctx;
      const llama_vocab * vocab;
+    common_sampler    * smpl;
      llama_batch         batch;
      int                 n_batch;
  
@@ -89,8 +90,9 @@ struct mtmd_cli_context {
          model = llama_init.model.get();
          lctx = llama_init.context.get();
          vocab = llama_model_get_vocab(model);
+        smpl = common_sampler_init(model, params.sampling);
          n_threads = params.cpuparams.n_threads;
-        batch = llama_batch_init(params.n_batch, 0, 1);
+        batch = llama_batch_init(1, 0, 1); // batch for next token generation
          n_batch = params.n_batch;
  
          if (!model || !lctx) {
@@ -118,6 +120,11 @@ struct mtmd_cli_context {
          }
      }
  
+    ~mtmd_cli_context() {
+        llama_batch_free(batch);
+        common_sampler_free(smpl);
+    }
+
      void init_vision_context(common_params & params) {
          const char * clip_path = params.mmproj.path.c_str();
          mtmd_context_params mparams = mtmd_context_params_default();
@@ -153,7 +160,7 @@ struct mtmd_cli_context {
      }
  };
  
-static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int n_predict) {
+static int generate_response(mtmd_cli_context & ctx, int n_predict) {
      llama_tokens generated_tokens;
      for (int i = 0; i < n_predict; i++) {
          if (i > n_predict || !g_is_generating || g_is_interrupted) {
@@ -161,9 +168,9 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
              break;
          }
  
-        llama_token token_id = common_sampler_sample(smpl, ctx.lctx, -1);
+        llama_token token_id = common_sampler_sample(ctx.smpl, ctx.lctx, -1);
          generated_tokens.push_back(token_id);
-        common_sampler_accept(smpl, token_id, true);
+        common_sampler_accept(ctx.smpl, token_id, true);
  
          if (llama_vocab_is_eog(ctx.vocab, token_id) || ctx.check_antiprompt(generated_tokens)) {
              LOG("\n");
@@ -261,7 +268,6 @@ int main(int argc, char ** argv) {
  
      bool is_single_turn = !params.prompt.empty() && !params.image.empty();
  
-    struct common_sampler * smpl = common_sampler_init(ctx.model, params.sampling);
      int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
  
      // Ctrl+C handling
@@ -300,7 +306,7 @@ int main(int argc, char ** argv) {
          if (eval_message(ctx, msg, true)) {
              return 1;
          }
-        if (!g_is_interrupted && generate_response(ctx, smpl, n_predict)) {
+        if (!g_is_interrupted && generate_response(ctx, n_predict)) {
              return 1;
          }
  
@@ -366,7 +372,7 @@ int main(int argc, char ** argv) {
                  return 1;
              }
              if (g_is_interrupted) break;
-            if (generate_response(ctx, smpl, n_predict)) {
+            if (generate_response(ctx, n_predict)) {
                  return 1;
              }
              content.clear();
diff --git a/tools/mtmd/mtmd-helper.cpp b/tools/mtmd/mtmd-helper.cpp

index 64f03fd1e7eb2dade6e688213a230c2d501b1b0a..686f42f3960fe0d734d385542fef97e1b282484b 100644 (file)
--- a/tools/mtmd/mtmd-helper.cpp
+++ b/tools/mtmd/mtmd-helper.cpp
@@ -311,6 +311,7 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
          GGML_ABORT("chunk type not supported");
      }
  
+    llama_batch_free(text_batch);
      return 0;
  }
author	Xuan-Son Nguyen <redacted>
	Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)
committer	GitHub <redacted>
	Mon, 2 Jun 2025 14:29:28 +0000 (16:29 +0200)
tools/mtmd/mtmd-cli.cpp		patch \| blob \| history
tools/mtmd/mtmd-helper.cpp		patch \| blob \| history