]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
mtmd: fix patch_size initialized to random value in audio models (#17128)
authorXuan-Son Nguyen <redacted>
Mon, 10 Nov 2025 10:41:05 +0000 (11:41 +0100)
committerGitHub <redacted>
Mon, 10 Nov 2025 10:41:05 +0000 (11:41 +0100)
* mtmd: fix patch_size initialized to random value in audio models

* add default hparams

tools/mtmd/clip.cpp

index d1423b67f98650d8e544b42fed8f42f2bd38b190..1d78f5954ed6604007da5098bbc86b7d3508dc5a 100644 (file)
@@ -160,13 +160,13 @@ enum patch_merge_type {
 };
 
 struct clip_hparams {
-    int32_t image_size;
-    int32_t patch_size;
-    int32_t n_embd;
-    int32_t n_ff;
-    int32_t projection_dim;
-    int32_t n_head;
-    int32_t n_layer;
+    int32_t image_size = 0;
+    int32_t patch_size = 0;
+    int32_t n_embd = 0;
+    int32_t n_ff = 0;
+    int32_t projection_dim = 0;
+    int32_t n_head = 0;
+    int32_t n_layer = 0;
     // idefics3
     int32_t image_longest_edge = 0;
     int32_t image_min_pixels = -1;
@@ -2683,6 +2683,9 @@ struct clip_model_loader {
                 }
             } else if (is_audio) {
                 get_u32(KEY_A_NUM_MEL_BINS, hparams.n_mel_bins);
+                // some hparams are unused, but still need to set to avoid issues
+                hparams.image_size = 0;
+                hparams.patch_size = 1;
 
             } else {
                 GGML_ASSERT(false && "unknown modality");