llava : change API to pure C style for Rust FFI bindgen (#6079)

author Ting Lou <redacted>

Fri, 15 Mar 2024 14:31:05 +0000 (22:31 +0800)

committer GitHub <redacted>

Fri, 15 Mar 2024 14:31:05 +0000 (16:31 +0200)
author Ting Lou <redacted>
Fri, 15 Mar 2024 14:31:05 +0000 (22:31 +0800)
committer GitHub <redacted>
Fri, 15 Mar 2024 14:31:05 +0000 (16:31 +0200)
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp

index 2035554ea87414089009edec2aed35ca5acfdb80..a0ed82d7e2328ddd1aa8c4dbfb84565e40fe0c62 100644 (file)
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -1235,16 +1235,16 @@ struct clip_image_f32 * clip_image_f32_init() {
  
  void clip_image_u8_free(struct clip_image_u8  * img) { delete img; }
  void clip_image_f32_free(struct clip_image_f32 * img) { delete img; }
-void clip_image_u8_batch_free(struct clip_image_u8_batch  & batch) {
-    if (batch.size > 0) {
-        delete[] batch.data;
-        batch.size = 0;
+void clip_image_u8_batch_free(struct clip_image_u8_batch  * batch) {
+    if (batch->size > 0) {
+        delete[] batch->data;
+        batch->size = 0;
      }
  }
-void clip_image_f32_batch_free(struct clip_image_f32_batch  & batch) {
-    if (batch.size > 0) {
-        delete[] batch.data;
-        batch.size = 0;
+void clip_image_f32_batch_free(struct clip_image_f32_batch  * batch) {
+    if (batch->size > 0) {
+        delete[] batch->data;
+        batch->size = 0;
      }
  }
  
@@ -1497,7 +1497,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
  
  // returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
  // res_imgs memory is being allocated here, previous allocations will be freed if found
-bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
+bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
      bool pad_to_square = true;
      if (!ctx->has_vision_encoder) {
          printf("This gguf file seems to have no vision encoder\n");
@@ -1509,11 +1509,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
          pad_to_square = false;
      }
      // free the previous res_imgs if any set
-    if (res_imgs.size > 0) {
+    if (res_imgs->size > 0) {
          clip_image_f32_batch_free(res_imgs);
      }
-    res_imgs.data = nullptr;
-    res_imgs.size = 0;
+    res_imgs->data = nullptr;
+    res_imgs->size = 0;
  
      // the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
      // see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
@@ -1568,11 +1568,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
              bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
              patches.insert(patches.begin(), image_original_resize);
              // clip_image_f32_batch_init(patches.size());
-            res_imgs.size = patches.size();
-            res_imgs.data = new clip_image_f32[res_imgs.size];
+            res_imgs->size = patches.size();
+            res_imgs->data = new clip_image_f32[res_imgs->size];
              int num=0;
              for (auto& patch : patches) {
-                normalize_image_u8_to_f32(patch, &res_imgs.data[num], ctx->image_mean, ctx->image_std);
+                normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std);
                  num++;
              }
  
@@ -1660,9 +1660,9 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
      // }
      // res_imgs.push_back(res);
  
-    res_imgs.size = 1;
-    res_imgs.data = new clip_image_f32[res_imgs.size];
-    res_imgs.data[0] = *res;
+    res_imgs->size = 1;
+    res_imgs->data = new clip_image_f32[res_imgs->size];
+    res_imgs->data[0] = *res;
      clip_image_f32_free(res);
  
      return true;
diff --git a/examples/llava/clip.h b/examples/llava/clip.h

index e5bd54924a9c82a06d7a3389fb040e0db6a4beca..45bdad68976585c9d7783bfeb31eff975b90e055 100644 (file)
--- a/examples/llava/clip.h
+++ b/examples/llava/clip.h
@@ -60,8 +60,8 @@ CLIP_API struct clip_image_f32 * clip_image_f32_init();
  
  CLIP_API void clip_image_u8_free (struct clip_image_u8  * img);
  CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
-CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch  & batch);
-CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch & batch);
+CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch  * batch);
+CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
  
  CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
  
@@ -69,7 +69,7 @@ CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8
  CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
  
  /** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
-CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs );
+CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
  
  CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
  
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp

index 9801281661b250defe571c6c6f4acfeca9a74848..29764757aab5d9f37e05ad7234371666d8d588d7 100644 (file)
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -223,7 +223,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
      clip_image_f32_batch img_res_v;
      img_res_v.size = 0;
      img_res_v.data = nullptr;
-    if (!clip_image_preprocess(ctx_clip, img, img_res_v)) {
+    if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
          fprintf(stderr, "%s: unable to preprocess image\n", __func__);
          delete[] img_res_v.data;
          return false;
diff --git a/examples/llava/llava.h b/examples/llava/llava.h

index 2d40f3f1d5f8425535b493795ca2add2c10c0b7f..19212f6e9e9c5cfd53767dd1097d437ba039dfb9 100644 (file)
--- a/examples/llava/llava.h
+++ b/examples/llava/llava.h
@@ -29,9 +29,9 @@ struct llava_image_embed {
  };
  
  /** sanity check for clip <-> llava embed size match */
-LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
+LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip);
  
-LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
+LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
  
  /** build an image embed from image file bytes */
  LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
author	Ting Lou <redacted>
	Fri, 15 Mar 2024 14:31:05 +0000 (22:31 +0800)
committer	GitHub <redacted>
	Fri, 15 Mar 2024 14:31:05 +0000 (16:31 +0200)
examples/llava/clip.cpp		patch \| blob \| history
examples/llava/clip.h		patch \| blob \| history
examples/llava/llava.cpp		patch \| blob \| history
examples/llava/llava.h		patch \| blob \| history