server : support llava 1.6 (#5553)

author CJ Pais <redacted>

Tue, 20 Feb 2024 19:07:22 +0000 (11:07 -0800)

committer GitHub <redacted>

Tue, 20 Feb 2024 19:07:22 +0000 (21:07 +0200)
author CJ Pais <redacted>
Tue, 20 Feb 2024 19:07:22 +0000 (11:07 -0800)
committer GitHub <redacted>
Tue, 20 Feb 2024 19:07:22 +0000 (21:07 +0200)
diff --git a/Makefile b/Makefile

index 41c79c135467a630005f5390c8d533267ed216f8..f03faf6eda0fb69e97325104a7d28373b4ae0a89 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -719,7 +719,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
         $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
         $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
  
-server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
+server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
         $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
         $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
         $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
diff --git a/build.zig b/build.zig

index 699738f3dd509242e7bb461746acc56e688c506b..c0af454dc9e9225303a6db76471699fc45518dae 100644 (file)
--- a/build.zig
+++ b/build.zig
@@ -123,6 +123,7 @@ pub fn build(b: *std.build.Builder) !void {
      const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
      const train = make.obj("train", "common/train.cpp");
      const clip = make.obj("clip", "examples/llava/clip.cpp");
+    const llava = make.obj("llava", "examples/llava/llava.cpp");
  
      _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser });
      _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
@@ -131,7 +132,7 @@ pub fn build(b: *std.build.Builder) !void {
      _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
      _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
  
-    const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip });
+    const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava });
      if (server.target.isWindows()) {
          server.linkSystemLibrary("ws2_32");
      }
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp

index 4cb65a07b67405ad36a25a8cbe7e52b00d4d02df..1a1cf7c78bf34ca7a64813fa417a1a3761bf254a 100644 (file)
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -311,7 +311,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx *
      return true;
  }
  
-static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
+bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
      float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
      if (!image_embd) {
          fprintf(stderr, "Unable to allocate memory for image embeddings\n");
diff --git a/examples/llava/llava.h b/examples/llava/llava.h

index 9e9466a5d1726123dd7cef57fc169025b15e37d6..2d40f3f1d5f8425535b493795ca2add2c10c0b7f 100644 (file)
--- a/examples/llava/llava.h
+++ b/examples/llava/llava.h
@@ -31,6 +31,8 @@ struct llava_image_embed {
  /** sanity check for clip <-> llava embed size match */
  LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
  
+LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
+
  /** build an image embed from image file bytes */
  LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
  /** build an image embed from a path to an image filename */
diff --git a/examples/server/server.cpp b/examples/server/server.cpp

index c7821eca68cba397b869ccbcde877fd9b84af9f1..eb01729fa7a8a2096c0b1a4864f8cb599208befa 100644 (file)
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -5,6 +5,7 @@
  #include "oai.hpp"
  
  #include "../llava/clip.h"
+#include "../llava/llava.h"
  
  #include "stb_image.h"
  
@@ -997,43 +998,12 @@ struct llama_server_context
              {
                  continue;
              }
-            clip_image_f32_batch img_res_v;
-            img_res_v.size = 0;
-            img_res_v.data = nullptr;
-            if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v))
-            {
-                LOG_TEE("Error processing the given image");
-                clip_free(clp_ctx);
-                clip_image_f32_batch_free(img_res_v);
-                return false;
-            }
-            if (img_res_v.size == 0)
-            {
-                LOG_TEE("Error processing the given image");
-                return false;
-            }
-
-            // note: assumes only one image was returned by clip_image_preprocess
-            clip_image_f32 * img_res = img_res_v.data;
  
-            img.image_tokens = clip_n_patches(clp_ctx);
-            img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx));
-            if (!img.image_embedding)
-            {
-                LOG_TEE("Unable to allocate memory for image embeddings\n");
-                clip_image_f32_batch_free(img_res_v);
-                clip_free(clp_ctx);
-                return false;
-            }
-            LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id);
-            if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding))
-            {
-                LOG_TEE("Unable to encode image\n");
-                clip_image_f32_batch_free(img_res_v);
+            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
+                LOG_TEE("Error processing the given image");
                  return false;
              }
  
-            clip_image_f32_batch_free(img_res_v);
  
              img.request_encode_image = false;
          }
author	CJ Pais <redacted>
	Tue, 20 Feb 2024 19:07:22 +0000 (11:07 -0800)
committer	GitHub <redacted>
	Tue, 20 Feb 2024 19:07:22 +0000 (21:07 +0200)
Makefile		patch \| blob \| history
build.zig		patch \| blob \| history
examples/llava/llava.cpp		patch \| blob \| history
examples/llava/llava.h		patch \| blob \| history
examples/server/server.cpp		patch \| blob \| history