From: Diego Devesa Date: Sun, 4 May 2025 15:05:20 +0000 (+0200) Subject: llava/mtmd : fixes to fully support dl backends (#13303) X-Git-Tag: upstream/0.0.5318~41 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=86bd60d3fe4a08b8c9d920e6defbc2412d803569;p=pkg%2Fggml%2Fsources%2Fllama.cpp llava/mtmd : fixes to fully support dl backends (#13303) --- diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 119d7c50..383e94ac 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -27,11 +27,11 @@ else() add_subdirectory(run) add_subdirectory(tokenize) add_subdirectory(tts) + add_subdirectory(llava) if (NOT GGML_BACKEND_DL) # these examples use the backends directly and cannot be built with dynamic loading add_subdirectory(cvector-generator) add_subdirectory(export-lora) - add_subdirectory(llava) if (GGML_RPC) add_subdirectory(rpc) endif() diff --git a/tools/llava/clip.cpp b/tools/llava/clip.cpp index 3b60a526..cc03bf88 100644 --- a/tools/llava/clip.cpp +++ b/tools/llava/clip.cpp @@ -3382,7 +3382,15 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima GGML_ABORT("Unknown projector type"); } - ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); + // ggml_backend_cpu_set_n_threads(ctx->backend_cpu, n_threads); + ggml_backend_dev_t dev = ggml_backend_get_device(ctx->backend_cpu); + ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; + if (reg) { + auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); + if (ggml_backend_set_n_threads_fn) { + ggml_backend_set_n_threads_fn(ctx->backend_cpu, n_threads); + } + } auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); if (status != GGML_STATUS_SUCCESS) { diff --git a/tools/llava/llava.cpp b/tools/llava/llava.cpp index c00d16ae..b85ab112 100644 --- a/tools/llava/llava.cpp +++ b/tools/llava/llava.cpp @@ -2,6 +2,7 @@ #include "llava.h" #include "llama.h" +#include "ggml-cpp.h" #include #include @@ -209,7 +210,10 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector struct ggml_tensor *flatten = ggml_view_2d(model.ctx, permuted_cont, clip_n_mmproj_embd(ctx_clip), num_patches_height * num_patches_width * num_patches_per_side * num_patches_per_side, size_ele * clip_n_mmproj_embd(ctx_clip), 0); // ggml_tensor_printf(flatten,"flatten",__LINE__,false,false); ggml_build_forward_expand(gf, flatten); - ggml_graph_compute_with_ctx(model.ctx, gf, 1); + + ggml_backend_ptr backend { ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr) }; + ggml_backend_graph_compute(backend.get(), gf); + struct ggml_tensor* result = ggml_graph_node(gf, -1); memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context