ggml-virtgpu: make the code thread safe (llama/19204)

author Kevin Pouget <redacted>

Wed, 4 Feb 2026 02:46:18 +0000 (03:46 +0100)

committer Georgi Gerganov <redacted>

Sat, 7 Feb 2026 08:37:38 +0000 (10:37 +0200)
author Kevin Pouget <redacted>
Wed, 4 Feb 2026 02:46:18 +0000 (03:46 +0100)
committer Georgi Gerganov <redacted>
Sat, 7 Feb 2026 08:37:38 +0000 (10:37 +0200)
diff --git a/include/ggml-virtgpu.h b/include/ggml-virtgpu.h

index 1cb4bd7a038410320c4acd91f4ebb3d32c414974..faaba8f246d1f97237f45378982ad46c96796a77 100644 (file)
--- a/include/ggml-virtgpu.h
+++ b/include/ggml-virtgpu.h
@@ -7,8 +7,6 @@
  extern "C" {
  #endif
  
-#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
-
  GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
  
  #ifdef  __cplusplus
diff --git a/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp b/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp

index f60ae3556caa24265bbe567341a7dfbfbb674f7e..d2e87330a63d0ccbb593e6919dad048eb959c7d1 100644 (file)
--- a/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp
+++ b/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp
@@ -36,7 +36,7 @@ apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
      result.data      = reinterpret_cast<uint64_t>(tensor->data);
      if (tensor->data) {
          if (!tensor->buffer) {
-            GGML_ABORT("tensor has data but not buffer");
+            GGML_ABORT("%s: tensor has data but not buffer", __func__);
          }
          // tensor->data is serialized as an offset to the buffer base address
          result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp b/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp

index 77b4ee71e121e7ec1bf4652a9361f76e17a0005f..cc879e51d0427a63016dc1913131f14a362b1d67 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp
@@ -27,7 +27,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
  
      const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
      if (!shmem_data) {
-        GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
          apir_decoder_set_fatal(dec);
          return 1;
      }
@@ -45,7 +45,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
          if (dev->iface.supports_op(dev, op)) {
              continue;
          }
-        GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
  
          status = GGML_STATUS_ABORTED;
          apir_encode_ggml_status(enc, &status);
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp

index 8ea1bb4fb4993033f440b67664924bb63ea788f7..d55eec27610a1d9dcd52b9cf6c9c1ea3a6d595d4 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp
@@ -36,18 +36,22 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec
      ggml_backend_buffer_type_t buft;
      buft = apir_decode_ggml_buffer_type(dec);
  
-    size_t value = buft->iface.get_max_size(buft);
+    size_t value = SIZE_MAX;
+    if (buft->iface.get_max_size) {
+        value = buft->iface.get_max_size(buft);
+    }
+
      apir_encode_size_t(enc, &value);
  
      return 0;
  }
  
+/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
  uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
      GGML_UNUSED(ctx);
-    ggml_backend_buffer_type_t buft;
-    buft = apir_decode_ggml_buffer_type(dec);
+    GGML_UNUSED(dec);
+    const bool is_host = false;
  
-    bool is_host = buft->iface.is_host(buft);
      apir_encode_bool_t(enc, &is_host);
  
      return 0;
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp b/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp

index cf81888e9892b5bcd2488e63393b69cb164d5ced..8cc063ff0a6727c22a47bf63daf9c5c20acd56d7 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp
@@ -40,7 +40,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
  
      if (!shmem_data) {
-        GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
          return 1;
      }
  
@@ -71,7 +71,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl
  
      void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
      if (!shmem_data) {
-        GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
          return 1;
      }
  
@@ -121,7 +121,7 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
      buffer = apir_decode_ggml_buffer(dec);
  
      if (!apir_untrack_backend_buffer(buffer)) {
-        GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer);
+        GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer);
          return 1;
      }
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-device.cpp b/src/ggml-virtgpu/backend/backend-dispatched-device.cpp

index 497f737a8815414c3f0097b073092feabaaeb950..c7acb8b51ce742601e5243bd2a60beb23e28d2ae 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-device.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-device.cpp
@@ -124,7 +124,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec,
  
      void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
      if (!shmem_ptr) {
-        GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
          apir_decoder_set_fatal(dec);
          return 1;
      }
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.cpp b/src/ggml-virtgpu/backend/backend-dispatched.cpp

index 51d445725f085229638339bdf5bca2799991ff99..64152eef0d8b461ab0e5f01384751af0e50a6025 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched.cpp
@@ -17,26 +17,26 @@ uint64_t timer_count = 0;
  
  uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
      if (reg != NULL) {
-        GGML_LOG_WARN("%s: already initialized\n", __func__);
+        GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: already initialized\n", __func__);
          return APIR_BACKEND_INITIALIZE_ALREADY_INITED;
      }
      ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p;
  
      reg = ggml_backend_reg_fct();
      if (reg == NULL) {
-        GGML_LOG_ERROR("%s: backend registration failed\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend registration failed\n", __func__);
          return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
      }
  
      if (!reg->iface.get_device_count(reg)) {
-        GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__);
          return APIR_BACKEND_INITIALIZE_NO_DEVICE;
      }
  
      dev = reg->iface.get_device(reg, 0);
  
      if (!dev) {
-        GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__);
          return APIR_BACKEND_INITIALIZE_NO_DEVICE;
      }
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.gen.h b/src/ggml-virtgpu/backend/backend-dispatched.gen.h

index b81fd5039bde6c4ad2f13bc69bfe02d695366b17..481d7f3150db065c0ed31cd39f608551eb93eb05 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.gen.h
+++ b/src/ggml-virtgpu/backend/backend-dispatched.gen.h
@@ -16,6 +16,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec,
  uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
  uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
  uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
+/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
  uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
  uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
  uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
@@ -62,7 +63,7 @@ static inline const char * backend_dispatch_command_name(ApirBackendCommandType
          case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
              return "backend_buffer_type_get_max_size";
          case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
-            return "backend_buffer_type_is_host";
+            return "backend_buffer_type_is_host (DEPRECATED)";
          case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
              return "backend_buffer_type_alloc_buffer";
          case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
@@ -110,7 +111,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC
      /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME  = */ backend_buffer_type_get_name,
      /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT  = */ backend_buffer_type_get_alignment,
      /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE  = */ backend_buffer_type_get_max_size,
-    /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST  = */ backend_buffer_type_is_host,
+    /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST  = */ backend_buffer_type_is_host /* DEPRECATED */,
      /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER  = */ backend_buffer_type_alloc_buffer,
      /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE  = */ backend_buffer_type_get_alloc_size,
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.h b/src/ggml-virtgpu/backend/backend-dispatched.h

index 6ccbecf078d82d17dd0f921e1f62feb5694e8ad8..10311631d4fb870002b063173a378009fbc57050 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.h
+++ b/src/ggml-virtgpu/backend/backend-dispatched.h
@@ -11,6 +11,8 @@
  #include "shared/apir_cs.h"
  #include "shared/apir_cs_ggml.h"
  
+#define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: "
+
  struct virgl_apir_context {
      uint32_t               ctx_id;
      virgl_apir_callbacks * iface;
diff --git a/src/ggml-virtgpu/backend/backend.cpp b/src/ggml-virtgpu/backend/backend.cpp

index 95d602ed60380adf808a9cd8ab687de697ed2285..d93414a078bad720e70e822f562596a8a4bc6043 100644 (file)
--- a/src/ggml-virtgpu/backend/backend.cpp
+++ b/src/ggml-virtgpu/backend/backend.cpp
@@ -35,14 +35,8 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) {
          buffer->iface.free_buffer(buffer);
      }
  
-    if (dev) {
-        size_t free, total;
-        dev->iface.get_memory(dev, &free, &total);
-        GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024);
-    }
-
      if (backend_library_handle) {
-        GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__);
+        GGML_LOG_INFO(GGML_VIRTGPU_BCK "The GGML backend library was loaded. Unloading it.\n");
          dlclose(backend_library_handle);
          backend_library_handle = NULL;
      }
@@ -65,7 +59,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
          if (apir_logfile) {
              ggml_log_set(log_to_file_callback, apir_logfile);
          } else {
-            GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file);
+            GGML_LOG_INFO(GGML_VIRTGPU_BCK "Could not open the log file at '%s'\n", apir_log_to_file);
          }
      }
  
@@ -74,7 +68,10 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
      const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
  
      if (!library_name) {
-        GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+                       "%s: cannot open the GGML library: env var '%s' not defined\n",
+                       __func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
+
  
          return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
      }
@@ -82,13 +79,16 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
      backend_library_handle = dlopen(library_name, RTLD_LAZY);
  
      if (!backend_library_handle) {
-        GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror());
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+                       "%s: cannot open the GGML library: %s\n", __func__, dlerror());
  
          return APIR_LOAD_LIBRARY_CANNOT_OPEN;
      }
  
      if (!library_reg) {
-        GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+                       "%s: cannot register the GGML library: env var '%s' not defined\n",
+                       __func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
  
          return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
      }
@@ -96,8 +96,10 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
      void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
      dlsym_error                 = dlerror();
      if (dlsym_error) {
-        GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg,
-              APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+                       "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
+                       __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
+
  
          return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
      }
@@ -134,7 +136,9 @@ uint32_t apir_backend_dispatcher(uint32_t               virgl_ctx_id,
      };
  
      if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
-        GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+                       "%s: Received an invalid dispatch index (%d >= %d)\n",
+                        __func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
          return APIR_BACKEND_FORWARD_INDEX_INVALID;
      }
  
diff --git a/src/ggml-virtgpu/backend/shared/apir_cs.h b/src/ggml-virtgpu/backend/shared/apir_cs.h

index 27a61091ffd99060649a8363c161cc19708077e5..1bc3a5f685b9c454d18d22757753990801bcc2b8 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_cs.h
+++ b/src/ggml-virtgpu/backend/shared/apir_cs.h
@@ -86,7 +86,7 @@ static inline bool apir_decoder_peek_internal(apir_decoder * dec,
      assert(val_size <= size);
  
      if (unlikely(size > (size_t) (dec->end - dec->cur))) {
-        GGML_LOG_ERROR("reading too much from the decoder ...\n");
+        GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
          apir_decoder_set_fatal(dec);
          memset(val, 0, val_size);
          return false;
@@ -103,7 +103,7 @@ static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val
  
  static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
      if (unlikely(size > (size_t) (dec->end - dec->cur))) {
-        GGML_LOG_ERROR("reading too much from the decoder ...\n");
+        GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
          apir_decoder_set_fatal(dec);
          return NULL;
      }
@@ -221,7 +221,7 @@ static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expec
      uint64_t size;
      apir_decode_uint64_t(dec, &size);
      if (size != expected_size) {
-        GGML_LOG_ERROR("Couldn't decode array from the decoder\n");
+        GGML_LOG_ERROR("%s: Couldn't decode array from the decoder\n", __func__);
          apir_decoder_set_fatal(dec);
          size = 0;
      }
@@ -322,7 +322,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
      if (size) {
          val[size - 1] = '\0';
      } else {
-        GGML_LOG_ERROR("Couldn't decode the blog array\n");
+        GGML_LOG_ERROR("%s: Couldn't decode the blog array\n", __func__);
          apir_decoder_set_fatal(dec);
      }
  }
@@ -332,7 +332,8 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
  static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
      size_t alloc_size;
      if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
-        GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count);
+        GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n",
+                       __func__, size, count);
          return NULL;
      }
  
diff --git a/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h

index 070c3b25fb1b67fa4ffdf61df01a8bcadcb770e0..289f4b77d74ffeafc7f7fa62e032f6ebc10ed2ea 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h
+++ b/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h
@@ -39,11 +39,17 @@ static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor
  
  static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
      const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
+
+    if (!apir_rpc_tensor) {
+        return NULL;
+    }
+
      ggml_init_params params{
          /*.mem_size   =*/ ggml_tensor_overhead(),
          /*.mem_buffer =*/ NULL,
          /*.no_alloc   =*/ true,
      };
+
      ggml_context * ctx = ggml_init(params);
  
      const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
@@ -71,6 +77,10 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decod
      return (ggml_backend_buffer_type_t) handle;
  }
  
+static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) {
+    apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
+}
+
  static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
      apir_buffer_type_host_handle_t handle;
  
@@ -154,13 +164,13 @@ static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml
      size_t tensor_size = sizeof(*tensor);
  
      if (tensor->extra) {
-        GGML_ABORT("Cannot pass tensors with extra");
+        GGML_ABORT("%s: Cannot pass tensors with extra", __func__);
      }
  
      if (tensor->src[0] && tensor->buffer) {
          static int first = 1;
          if (first) {
-            GGML_LOG_WARN("Cannot pass tensors with src and buffer\n");
+            GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__);
              first = 0;
          }
      }
diff --git a/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/src/ggml-virtgpu/ggml-backend-buffer-type.cpp

index 7f650659b8a493ddd691b79c3879d0ef959dfc9d..c493a8e2ae376b4c589dd6f058e64622f2676a77 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-buffer-type.cpp
+++ b/src/ggml-virtgpu/ggml-backend-buffer-type.cpp
@@ -6,7 +6,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml
  
      ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
      if (!context) {
-        GGML_ABORT("Couldn't allocate the buffer context ...");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
      }
  
      context->gpu = gpu;
@@ -20,7 +20,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml
          context->base         = context->apir_context.shmem.mmap_ptr;
          context->is_from_ptr  = true;
      } else {
-        context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size);
+        context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size);
          context->is_from_ptr  = false;
          context->base         = NULL;
      }
@@ -34,36 +34,19 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml
  static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
      virtgpu * gpu = BUFT_TO_GPU(buft);
  
-    return apir_buffer_type_get_name(gpu, buft);
+    return gpu->cached_buffer_type.name;
  }
  
  static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
      virtgpu * gpu = BUFT_TO_GPU(buft);
  
-    static size_t align = 0;
-
-    if (align == 0) {
-        align = apir_buffer_type_get_alignment(gpu, buft);
-    }
-
-    return align;
+    return gpu->cached_buffer_type.alignment;
  }
  
  static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
      virtgpu * gpu = BUFT_TO_GPU(buft);
  
-    static size_t max_size = 0;
-    if (max_size == 0) {
-        max_size = apir_buffer_type_get_max_size(gpu, buft);
-    }
-
-    return max_size;
-}
-
-static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
-    virtgpu * gpu = BUFT_TO_GPU(buft);
-
-    return apir_buffer_type_is_host(gpu, buft);
+    return gpu->cached_buffer_type.max_size;
  }
  
  static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
@@ -76,7 +59,7 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
          return ggml_nbytes(tensor);
      }
  
-    return apir_buffer_type_get_alloc_size(gpu, buft, tensor);
+    return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor);
  }
  
  const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
diff --git a/src/ggml-virtgpu/ggml-backend-device.cpp b/src/ggml-virtgpu/ggml-backend-device.cpp

index 579eb99078147f13dea50233cbb3bba25092f54f..c7d2881058b429864c952469853b147db7e6406b 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-device.cpp
+++ b/src/ggml-virtgpu/ggml-backend-device.cpp
@@ -3,32 +3,27 @@
  static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    return apir_device_get_name(gpu);
+    return gpu->cached_device_info.name;
  }
  
  static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    return apir_device_get_description(gpu);
+    // Return the pre-cached description from the virtgpu structure
+    return gpu->cached_device_info.description;
  }
  
  static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    static enum ggml_backend_dev_type type;
-    static bool                       has_type = false;
-    if (!has_type) {
-        has_type = true;
-        type     = (enum ggml_backend_dev_type) apir_device_get_type(gpu);
-    }
-
-    return type;
+    return (enum ggml_backend_dev_type) gpu->cached_device_info.type;
  }
  
  static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    return apir_device_get_memory(gpu, free, total);
+    *free = gpu->cached_device_info.memory_free;
+    *total = gpu->cached_device_info.memory_total;
  }
  
  static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
@@ -77,13 +72,22 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_
  ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
-
-    static ggml_backend_buffer_type buft{
-        /* .iface    = */ ggml_backend_remoting_buffer_type_interface,
-        /* .device   = */ dev,
-        /* .context  = */ (void *) ctx,
-    };
+    static std::atomic<bool> initialized = false;
+    static ggml_backend_buffer_type buft;
+
+    if (!initialized) {
+        static std::mutex           mutex;
+        std::lock_guard<std::mutex> lock(mutex);
+
+        if (!initialized) {
+            buft = {
+                /* .iface    = */ ggml_backend_remoting_buffer_type_interface,
+                /* .device   = */ dev,
+                /* .context  = */ (void *) gpu->cached_buffer_type.host_handle,
+            };
+            initialized = true;
+        }
+    }
  
      return &buft;
  }
@@ -91,13 +95,22 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac
  static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
-
-    static ggml_backend_buffer_type buft{
-        /* .iface    = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
-        /* .device   = */ dev,
-        /* .context  = */ (void *) ctx,
-    };
+    static std::atomic<bool> initialized = false;
+    static ggml_backend_buffer_type buft;
+
+    if (!initialized) {
+        static std::mutex           mutex;
+        std::lock_guard<std::mutex> lock(mutex);
+
+        if (!initialized) {
+            buft = {
+                /* .iface    = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
+                /* .device   = */ dev,
+                /* .context  = */ (void *) gpu->cached_buffer_type.host_handle,
+            };
+            initialized = true;
+        }
+    }
  
      return &buft;
  }
@@ -110,7 +123,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_b
  
      ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
      if (!context) {
-        GGML_ABORT("Couldn't allocate the buffer context ...");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
      }
  
      context->gpu          = gpu;
diff --git a/src/ggml-virtgpu/ggml-backend-reg.cpp b/src/ggml-virtgpu/ggml-backend-reg.cpp

index c46cf51c0221a820699db18a0b4c9abd9ad795c5..2d02cfec1d3e744882bb0b25ef87843f11e4098c 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-reg.cpp
+++ b/src/ggml-virtgpu/ggml-backend-reg.cpp
@@ -4,37 +4,70 @@
  #include <iostream>
  #include <mutex>
  
+void ggml_virtgpu_cleanup(virtgpu * gpu);
+
  static virtgpu * apir_initialize() {
-    static virtgpu * apir_gpu_instance = NULL;
-    static bool      apir_initialized  = false;
+    static virtgpu *         gpu          = NULL;
+    static std::atomic<bool> initialized  = false;
+
+    if (initialized) {
+        // fast track
+        return gpu;
+    }
  
      {
          static std::mutex           mutex;
          std::lock_guard<std::mutex> lock(mutex);
  
-        if (apir_initialized) {
-            return apir_gpu_instance;
+        if (initialized) {
+            // thread safe
+            return gpu;
+        }
+
+        gpu = create_virtgpu();
+        if (!gpu) {
+            initialized = true;
+            return NULL;
          }
  
-        apir_gpu_instance = create_virtgpu();
-        if (!apir_gpu_instance) {
-            GGML_ABORT("failed to initialize the virtgpu");
+        // Pre-fetch and cache all device information, it will not change
+        gpu->cached_device_info.description  = apir_device_get_description(gpu);
+        if (!gpu->cached_device_info.description) {
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__);
+        }
+        gpu->cached_device_info.name         = apir_device_get_name(gpu);
+        if (!gpu->cached_device_info.name) {
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__);
          }
+        gpu->cached_device_info.device_count = apir_device_get_count(gpu);
+        gpu->cached_device_info.type         = apir_device_get_type(gpu);
+
+        apir_device_get_memory(gpu,
+                              &gpu->cached_device_info.memory_free,
+                              &gpu->cached_device_info.memory_total);
+
+        apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu);
+        gpu->cached_buffer_type.host_handle             = buft_host_handle;
+        gpu->cached_buffer_type.name                    = apir_buffer_type_get_name(gpu, buft_host_handle);
+        if (!gpu->cached_buffer_type.name) {
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__);
+        }
+        gpu->cached_buffer_type.alignment               = apir_buffer_type_get_alignment(gpu, buft_host_handle);
+        gpu->cached_buffer_type.max_size                = apir_buffer_type_get_max_size(gpu, buft_host_handle);
  
-        apir_initialized = true;
+        initialized = true;
      }
  
-    return apir_gpu_instance;
+    return gpu;
  }
  
  static int ggml_backend_remoting_get_device_count() {
      virtgpu * gpu = apir_initialize();
      if (!gpu) {
-        GGML_LOG_WARN("apir_initialize failed\n");
          return 0;
      }
  
-    return apir_device_get_count(gpu);
+    return gpu->cached_device_info.device_count;
  }
  
  static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) {
@@ -52,17 +85,21 @@ ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) {
  
  static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
      if (devices.size() > 0) {
-        GGML_LOG_INFO("%s: already initialized\n", __func__);
+        GGML_LOG_INFO(GGML_VIRTGPU "%s: already initialized\n", __func__);
          return;
      }
  
      virtgpu * gpu = apir_initialize();
      if (!gpu) {
-        GGML_LOG_ERROR("apir_initialize failed\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: apir_initialize failed\n", __func__);
          return;
      }
  
-    static bool initialized = false;
+    static std::atomic<bool> initialized = false;
+
+    if (initialized) {
+        return; // fast track
+    }
  
      {
          static std::mutex           mutex;
@@ -70,10 +107,10 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
          if (!initialized) {
              for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) {
                  ggml_backend_remoting_device_context * ctx       = new ggml_backend_remoting_device_context;
-                char                                   desc[256] = "API Remoting device";
+                char                                   desc[256] = "ggml-virtgpu API Remoting device";
  
                  ctx->device      = i;
-                ctx->name        = GGML_REMOTING_FRONTEND_NAME + std::to_string(i);
+                ctx->name        = GGML_VIRTGPU_NAME + std::to_string(i);
                  ctx->description = desc;
                  ctx->gpu         = gpu;
  
@@ -98,7 +135,7 @@ static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_
  static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) {
      UNUSED(reg);
  
-    return GGML_REMOTING_FRONTEND_NAME;
+    return GGML_VIRTGPU_NAME;
  }
  
  static const ggml_backend_reg_i ggml_backend_remoting_reg_i = {
@@ -111,8 +148,7 @@ static const ggml_backend_reg_i ggml_backend_remoting_reg_i = {
  ggml_backend_reg_t ggml_backend_virtgpu_reg() {
      virtgpu * gpu = apir_initialize();
      if (!gpu) {
-        GGML_LOG_ERROR("virtgpu_apir_initialize failed\n");
-        return NULL;
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_apir_initialize failed\n", __func__);
      }
  
      static ggml_backend_reg reg = {
@@ -129,9 +165,25 @@ ggml_backend_reg_t ggml_backend_virtgpu_reg() {
  
      ggml_backend_remoting_reg_init_devices(&reg);
  
-    GGML_LOG_INFO("%s: initialized\n", __func__);
-
      return &reg;
  }
  
+// public function, not exposed in the GGML interface at the moment
+void ggml_virtgpu_cleanup(virtgpu * gpu) {
+    if (gpu->cached_device_info.name) {
+        free(gpu->cached_device_info.name);
+        gpu->cached_device_info.name = NULL;
+    }
+    if (gpu->cached_device_info.description) {
+        free(gpu->cached_device_info.description);
+        gpu->cached_device_info.description = NULL;
+    }
+    if (gpu->cached_buffer_type.name) {
+        free(gpu->cached_buffer_type.name);
+        gpu->cached_buffer_type.name = NULL;
+    }
+
+    mtx_destroy(&gpu->data_shmem_mutex);
+}
+
  GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg)
diff --git a/src/ggml-virtgpu/ggml-remoting.h b/src/ggml-virtgpu/ggml-remoting.h

index 36fc6b2a7bd9ac6d52244fd517e299c903f280b6..087664086765f4bdc2c2bfb8eef317537d09c937 100644 (file)
--- a/src/ggml-virtgpu/ggml-remoting.h
+++ b/src/ggml-virtgpu/ggml-remoting.h
@@ -8,6 +8,9 @@
  #include <memory>
  #include <string>
  
+#define GGML_VIRTGPU_NAME "ggml-virtgpu"
+#define GGML_VIRTGPU "ggml-virtgpu: "
+
  // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
  
  #define USE_ALWAYS_TRUE_SUPPORTS_OP 1
@@ -62,7 +65,7 @@ static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggm
  
  static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
      if (!buffer->context) {
-        GGML_ABORT("%s: no context available :/", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__);
      }
      return BUFFER_TO_HOST_HANDLE(buffer);
  }
diff --git a/src/ggml-virtgpu/ggmlremoting_functions.yaml b/src/ggml-virtgpu/ggmlremoting_functions.yaml

index 0b7cccfe9cf8656c9583f91e296cb8df29b2d6a8..14ef2433e46c5d763c94b7fb88b71adf3b8903af 100644 (file)
--- a/src/ggml-virtgpu/ggmlremoting_functions.yaml
+++ b/src/ggml-virtgpu/ggmlremoting_functions.yaml
@@ -24,10 +24,10 @@ functions:
          frontend_return: "int"
  
        get_name:
-        frontend_return: "const char *"
+        frontend_return: "char *"
  
        get_description:
-        frontend_return: "const char *"
+        frontend_return: "char *"
  
        get_type:
          frontend_return: "uint32_t"
@@ -64,35 +64,33 @@ functions:
      group_description: "buffer-type"
      functions:
        get_name:
-        frontend_return: "const char *"
+        frontend_return: "char *"
          frontend_extra_params:
-        - "ggml_backend_buffer_type_t buft"
+        - "apir_buffer_type_host_handle_t host_handle"
  
        get_alignment:
          frontend_return: "size_t"
          frontend_extra_params:
-        - "ggml_backend_buffer_type_t buft"
+        - "apir_buffer_type_host_handle_t host_handle"
  
        get_max_size:
          frontend_return: "size_t"
          frontend_extra_params:
-        - "ggml_backend_buffer_type_t buft"
+        - "apir_buffer_type_host_handle_t host_handle"
  
        is_host:
-        frontend_return: "bool"
-        frontend_extra_params:
-        - "ggml_backend_buffer_type_t buft"
+        deprecated: true
  
        alloc_buffer:
          frontend_return: "apir_buffer_context_t"
          frontend_extra_params:
-        - "ggml_backend_buffer_type_t buffer_buft"
+        - "apir_buffer_type_host_handle_t host_handle"
          - "size_t size"
  
        get_alloc_size:
          frontend_return: "size_t"
          frontend_extra_params:
-        - "ggml_backend_buffer_type_t buft"
+        - "apir_buffer_type_host_handle_t host_handle"
          - "const ggml_tensor *op"
  
    buffer:
diff --git a/src/ggml-virtgpu/regenerate_remoting.py b/src/ggml-virtgpu/regenerate_remoting.py

index 4174a24327f8005a345c18394f3e939263906060..aeb48a4087e7282b1685032d2d3f2e9ab80905aa 100755 (executable)
--- a/src/ggml-virtgpu/regenerate_remoting.py
+++ b/src/ggml-virtgpu/regenerate_remoting.py
@@ -116,7 +116,7 @@ class RemotingCodebaseGenerator:
                          'frontend_return': func_metadata.get('frontend_return', 'void'),
                          'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
                          'group_description': group_description,
-                        'newly_added': func_metadata.get('newly_added', False)
+                        'deprecated': func_metadata.get('deprecated', False),
                      })
                      enum_value += 1
  
@@ -165,6 +165,9 @@ class RemotingCodebaseGenerator:
  
              signature = "uint32_t"
              params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
+            if func['deprecated']:
+                decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */")
+
              decl_lines.append(f"{signature} {func['backend_function']}({params});")
  
          # Switch cases
@@ -176,7 +179,9 @@ class RemotingCodebaseGenerator:
                  switch_lines.append(f"  /* {func['group_description']} */")
                  current_group = func['group_name']
  
-            switch_lines.append(f"  case {func['enum_name']}: return \"{func['backend_function']}\";")
+            deprecated = " (DEPRECATED)" if func['deprecated'] else ""
+
+            switch_lines.append(f"  case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";")
  
          # Dispatch table
          table_lines = []
@@ -188,7 +193,8 @@ class RemotingCodebaseGenerator:
                  table_lines.append("")
                  current_group = func['group_name']
  
-            table_lines.append(f"  /* {func['enum_name']}  = */ {func['backend_function']},")
+            deprecated = " /* DEPRECATED */" if func['deprecated'] else ""
+            table_lines.append(f"  /* {func['enum_name']}  = */ {func['backend_function']}{deprecated},")
  
          header_content = f'''\
  #pragma once
@@ -225,6 +231,10 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC
                  decl_lines.append(f"/* {func['group_description']} */")
                  current_group = func['group_name']
  
+            if func['deprecated']:
+                decl_lines.append(f"/* {func['frontend_function']} is deprecated. */")
+                continue
+
              # Build parameter list
              params = [self.naming_patterns['frontend_base_param']]
              params.extend(func['frontend_extra_params'])
@@ -287,7 +297,7 @@ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATC
          generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
  
          if not self.clang_format_available:
-            logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted."
+            logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n"
                              "   Install clang-format to enable automatic code formatting.")
          else:
              logging.info("\n🎨 Formatting files with clang-format...")
diff --git a/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/src/ggml-virtgpu/virtgpu-forward-backend.cpp

index bf3c41011ac9b6d1d8aa5f483b390b20178a00e9..07d9a6684964edfa831b3797004cad313c82d06d 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-backend.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-backend.cpp
@@ -18,12 +18,17 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
      virtgpu_shmem * shmem = &temp_shmem;
+    bool using_shared_shmem = false;
  
      if (cgraph_size <= gpu->data_shmem.mmap_size) {
-        // prefer the init-time allocated page, if large enough
+        // Lock mutex before using shared data_shmem buffer
+        if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+            GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+        }
+        using_shared_shmem = true;
          shmem = &gpu->data_shmem;
      } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
-        GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
      }
  
      apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
@@ -42,7 +47,10 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
  
      remote_call_finish(gpu, encoder, decoder);
  
-    if (shmem != &gpu->data_shmem) {
+    // Unlock mutex before cleanup
+    if (using_shared_shmem) {
+        mtx_unlock(&gpu->data_shmem_mutex);
+    } else {
          virtgpu_shmem_destroy(gpu, shmem);
      }
  
diff --git a/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp

index 03cb09e0643a9f8579cc9e8ffa52c25da37be79a..cab74fd1707da8049d42dd5028d6be43c63a6ca8 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp
@@ -1,20 +1,20 @@
  #include "virtgpu-forward-impl.h"
  
-const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
  
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME);
  
-    apir_encode_ggml_buffer_type(encoder, buft);
+    apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
  
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
      const size_t string_size = apir_decode_array_size_unchecked(decoder);
      char *       string      = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
      if (!string) {
-        GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
          apir_decoder_set_fatal(decoder);
      }
      apir_decode_char_array(decoder, string, string_size);
@@ -24,14 +24,14 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t
      return string;
  }
  
-size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
  
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT);
  
-    apir_encode_ggml_buffer_type(encoder, buft);
+    apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
  
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
@@ -43,14 +43,14 @@ size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t
      return alignment;
  }
  
-size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
  
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE);
  
-    apir_encode_ggml_buffer_type(encoder, buft);
+    apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
  
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
@@ -62,26 +62,7 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b
      return max_size;
  }
  
-bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
-    apir_encoder *        encoder;
-    apir_decoder *        decoder;
-    ApirForwardReturnCode ret;
-
-    REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST);
-
-    apir_encode_ggml_buffer_type(encoder, buft);
-
-    REMOTE_CALL(gpu, encoder, decoder, ret);
-
-    bool is_host;
-    apir_decode_bool_t(decoder, &is_host);
-
-    remote_call_finish(gpu, encoder, decoder);
-
-    return is_host;
-}
-
-apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) {
+apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
@@ -90,7 +71,7 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_
  
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER);
  
-    apir_encode_ggml_buffer_type(encoder, buft);
+    apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
  
      apir_encode_size_t(encoder, &size);
  
@@ -103,14 +84,14 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_
      return buffer_context;
  }
  
-size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) {
+size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
  
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE);
  
-    apir_encode_ggml_buffer_type(encoder, buft);
+    apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
  
      apir_encode_ggml_tensor_inline(encoder, op);
  
diff --git a/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/src/ggml-virtgpu/virtgpu-forward-buffer.cpp

index 3181e394407a8eac19044aec832cdf2862cc6deb..86eee358cf4ae74bba80971fce6b06796b7aa3ab 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-buffer.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-buffer.cpp
@@ -36,13 +36,18 @@ void apir_buffer_set_tensor(virtgpu *               gpu,
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
      virtgpu_shmem * shmem = &temp_shmem;
+    bool using_shared_shmem = false;
  
      if (size <= gpu->data_shmem.mmap_size) {
-        // prefer the init-time allocated page, if large enough
+        // Lock mutex before using shared data_shmem buffer
+        if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+            GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+        }
+        using_shared_shmem = true;
          shmem = &gpu->data_shmem;
  
      } else if (virtgpu_shmem_create(gpu, size, shmem)) {
-        GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
      }
  
      memcpy(shmem->mmap_ptr, data, size);
@@ -55,7 +60,10 @@ void apir_buffer_set_tensor(virtgpu *               gpu,
  
      remote_call_finish(gpu, encoder, decoder);
  
-    if (shmem != &gpu->data_shmem) {
+    // Unlock mutex before cleanup
+    if (using_shared_shmem) {
+        mtx_unlock(&gpu->data_shmem_mutex);
+    } else {
          virtgpu_shmem_destroy(gpu, shmem);
      }
  
@@ -79,13 +87,18 @@ void apir_buffer_get_tensor(virtgpu *               gpu,
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
      virtgpu_shmem * shmem = &temp_shmem;
+    bool using_shared_shmem = false;
  
      if (size <= gpu->data_shmem.mmap_size) {
-        // prefer the init-time allocated page, if large enough
+        // Lock mutex before using shared data_shmem buffer
+        if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+            GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+        }
+        using_shared_shmem = true;
          shmem = &gpu->data_shmem;
  
      } else if (virtgpu_shmem_create(gpu, size, shmem)) {
-        GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
      }
  
      apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
@@ -98,7 +111,10 @@ void apir_buffer_get_tensor(virtgpu *               gpu,
  
      remote_call_finish(gpu, encoder, decoder);
  
-    if (shmem != &gpu->data_shmem) {
+    // Unlock mutex before cleanup
+    if (using_shared_shmem) {
+        mtx_unlock(&gpu->data_shmem_mutex);
+    } else {
          virtgpu_shmem_destroy(gpu, shmem);
      }
  }
diff --git a/src/ggml-virtgpu/virtgpu-forward-device.cpp b/src/ggml-virtgpu/virtgpu-forward-device.cpp

index 3e45e55bdcb5c6e59f24885a3e3ff0f884af20eb..4b6b8f527be544409aaa577b2374afa794590a6b 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-device.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-device.cpp
@@ -2,11 +2,6 @@
  #include "virtgpu-shm.h"
  
  int apir_device_get_count(virtgpu * gpu) {
-    static int32_t dev_count = -1;
-    if (dev_count != -1) {
-        return dev_count;
-    }
-
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
@@ -14,6 +9,7 @@ int apir_device_get_count(virtgpu * gpu) {
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
+    int32_t dev_count = -1;
      apir_decode_int32_t(decoder, &dev_count);
  
      remote_call_finish(gpu, encoder, decoder);
@@ -21,11 +17,7 @@ int apir_device_get_count(virtgpu * gpu) {
      return dev_count;
  }
  
-const char * apir_device_get_name(virtgpu * gpu) {
-    static char * string = nullptr;
-    if (string) {
-        return string;
-    }
+char * apir_device_get_name(virtgpu * gpu) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
@@ -34,9 +26,9 @@ const char * apir_device_get_name(virtgpu * gpu) {
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
      const size_t string_size = apir_decode_array_size_unchecked(decoder);
-    string                   = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
+    char            * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
      if (!string) {
-        GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
          return NULL;
      }
      apir_decode_char_array(decoder, string, string_size);
@@ -46,7 +38,7 @@ const char * apir_device_get_name(virtgpu * gpu) {
      return string;
  }
  
-const char * apir_device_get_description(virtgpu * gpu) {
+char * apir_device_get_description(virtgpu * gpu) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
@@ -58,7 +50,7 @@ const char * apir_device_get_description(virtgpu * gpu) {
      const size_t string_size = apir_decode_array_size_unchecked(decoder);
      char *       string      = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
      if (!string) {
-        GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device description buffer\n", __func__);
  
          return NULL;
      }
@@ -181,7 +173,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
  
      if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
-        GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+        GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer");
      }
  
      apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
diff --git a/src/ggml-virtgpu/virtgpu-forward-impl.h b/src/ggml-virtgpu/virtgpu-forward-impl.h

index eea3e7e5a9bb5b6e1692d46d4505b3c2b4160385..f23c75bb9686be2ab5c155a4b6610f66d15dfe95 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-impl.h
+++ b/src/ggml-virtgpu/virtgpu-forward-impl.h
@@ -11,7 +11,7 @@
          int32_t forward_flag = (int32_t) apir_command_type__;                                              \
          encoder_name         = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \
          if (!encoder_name) {                                                                               \
-            GGML_ABORT("%s: failed to prepare the remote call encoder", __func__);                       \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);                       \
          }                                                                                                  \
      } while (0)
  
@@ -19,10 +19,10 @@
      do {                                                                                                          \
          ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL);       \
          if (!decoder_name) {                                                                                      \
-            GGML_ABORT("%s: failed to kick the remote call", __func__);                                         \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__);                                         \
          }                                                                                                         \
          if (ret_name < APIR_FORWARD_BASE_INDEX) {                                                                 \
-            GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__,                             \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__,                             \
                         apir_forward_error(ret_name), ret_name);                                                   \
          }                                                                                                         \
          ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX);                                  \
diff --git a/src/ggml-virtgpu/virtgpu-forward.gen.h b/src/ggml-virtgpu/virtgpu-forward.gen.h

index c27c07f0865f7f59d5b3ad125641fcb7f53c0268..fe4cae2025399645fbdf13228b977d36b2db5130 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward.gen.h
+++ b/src/ggml-virtgpu/virtgpu-forward.gen.h
@@ -3,8 +3,8 @@
  /* device */
  void                           apir_device_get_device_count(struct virtgpu * gpu);
  int                            apir_device_get_count(struct virtgpu * gpu);
-const char *                   apir_device_get_name(struct virtgpu * gpu);
-const char *                   apir_device_get_description(struct virtgpu * gpu);
+char *                         apir_device_get_name(struct virtgpu * gpu);
+char *                         apir_device_get_description(struct virtgpu * gpu);
  uint32_t                       apir_device_get_type(struct virtgpu * gpu);
  void                           apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total);
  bool                           apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op);
@@ -17,14 +17,15 @@ void                           apir_device_get_props(struct virtgpu * gpu,
  apir_buffer_context_t          apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size);
  
  /* buffer-type */
-const char *          apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-size_t                apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-size_t                apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-bool                  apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu *           gpu,
-                                                    ggml_backend_buffer_type_t buffer_buft,
-                                                    size_t                     size);
-size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op);
+char *                apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+size_t                apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+size_t                apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu *               gpu,
+                                                    apir_buffer_type_host_handle_t host_handle,
+                                                    size_t                         size);
+size_t                apir_buffer_type_get_alloc_size(struct virtgpu *               gpu,
+                                                      apir_buffer_type_host_handle_t host_handle,
+                                                      const ggml_tensor *            op);
  
  /* buffer */
  void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
diff --git a/src/ggml-virtgpu/virtgpu-shm.cpp b/src/ggml-virtgpu/virtgpu-shm.cpp

index 4def405a62b753a1aa655ddf16da26ff728f7b79..ce6b3b3e607f6ad616ab7787aaf3042efb88d810 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-shm.cpp
+++ b/src/ggml-virtgpu/virtgpu-shm.cpp
@@ -85,8 +85,7 @@ int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) {
      void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
      if (!ptr) {
          virtgpu_ioctl_gem_close(gpu, gem_handle);
-        GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n");
-        exit(1);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_ioctl_map failed\n", __func__);
          return 1;
      }
  
diff --git a/src/ggml-virtgpu/virtgpu.cpp b/src/ggml-virtgpu/virtgpu.cpp

index 005c8e21db8797bbf31f6abd4818d7fcd1603427..1e650dc65b2b883c8bfcb0cff31676ed8705e211 100644 (file)
--- a/src/ggml-virtgpu/virtgpu.cpp
+++ b/src/ggml-virtgpu/virtgpu.cpp
@@ -33,7 +33,7 @@ static int virtgpu_handshake(virtgpu * gpu) {
  
      encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0);
      if (!encoder) {
-        GGML_ABORT("%s: failed to prepare the remote call encoder", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);
          return 1;
      }
  
@@ -52,7 +52,7 @@ static int virtgpu_handshake(virtgpu * gpu) {
      log_call_duration(call_duration_ns, "API Remoting handshake");
  
      if (!decoder) {
-        GGML_ABORT(
+        GGML_ABORT(GGML_VIRTGPU
              "%s: failed to initiate the communication with the virglrenderer library. "
              "Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
              __func__);
@@ -65,7 +65,8 @@ static int virtgpu_handshake(virtgpu * gpu) {
      uint32_t host_minor;
  
      if (ret_magic != APIR_HANDSHAKE_MAGIC) {
-        GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
                     apir_backend_initialize_error(ret_magic));
      } else {
          apir_decode_uint32_t(decoder, &host_major);
@@ -78,13 +79,13 @@ static int virtgpu_handshake(virtgpu * gpu) {
          return 1;
      }
  
-    GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
-    GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
+    GGML_LOG_INFO(GGML_VIRTGPU "%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
+    GGML_LOG_INFO(GGML_VIRTGPU "%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
  
      if (guest_major != host_major) {
-        GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
+        GGML_LOG_ERROR(GGML_VIRTGPU "Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
      } else if (guest_minor != host_minor) {
-        GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
+        GGML_LOG_WARN(GGML_VIRTGPU "Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
      }
  
      return 0;
@@ -97,7 +98,7 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
  
      encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0);
      if (!encoder) {
-        GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to prepare the API Remoting command encoder", __func__);
          return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
      }
  
@@ -108,36 +109,67 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
      log_call_duration(call_duration_ns, "API Remoting LoadLibrary");
  
      if (!decoder) {
-        GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to trigger the API Remoting hypercall.\n", __func__);
          return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
      }
  
      remote_call_finish(gpu, encoder, decoder);
  
      if (ret == APIR_LOAD_LIBRARY_SUCCESS) {
-        GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__);
+        GGML_LOG_INFO(GGML_VIRTGPU "The API Remoting backend was successfully loaded and initialized\n");
  
          return ret;
      }
  
      // something wrong happened, find out what.
-
      if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
-        GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__,
-                   apir_load_library_error(ret), ret);
+        if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: virglrenderer could not open the API Remoting backend library, "
+                       "some environment variables are missing. "
+                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+                       __func__, apir_load_library_error(ret));
+        } else if (ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: virglrenderer could not open the API Remoting backend library. "
+                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+                       __func__, apir_load_library_error(ret));
+        } else if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: could not load the backend library, some symbols are missing. "
+                       "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ",
+                       __func__, apir_load_library_error(ret));
+        } else {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__,
+                       apir_load_library_error(ret), ret);
+        }
          return ret;
      }
  
-    GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__);
+    GGML_LOG_INFO(GGML_VIRTGPU
+                  "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
  
      ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
  
-    if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
-        GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)",
+    if (apir_ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: the API Remoting backend library couldn't load the GGML backend library. "
+                   "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+                   __func__, apir_load_library_error(apir_ret));
+    } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) {
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
+                   "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+                   __func__, apir_load_library_error(apir_ret));
+    } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)",
                     __func__, apir_ret, apir_load_library_error(apir_ret));
      } else {
          uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
-        GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
                     lib_ret);
      }
      return ret;
@@ -149,38 +181,58 @@ virtgpu * create_virtgpu() {
      gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr;
      util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024);
  
+    // Initialize mutex to protect shared data_shmem buffer
+    if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) {
+        delete gpu;
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to initialize data_shmem mutex", __func__);
+        return NULL;
+    }
+
      if (virtgpu_open(gpu) != APIR_SUCCESS) {
-        GGML_ABORT("%s: failed to open the virtgpu device", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU
+                       "%s: failed to open the virtgpu device\n", __func__);
          return NULL;
      }
  
      if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
-        GGML_ABORT("%s: failed to initialize the GPU capset", __func__);
+        if (gpu->use_apir_capset) {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__);
+        } else {
+            GGML_ABORT(GGML_VIRTGPU
+                       "%s: failed to initialize the virtgpu Venus capset", __func__);
+        }
          return NULL;
      }
  
      if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
-        GGML_ABORT("%s: failed to initialize the GPU context", __func__);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to initialize the GPU context", __func__);
          return NULL;
      }
  
      if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
-        GGML_ABORT("%s: failed to create the shared reply memory pages", __func__);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to create the shared reply memory pages", __func__);
          return NULL;
      }
  
      if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
-        GGML_ABORT("%s: failed to create the shared data memory pages", __func__);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to create the shared data memory pages", __func__);
          return NULL;
      }
  
      if (virtgpu_handshake(gpu)) {
-        GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to handshake with the virglrenderer library", __func__);
          return NULL;
      }
  
      if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
-        GGML_ABORT("%s: failed to load the backend library", __func__);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to load the backend library", __func__);
          return NULL;
      }
  
@@ -191,7 +243,8 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
      drmDevicePtr devs[8];
      int          count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
      if (count < 0) {
-        GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU
+                       "%s: failed to enumerate DRM devices\n", __func__);
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
@@ -213,16 +266,19 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d
  
      int fd = open(node_path, O_RDWR | O_CLOEXEC);
      if (fd < 0) {
-        GGML_ABORT("failed to open %s", node_path);
+        GGML_ABORT(GGML_VIRTGPU
+                   "%s: failed to open %s", __func__, node_path);
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
      drmVersionPtr version = drmGetVersion(fd);
      if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
          if (version) {
-            GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major);
+            GGML_LOG_ERROR(GGML_VIRTGPU
+                           "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major);
          } else {
-            GGML_ABORT("failed to get DRM driver version");
+            GGML_LOG_ERROR(GGML_VIRTGPU
+                           "%s: failed to get DRM driver version\n", __func__);
          }
  
          if (version) {
@@ -236,7 +292,7 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d
  
      drmFreeVersion(version);
  
-    GGML_LOG_INFO("using DRM device %s\n", node_path);
+    GGML_LOG_INFO(GGML_VIRTGPU "using DRM device %s\n", node_path);
  
      return APIR_SUCCESS;
  }
@@ -245,7 +301,7 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) {
      assert(!gpu->capset.version);
      const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
      if (ret) {
-        GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno));
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to initialize context: %s\n", __func__, strerror(errno));
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
@@ -254,10 +310,10 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) {
  
  static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
      if (gpu->use_apir_capset) {
-        GGML_LOG_INFO("Using the APIR capset\n");
+        GGML_LOG_INFO(GGML_VIRTGPU "Using the APIR capset\n");
          gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR;
      } else {
-        GGML_LOG_INFO("Using the Venus capset\n");
+        GGML_LOG_INFO(GGML_VIRTGPU "Using the Venus capset\n");
          gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS;
      }
      gpu->capset.version = 0;
@@ -266,7 +322,9 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
          virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
  
      if (ret) {
-        GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno));
+        GGML_LOG_ERROR(GGML_VIRTGPU
+                       "%s: failed to get APIR v%d capset: %s\n",
+                       __func__, gpu->capset.version, strerror(errno));
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
@@ -333,9 +391,9 @@ apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type,
       * Prepare the command encoder and its buffer
       */
  
-    static char encoder_buffer[4096];
+    thread_local char encoder_buffer[4096];
  
-    static apir_encoder enc;
+    thread_local apir_encoder enc;
      enc = {
          .cur   = encoder_buffer,
          .start = encoder_buffer,
@@ -369,19 +427,19 @@ void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) {
      UNUSED(gpu);
  
      if (!enc) {
-        GGML_LOG_ERROR("Invalid (null) encoder\n");
+        GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) encoder", __func__);
      }
  
      if (!dec) {
-        GGML_LOG_ERROR("Invalid (null) decoder\n");
+        GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) decoder", __func__);
      }
  
      if (apir_encoder_get_fatal(enc)) {
-        GGML_LOG_ERROR("Failed to encode the output parameters.\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to encode the output parameters.", __func__);
      }
  
      if (apir_decoder_get_fatal(dec)) {
-        GGML_LOG_ERROR("Failed to decode the input parameters.\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to decode the input parameters.", __func__);
      }
  }
  
@@ -423,7 +481,7 @@ uint32_t remote_call(virtgpu *       gpu,
      int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
  
      if (ret != 0) {
-        GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
+        GGML_ABORT(GGML_VIRTGPU "%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
      }
  
      /*
@@ -467,7 +525,7 @@ uint32_t remote_call(virtgpu *       gpu,
      }
  
      if (max_wait_ms && timedout) {
-        GGML_LOG_ERROR("timed out waiting for the host answer...\n");
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: timed out waiting for the host answer...\n", __func__);
          return APIR_FORWARD_TIMEOUT;
      }
  
@@ -489,10 +547,13 @@ static void log_call_duration(long long call_duration_ns, const char * name) {
      double call_duration_s  = (double) call_duration_ns / 1e9;  // 1 second = 1e9 nanoseconds
  
      if (call_duration_s > 1) {
-        GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name);
+        GGML_LOG_INFO(GGML_VIRTGPU
+                      "waited %.2fs for the %s host reply...\n", call_duration_s, name);
      } else if (call_duration_ms > 1) {
-        GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name);
+        GGML_LOG_INFO(GGML_VIRTGPU
+                      "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
      } else {
-        GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name);
+        GGML_LOG_INFO(GGML_VIRTGPU
+                      "waited %lldns for the %s host reply...\n", call_duration_ns, name);
      }
  }
diff --git a/src/ggml-virtgpu/virtgpu.h b/src/ggml-virtgpu/virtgpu.h

index d4bb42e20b2562e6362284ce516aa6f0bb796822..68e0f3a376e7c5206795f41ddadf81bb5f0c90f0 100644 (file)
--- a/src/ggml-virtgpu/virtgpu.h
+++ b/src/ggml-virtgpu/virtgpu.h
@@ -17,6 +17,8 @@
  
  #include <cstring>
  
+#include "ggml-remoting.h"
+
  #define VIRGL_RENDERER_UNSTABLE_APIS 1
  #include "apir_hw.h"
  #include <drm/virtgpu_drm.h>
@@ -73,6 +75,27 @@ struct virtgpu {
      /* APIR communication pages */
      virtgpu_shmem reply_shmem;
      virtgpu_shmem data_shmem;
+
+    /* Mutex to protect shared data_shmem buffer from concurrent access */
+    mtx_t data_shmem_mutex;
+
+    /* Cached device information to prevent memory leaks and race conditions */
+    struct {
+        char *   description;
+        char *   name;
+        int32_t  device_count;
+        uint32_t type;
+        size_t   memory_free;
+        size_t   memory_total;
+    } cached_device_info;
+
+    /* Cached buffer type information to prevent memory leaks and race conditions */
+    struct {
+        apir_buffer_type_host_handle_t host_handle;
+        char *                         name;
+        size_t                         alignment;
+        size_t                         max_size;
+    } cached_buffer_type;
  };
  
  static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {
author	Kevin Pouget <redacted>
	Wed, 4 Feb 2026 02:46:18 +0000 (03:46 +0100)
committer	Georgi Gerganov <redacted>
	Sat, 7 Feb 2026 08:37:38 +0000 (10:37 +0200)
include/ggml-virtgpu.h		patch \| blob \| history
src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-backend.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-device.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.gen.h		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.h		patch \| blob \| history
src/ggml-virtgpu/backend/backend.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_cs.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_cs_ggml.h		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-device.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-reg.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-remoting.h		patch \| blob \| history
src/ggml-virtgpu/ggmlremoting_functions.yaml		patch \| blob \| history
src/ggml-virtgpu/regenerate_remoting.py		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-backend.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-buffer.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-device.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-impl.h		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward.gen.h		patch \| blob \| history
src/ggml-virtgpu/virtgpu-shm.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu.h		patch \| blob \| history