ggml-virtgpu: improve the reliability of the code (llama/19846)

author Kevin Pouget <redacted>

Thu, 26 Feb 2026 12:00:57 +0000 (13:00 +0100)

committer Georgi Gerganov <redacted>

Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)
author Kevin Pouget <redacted>
Thu, 26 Feb 2026 12:00:57 +0000 (13:00 +0100)
committer Georgi Gerganov <redacted>
Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp b/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp

index cc879e51d0427a63016dc1913131f14a362b1d67..03a037f1cbd91a4bcd46f0b4e0ae29bca329852d 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp
@@ -7,9 +7,21 @@
  
  #include <cstdint>
  
+static uint32_t validate_graph_operation(size_t cgraph_size, uint32_t shmem_res_id, const char * operation) {
+    if (cgraph_size == 0) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Zero-size computation graph\n", operation);
+        return 1;
+    }
+
+    // place-holder: validate that the size of shmem_res_id is <= cgraph_size
+    // need to add another method in the Virgl->APIR callback interface
+    GGML_UNUSED(shmem_res_id);
+
+    return 0;  // Valid
+}
+
  uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
      GGML_UNUSED(ctx);
-    GGML_UNUSED(enc);
  
      static bool async_backend_initialized = false;
      static bool async_backend;
@@ -34,10 +46,26 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
      size_t cgraph_size;
      apir_decode_size_t(dec, &cgraph_size);
  
+    if (validate_graph_operation(cgraph_size, shmem_res_id, __func__) != 0) {
+        apir_decoder_set_fatal(dec);
+        return 1;
+    }
+
      apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size);
  
      ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size);
  
+    if (!cgraph || apir_decoder_get_fatal(&secondary_dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to deserialize computation graph\n", __func__);
+        return 1;
+    }
+
+    if (cgraph->n_nodes < 0 || cgraph->n_leafs < 0) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid negative node/leaf count: nodes=%d leafs=%d\n", __func__,
+                       cgraph->n_nodes, cgraph->n_leafs);
+        return 1;
+    }
+
      ggml_status status;
  #if APIR_BACKEND_CHECK_SUPPORTS_OP == 1
      for (int idx = 0; idx < cgraph->n_nodes; idx++) {
@@ -45,7 +73,8 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
          if (dev->iface.supports_op(dev, op)) {
              continue;
          }
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", __func__, idx,
+                       ggml_op_desc(op));
  
          status = GGML_STATUS_ABORTED;
          apir_encode_ggml_status(enc, &status);
@@ -53,9 +82,17 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v
          return 0;
      }
  #endif
+
+    // Check if backend is properly initialized
+    if (!bck) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Backend not initialized (bck is null)\n", __func__);
+
+        return 1;
+    }
+
      status = bck->iface.graph_compute(bck, cgraph);
  
-    if (async_backend) {
+    if (async_backend && bck->iface.synchronize) {
          bck->iface.synchronize(bck);
      }
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp b/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp

index d55eec27610a1d9dcd52b9cf6c9c1ea3a6d595d4..c66dbaa9e8f327a3763837eb27d82bb0011e216f 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp
@@ -85,7 +85,19 @@ uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * d
  
      const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec);
  
-    size_t value = buft->iface.get_alloc_size(buft, op);
+    // Check for decode error
+    if (op == nullptr) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Failed to decode tensor\n", __func__);
+        apir_decoder_set_fatal(dec);
+        return 1;
+    }
+
+    size_t value;
+    if (buft->iface.get_alloc_size) {
+        value = buft->iface.get_alloc_size(buft, op);
+    } else {
+        value = ggml_nbytes(op);  // Default fallback
+    }
  
      apir_encode_size_t(enc, &value);
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp b/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp

index 8cc063ff0a6727c22a47bf63daf9c5c20acd56d7..3ade8d99b4edf663196069f78abd8cd48cde4013 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp
@@ -6,11 +6,26 @@
  
  #include <cstdint>
  
+static uint32_t validate_buffer_operation(size_t offset, size_t size, const char * operation) {
+    // Only check for critical integer overflow - no arbitrary size limits
+    if (offset > SIZE_MAX - size) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Integer overflow in offset+size: %zu + %zu\n", operation, offset, size);
+        return 1;
+    }
+
+    return 0;  // Valid
+}
+
  uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
      GGML_UNUSED(ctx);
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer);
      apir_encode_uintptr_t(enc, &base);
  
@@ -24,6 +39,11 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      ggml_tensor * tensor;
      // safe to remove the const qualifier here
      tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec);
@@ -37,6 +57,10 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      size_t size;
      apir_decode_size_t(dec, &size);
  
+    if (validate_buffer_operation(offset, size, __func__) != 0) {
+        return 1;
+    }
+
      void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
  
      if (!shmem_data) {
@@ -56,6 +80,11 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      const ggml_tensor * tensor;
      // safe to remove the const qualifier here
      tensor = apir_decode_ggml_tensor(dec);
@@ -69,6 +98,10 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      size_t size;
      apir_decode_size_t(dec, &size);
  
+    if (validate_buffer_operation(offset, size, __func__) != 0) {
+        return 1;
+    }
+
      void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
      if (!shmem_data) {
          GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
@@ -86,6 +119,11 @@ uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      const ggml_tensor * src;
      // safe to remove the const qualifier here
      src               = apir_decode_ggml_tensor(dec);
@@ -105,6 +143,11 @@ uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      uint8_t value;
      apir_decode_uint8_t(dec, &value);
  
@@ -120,6 +163,11 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
      ggml_backend_buffer_t buffer;
      buffer = apir_decode_ggml_buffer(dec);
  
+    if (!buffer || apir_decoder_get_fatal(dec)) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Invalid buffer handle from guest\n", __func__);
+        return 1;
+    }
+
      if (!apir_untrack_backend_buffer(buffer)) {
          GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer);
          return 1;
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.cpp b/src/ggml-virtgpu/backend/backend-dispatched.cpp

index 64152eef0d8b461ab0e5f01384751af0e50a6025..c80e4aabe1fe16647fbdecc6da6e2bbe08f6d029 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.cpp
+++ b/src/ggml-virtgpu/backend/backend-dispatched.cpp
@@ -1,6 +1,6 @@
  #include "backend-dispatched.h"
-#include "backend-virgl-apir.h"
  
+#include "backend-virgl-apir.h"
  #include "ggml-backend-impl.h"
  #include "ggml-backend.h"
  #include "ggml-impl.h"
@@ -28,19 +28,24 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
          return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
      }
  
-    if (!reg->iface.get_device_count(reg)) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__);
+    size_t device_count = reg->iface.get_device_count(reg);
+    if (!device_count) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: no device found\n", __func__);
          return APIR_BACKEND_INITIALIZE_NO_DEVICE;
      }
  
      dev = reg->iface.get_device(reg, 0);
  
      if (!dev) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: failed to get device\n", __func__);
          return APIR_BACKEND_INITIALIZE_NO_DEVICE;
      }
  
      bck = dev->iface.init_backend(dev, NULL);
+    if (!bck) {
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed\n", __func__);
+        return APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED;
+    }
  
      return APIR_BACKEND_INITIALIZE_SUCCESS;
  }
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.gen.h b/src/ggml-virtgpu/backend/backend-dispatched.gen.h

index 481d7f3150db065c0ed31cd39f608551eb93eb05..3dc334e4ce4f3950b16b73581559d0c28d774084 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.gen.h
+++ b/src/ggml-virtgpu/backend/backend-dispatched.gen.h
@@ -32,64 +32,6 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg
  /* backend */
  uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
  
-static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) {
-    switch (type) {
-        /* device */
-        case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
-            return "backend_device_get_device_count";
-        case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
-            return "backend_device_get_count";
-        case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
-            return "backend_device_get_name";
-        case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
-            return "backend_device_get_description";
-        case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
-            return "backend_device_get_type";
-        case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
-            return "backend_device_get_memory";
-        case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
-            return "backend_device_supports_op";
-        case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
-            return "backend_device_get_buffer_type";
-        case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
-            return "backend_device_get_props";
-        case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
-            return "backend_device_buffer_from_ptr";
-        /* buffer-type */
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
-            return "backend_buffer_type_get_name";
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
-            return "backend_buffer_type_get_alignment";
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
-            return "backend_buffer_type_get_max_size";
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
-            return "backend_buffer_type_is_host (DEPRECATED)";
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
-            return "backend_buffer_type_alloc_buffer";
-        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
-            return "backend_buffer_type_get_alloc_size";
-        /* buffer */
-        case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
-            return "backend_buffer_get_base";
-        case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
-            return "backend_buffer_set_tensor";
-        case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
-            return "backend_buffer_get_tensor";
-        case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
-            return "backend_buffer_cpy_tensor";
-        case APIR_COMMAND_TYPE_BUFFER_CLEAR:
-            return "backend_buffer_clear";
-        case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
-            return "backend_buffer_free_buffer";
-        /* backend */
-        case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
-            return "backend_backend_graph_compute";
-
-        default:
-            return "unknown";
-    }
-}
-
  extern "C" {
  static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {
  
diff --git a/src/ggml-virtgpu/backend/backend-dispatched.h b/src/ggml-virtgpu/backend/backend-dispatched.h

index 10311631d4fb870002b063173a378009fbc57050..740ee9e3ffc89949c9be1fa12117e1ea8fab55d4 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-dispatched.h
+++ b/src/ggml-virtgpu/backend/backend-dispatched.h
@@ -1,5 +1,6 @@
  #pragma once
  
+// clang-format off
  #include <cstdint>
  #include <cstddef>
  
@@ -10,6 +11,7 @@
  #include "shared/apir_backend.h"
  #include "shared/apir_cs.h"
  #include "shared/apir_cs_ggml.h"
+// clang-format on
  
  #define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: "
  
diff --git a/src/ggml-virtgpu/backend/backend-virgl-apir.h b/src/ggml-virtgpu/backend/backend-virgl-apir.h

index 44b347f853f5fa88e81ecc845c197bc5fc79d743..c65a01cdf9b44bc6bd94959bb3111bb223f04a4a 100644 (file)
--- a/src/ggml-virtgpu/backend/backend-virgl-apir.h
+++ b/src/ggml-virtgpu/backend/backend-virgl-apir.h
@@ -19,7 +19,7 @@ struct virgl_apir_callbacks {
  };
  
  extern "C" {
-ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs);
+ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs);
  void                      apir_backend_deinit(uint32_t virgl_ctx_id);
  uint32_t                  apir_backend_dispatcher(uint32_t               virgl_ctx_id,
                                                    virgl_apir_callbacks * virgl_cbs,
diff --git a/src/ggml-virtgpu/backend/backend.cpp b/src/ggml-virtgpu/backend/backend.cpp

index d93414a078bad720e70e822f562596a8a4bc6043..535a05f3e69bdc90ef9d95d1ecf3f19de5b1d250 100644 (file)
--- a/src/ggml-virtgpu/backend/backend.cpp
+++ b/src/ggml-virtgpu/backend/backend.cpp
@@ -1,6 +1,5 @@
  #include "backend-dispatched.h"
  #include "backend-virgl-apir.h"
-
  #include "shared/api_remoting.h"
  #include "shared/apir_backend.h"
  #include "shared/apir_cs.h"
@@ -17,10 +16,10 @@
  #define GGML_DEFAULT_BACKEND_REG "ggml_backend_init"
  
  static void * backend_library_handle = NULL;
-static FILE * apir_logfile = NULL;
+static FILE * apir_logfile           = NULL;
  
  static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) {
-    FILE * logfile = (FILE *)user_data;
+    FILE * logfile = (FILE *) user_data;
      fprintf(logfile, "[%d] %s", level, text);
      fflush(logfile);
  }
@@ -48,9 +47,9 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) {
  }
  
  #define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path"
-#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg"
+#define APIR_GGML_LIBRARY_REG_KEY  "ggml.library.reg"
  
-ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) {
+ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks * virgl_cbs) {
      const char * dlsym_error;
  
      const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV);
@@ -63,15 +62,13 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
          }
      }
  
-    const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY);
+    const char * library_name      = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY);
      const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY);
-    const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
+    const char * library_reg       = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
  
      if (!library_name) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
-                       "%s: cannot open the GGML library: env var '%s' not defined\n",
-                       __func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
-
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: env var '%s' not defined\n", __func__,
+                       APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
  
          return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
      }
@@ -79,16 +76,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
      backend_library_handle = dlopen(library_name, RTLD_LAZY);
  
      if (!backend_library_handle) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
-                       "%s: cannot open the GGML library: %s\n", __func__, dlerror());
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot open the GGML library: %s\n", __func__, dlerror());
  
          return APIR_LOAD_LIBRARY_CANNOT_OPEN;
      }
  
      if (!library_reg) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
-                       "%s: cannot register the GGML library: env var '%s' not defined\n",
-                       __func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot register the GGML library: env var '%s' not defined\n", __func__,
+                       APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
  
          return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
      }
@@ -96,11 +91,9 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct
      void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
      dlsym_error                 = dlerror();
      if (dlsym_error) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
-                       "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
                         __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
  
-
          return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
      }
  
@@ -132,13 +125,12 @@ uint32_t apir_backend_dispatcher(uint32_t               virgl_ctx_id,
  
      virgl_apir_context ctx = {
          .ctx_id = virgl_ctx_id,
-        .iface = virgl_cbs,
+        .iface  = virgl_cbs,
      };
  
      if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
-        GGML_LOG_ERROR(GGML_VIRTGPU_BCK
-                       "%s: Received an invalid dispatch index (%d >= %d)\n",
-                        __func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
+        GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Received an invalid dispatch index (%d >= %d)\n", __func__, cmd_type,
+                       APIR_BACKEND_DISPATCH_TABLE_COUNT);
          return APIR_BACKEND_FORWARD_INDEX_INVALID;
      }
  
diff --git a/src/ggml-virtgpu/backend/shared/api_remoting.h b/src/ggml-virtgpu/backend/shared/api_remoting.h

index f19a5d12d17251463bb71b8bd2e6350f560ff100..6bf97e8a3a243341c920efe35540ecfafef23eca 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/api_remoting.h
+++ b/src/ggml-virtgpu/backend/shared/api_remoting.h
@@ -16,28 +16,32 @@ enum ApirCommandType {
      APIR_COMMAND_TYPE_LOADLIBRARY = 1,
      APIR_COMMAND_TYPE_FORWARD     = 2,
  
-    APIR_COMMAND_TYPE_LENGTH      = 3,
+    APIR_COMMAND_TYPE_LENGTH = 3,
  };
  
  typedef uint64_t ApirCommandFlags;
  
  enum ApirLoadLibraryReturnCode {
      APIR_LOAD_LIBRARY_SUCCESS                        = 0,
+    // these error codes are returned by the Virglrenderer APIR component
      APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1,
      APIR_LOAD_LIBRARY_ALREADY_LOADED                 = 2,
      APIR_LOAD_LIBRARY_ENV_VAR_MISSING                = 3,
      APIR_LOAD_LIBRARY_CANNOT_OPEN                    = 4,
      APIR_LOAD_LIBRARY_SYMBOL_MISSING                 = 5,
-    APIR_LOAD_LIBRARY_INIT_BASE_INDEX                = 6,  // anything above this is a APIR backend library initialization return code
+    // any value greater than this is an APIR *backend library* initialization return code
+    APIR_LOAD_LIBRARY_INIT_BASE_INDEX                = 6,
  };
  
  enum ApirForwardReturnCode {
-    APIR_FORWARD_SUCCESS         = 0,
-    APIR_FORWARD_NO_DISPATCH_FCT = 1,
-    APIR_FORWARD_TIMEOUT         = 2,
-
-    APIR_FORWARD_BASE_INDEX      = 3,  // anything above this is a APIR backend library forward return code
-} ;
+    APIR_FORWARD_SUCCESS                = 0,
+    // these error codes are returned by the Virglrenderer APIR component
+    APIR_FORWARD_NO_DISPATCH_FCT        = 1,
+    APIR_FORWARD_TIMEOUT                = 2,
+    APIR_FORWARD_FAILED_TO_SYNC_STREAMS = 3,
+    // any value greater than this index an APIR *backend library* forward return code
+    APIR_FORWARD_BASE_INDEX             = 4,
+};
  
  __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) {
      switch (type) {
@@ -82,6 +86,7 @@ __attribute__((unused)) static const char * apir_forward_error(ApirForwardReturn
      APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS);
      APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT);
      APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT);
+    APIR_FORWARD_ERROR(APIR_FORWARD_FAILED_TO_SYNC_STREAMS);
      APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX);
  
      return "Unknown APIR_COMMAND_TYPE_FORWARD error";
diff --git a/src/ggml-virtgpu/backend/shared/apir_backend.gen.h b/src/ggml-virtgpu/backend/shared/apir_backend.gen.h

index d214b6f2a90776db8ee3b8d899dde707f484dec5..520ac9c72990041113c58fe846a50fab0efcaee2 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_backend.gen.h
+++ b/src/ggml-virtgpu/backend/shared/apir_backend.gen.h
@@ -34,3 +34,61 @@ typedef enum ApirBackendCommandType {
      // last command_type index + 1
      APIR_BACKEND_DISPATCH_TABLE_COUNT = 23,
  } ApirBackendCommandType;
+
+static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {
+    switch (type) {
+        /* device */
+        case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT:
+            return "device_get_device_count";
+        case APIR_COMMAND_TYPE_DEVICE_GET_COUNT:
+            return "device_get_count";
+        case APIR_COMMAND_TYPE_DEVICE_GET_NAME:
+            return "device_get_name";
+        case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION:
+            return "device_get_description";
+        case APIR_COMMAND_TYPE_DEVICE_GET_TYPE:
+            return "device_get_type";
+        case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY:
+            return "device_get_memory";
+        case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP:
+            return "device_supports_op";
+        case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE:
+            return "device_get_buffer_type";
+        case APIR_COMMAND_TYPE_DEVICE_GET_PROPS:
+            return "device_get_props";
+        case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR:
+            return "device_buffer_from_ptr";
+        /* buffer-type */
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME:
+            return "buffer_type_get_name";
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT:
+            return "buffer_type_get_alignment";
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
+            return "buffer_type_get_max_size";
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
+            return "buffer_type_is_host";
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
+            return "buffer_type_alloc_buffer";
+        case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
+            return "buffer_type_get_alloc_size";
+        /* buffer */
+        case APIR_COMMAND_TYPE_BUFFER_GET_BASE:
+            return "buffer_get_base";
+        case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR:
+            return "buffer_set_tensor";
+        case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR:
+            return "buffer_get_tensor";
+        case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR:
+            return "buffer_cpy_tensor";
+        case APIR_COMMAND_TYPE_BUFFER_CLEAR:
+            return "buffer_clear";
+        case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER:
+            return "buffer_free_buffer";
+        /* backend */
+        case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE:
+            return "backend_graph_compute";
+
+        default:
+            return "unknown";
+    }
+}
diff --git a/src/ggml-virtgpu/backend/shared/apir_backend.h b/src/ggml-virtgpu/backend/shared/apir_backend.h

index f3efa52c7214fa4aeda622bde02294cd94c3f61b..da1e21b5b2fdea320e6f277b9dfecb26c9302060 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_backend.h
+++ b/src/ggml-virtgpu/backend/shared/apir_backend.h
@@ -14,7 +14,7 @@
  #define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED          6
  #define APIR_BACKEND_INITIALIZE_ALREADY_INITED              7
  #define APIR_BACKEND_INITIALIZE_NO_DEVICE                   8
-
+#define APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED         9
  
  // new entries here need to be added to the apir_backend_initialize_error function below
  
@@ -39,6 +39,10 @@ static const char * apir_backend_initialize_error(int code) {
      APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS);
      APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS);
      APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED);
+    APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED);
+    APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_ALREADY_INITED);
+    APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_NO_DEVICE);
+    APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_INIT_FAILED);
  
      return "Unknown APIR_BACKEND_INITIALIZE error:/";
  
diff --git a/src/ggml-virtgpu/backend/shared/apir_cs.h b/src/ggml-virtgpu/backend/shared/apir_cs.h

index 1bc3a5f685b9c454d18d22757753990801bcc2b8..64bf2ec9609dbccbfacd27aa2497bc8dd82effca 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_cs.h
+++ b/src/ggml-virtgpu/backend/shared/apir_cs.h
@@ -13,7 +13,6 @@ struct apir_encoder {
      const char * start;
      const char * end;
      bool         fatal;
-
  };
  
  struct apir_decoder {
@@ -28,8 +27,8 @@ struct apir_decoder {
  
  static apir_decoder apir_new_decoder(const char * ptr, size_t size) {
      apir_decoder dec = {
-        .cur = ptr,
-        .end = ptr + size,
+        .cur   = ptr,
+        .end   = ptr + size,
          .fatal = false,
      };
  
@@ -79,10 +78,7 @@ static inline bool apir_decoder_get_fatal(const apir_decoder * dec) {
   * encode peek
   */
  
-static inline bool apir_decoder_peek_internal(apir_decoder * dec,
-                                              size_t                size,
-                                              void *                val,
-                                              size_t                val_size) {
+static inline bool apir_decoder_peek_internal(apir_decoder * dec, size_t size, void * val, size_t val_size) {
      assert(val_size <= size);
  
      if (unlikely(size > (size_t) (dec->end - dec->cur))) {
@@ -332,8 +328,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t
  static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
      size_t alloc_size;
      if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
-        GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n",
-                       __func__, size, count);
+        GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n", __func__, size, count);
          return NULL;
      }
  
@@ -352,20 +347,19 @@ static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) {
  
  /* apir_buffer_type_host_handle_t */
  
-static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder *                  enc,
+static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder *                         enc,
                                                                const apir_buffer_type_host_handle_t * val) {
      apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
  }
  
-static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder *            dec,
+static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder *                   dec,
                                                                apir_buffer_type_host_handle_t * val) {
      apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t));
  }
  
  /* apir_buffer_host_handle_t */
  
-static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder *             enc,
-                                                         const apir_buffer_host_handle_t * val) {
+static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, const apir_buffer_host_handle_t * val) {
      apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t));
  }
  
diff --git a/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h b/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h

index 289f4b77d74ffeafc7f7fa62e032f6ebc10ed2ea..fabe3e401ca22179c6d350b8a199ea8aa2dca74b 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h
+++ b/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h
@@ -1,11 +1,10 @@
-#include "ggml-impl.h"
  #include "apir_cs.h"
  #include "apir_cs_rpc.h"
+#include "ggml-impl.h"
  
  // ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
  
-static inline void apir_encode_ggml_buffer_host_handle(apir_encoder *                    enc,
-                                                       const apir_buffer_host_handle_t * handle);
+static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle);
  
  static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
  
@@ -22,8 +21,7 @@ static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder
      return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
  }
  
-static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec,
-                                                                          uint32_t       n_tensors) {
+static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, uint32_t n_tensors) {
      size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
  
      return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
@@ -45,9 +43,9 @@ static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
      }
  
      ggml_init_params params{
-        /*.mem_size   =*/ ggml_tensor_overhead(),
-        /*.mem_buffer =*/ NULL,
-        /*.no_alloc   =*/ true,
+        /*.mem_size   =*/ggml_tensor_overhead(),
+        /*.mem_buffer =*/NULL,
+        /*.no_alloc   =*/true,
      };
  
      ggml_context * ctx = ggml_init(params);
@@ -105,6 +103,19 @@ static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec)
  
      apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
  
+    // SECURITY: Validate buffer handle against tracked buffers to prevent
+    // guest VM from providing arbitrary host memory addresses
+    if (buffer) {
+        extern std::unordered_set<ggml_backend_buffer_t> backend_buffers;
+        if (backend_buffers.find(buffer) == backend_buffers.end()) {
+            GGML_LOG_WARN("ggml-virtgpu-backend: %s: Invalid buffer handle from guest: %p\n", __func__,
+                          (void *) buffer);
+            // Set fatal flag to prevent further processing with invalid handle
+            apir_decoder_set_fatal(dec);
+            return NULL;
+        }
+    }
+
      return buffer;
  }
  
diff --git a/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h b/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h

index f6817989528eca7dfa21c0e0990ae5c98d0ee38c..4cb2f047d1e5e8aa5dd6c560e2c7fa128a4b723b 100644 (file)
--- a/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h
+++ b/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h
@@ -1,3 +1,6 @@
+#pragma once
+
+// clang-format off
  #include "ggml.h"
  #include "ggml-backend-impl.h"
  
@@ -5,6 +8,7 @@
  #include <unordered_set>
  #include <vector>
  #include <cstdint>
+// clang-format on
  
  // ggml_tensor is serialized into apir_rpc_tensor
  struct apir_rpc_tensor {
diff --git a/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/src/ggml-virtgpu/ggml-backend-buffer-type.cpp

index c493a8e2ae376b4c589dd6f058e64622f2676a77..8fa20ff43bd58810ab39ebced8cd68c05309294e 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-buffer-type.cpp
+++ b/src/ggml-virtgpu/ggml-backend-buffer-type.cpp
@@ -34,6 +34,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml
  static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
      virtgpu * gpu = BUFT_TO_GPU(buft);
  
+    // Return the prefixed name that was built once during initialization
      return gpu->cached_buffer_type.name;
  }
  
@@ -53,9 +54,8 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff
                                                                 const ggml_tensor *        tensor) {
      virtgpu * gpu = BUFT_TO_GPU(buft);
  
-    if (tensor->buffer == NULL
-        || !tensor->buffer->context
-        || !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
+    if (tensor->buffer == NULL || !tensor->buffer->context ||
+        !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
          return ggml_nbytes(tensor);
      }
  
diff --git a/src/ggml-virtgpu/ggml-backend-device.cpp b/src/ggml-virtgpu/ggml-backend-device.cpp

index c7d2881058b429864c952469853b147db7e6406b..ec8156bb868dda490ed446dce1700dee76ba2cd5 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-device.cpp
+++ b/src/ggml-virtgpu/ggml-backend-device.cpp
@@ -3,6 +3,7 @@
  static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
+    // Return the prefixed name that was built once during initialization
      return gpu->cached_device_info.name;
  }
  
@@ -22,7 +23,7 @@ static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_bac
  static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    *free = gpu->cached_device_info.memory_free;
+    *free  = gpu->cached_device_info.memory_free;
      *total = gpu->cached_device_info.memory_total;
  }
  
@@ -72,7 +73,7 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_
  ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    static std::atomic<bool> initialized = false;
+    static std::atomic<bool>        initialized = false;
      static ggml_backend_buffer_type buft;
  
      if (!initialized) {
@@ -95,7 +96,7 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac
  static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
      virtgpu * gpu = DEV_TO_GPU(dev);
  
-    static std::atomic<bool> initialized = false;
+    static std::atomic<bool>        initialized = false;
      static ggml_backend_buffer_type buft;
  
      if (!initialized) {
diff --git a/src/ggml-virtgpu/ggml-backend-reg.cpp b/src/ggml-virtgpu/ggml-backend-reg.cpp

index 2d02cfec1d3e744882bb0b25ef87843f11e4098c..a4df5956aa39f5140892ff4b6a7ab663577f1d6f 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend-reg.cpp
+++ b/src/ggml-virtgpu/ggml-backend-reg.cpp
@@ -7,8 +7,8 @@
  void ggml_virtgpu_cleanup(virtgpu * gpu);
  
  static virtgpu * apir_initialize() {
-    static virtgpu *         gpu          = NULL;
-    static std::atomic<bool> initialized  = false;
+    static virtgpu *         gpu         = NULL;
+    static std::atomic<bool> initialized = false;
  
      if (initialized) {
          // fast track
@@ -31,29 +31,53 @@ static virtgpu * apir_initialize() {
          }
  
          // Pre-fetch and cache all device information, it will not change
-        gpu->cached_device_info.description  = apir_device_get_description(gpu);
+        gpu->cached_device_info.description = apir_device_get_description(gpu);
          if (!gpu->cached_device_info.description) {
              GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__);
          }
-        gpu->cached_device_info.name         = apir_device_get_name(gpu);
-        if (!gpu->cached_device_info.name) {
-            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__);
-        }
          gpu->cached_device_info.device_count = apir_device_get_count(gpu);
          gpu->cached_device_info.type         = apir_device_get_type(gpu);
  
-        apir_device_get_memory(gpu,
-                              &gpu->cached_device_info.memory_free,
-                              &gpu->cached_device_info.memory_total);
+        {
+            // Get the remote name and create prefixed version
+            char * rmt_device_name = apir_device_get_name(gpu);
+            if (!rmt_device_name) {
+                GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu device name", __func__);
+            }
+
+            size_t device_name_len       = strlen(rmt_device_name) + 11;  // "[virtgpu] " + null terminator
+            gpu->cached_device_info.name = (char *) malloc(device_name_len);
+            if (!gpu->cached_device_info.name) {
+                free(rmt_device_name);
+                GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed device name", __func__);
+            }
+            snprintf(gpu->cached_device_info.name, device_name_len, "[virtgpu] %s", rmt_device_name);
+            free(rmt_device_name);
+        }
+
+        apir_device_get_memory(gpu, &gpu->cached_device_info.memory_free, &gpu->cached_device_info.memory_total);
  
          apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu);
          gpu->cached_buffer_type.host_handle             = buft_host_handle;
-        gpu->cached_buffer_type.name                    = apir_buffer_type_get_name(gpu, buft_host_handle);
-        if (!gpu->cached_buffer_type.name) {
-            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__);
+        {
+            // Get the remote name and create prefixed version
+            char * rmt_name = apir_buffer_type_get_name(gpu, buft_host_handle);
+            if (!rmt_name) {
+                GGML_ABORT(GGML_VIRTGPU "%s: failed to get the virtgpu buffer type name", __func__);
+            }
+
+            size_t prefixed_len          = strlen(rmt_name) + 11;  // "[virtgpu] " + null terminator
+            gpu->cached_buffer_type.name = (char *) malloc(prefixed_len);
+            if (!gpu->cached_buffer_type.name) {
+                free(rmt_name);
+                GGML_ABORT(GGML_VIRTGPU "%s: failed to allocate memory for prefixed buffer type name", __func__);
+            }
+            snprintf(gpu->cached_buffer_type.name, prefixed_len, "[virtgpu] %s", rmt_name);
+            free(rmt_name);
          }
-        gpu->cached_buffer_type.alignment               = apir_buffer_type_get_alignment(gpu, buft_host_handle);
-        gpu->cached_buffer_type.max_size                = apir_buffer_type_get_max_size(gpu, buft_host_handle);
+
+        gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle);
+        gpu->cached_buffer_type.max_size  = apir_buffer_type_get_max_size(gpu, buft_host_handle);
  
          initialized = true;
      }
@@ -98,7 +122,7 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
      static std::atomic<bool> initialized = false;
  
      if (initialized) {
-        return; // fast track
+        return;  // fast track
      }
  
      {
diff --git a/src/ggml-virtgpu/ggml-backend.cpp b/src/ggml-virtgpu/ggml-backend.cpp

index 5cd6c0c0608c8d9e2d3f809354b23bf768930a07..a63ee2b9d2fa445043ca059a7f99a93cbd746ae3 100644 (file)
--- a/src/ggml-virtgpu/ggml-backend.cpp
+++ b/src/ggml-virtgpu/ggml-backend.cpp
@@ -1,5 +1,5 @@
-#include "ggml-remoting.h"
  #include "../../include/ggml-virtgpu.h"
+#include "ggml-remoting.h"
  
  static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) {
      UNUSED(backend);
diff --git a/src/ggml-virtgpu/ggml-remoting.h b/src/ggml-virtgpu/ggml-remoting.h

index 087664086765f4bdc2c2bfb8eef317537d09c937..4f70326bee2cdd299f384f095e0abe10e1a7ba9c 100644 (file)
--- a/src/ggml-virtgpu/ggml-remoting.h
+++ b/src/ggml-virtgpu/ggml-remoting.h
@@ -9,7 +9,7 @@
  #include <string>
  
  #define GGML_VIRTGPU_NAME "ggml-virtgpu"
-#define GGML_VIRTGPU "ggml-virtgpu: "
+#define GGML_VIRTGPU      "ggml-virtgpu: "
  
  // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
  
diff --git a/src/ggml-virtgpu/include/apir_hw.h b/src/ggml-virtgpu/include/apir_hw.h

index 33af045ca2b8668de6bff4b549da8f8d9bae13dd..7d6ea2265db8c46fcd6744d24858b7c952c47e7b 100644 (file)
--- a/src/ggml-virtgpu/include/apir_hw.h
+++ b/src/ggml-virtgpu/include/apir_hw.h
@@ -3,7 +3,7 @@
  #include <stdint.h>
  
  struct virgl_renderer_capset_apir {
-   uint32_t apir_version;
-   uint32_t supports_blob_resources;
-   uint32_t reserved[4];           // For future expansion
+    uint32_t apir_version;
+    uint32_t supports_blob_resources;
+    uint32_t reserved[4];  // For future expansion
  };
diff --git a/src/ggml-virtgpu/regenerate_remoting.py b/src/ggml-virtgpu/regenerate_remoting.py

index aeb48a4087e7282b1685032d2d3f2e9ab80905aa..dae75fd1c80f2d5fb291793ea91dd208eeff127f 100755 (executable)
--- a/src/ggml-virtgpu/regenerate_remoting.py
+++ b/src/ggml-virtgpu/regenerate_remoting.py
@@ -145,8 +145,31 @@ class RemotingCodebaseGenerator:
          enum_lines.append(f"  APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
          enum_lines.append("} ApirBackendCommandType;")
  
+        # Generate function name mapping
+        func_lines = []
+        func_lines.append("static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {")
+        func_lines.append("    switch (type) {")
+
+        current_group = None
+        for func in functions:
+            # Add comment for new group
+            if func['group_name'] != current_group:
+                func_lines.append(f"        /* {func['group_description']} */")
+                current_group = func['group_name']
+
+            # Generate clean function name without backend_ prefix
+            clean_name = f"{func['group_name']}_{func['function_name']}"
+            func_lines.append(f"        case {func['enum_name']}:")
+            func_lines.append(f"            return \"{clean_name}\";")
+
+        func_lines.append("")
+        func_lines.append("        default:")
+        func_lines.append("            return \"unknown\";")
+        func_lines.append("    }")
+        func_lines.append("}")
+
          # Full header template
-        header_content = NL.join(enum_lines) + "\n"
+        header_content = NL.join(enum_lines) + "\n\n" + NL.join(func_lines) + "\n"
  
          return header_content
  
@@ -170,19 +193,6 @@ class RemotingCodebaseGenerator:
  
              decl_lines.append(f"{signature} {func['backend_function']}({params});")
  
-        # Switch cases
-        switch_lines = []
-        current_group = None
-
-        for func in functions:
-            if func['group_name'] != current_group:
-                switch_lines.append(f"  /* {func['group_description']} */")
-                current_group = func['group_name']
-
-            deprecated = " (DEPRECATED)" if func['deprecated'] else ""
-
-            switch_lines.append(f"  case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";")
-
          # Dispatch table
          table_lines = []
          current_group = None
@@ -201,15 +211,6 @@ class RemotingCodebaseGenerator:
  
  {NL.join(decl_lines)}
  
-static inline const char *backend_dispatch_command_name(ApirBackendCommandType type)
-{{
-  switch (type) {{
-{NL.join(switch_lines)}
-
-  default: return "unknown";
-  }}
-}}
-
  extern "C" {{
  static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
    {NL.join(table_lines)}
diff --git a/src/ggml-virtgpu/virtgpu-forward-backend.cpp b/src/ggml-virtgpu/virtgpu-forward-backend.cpp

index 07d9a6684964edfa831b3797004cad313c82d06d..4593690c6385592f681dfb2cd731828d5edd6ea7 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-backend.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-backend.cpp
@@ -17,8 +17,8 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
      size_t               cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
-    virtgpu_shmem * shmem = &temp_shmem;
-    bool using_shared_shmem = false;
+    virtgpu_shmem * shmem              = &temp_shmem;
+    bool            using_shared_shmem = false;
  
      if (cgraph_size <= gpu->data_shmem.mmap_size) {
          // Lock mutex before using shared data_shmem buffer
@@ -26,7 +26,7 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
              GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
          }
          using_shared_shmem = true;
-        shmem = &gpu->data_shmem;
+        shmem              = &gpu->data_shmem;
      } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
          GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
      }
diff --git a/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp b/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp

index cab74fd1707da8049d42dd5028d6be43c63a6ca8..38f8ec945e0638053d58d80a5b21427c91feddb0 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp
@@ -62,7 +62,9 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle
      return max_size;
  }
  
-apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) {
+apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu *                      gpu,
+                                                    apir_buffer_type_host_handle_t host_handle,
+                                                    size_t                         size) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
@@ -84,7 +86,9 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_t
      return buffer_context;
  }
  
-size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) {
+size_t apir_buffer_type_get_alloc_size(virtgpu *                      gpu,
+                                       apir_buffer_type_host_handle_t host_handle,
+                                       const ggml_tensor *            op) {
      apir_encoder *        encoder;
      apir_decoder *        decoder;
      ApirForwardReturnCode ret;
diff --git a/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/src/ggml-virtgpu/virtgpu-forward-buffer.cpp

index 86eee358cf4ae74bba80971fce6b06796b7aa3ab..228284f4a42a43d7c01633b37fdb14d6059ce325 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-buffer.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-buffer.cpp
@@ -35,8 +35,8 @@ void apir_buffer_set_tensor(virtgpu *               gpu,
      apir_encode_ggml_tensor(encoder, tensor);
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
-    virtgpu_shmem * shmem = &temp_shmem;
-    bool using_shared_shmem = false;
+    virtgpu_shmem * shmem              = &temp_shmem;
+    bool            using_shared_shmem = false;
  
      if (size <= gpu->data_shmem.mmap_size) {
          // Lock mutex before using shared data_shmem buffer
@@ -44,7 +44,7 @@ void apir_buffer_set_tensor(virtgpu *               gpu,
              GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
          }
          using_shared_shmem = true;
-        shmem = &gpu->data_shmem;
+        shmem              = &gpu->data_shmem;
  
      } else if (virtgpu_shmem_create(gpu, size, shmem)) {
          GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
@@ -86,8 +86,8 @@ void apir_buffer_get_tensor(virtgpu *               gpu,
      apir_encode_ggml_tensor(encoder, tensor);
  
      virtgpu_shmem   temp_shmem;  // Local storage for large buffers
-    virtgpu_shmem * shmem = &temp_shmem;
-    bool using_shared_shmem = false;
+    virtgpu_shmem * shmem              = &temp_shmem;
+    bool            using_shared_shmem = false;
  
      if (size <= gpu->data_shmem.mmap_size) {
          // Lock mutex before using shared data_shmem buffer
@@ -95,7 +95,7 @@ void apir_buffer_get_tensor(virtgpu *               gpu,
              GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
          }
          using_shared_shmem = true;
-        shmem = &gpu->data_shmem;
+        shmem              = &gpu->data_shmem;
  
      } else if (virtgpu_shmem_create(gpu, size, shmem)) {
          GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
diff --git a/src/ggml-virtgpu/virtgpu-forward-device.cpp b/src/ggml-virtgpu/virtgpu-forward-device.cpp

index 4b6b8f527be544409aaa577b2374afa794590a6b..9f513c138dd24d331ab7c2500eb06ca010c10c15 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-device.cpp
+++ b/src/ggml-virtgpu/virtgpu-forward-device.cpp
@@ -26,7 +26,7 @@ char * apir_device_get_name(virtgpu * gpu) {
      REMOTE_CALL(gpu, encoder, decoder, ret);
  
      const size_t string_size = apir_decode_array_size_unchecked(decoder);
-    char            * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
+    char *       string      = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
      if (!string) {
          GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
          return NULL;
@@ -173,7 +173,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si
      REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
  
      if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
-        GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer");
+        GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate %ldb of guest-host shared buffer", __func__, size);
      }
  
      apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
diff --git a/src/ggml-virtgpu/virtgpu-forward-impl.h b/src/ggml-virtgpu/virtgpu-forward-impl.h

index f23c75bb9686be2ab5c155a4b6610f66d15dfe95..4d0b6e05c7402d968507c7427a4fe95e05066136 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward-impl.h
+++ b/src/ggml-virtgpu/virtgpu-forward-impl.h
@@ -1,29 +1,36 @@
-#include "virtgpu.h"
+#pragma once
  
+// clang-format off
+#include "virtgpu.h"
  #include "ggml-remoting.h"
  #include "backend/shared/apir_backend.h"
  #include "backend/shared/apir_cs_ggml.h"
-
  #include "ggml-backend-impl.h"
+// clang-format on
  
-#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__)                               \
-    do {                                                                                                   \
-        int32_t forward_flag = (int32_t) apir_command_type__;                                              \
-        encoder_name         = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \
-        if (!encoder_name) {                                                                               \
-            GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);                       \
-        }                                                                                                  \
+#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__)                                           \
+    int32_t      REMOTE_CALL_PREPARE_forward_flag = (int32_t) apir_command_type__;                                     \
+    const char * REMOTE_CALL_PREPARE_command_name = apir_dispatch_command_name(apir_command_type__);                   \
+    do {                                                                                                               \
+        encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, REMOTE_CALL_PREPARE_forward_flag); \
+        if (!encoder_name) {                                                                                           \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);                        \
+        }                                                                                                              \
      } while (0)
  
-#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name)                                           \
-    do {                                                                                                          \
-        ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL);       \
-        if (!decoder_name) {                                                                                      \
-            GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__);                                         \
-        }                                                                                                         \
-        if (ret_name < APIR_FORWARD_BASE_INDEX) {                                                                 \
-            GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__,                             \
-                       apir_forward_error(ret_name), ret_name);                                                   \
-        }                                                                                                         \
-        ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX);                                  \
+#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name)                                     \
+    do {                                                                                                    \
+        ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
+        if (!decoder_name) {                                                                                \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__);                        \
+        }                                                                                                   \
+        if (ret_name < APIR_FORWARD_BASE_INDEX) {                                                           \
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__,            \
+                       apir_forward_error(ret_name), ret_name);                                             \
+        }                                                                                                   \
+        ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX);                            \
+        if (ret_name != 0) {                                                                                \
+            GGML_ABORT(GGML_VIRTGPU "backend function '%s' failed (return code: %d)",                       \
+                       REMOTE_CALL_PREPARE_command_name, ret_name);                                         \
+        }                                                                                                   \
      } while (0)
diff --git a/src/ggml-virtgpu/virtgpu-forward.gen.h b/src/ggml-virtgpu/virtgpu-forward.gen.h

index fe4cae2025399645fbdf13228b977d36b2db5130..44b0ad1ffa1d1b483836bf1a0166d8800f1528d4 100644 (file)
--- a/src/ggml-virtgpu/virtgpu-forward.gen.h
+++ b/src/ggml-virtgpu/virtgpu-forward.gen.h
@@ -20,6 +20,7 @@ apir_buffer_context_t          apir_device_buffer_from_ptr(struct virtgpu * gpu,
  char *                apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
  size_t                apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
  size_t                apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+/* apir_buffer_type_is_host is deprecated. */
  apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu *               gpu,
                                                      apir_buffer_type_host_handle_t host_handle,
                                                      size_t                         size);
diff --git a/src/ggml-virtgpu/virtgpu.cpp b/src/ggml-virtgpu/virtgpu.cpp

index 1e650dc65b2b883c8bfcb0cff31676ed8705e211..a84a77399d9c261e40734d1f9856198712666674 100644 (file)
--- a/src/ggml-virtgpu/virtgpu.cpp
+++ b/src/ggml-virtgpu/virtgpu.cpp
@@ -53,9 +53,9 @@ static int virtgpu_handshake(virtgpu * gpu) {
  
      if (!decoder) {
          GGML_ABORT(GGML_VIRTGPU
-            "%s: failed to initiate the communication with the virglrenderer library. "
-            "Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
-            __func__);
+                   "%s: failed to initiate the communication with the virglrenderer library. "
+                   "Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
+                   __func__);
          return 1;
      }
  
@@ -65,8 +65,7 @@ static int virtgpu_handshake(virtgpu * gpu) {
      uint32_t host_minor;
  
      if (ret_magic != APIR_HANDSHAKE_MAGIC) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
+        GGML_ABORT(GGML_VIRTGPU "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
                     apir_backend_initialize_error(ret_magic));
      } else {
          apir_decode_uint32_t(decoder, &host_major);
@@ -140,15 +139,13 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
                         "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ",
                         __func__, apir_load_library_error(ret));
          } else {
-            GGML_ABORT(GGML_VIRTGPU
-                       "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__,
-                       apir_load_library_error(ret), ret);
+            GGML_ABORT(GGML_VIRTGPU "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)",
+                       __func__, apir_load_library_error(ret), ret);
          }
          return ret;
      }
  
-    GGML_LOG_INFO(GGML_VIRTGPU
-                  "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
+    GGML_LOG_INFO(GGML_VIRTGPU "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
  
      ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
  
@@ -158,10 +155,11 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
                     "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
                     __func__, apir_load_library_error(apir_ret));
      } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
-                   "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
-                   __func__, apir_load_library_error(apir_ret));
+        GGML_ABORT(
+            GGML_VIRTGPU
+            "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
+            "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+            __func__, apir_load_library_error(apir_ret));
      } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
          GGML_ABORT(GGML_VIRTGPU
                     "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)",
@@ -169,8 +167,8 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) {
      } else {
          uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
          GGML_ABORT(GGML_VIRTGPU
-                   "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
-                   lib_ret);
+                   "%s: the API Remoting backend library failed to initialize its backend library: apir code=%d)",
+                   __func__, lib_ret);
      }
      return ret;
  }
@@ -184,55 +182,49 @@ virtgpu * create_virtgpu() {
      // Initialize mutex to protect shared data_shmem buffer
      if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) {
          delete gpu;
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to initialize data_shmem mutex", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize data_shmem mutex", __func__);
          return NULL;
      }
  
      if (virtgpu_open(gpu) != APIR_SUCCESS) {
-        GGML_LOG_ERROR(GGML_VIRTGPU
-                       "%s: failed to open the virtgpu device\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to open the virtgpu device\n", __func__);
          return NULL;
      }
  
      if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
          if (gpu->use_apir_capset) {
              GGML_ABORT(GGML_VIRTGPU
-                       "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__);
+                       "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library "
+                       "supports it.",
+                       __func__);
          } else {
-            GGML_ABORT(GGML_VIRTGPU
-                       "%s: failed to initialize the virtgpu Venus capset", __func__);
+            GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu Venus capset", __func__);
          }
          return NULL;
      }
  
      if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to initialize the GPU context", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the GPU context", __func__);
          return NULL;
      }
  
      if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to create the shared reply memory pages", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared reply memory pages", __func__);
          return NULL;
      }
  
      if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to create the shared data memory pages", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to create the shared data memory pages", __func__);
          return NULL;
      }
  
      if (virtgpu_handshake(gpu)) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to handshake with the virglrenderer library", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to handshake with the virglrenderer library", __func__);
          return NULL;
      }
  
      if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to load the backend library", __func__);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to load the backend library", __func__);
          return NULL;
      }
  
@@ -243,8 +235,7 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) {
      drmDevicePtr devs[8];
      int          count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
      if (count < 0) {
-        GGML_LOG_ERROR(GGML_VIRTGPU
-                       "%s: failed to enumerate DRM devices\n", __func__);
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to enumerate DRM devices\n", __func__);
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
@@ -266,19 +257,17 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d
  
      int fd = open(node_path, O_RDWR | O_CLOEXEC);
      if (fd < 0) {
-        GGML_ABORT(GGML_VIRTGPU
-                   "%s: failed to open %s", __func__, node_path);
+        GGML_ABORT(GGML_VIRTGPU "%s: failed to open %s", __func__, node_path);
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
      drmVersionPtr version = drmGetVersion(fd);
      if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
          if (version) {
-            GGML_LOG_ERROR(GGML_VIRTGPU
-                           "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major);
+            GGML_LOG_ERROR(GGML_VIRTGPU "%s: unknown DRM driver %s version %d\n", __func__, version->name,
+                           version->version_major);
          } else {
-            GGML_LOG_ERROR(GGML_VIRTGPU
-                           "%s: failed to get DRM driver version\n", __func__);
+            GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get DRM driver version\n", __func__);
          }
  
          if (version) {
@@ -322,9 +311,8 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
          virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
  
      if (ret) {
-        GGML_LOG_ERROR(GGML_VIRTGPU
-                       "%s: failed to get APIR v%d capset: %s\n",
-                       __func__, gpu->capset.version, strerror(errno));
+        GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to get APIR v%d capset: %s\n", __func__, gpu->capset.version,
+                       strerror(errno));
          return APIR_ERROR_INITIALIZATION_FAILED;
      }
  
@@ -547,13 +535,10 @@ static void log_call_duration(long long call_duration_ns, const char * name) {
      double call_duration_s  = (double) call_duration_ns / 1e9;  // 1 second = 1e9 nanoseconds
  
      if (call_duration_s > 1) {
-        GGML_LOG_INFO(GGML_VIRTGPU
-                      "waited %.2fs for the %s host reply...\n", call_duration_s, name);
+        GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fs for the %s host reply...\n", call_duration_s, name);
      } else if (call_duration_ms > 1) {
-        GGML_LOG_INFO(GGML_VIRTGPU
-                      "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
+        GGML_LOG_INFO(GGML_VIRTGPU "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
      } else {
-        GGML_LOG_INFO(GGML_VIRTGPU
-                      "waited %lldns for the %s host reply...\n", call_duration_ns, name);
+        GGML_LOG_INFO(GGML_VIRTGPU "waited %lldns for the %s host reply...\n", call_duration_ns, name);
      }
  }
diff --git a/src/ggml-virtgpu/virtgpu.h b/src/ggml-virtgpu/virtgpu.h

index 68e0f3a376e7c5206795f41ddadf81bb5f0c90f0..f82d8fb50bacb4254092e17f60b1b284e492d840 100644 (file)
--- a/src/ggml-virtgpu/virtgpu.h
+++ b/src/ggml-virtgpu/virtgpu.h
@@ -1,5 +1,6 @@
  #pragma once
  
+// clang-format off
  #include "virtgpu-utils.h"
  #include "virtgpu-shm.h"
  #include "virtgpu-apir.h"
@@ -23,20 +24,21 @@
  #include "apir_hw.h"
  #include <drm/virtgpu_drm.h>
  #include "venus_hw.h"
+// clang-format on
  
  #ifndef VIRTGPU_DRM_CAPSET_APIR
  // Will be defined include/drm/virtgpu_drm.h when
  // https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs
  // is merged
-#define VIRTGPU_DRM_CAPSET_APIR 10
+#    define VIRTGPU_DRM_CAPSET_APIR 10
  #endif
  
  // Mesa/Virlgrenderer Venus internal. Only necessary during the
  // Venus->APIR transition in Virglrenderer
  #define VENUS_COMMAND_TYPE_LENGTH 331
  
-#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16
-#define VIRTGPU_DRM_CAPSET_VENUS 4
+#ifndef VIRTGPU_DRM_CAPSET_VENUS  // only available with Linux >= v6.16
+#    define VIRTGPU_DRM_CAPSET_VENUS 4
  #endif
  
  typedef uint32_t virgl_renderer_capset;
author	Kevin Pouget <redacted>
	Thu, 26 Feb 2026 12:00:57 +0000 (13:00 +0100)
committer	Georgi Gerganov <redacted>
	Fri, 27 Feb 2026 10:04:54 +0000 (12:04 +0200)
src/ggml-virtgpu/backend/backend-dispatched-backend.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.gen.h		patch \| blob \| history
src/ggml-virtgpu/backend/backend-dispatched.h		patch \| blob \| history
src/ggml-virtgpu/backend/backend-virgl-apir.h		patch \| blob \| history
src/ggml-virtgpu/backend/backend.cpp		patch \| blob \| history
src/ggml-virtgpu/backend/shared/api_remoting.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_backend.gen.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_backend.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_cs.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_cs_ggml.h		patch \| blob \| history
src/ggml-virtgpu/backend/shared/apir_cs_rpc.h		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-device.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-backend-reg.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-backend.cpp		patch \| blob \| history
src/ggml-virtgpu/ggml-remoting.h		patch \| blob \| history
src/ggml-virtgpu/include/apir_hw.h		patch \| blob \| history
src/ggml-virtgpu/regenerate_remoting.py		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-backend.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-buffer.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-device.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward-impl.h		patch \| blob \| history
src/ggml-virtgpu/virtgpu-forward.gen.h		patch \| blob \| history
src/ggml-virtgpu/virtgpu.cpp		patch \| blob \| history
src/ggml-virtgpu/virtgpu.h		patch \| blob \| history