extern "C" {
#endif
-#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
-
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
#ifdef __cplusplus
result.data = reinterpret_cast<uint64_t>(tensor->data);
if (tensor->data) {
if (!tensor->buffer) {
- GGML_ABORT("tensor has data but not buffer");
+ GGML_ABORT("%s: tensor has data but not buffer", __func__);
}
// tensor->data is serialized as an offset to the buffer base address
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
- GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
apir_decoder_set_fatal(dec);
return 1;
}
if (dev->iface.supports_op(dev, op)) {
continue;
}
- GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op));
status = GGML_STATUS_ABORTED;
apir_encode_ggml_status(enc, &status);
ggml_backend_buffer_type_t buft;
buft = apir_decode_ggml_buffer_type(dec);
- size_t value = buft->iface.get_max_size(buft);
+ size_t value = SIZE_MAX;
+ if (buft->iface.get_max_size) {
+ value = buft->iface.get_max_size(buft);
+ }
+
apir_encode_size_t(enc, &value);
return 0;
}
+/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) {
GGML_UNUSED(ctx);
- ggml_backend_buffer_type_t buft;
- buft = apir_decode_ggml_buffer_type(dec);
+ GGML_UNUSED(dec);
+ const bool is_host = false;
- bool is_host = buft->iface.is_host(buft);
apir_encode_bool_t(enc, &is_host);
return 0;
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
- GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
return 1;
}
void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_data) {
- GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
return 1;
}
buffer = apir_decode_ggml_buffer(dec);
if (!apir_untrack_backend_buffer(buffer)) {
- GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer);
+ GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: unknown buffer %p\n", __func__, (void *) buffer);
return 1;
}
void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id);
if (!shmem_ptr) {
- GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: Couldn't get the shmem addr from virgl\n", __func__);
apir_decoder_set_fatal(dec);
return 1;
}
uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) {
if (reg != NULL) {
- GGML_LOG_WARN("%s: already initialized\n", __func__);
+ GGML_LOG_WARN(GGML_VIRTGPU_BCK "%s: already initialized\n", __func__);
return APIR_BACKEND_INITIALIZE_ALREADY_INITED;
}
ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p;
reg = ggml_backend_reg_fct();
if (reg == NULL) {
- GGML_LOG_ERROR("%s: backend registration failed\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend registration failed\n", __func__);
return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED;
}
if (!reg->iface.get_device_count(reg)) {
- GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device found\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}
dev = reg->iface.get_device(reg, 0);
if (!dev) {
- GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK "%s: backend initialization failed: no device received\n", __func__);
return APIR_BACKEND_INITIALIZE_NO_DEVICE;
}
uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
+/* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST is deprecated. Keeping the handler for backward compatibility. */
uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx);
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE:
return "backend_buffer_type_get_max_size";
case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST:
- return "backend_buffer_type_is_host";
+ return "backend_buffer_type_is_host (DEPRECATED)";
case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER:
return "backend_buffer_type_alloc_buffer";
case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE:
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size,
- /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host,
+ /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host /* DEPRECATED */,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer,
/* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size,
#include "shared/apir_cs.h"
#include "shared/apir_cs_ggml.h"
+#define GGML_VIRTGPU_BCK "ggml-virtgpu-backend: "
+
struct virgl_apir_context {
uint32_t ctx_id;
virgl_apir_callbacks * iface;
buffer->iface.free_buffer(buffer);
}
- if (dev) {
- size_t free, total;
- dev->iface.get_memory(dev, &free, &total);
- GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024);
- }
-
if (backend_library_handle) {
- GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__);
+ GGML_LOG_INFO(GGML_VIRTGPU_BCK "The GGML backend library was loaded. Unloading it.\n");
dlclose(backend_library_handle);
backend_library_handle = NULL;
}
if (apir_logfile) {
ggml_log_set(log_to_file_callback, apir_logfile);
} else {
- GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file);
+ GGML_LOG_INFO(GGML_VIRTGPU_BCK "Could not open the log file at '%s'\n", apir_log_to_file);
}
}
const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG;
if (!library_name) {
- GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+ "%s: cannot open the GGML library: env var '%s' not defined\n",
+ __func__, APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV);
+
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}
backend_library_handle = dlopen(library_name, RTLD_LAZY);
if (!backend_library_handle) {
- GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror());
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+ "%s: cannot open the GGML library: %s\n", __func__, dlerror());
return APIR_LOAD_LIBRARY_CANNOT_OPEN;
}
if (!library_reg) {
- GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+ "%s: cannot register the GGML library: env var '%s' not defined\n",
+ __func__, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV);
return APIR_LOAD_LIBRARY_ENV_VAR_MISSING;
}
void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg);
dlsym_error = dlerror();
if (dlsym_error) {
- GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg,
- APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+ "%s: cannot find the GGML backend registration symbol '%s' (from %s): %s\n",
+ __func__, library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error);
+
return APIR_LOAD_LIBRARY_SYMBOL_MISSING;
}
};
if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) {
- GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
+ GGML_LOG_ERROR(GGML_VIRTGPU_BCK
+ "%s: Received an invalid dispatch index (%d >= %d)\n",
+ __func__, cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT);
return APIR_BACKEND_FORWARD_INDEX_INVALID;
}
assert(val_size <= size);
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
- GGML_LOG_ERROR("reading too much from the decoder ...\n");
+ GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
apir_decoder_set_fatal(dec);
memset(val, 0, val_size);
return false;
static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) {
if (unlikely(size > (size_t) (dec->end - dec->cur))) {
- GGML_LOG_ERROR("reading too much from the decoder ...\n");
+ GGML_LOG_ERROR("%s: reading too much from the decoder ...\n", __func__);
apir_decoder_set_fatal(dec);
return NULL;
}
uint64_t size;
apir_decode_uint64_t(dec, &size);
if (size != expected_size) {
- GGML_LOG_ERROR("Couldn't decode array from the decoder\n");
+ GGML_LOG_ERROR("%s: Couldn't decode array from the decoder\n", __func__);
apir_decoder_set_fatal(dec);
size = 0;
}
if (size) {
val[size - 1] = '\0';
} else {
- GGML_LOG_ERROR("Couldn't decode the blog array\n");
+ GGML_LOG_ERROR("%s: Couldn't decode the blog array\n", __func__);
apir_decoder_set_fatal(dec);
}
}
static inline void * apir_decoder_alloc_array(size_t size, size_t count) {
size_t alloc_size;
if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) {
- GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count);
+ GGML_LOG_ERROR("%s: overflow in array allocation of %zu * %zu bytes\n",
+ __func__, size, count);
return NULL;
}
static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
+
+ if (!apir_rpc_tensor) {
+ return NULL;
+ }
+
ggml_init_params params{
/*.mem_size =*/ ggml_tensor_overhead(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ true,
};
+
ggml_context * ctx = ggml_init(params);
const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
return (ggml_backend_buffer_type_t) handle;
}
+static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) {
+ apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
+}
+
static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
apir_buffer_type_host_handle_t handle;
size_t tensor_size = sizeof(*tensor);
if (tensor->extra) {
- GGML_ABORT("Cannot pass tensors with extra");
+ GGML_ABORT("%s: Cannot pass tensors with extra", __func__);
}
if (tensor->src[0] && tensor->buffer) {
static int first = 1;
if (first) {
- GGML_LOG_WARN("Cannot pass tensors with src and buffer\n");
+ GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__);
first = 0;
}
}
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
if (!context) {
- GGML_ABORT("Couldn't allocate the buffer context ...");
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
}
context->gpu = gpu;
context->base = context->apir_context.shmem.mmap_ptr;
context->is_from_ptr = true;
} else {
- context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size);
+ context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size);
context->is_from_ptr = false;
context->base = NULL;
}
static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
- return apir_buffer_type_get_name(gpu, buft);
+ return gpu->cached_buffer_type.name;
}
static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
- static size_t align = 0;
-
- if (align == 0) {
- align = apir_buffer_type_get_alignment(gpu, buft);
- }
-
- return align;
+ return gpu->cached_buffer_type.alignment;
}
static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
virtgpu * gpu = BUFT_TO_GPU(buft);
- static size_t max_size = 0;
- if (max_size == 0) {
- max_size = apir_buffer_type_get_max_size(gpu, buft);
- }
-
- return max_size;
-}
-
-static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
- virtgpu * gpu = BUFT_TO_GPU(buft);
-
- return apir_buffer_type_is_host(gpu, buft);
+ return gpu->cached_buffer_type.max_size;
}
static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
return ggml_nbytes(tensor);
}
- return apir_buffer_type_get_alloc_size(gpu, buft, tensor);
+ return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor);
}
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
- return apir_device_get_name(gpu);
+ return gpu->cached_device_info.name;
}
static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
- return apir_device_get_description(gpu);
+ // Return the pre-cached description from the virtgpu structure
+ return gpu->cached_device_info.description;
}
static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
- static enum ggml_backend_dev_type type;
- static bool has_type = false;
- if (!has_type) {
- has_type = true;
- type = (enum ggml_backend_dev_type) apir_device_get_type(gpu);
- }
-
- return type;
+ return (enum ggml_backend_dev_type) gpu->cached_device_info.type;
}
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
virtgpu * gpu = DEV_TO_GPU(dev);
- return apir_device_get_memory(gpu, free, total);
+ *free = gpu->cached_device_info.memory_free;
+ *total = gpu->cached_device_info.memory_total;
}
static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
- apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
-
- static ggml_backend_buffer_type buft{
- /* .iface = */ ggml_backend_remoting_buffer_type_interface,
- /* .device = */ dev,
- /* .context = */ (void *) ctx,
- };
+ static std::atomic<bool> initialized = false;
+ static ggml_backend_buffer_type buft;
+
+ if (!initialized) {
+ static std::mutex mutex;
+ std::lock_guard<std::mutex> lock(mutex);
+
+ if (!initialized) {
+ buft = {
+ /* .iface = */ ggml_backend_remoting_buffer_type_interface,
+ /* .device = */ dev,
+ /* .context = */ (void *) gpu->cached_buffer_type.host_handle,
+ };
+ initialized = true;
+ }
+ }
return &buft;
}
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
virtgpu * gpu = DEV_TO_GPU(dev);
- apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu);
-
- static ggml_backend_buffer_type buft{
- /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
- /* .device = */ dev,
- /* .context = */ (void *) ctx,
- };
+ static std::atomic<bool> initialized = false;
+ static ggml_backend_buffer_type buft;
+
+ if (!initialized) {
+ static std::mutex mutex;
+ std::lock_guard<std::mutex> lock(mutex);
+
+ if (!initialized) {
+ buft = {
+ /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
+ /* .device = */ dev,
+ /* .context = */ (void *) gpu->cached_buffer_type.host_handle,
+ };
+ initialized = true;
+ }
+ }
return &buft;
}
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
if (!context) {
- GGML_ABORT("Couldn't allocate the buffer context ...");
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
}
context->gpu = gpu;
#include <iostream>
#include <mutex>
+void ggml_virtgpu_cleanup(virtgpu * gpu);
+
static virtgpu * apir_initialize() {
- static virtgpu * apir_gpu_instance = NULL;
- static bool apir_initialized = false;
+ static virtgpu * gpu = NULL;
+ static std::atomic<bool> initialized = false;
+
+ if (initialized) {
+ // fast track
+ return gpu;
+ }
{
static std::mutex mutex;
std::lock_guard<std::mutex> lock(mutex);
- if (apir_initialized) {
- return apir_gpu_instance;
+ if (initialized) {
+ // thread safe
+ return gpu;
+ }
+
+ gpu = create_virtgpu();
+ if (!gpu) {
+ initialized = true;
+ return NULL;
}
- apir_gpu_instance = create_virtgpu();
- if (!apir_gpu_instance) {
- GGML_ABORT("failed to initialize the virtgpu");
+ // Pre-fetch and cache all device information, it will not change
+ gpu->cached_device_info.description = apir_device_get_description(gpu);
+ if (!gpu->cached_device_info.description) {
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device description", __func__);
+ }
+ gpu->cached_device_info.name = apir_device_get_name(gpu);
+ if (!gpu->cached_device_info.name) {
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu device name", __func__);
}
+ gpu->cached_device_info.device_count = apir_device_get_count(gpu);
+ gpu->cached_device_info.type = apir_device_get_type(gpu);
+
+ apir_device_get_memory(gpu,
+ &gpu->cached_device_info.memory_free,
+ &gpu->cached_device_info.memory_total);
+
+ apir_buffer_type_host_handle_t buft_host_handle = apir_device_get_buffer_type(gpu);
+ gpu->cached_buffer_type.host_handle = buft_host_handle;
+ gpu->cached_buffer_type.name = apir_buffer_type_get_name(gpu, buft_host_handle);
+ if (!gpu->cached_buffer_type.name) {
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to initialize the virtgpu buffer type name", __func__);
+ }
+ gpu->cached_buffer_type.alignment = apir_buffer_type_get_alignment(gpu, buft_host_handle);
+ gpu->cached_buffer_type.max_size = apir_buffer_type_get_max_size(gpu, buft_host_handle);
- apir_initialized = true;
+ initialized = true;
}
- return apir_gpu_instance;
+ return gpu;
}
static int ggml_backend_remoting_get_device_count() {
virtgpu * gpu = apir_initialize();
if (!gpu) {
- GGML_LOG_WARN("apir_initialize failed\n");
return 0;
}
- return apir_device_get_count(gpu);
+ return gpu->cached_device_info.device_count;
}
static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) {
static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) {
if (devices.size() > 0) {
- GGML_LOG_INFO("%s: already initialized\n", __func__);
+ GGML_LOG_INFO(GGML_VIRTGPU "%s: already initialized\n", __func__);
return;
}
virtgpu * gpu = apir_initialize();
if (!gpu) {
- GGML_LOG_ERROR("apir_initialize failed\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: apir_initialize failed\n", __func__);
return;
}
- static bool initialized = false;
+ static std::atomic<bool> initialized = false;
+
+ if (initialized) {
+ return; // fast track
+ }
{
static std::mutex mutex;
if (!initialized) {
for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) {
ggml_backend_remoting_device_context * ctx = new ggml_backend_remoting_device_context;
- char desc[256] = "API Remoting device";
+ char desc[256] = "ggml-virtgpu API Remoting device";
ctx->device = i;
- ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i);
+ ctx->name = GGML_VIRTGPU_NAME + std::to_string(i);
ctx->description = desc;
ctx->gpu = gpu;
static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) {
UNUSED(reg);
- return GGML_REMOTING_FRONTEND_NAME;
+ return GGML_VIRTGPU_NAME;
}
static const ggml_backend_reg_i ggml_backend_remoting_reg_i = {
ggml_backend_reg_t ggml_backend_virtgpu_reg() {
virtgpu * gpu = apir_initialize();
if (!gpu) {
- GGML_LOG_ERROR("virtgpu_apir_initialize failed\n");
- return NULL;
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_apir_initialize failed\n", __func__);
}
static ggml_backend_reg reg = {
ggml_backend_remoting_reg_init_devices(®);
- GGML_LOG_INFO("%s: initialized\n", __func__);
-
return ®
}
+// public function, not exposed in the GGML interface at the moment
+void ggml_virtgpu_cleanup(virtgpu * gpu) {
+ if (gpu->cached_device_info.name) {
+ free(gpu->cached_device_info.name);
+ gpu->cached_device_info.name = NULL;
+ }
+ if (gpu->cached_device_info.description) {
+ free(gpu->cached_device_info.description);
+ gpu->cached_device_info.description = NULL;
+ }
+ if (gpu->cached_buffer_type.name) {
+ free(gpu->cached_buffer_type.name);
+ gpu->cached_buffer_type.name = NULL;
+ }
+
+ mtx_destroy(&gpu->data_shmem_mutex);
+}
+
GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg)
#include <memory>
#include <string>
+#define GGML_VIRTGPU_NAME "ggml-virtgpu"
+#define GGML_VIRTGPU "ggml-virtgpu: "
+
// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes
#define USE_ALWAYS_TRUE_SUPPORTS_OP 1
static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) {
if (!buffer->context) {
- GGML_ABORT("%s: no context available :/", __func__);
+ GGML_ABORT(GGML_VIRTGPU "%s: no context available :/", __func__);
}
return BUFFER_TO_HOST_HANDLE(buffer);
}
frontend_return: "int"
get_name:
- frontend_return: "const char *"
+ frontend_return: "char *"
get_description:
- frontend_return: "const char *"
+ frontend_return: "char *"
get_type:
frontend_return: "uint32_t"
group_description: "buffer-type"
functions:
get_name:
- frontend_return: "const char *"
+ frontend_return: "char *"
frontend_extra_params:
- - "ggml_backend_buffer_type_t buft"
+ - "apir_buffer_type_host_handle_t host_handle"
get_alignment:
frontend_return: "size_t"
frontend_extra_params:
- - "ggml_backend_buffer_type_t buft"
+ - "apir_buffer_type_host_handle_t host_handle"
get_max_size:
frontend_return: "size_t"
frontend_extra_params:
- - "ggml_backend_buffer_type_t buft"
+ - "apir_buffer_type_host_handle_t host_handle"
is_host:
- frontend_return: "bool"
- frontend_extra_params:
- - "ggml_backend_buffer_type_t buft"
+ deprecated: true
alloc_buffer:
frontend_return: "apir_buffer_context_t"
frontend_extra_params:
- - "ggml_backend_buffer_type_t buffer_buft"
+ - "apir_buffer_type_host_handle_t host_handle"
- "size_t size"
get_alloc_size:
frontend_return: "size_t"
frontend_extra_params:
- - "ggml_backend_buffer_type_t buft"
+ - "apir_buffer_type_host_handle_t host_handle"
- "const ggml_tensor *op"
buffer:
'frontend_return': func_metadata.get('frontend_return', 'void'),
'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
'group_description': group_description,
- 'newly_added': func_metadata.get('newly_added', False)
+ 'deprecated': func_metadata.get('deprecated', False),
})
enum_value += 1
signature = "uint32_t"
params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
+ if func['deprecated']:
+ decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */")
+
decl_lines.append(f"{signature} {func['backend_function']}({params});")
# Switch cases
switch_lines.append(f" /* {func['group_description']} */")
current_group = func['group_name']
- switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";")
+ deprecated = " (DEPRECATED)" if func['deprecated'] else ""
+
+ switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}{deprecated}\";")
# Dispatch table
table_lines = []
table_lines.append("")
current_group = func['group_name']
- table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},")
+ deprecated = " /* DEPRECATED */" if func['deprecated'] else ""
+ table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']}{deprecated},")
header_content = f'''\
#pragma once
decl_lines.append(f"/* {func['group_description']} */")
current_group = func['group_name']
+ if func['deprecated']:
+ decl_lines.append(f"/* {func['frontend_function']} is deprecated. */")
+ continue
+
# Build parameter list
params = [self.naming_patterns['frontend_base_param']]
params.extend(func['frontend_extra_params'])
generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
if not self.clang_format_available:
- logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted."
+ logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n"
" Install clang-format to enable automatic code formatting.")
else:
logging.info("\n🎨 Formatting files with clang-format...")
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
+ bool using_shared_shmem = false;
if (cgraph_size <= gpu->data_shmem.mmap_size) {
- // prefer the init-time allocated page, if large enough
+ // Lock mutex before using shared data_shmem buffer
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+ }
+ using_shared_shmem = true;
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
- GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
}
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
remote_call_finish(gpu, encoder, decoder);
- if (shmem != &gpu->data_shmem) {
+ // Unlock mutex before cleanup
+ if (using_shared_shmem) {
+ mtx_unlock(&gpu->data_shmem_mutex);
+ } else {
virtgpu_shmem_destroy(gpu, shmem);
}
#include "virtgpu-forward-impl.h"
-const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME);
- apir_encode_ggml_buffer_type(encoder, buft);
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
- GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
apir_decoder_set_fatal(decoder);
}
apir_decode_char_array(decoder, string, string_size);
return string;
}
-size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT);
- apir_encode_ggml_buffer_type(encoder, buft);
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
REMOTE_CALL(gpu, encoder, decoder, ret);
return alignment;
}
-size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
+size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE);
- apir_encode_ggml_buffer_type(encoder, buft);
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
REMOTE_CALL(gpu, encoder, decoder, ret);
return max_size;
}
-bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) {
- apir_encoder * encoder;
- apir_decoder * decoder;
- ApirForwardReturnCode ret;
-
- REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST);
-
- apir_encode_ggml_buffer_type(encoder, buft);
-
- REMOTE_CALL(gpu, encoder, decoder, ret);
-
- bool is_host;
- apir_decode_bool_t(decoder, &is_host);
-
- remote_call_finish(gpu, encoder, decoder);
-
- return is_host;
-}
-
-apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) {
+apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, size_t size) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER);
- apir_encode_ggml_buffer_type(encoder, buft);
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
apir_encode_size_t(encoder, &size);
return buffer_context;
}
-size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) {
+size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle, const ggml_tensor * op) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE);
- apir_encode_ggml_buffer_type(encoder, buft);
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
apir_encode_ggml_tensor_inline(encoder, op);
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
+ bool using_shared_shmem = false;
if (size <= gpu->data_shmem.mmap_size) {
- // prefer the init-time allocated page, if large enough
+ // Lock mutex before using shared data_shmem buffer
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+ }
+ using_shared_shmem = true;
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
- GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
}
memcpy(shmem->mmap_ptr, data, size);
remote_call_finish(gpu, encoder, decoder);
- if (shmem != &gpu->data_shmem) {
+ // Unlock mutex before cleanup
+ if (using_shared_shmem) {
+ mtx_unlock(&gpu->data_shmem_mutex);
+ } else {
virtgpu_shmem_destroy(gpu, shmem);
}
virtgpu_shmem temp_shmem; // Local storage for large buffers
virtgpu_shmem * shmem = &temp_shmem;
+ bool using_shared_shmem = false;
if (size <= gpu->data_shmem.mmap_size) {
- // prefer the init-time allocated page, if large enough
+ // Lock mutex before using shared data_shmem buffer
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
+ }
+ using_shared_shmem = true;
shmem = &gpu->data_shmem;
} else if (virtgpu_shmem_create(gpu, size, shmem)) {
- GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
}
apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
remote_call_finish(gpu, encoder, decoder);
- if (shmem != &gpu->data_shmem) {
+ // Unlock mutex before cleanup
+ if (using_shared_shmem) {
+ mtx_unlock(&gpu->data_shmem_mutex);
+ } else {
virtgpu_shmem_destroy(gpu, shmem);
}
}
#include "virtgpu-shm.h"
int apir_device_get_count(virtgpu * gpu) {
- static int32_t dev_count = -1;
- if (dev_count != -1) {
- return dev_count;
- }
-
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
REMOTE_CALL(gpu, encoder, decoder, ret);
+ int32_t dev_count = -1;
apir_decode_int32_t(decoder, &dev_count);
remote_call_finish(gpu, encoder, decoder);
return dev_count;
}
-const char * apir_device_get_name(virtgpu * gpu) {
- static char * string = nullptr;
- if (string) {
- return string;
- }
+char * apir_device_get_name(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
REMOTE_CALL(gpu, encoder, decoder, ret);
const size_t string_size = apir_decode_array_size_unchecked(decoder);
- string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
+ char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
- GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
return NULL;
}
apir_decode_char_array(decoder, string, string_size);
return string;
}
-const char * apir_device_get_description(virtgpu * gpu) {
+char * apir_device_get_description(virtgpu * gpu) {
apir_encoder * encoder;
apir_decoder * decoder;
ApirForwardReturnCode ret;
const size_t string_size = apir_decode_array_size_unchecked(decoder);
char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
if (!string) {
- GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device description buffer\n", __func__);
return NULL;
}
REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
- GGML_ABORT("Couldn't allocate the guest-host shared buffer");
+ GGML_ABORT(GGML_VIRTGPU "Couldn't allocate the guest-host shared buffer");
}
apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
int32_t forward_flag = (int32_t) apir_command_type__; \
encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \
if (!encoder_name) { \
- GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \
} \
} while (0)
do { \
ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
if (!decoder_name) { \
- GGML_ABORT("%s: failed to kick the remote call", __func__); \
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \
} \
if (ret_name < APIR_FORWARD_BASE_INDEX) { \
- GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, \
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \
apir_forward_error(ret_name), ret_name); \
} \
ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \
/* device */
void apir_device_get_device_count(struct virtgpu * gpu);
int apir_device_get_count(struct virtgpu * gpu);
-const char * apir_device_get_name(struct virtgpu * gpu);
-const char * apir_device_get_description(struct virtgpu * gpu);
+char * apir_device_get_name(struct virtgpu * gpu);
+char * apir_device_get_description(struct virtgpu * gpu);
uint32_t apir_device_get_type(struct virtgpu * gpu);
void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total);
bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op);
apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size);
/* buffer-type */
-const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft);
-apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
- ggml_backend_buffer_type_t buffer_buft,
- size_t size);
-size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op);
+char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
+apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
+ apir_buffer_type_host_handle_t host_handle,
+ size_t size);
+size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu,
+ apir_buffer_type_host_handle_t host_handle,
+ const ggml_tensor * op);
/* buffer */
void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
if (!ptr) {
virtgpu_ioctl_gem_close(gpu, gem_handle);
- GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n");
- exit(1);
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_ioctl_map failed\n", __func__);
return 1;
}
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0);
if (!encoder) {
- GGML_ABORT("%s: failed to prepare the remote call encoder", __func__);
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__);
return 1;
}
log_call_duration(call_duration_ns, "API Remoting handshake");
if (!decoder) {
- GGML_ABORT(
+ GGML_ABORT(GGML_VIRTGPU
"%s: failed to initiate the communication with the virglrenderer library. "
"Most likely, the wrong virglrenderer library was loaded in the hypervisor.",
__func__);
uint32_t host_minor;
if (ret_magic != APIR_HANDSHAKE_MAGIC) {
- GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic,
apir_backend_initialize_error(ret_magic));
} else {
apir_decode_uint32_t(decoder, &host_major);
return 1;
}
- GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
- GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
+ GGML_LOG_INFO(GGML_VIRTGPU "%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor);
+ GGML_LOG_INFO(GGML_VIRTGPU "%s: Host is running with %u.%u\n", __func__, host_major, host_minor);
if (guest_major != host_major) {
- GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
+ GGML_LOG_ERROR(GGML_VIRTGPU "Host major (%d) and guest major (%d) version differ\n", host_major, guest_major);
} else if (guest_minor != host_minor) {
- GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
+ GGML_LOG_WARN(GGML_VIRTGPU "Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor);
}
return 0;
encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0);
if (!encoder) {
- GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__);
+ GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to prepare the API Remoting command encoder", __func__);
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
}
log_call_duration(call_duration_ns, "API Remoting LoadLibrary");
if (!decoder) {
- GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__);
+ GGML_ABORT(GGML_VIRTGPU "%s: hypercall error: failed to trigger the API Remoting hypercall.\n", __func__);
return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR;
}
remote_call_finish(gpu, encoder, decoder);
if (ret == APIR_LOAD_LIBRARY_SUCCESS) {
- GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__);
+ GGML_LOG_INFO(GGML_VIRTGPU "The API Remoting backend was successfully loaded and initialized\n");
return ret;
}
// something wrong happened, find out what.
-
if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
- GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__,
- apir_load_library_error(ret), ret);
+ if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: virglrenderer could not open the API Remoting backend library, "
+ "some environment variables are missing. "
+ "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+ __func__, apir_load_library_error(ret));
+ } else if (ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: virglrenderer could not open the API Remoting backend library. "
+ "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+ __func__, apir_load_library_error(ret));
+ } else if (ret == APIR_LOAD_LIBRARY_ENV_VAR_MISSING) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: could not load the backend library, some symbols are missing. "
+ "Make sure virglrenderer is correctly configured by the hypervisor. (%s) ",
+ __func__, apir_load_library_error(ret));
+ } else {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: virglrenderer could not load the API Remoting backend library. (%s - code %d)", __func__,
+ apir_load_library_error(ret), ret);
+ }
return ret;
}
- GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__);
+ GGML_LOG_INFO(GGML_VIRTGPU
+ "%s: virglrenderer successfully loaded the API Remoting backend library.\n", __func__);
ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX);
- if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
- GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)",
+ if (apir_ret == APIR_LOAD_LIBRARY_CANNOT_OPEN) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: the API Remoting backend library couldn't load the GGML backend library. "
+ "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+ __func__, apir_load_library_error(apir_ret));
+ } else if (apir_ret == APIR_LOAD_LIBRARY_SYMBOL_MISSING) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: the API Remoting backend library couldn't load the GGML backend library, some symbols are missing. "
+ "Make sure virglrenderer is correctly configured by the hypervisor. (%s)",
+ __func__, apir_load_library_error(apir_ret));
+ } else if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: the API Remoting backend library couldn't load the GGML backend library: apir code=%d | %s)",
__func__, apir_ret, apir_load_library_error(apir_ret));
} else {
uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX;
- GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__,
lib_ret);
}
return ret;
gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr;
util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024);
+ // Initialize mutex to protect shared data_shmem buffer
+ if (mtx_init(&gpu->data_shmem_mutex, mtx_plain) != thrd_success) {
+ delete gpu;
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to initialize data_shmem mutex", __func__);
+ return NULL;
+ }
+
if (virtgpu_open(gpu) != APIR_SUCCESS) {
- GGML_ABORT("%s: failed to open the virtgpu device", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU
+ "%s: failed to open the virtgpu device\n", __func__);
return NULL;
}
if (virtgpu_init_capset(gpu) != APIR_SUCCESS) {
- GGML_ABORT("%s: failed to initialize the GPU capset", __func__);
+ if (gpu->use_apir_capset) {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to initialize the virtgpu APIR capset. Make sure that the virglrenderer library supports it.", __func__);
+ } else {
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to initialize the virtgpu Venus capset", __func__);
+ }
return NULL;
}
if (virtgpu_init_context(gpu) != APIR_SUCCESS) {
- GGML_ABORT("%s: failed to initialize the GPU context", __func__);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to initialize the GPU context", __func__);
return NULL;
}
if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) {
- GGML_ABORT("%s: failed to create the shared reply memory pages", __func__);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to create the shared reply memory pages", __func__);
return NULL;
}
if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) {
- GGML_ABORT("%s: failed to create the shared data memory pages", __func__);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to create the shared data memory pages", __func__);
return NULL;
}
if (virtgpu_handshake(gpu)) {
- GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to handshake with the virglrenderer library", __func__);
return NULL;
}
if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) {
- GGML_ABORT("%s: failed to load the backend library", __func__);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to load the backend library", __func__);
return NULL;
}
drmDevicePtr devs[8];
int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
if (count < 0) {
- GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__);
+ GGML_LOG_ERROR(GGML_VIRTGPU
+ "%s: failed to enumerate DRM devices\n", __func__);
return APIR_ERROR_INITIALIZATION_FAILED;
}
int fd = open(node_path, O_RDWR | O_CLOEXEC);
if (fd < 0) {
- GGML_ABORT("failed to open %s", node_path);
+ GGML_ABORT(GGML_VIRTGPU
+ "%s: failed to open %s", __func__, node_path);
return APIR_ERROR_INITIALIZATION_FAILED;
}
drmVersionPtr version = drmGetVersion(fd);
if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) {
if (version) {
- GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major);
+ GGML_LOG_ERROR(GGML_VIRTGPU
+ "%s: unknown DRM driver %s version %d\n", __func__, version->name, version->version_major);
} else {
- GGML_ABORT("failed to get DRM driver version");
+ GGML_LOG_ERROR(GGML_VIRTGPU
+ "%s: failed to get DRM driver version\n", __func__);
}
if (version) {
drmFreeVersion(version);
- GGML_LOG_INFO("using DRM device %s\n", node_path);
+ GGML_LOG_INFO(GGML_VIRTGPU "using DRM device %s\n", node_path);
return APIR_SUCCESS;
}
assert(!gpu->capset.version);
const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
if (ret) {
- GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno));
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: failed to initialize context: %s\n", __func__, strerror(errno));
return APIR_ERROR_INITIALIZATION_FAILED;
}
static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) {
if (gpu->use_apir_capset) {
- GGML_LOG_INFO("Using the APIR capset\n");
+ GGML_LOG_INFO(GGML_VIRTGPU "Using the APIR capset\n");
gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR;
} else {
- GGML_LOG_INFO("Using the Venus capset\n");
+ GGML_LOG_INFO(GGML_VIRTGPU "Using the Venus capset\n");
gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS;
}
gpu->capset.version = 0;
virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data));
if (ret) {
- GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno));
+ GGML_LOG_ERROR(GGML_VIRTGPU
+ "%s: failed to get APIR v%d capset: %s\n",
+ __func__, gpu->capset.version, strerror(errno));
return APIR_ERROR_INITIALIZATION_FAILED;
}
* Prepare the command encoder and its buffer
*/
- static char encoder_buffer[4096];
+ thread_local char encoder_buffer[4096];
- static apir_encoder enc;
+ thread_local apir_encoder enc;
enc = {
.cur = encoder_buffer,
.start = encoder_buffer,
UNUSED(gpu);
if (!enc) {
- GGML_LOG_ERROR("Invalid (null) encoder\n");
+ GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) encoder", __func__);
}
if (!dec) {
- GGML_LOG_ERROR("Invalid (null) decoder\n");
+ GGML_ABORT(GGML_VIRTGPU "%s: Invalid (null) decoder", __func__);
}
if (apir_encoder_get_fatal(enc)) {
- GGML_LOG_ERROR("Failed to encode the output parameters.\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to encode the output parameters.", __func__);
}
if (apir_decoder_get_fatal(dec)) {
- GGML_LOG_ERROR("Failed to decode the input parameters.\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Failed to decode the input parameters.", __func__);
}
}
int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
if (ret != 0) {
- GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
+ GGML_ABORT(GGML_VIRTGPU "%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret);
}
/*
}
if (max_wait_ms && timedout) {
- GGML_LOG_ERROR("timed out waiting for the host answer...\n");
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: timed out waiting for the host answer...\n", __func__);
return APIR_FORWARD_TIMEOUT;
}
double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds
if (call_duration_s > 1) {
- GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name);
+ GGML_LOG_INFO(GGML_VIRTGPU
+ "waited %.2fs for the %s host reply...\n", call_duration_s, name);
} else if (call_duration_ms > 1) {
- GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name);
+ GGML_LOG_INFO(GGML_VIRTGPU
+ "waited %.2fms for the %s host reply...\n", call_duration_ms, name);
} else {
- GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name);
+ GGML_LOG_INFO(GGML_VIRTGPU
+ "waited %lldns for the %s host reply...\n", call_duration_ns, name);
}
}
#include <cstring>
+#include "ggml-remoting.h"
+
#define VIRGL_RENDERER_UNSTABLE_APIS 1
#include "apir_hw.h"
#include <drm/virtgpu_drm.h>
/* APIR communication pages */
virtgpu_shmem reply_shmem;
virtgpu_shmem data_shmem;
+
+ /* Mutex to protect shared data_shmem buffer from concurrent access */
+ mtx_t data_shmem_mutex;
+
+ /* Cached device information to prevent memory leaks and race conditions */
+ struct {
+ char * description;
+ char * name;
+ int32_t device_count;
+ uint32_t type;
+ size_t memory_free;
+ size_t memory_total;
+ } cached_device_info;
+
+ /* Cached buffer type information to prevent memory leaks and race conditions */
+ struct {
+ apir_buffer_type_host_handle_t host_handle;
+ char * name;
+ size_t alignment;
+ size_t max_size;
+ } cached_buffer_type;
};
static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) {