GGML_BACKEND_DEVICE_TYPE_CPU,
// GPU device using dedicated memory
GGML_BACKEND_DEVICE_TYPE_GPU,
+ // integrated GPU device using host memory
+ GGML_BACKEND_DEVICE_TYPE_IGPU,
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
GGML_BACKEND_DEVICE_TYPE_ACCEL
};
// all the device properties
struct ggml_backend_dev_props {
+ // device name
const char * name;
+ // device description
const char * description;
+ // device free memory in bytes
size_t memory_free;
+ // device total memory in bytes
size_t memory_total;
+ // device type
enum ggml_backend_dev_type type;
+ // device id
+ // for PCI devices, this should be the PCI bus id formatted as "domain:bus:device.function" (e.g. "0000:01:00.0")
+ // if the id is unknown, this should be NULL
+ const char * device_id;
+ // device capabilities
struct ggml_backend_dev_caps caps;
};
extern "C" {
#endif
- #define GGML_BACKEND_API_VERSION 1
+ #define GGML_BACKEND_API_VERSION 2
//
// Backend buffer type
ggml_backend_t ggml_backend_init_best(void) {
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
- if (!dev) {
- dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
- }
+ dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
+ dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
if (!dev) {
return nullptr;
}
int device;
std::string name;
std::string description;
+ std::string pci_bus_id;
};
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
}
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
+ ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
+
props->name = ggml_backend_cuda_device_get_name(dev);
props->description = ggml_backend_cuda_device_get_description(dev);
props->type = ggml_backend_cuda_device_get_type(dev);
+ props->device_id = ctx->pci_bus_id.empty() ? nullptr : ctx->pci_bus_id.c_str();
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
dev_ctx->description = prop.name;
+ char pci_bus_id[16] = {};
+ snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
+ dev_ctx->pci_bus_id = pci_bus_id;
+
ggml_backend_dev_t dev = new ggml_backend_device {
/* .iface = */ ggml_backend_cuda_device_interface,
/* .reg = */ ®,
static enum ggml_backend_dev_type ggml_backend_vk_device_get_type(ggml_backend_dev_t dev) {
UNUSED(dev);
+ // TODO: return GGML_BACKEND_DEVICE_TYPE_IGPU for integrated GPUs
return GGML_BACKEND_DEVICE_TYPE_GPU;
}
props->name = ggml_backend_vk_device_get_name(dev);
props->description = ggml_backend_vk_device_get_description(dev);
props->type = ggml_backend_vk_device_get_type(dev);
+ // TODO: set props->device_id to PCI bus id
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
props->caps = {
/* .async = */ false,