uint32_t subgroup_size;
uint32_t shader_core_count;
bool uma;
+ bool prefer_host_memory;
bool float_controls_rte_fp16;
bool subgroup_size_control;
static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
vk_buffer buf;
try {
- if (device->uma) {
+ if (device->prefer_host_memory) {
+ buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eDeviceLocal);
+ } else if (device->uma) {
// Fall back to host memory type
buf = ggml_vk_create_buffer(device, size, vk::MemoryPropertyFlagBits::eDeviceLocal, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
} else {
device->physical_device = physical_devices[dev_num];
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
+ const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY");
+ device->prefer_host_memory = GGML_VK_PREFER_HOST_MEMORY != nullptr;
+
bool fp16_storage = false;
bool fp16_compute = false;
bool maintenance4_support = false;