option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
+option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
for (int id = 0; id < info.device_count; ++id) {
int device_vmm = 0;
-#if !defined(GGML_CUDA_NO_VMM)
+#if defined(GGML_USE_VMM)
CUdevice device;
CU_CHECK(cuDeviceGet(&device, id));
CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
alloc_prop.location.id = id;
CU_CHECK(cuMemGetAllocationGranularity(&info.devices[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
}
-#endif // !defined(GGML_CUDA_NO_VMM)
+#endif // defined(GGML_USE_VMM)
info.devices[id].vmm = !!device_vmm;
cudaDeviceProp prop;
};
// pool with virtual memory
-#if !defined(GGML_CUDA_NO_VMM)
+#if defined(GGML_USE_VMM)
struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
GGML_ASSERT(ptr == (void *) ((char *)(pool_addr) + pool_used));
}
};
-#endif // !defined(GGML_CUDA_NO_VMM)
+#endif // defined(GGML_USE_VMM)
std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
-#if !defined(GGML_CUDA_NO_VMM)
+#if defined(GGML_USE_VMM)
if (ggml_cuda_info().devices[device].vmm) {
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
}
-#endif // !defined(GGML_CUDA_NO_VMM)
+#endif // defined(GGML_USE_VMM)
return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_leg(device));
}
features.push_back({ "FORCE_CUBLAS", "1" });
#endif
- #ifdef GGML_CUDA_NO_VMM
+ #ifndef GGML_USE_VMM
features.push_back({ "NO_VMM", "1" });
#endif