Add an option to build without CUDA VMM (llama/7067)

author William Tambellini <redacted>

Mon, 6 May 2024 18:12:14 +0000 (11:12 -0700)

committer Georgi Gerganov <redacted>

Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)
author William Tambellini <redacted>
Mon, 6 May 2024 18:12:14 +0000 (11:12 -0700)
committer Georgi Gerganov <redacted>
Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)
diff --git a/ggml-cuda.cu b/ggml-cuda.cu

index fa56f9521e494134c2e3b680da741f2baa275eb5..8739baa2a791404eb6355c87d5372ba8f4e17fc4 100644 (file)
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -113,7 +113,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
      for (int id = 0; id < info.device_count; ++id) {
          int device_vmm = 0;
  
-#if !defined(GGML_USE_HIPBLAS)
+#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
          CUdevice device;
          CU_CHECK(cuDeviceGet(&device, id));
          CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device));
@@ -259,7 +259,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
  };
  
  // pool with virtual memory
-#if !defined(GGML_USE_HIPBLAS)
+#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
  struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
      static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
  
@@ -356,7 +356,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
  #endif // !defined(GGML_USE_HIPBLAS)
  
  std::unique_ptr<ggml_cuda_pool> ggml_backend_cuda_context::new_pool_for_device(int device) {
-#if !defined(GGML_USE_HIPBLAS)
+#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM)
      if (ggml_cuda_info().devices[device].vmm) {
          return std::unique_ptr<ggml_cuda_pool>(new ggml_cuda_pool_vmm(device));
      }
author	William Tambellini <redacted>
	Mon, 6 May 2024 18:12:14 +0000 (11:12 -0700)
committer	Georgi Gerganov <redacted>
	Mon, 13 May 2024 08:02:26 +0000 (11:02 +0300)