#include "ggml/ggml-alloc.h"
#include "ggml/ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
}
// initialize the backend
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init(0);
#include "ggml/ggml-alloc.h"
#include "ggml/ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
}
// initialize the backend
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init(0);
#include "ggml/ggml-alloc.h"
#include "ggml/ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
ggml_backend_t gpu_backend = NULL;
// initialize the backends
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (params.n_gpu_layers > 0) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
gpu_backend = ggml_backend_cuda_init(0);
```bash
git clone https://github.com/ggerganov/llama.cpp
-# On a CUDA-enabled system add -DLLAMA_CUBLAS=1
+# On a CUDA-enabled system add -DLLAMA_CUDA=1
# On a Mac add -DLLAMA_METAL=1
cmake llama.cpp \
-B llama_build \
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
- message(STATUS "cuBLAS found")
+ message(STATUS "CUDA found")
enable_language(CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")
endif()
else()
- message(WARNING "cuBLAS not found")
+ message(WARNING "CUDA not found")
endif()
endif()
+# TODO: do not build separate ggml-rocm target (see CUDA build above, or llama.cpp for reference)
if (GGML_HIPBLAS)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)