CMAKE_EXTRA=""
if [ ! -z ${GG_BUILD_CUDA} ]; then
- CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUBLAS=ON"
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
fi
if [ ! -z ${GG_BUILD_METAL} ]; then
set(TEST_TARGET gpt-2-batched)
add_executable(${TEST_TARGET} main-batched.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)
-
-
-#
-# For GPU offloading
-
-if (GGML_CUBLAS)
- add_compile_definitions(GGML_USE_CUBLAS)
-endif()
-
-if (GGML_METAL)
- add_compile_definitions(GGML_USE_METAL)
-endif()
#include "ggml-alloc.h"
#include "ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
ggml_backend_t backend = NULL;
//ggml_backend_buffer_t buffer;
- #ifdef GGML_USE_CUBLAS
+ #ifdef GGML_USE_CUDA
if (use_gpu) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
backend = ggml_backend_cuda_init(0);
#include "ggml-alloc.h"
#include "ggml-backend.h"
-// #define GGML_USE_CUBLAS
-
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
};
// initialize the backend
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (use_gpu) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init(0);
#include "ggml-alloc.h"
#include "ggml-backend.h"
-// #define GGML_USE_CUBLAS
-
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
};
// initialize the backend
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (use_gpu) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init(0);
#include "ggml-alloc.h"
#include "ggml-backend.h"
-//#define GGML_USE_CUBLAS // uncomment this to use cuda backend, make sure build ggml lib with GGML_CUBLAS=ON
-
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
};
// initialize the backend
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
if (use_gpu) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init(0);
#include "ggml-alloc.h"
#include "ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
ggml_backend_t backend = NULL;
ggml_backend_buffer_t params_buffer = NULL;
- #ifdef GGML_USE_CUBLAS
+ #ifdef GGML_USE_CUDA
if (use_gpu) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
backend = ggml_backend_cuda_init(0);