From: Georgi Gerganov Date: Wed, 27 Mar 2024 11:09:12 +0000 (+0200) Subject: examples : fix CUBLAS leftovers (#0) X-Git-Tag: upstream/0.0.1642~804 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=af3259766212f71bd5fa7a5d46f4d32592f3bb6c;p=pkg%2Fggml%2Fsources%2Fggml examples : fix CUBLAS leftovers (#0) ggml-ci --- diff --git a/examples/gpt-2/main-backend.cpp b/examples/gpt-2/main-backend.cpp index bd4cb80b..714c158f 100644 --- a/examples/gpt-2/main-backend.cpp +++ b/examples/gpt-2/main-backend.cpp @@ -2,7 +2,7 @@ #include "ggml/ggml-alloc.h" #include "ggml/ggml-backend.h" -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif @@ -197,7 +197,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & } // initialize the backend -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA if (n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); diff --git a/examples/gpt-2/main-batched.cpp b/examples/gpt-2/main-batched.cpp index 9ba4496c..6dbf5e3b 100644 --- a/examples/gpt-2/main-batched.cpp +++ b/examples/gpt-2/main-batched.cpp @@ -2,7 +2,7 @@ #include "ggml/ggml-alloc.h" #include "ggml/ggml-backend.h" -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif @@ -285,7 +285,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab & } // initialize the backend -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA if (n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); diff --git a/examples/gpt-2/main-sched.cpp b/examples/gpt-2/main-sched.cpp index ad10aa96..bdf3bff8 100644 --- a/examples/gpt-2/main-sched.cpp +++ b/examples/gpt-2/main-sched.cpp @@ -2,7 +2,7 @@ #include "ggml/ggml-alloc.h" #include "ggml/ggml-backend.h" -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA #include "ggml-cuda.h" #endif @@ -105,7 +105,7 @@ void init_backends(gpt2_model & model, const gpt_params & params) { ggml_backend_t gpu_backend = NULL; // initialize the backends -#ifdef GGML_USE_CUBLAS +#ifdef GGML_USE_CUDA if (params.n_gpu_layers > 0) { fprintf(stderr, "%s: using CUDA backend\n", __func__); gpu_backend = ggml_backend_cuda_init(0); diff --git a/examples/python/README.md b/examples/python/README.md index 480920f7..69287f88 100644 --- a/examples/python/README.md +++ b/examples/python/README.md @@ -52,7 +52,7 @@ As of this writing the best is to use [ggerganov/llama.cpp](https://github.com/g ```bash git clone https://github.com/ggerganov/llama.cpp -# On a CUDA-enabled system add -DLLAMA_CUBLAS=1 +# On a CUDA-enabled system add -DLLAMA_CUDA=1 # On a Mac add -DLLAMA_METAL=1 cmake llama.cpp \ -B llama_build \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e13733f2..47b7a2d9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -215,7 +215,7 @@ if (GGML_CUDA) find_package(CUDAToolkit) if (CUDAToolkit_FOUND) - message(STATUS "cuBLAS found") + message(STATUS "CUDA found") enable_language(CUDA) @@ -252,10 +252,11 @@ if (GGML_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") endif() else() - message(WARNING "cuBLAS not found") + message(WARNING "CUDA not found") endif() endif() +# TODO: do not build separate ggml-rocm target (see CUDA build above, or llama.cpp for reference) if (GGML_HIPBLAS) list(APPEND CMAKE_PREFIX_PATH /opt/rocm)