cmake : add CMake support for cuBLAS (#101)

author Georgi Gerganov <redacted>

Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)

committer GitHub <redacted>

Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)
author Georgi Gerganov <redacted>
Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)
committer GitHub <redacted>
Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 54d18b0031b2a8993c8706cce111bf2848a44b82..2e46d55eaa640a7517b5d747cc5f1075f0d31e64 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,8 @@ option(GGML_BUILD_EXAMPLES          "ggml: build examples" ${GGML_STANDALONE})
  
  option(GGML_PERF                    "ggml: enable perf timings"          OFF)
  option(GGML_NO_ACCELERATE           "ggml: disable Accelerate framework" OFF)
+option(GGML_OPENBLAS                "ggml: use OpenBLAS"                 OFF)
+option(GGML_CUBLAS                  "ggml: use cuBLAS"                   OFF)
  
  # sanitizers
  
diff --git a/README.md b/README.md

index c69d790d6715ed684dc5934ec07f4a528679606c..5f110cc2a0b1b58dbcb0c85531390f463571daa6 100644 (file)
--- a/README.md
+++ b/README.md
@@ -85,3 +85,10 @@ The inference speeds that I get for the different models on my 32GB MacBook M1 P
  | GPT-J |    6B | 125 ms |
  
  For more information, checkout the corresponding programs in the [examples](examples) folder.
+
+## Using cuBLAS
+
+```bash
+# fix the path to point to your CUDA compiler
+cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc ..
+```
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index 18e2c85796086f5e8300674dbf3978af4ca305dc..25628bebf91ac9a9baec1af9323205f1c9c5ad56 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -156,13 +156,39 @@ if (GGML_OPENBLAS)
      endif()
  endif()
  
+if (GGML_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
+
+    find_package(CUDAToolkit)
+    if (CUDAToolkit_FOUND)
+        message(STATUS "cuBLAS found")
+
+        enable_language(CUDA)
+
+        set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
+
+        add_compile_definitions(GGML_USE_CUBLAS)
+
+        if (GGML_STATIC)
+            set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
+        else()
+            set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
+        endif()
+
+    else()
+        message(WARNING "cuBLAS not found")
+    endif()
+endif()
+
+
  if (GGML_PERF)
      set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_PERF)
  endif()
  
  add_library(${TARGET}
      ggml.c
-    )
+    ../include/ggml/ggml.h
+    ${GGML_CUDA_SOURCES})
  
  target_include_directories(${TARGET} PUBLIC
      .
@@ -197,6 +223,13 @@ if (MINGW)
          )
  endif()
  
+if (GGML_CUDA_SOURCES)
+    message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
+    set_property(TARGET ggml  PROPERTY CUDA_ARCHITECTURES OFF)
+    set_property(TARGET ggml  PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
+    target_link_libraries(ggml PUBLIC stdc++)
+endif()
+
  install(TARGETS ${TARGET}
      LIBRARY DESTINATION lib
      ARCHIVE DESTINATION lib/static
author	Georgi Gerganov <redacted>
	Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)
committer	GitHub <redacted>
	Sat, 22 Apr 2023 10:23:20 +0000 (13:23 +0300)
CMakeLists.txt		patch \| blob \| history
README.md		patch \| blob \| history
src/CMakeLists.txt		patch \| blob \| history