]> git.djapps.eu Git - pkg/ggml/sources/llama.cpp/commitdiff
ROCm: use native CMake HIP support (#5966)
authorGavin Zhao <redacted>
Fri, 17 May 2024 15:03:03 +0000 (11:03 -0400)
committerGitHub <redacted>
Fri, 17 May 2024 15:03:03 +0000 (17:03 +0200)
Supercedes #4024 and #4813.

CMake's native HIP support has become the
recommended way to add HIP code into a project (see
[here](https://rocm.docs.amd.com/en/docs-6.0.0/conceptual/cmake-packages.html#using-hip-in-cmake)).
This PR makes the following changes:

1. The environment variable `HIPCXX` or CMake option
`CMAKE_HIP_COMPILER` should be used to specify the HIP
compiler. Notably this shouldn't be `hipcc`, but ROCm's clang,
which usually resides in `$ROCM_PATH/llvm/bin/clang`. Previously
this was control by `CMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`.
Note that since native CMake HIP support is not yet available on
Windows, on Windows we fall back to the old behavior.

2. CMake option `CMAKE_HIP_ARCHITECTURES` is used to control the
GPU architectures to build for. Previously this was controled by
`GPU_TARGETS`.

3. Updated the Nix recipe to account for these new changes.

4. The GPU targets to build against in the Nix recipe is now
consistent with the supported GPU targets in nixpkgs.

5. Added CI checks for HIP on both Linux and Windows. On Linux, we test
both the new and old behavior.

The most important part about this PR is the separation of the
HIP compiler and the C/C++ compiler. This allows users to choose
a different C/C++ compiler if desired, compared to the current
situation where when building for ROCm support, everything must be
compiled with ROCm's clang.

~~Makefile is unchanged. Please let me know if we want to be
consistent on variables' naming because Makefile still uses
`GPU_TARGETS` to control architectures to build for, but I feel
like setting `CMAKE_HIP_ARCHITECTURES` is a bit awkward when you're
calling `make`.~~ Makefile used `GPU_TARGETS` but the README says
to use `AMDGPU_TARGETS`. For consistency with CMake, all usage of
`GPU_TARGETS` in Makefile has been updated to `AMDGPU_TARGETS`.

Thanks to the suggestion of @jin-eld, to maintain backwards
compatibility (and not break too many downstream users' builds), if
`CMAKE_CXX_COMPILER` ends with `hipcc`, then we still compile using
the original behavior and emit a warning that recommends switching
to the new HIP support. Similarly, if `AMDGPU_TARGETS` is set but
`CMAKE_HIP_ARCHITECTURES` is not, then we forward `AMDGPU_TARGETS`
to `CMAKE_HIP_ARCHITECTURES` to ease the transition to the new
HIP support.

Signed-off-by: Gavin Zhao <redacted>
.devops/nix/package.nix
.github/workflows/build.yml
CMakeLists.txt
Makefile
README.md

index 2c0ae4e2a071b2c6bd0d9c95de4a8f1fe027eac8..1c9633cdf5557df185d962d0d61c05dc1e39cf46 100644 (file)
@@ -227,20 +227,20 @@ effectiveStdenv.mkDerivation (
         )
       ]
       ++ optionals useRocm [
-        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
-        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
-
-        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-        # and select the line that matches the current nixpkgs version of rocBLAS.
-        # Should likely use `rocmPackages.clr.gpuTargets`.
-        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
+        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
       ]
       ++ optionals useMetalKit [
         (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
         (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
       ];
 
+    # Environment variables needed for ROCm
+    env = optionals useRocm {
+      ROCM_PATH = "${rocmPackages.clr}";
+      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
+    };
+
     # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
     # if they haven't been added yet.
     postInstall = ''
index 0742443c60e2d4e9008ee038842e6984bca897fe..0109cc004a44be09ea29a627faf98d19bab05db0 100644 (file)
@@ -392,6 +392,33 @@ jobs:
           cmake -DLLAMA_VULKAN=ON ..
           cmake --build . --config Release -j $(nproc)
 
+  ubuntu-22-cmake-hip:
+    runs-on: ubuntu-22.04
+    container: rocm/dev-ubuntu-22.04:6.0.2
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
+
+      - name: Build with native CMake HIP support
+        id: cmake_build
+        run: |
+          cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
+          cmake --build build --config Release -j $(nproc)
+
+      - name: Build with legacy HIP support
+        id: cmake_build_legacy_hip
+        run: |
+          cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
+          cmake --build build2 --config Release -j $(nproc)
+
   ubuntu-22-cmake-sycl:
     runs-on: ubuntu-22.04
 
@@ -989,6 +1016,37 @@ jobs:
           path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
           name: llama-bin-win-sycl-x64.zip
 
+  windows-latest-cmake-hip:
+    runs-on: windows-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Install
+        id: depends
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "Downloading AMD HIP SDK Installer"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          write-host "Installing AMD HIP SDK"
+          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+          write-host "Completed AMD HIP SDK installation"
+
+      - name: Verify ROCm
+        id: verify
+        run: |
+          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+
+      - name: Build
+        id: cmake_build
+        run: |
+          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+          cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
+          cmake --build build --config Release
+
   ios-xcode-build:
     runs-on: macos-latest
 
index 8ab6a45a6a07409ca778da17cbbfcb0c36a84f45..990e34b86aae338a0eff5214c039238cac0df1c7 100644 (file)
@@ -555,16 +555,37 @@ if (LLAMA_VULKAN)
 endif()
 
 if (LLAMA_HIPBLAS)
-    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
-
-    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
+    if ($ENV{ROCM_PATH})
+        set(ROCM_PATH $ENV{ROCM_PATH})
+    else()
+        set(ROCM_PATH /opt/rocm)
     endif()
+    list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
 
-    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+    # CMake on Windows doesn't support the HIP language yet
+    if(WIN32)
+        set(CXX_IS_HIPCC TRUE)
+    else()
+        string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
     endif()
 
+    if(CXX_IS_HIPCC)
+        if(LINUX)
+            if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+                message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+            endif()
+
+            message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
+                    " Prefer setting the HIP compiler directly. See README for details.")
+        endif()
+    else()
+        # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
+        if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
+            set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_ARGETS})
+        endif()
+        cmake_minimum_required(VERSION 3.21)
+        enable_language(HIP)
+    endif()
     find_package(hip     REQUIRED)
     find_package(hipblas REQUIRED)
     find_package(rocblas REQUIRED)
@@ -598,13 +619,18 @@ if (LLAMA_HIPBLAS)
     add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
     add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
 
-    set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+    if (CXX_IS_HIPCC)
+        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
+    else()
+        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
+    endif()
 
     if (LLAMA_STATIC)
         message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
     endif()
 
-    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
+    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
 endif()
 
 if (LLAMA_SYCL)
index 3fa56d13a4671ceee07a9fde13a11c234953a6ec..22d5218565d2345ccf5688c73c383954934c8a30 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -560,10 +560,10 @@ endif # LLAMA_VULKAN
 ifdef LLAMA_HIPBLAS
        ifeq ($(wildcard /opt/rocm),)
                ROCM_PATH       ?= /usr
-               GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
+               AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
        else
                ROCM_PATH       ?= /opt/rocm
-               GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
+               AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
        endif
        HIPCC                   ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
        LLAMA_CUDA_DMMV_X       ?= 32
@@ -575,7 +575,7 @@ ifdef LLAMA_HIP_UMA
 endif # LLAMA_HIP_UMA
        MK_LDFLAGS  += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
        MK_LDFLAGS      += -lhipblas -lamdhip64 -lrocblas
-       HIPFLAGS    += $(addprefix --offload-arch=,$(GPU_TARGETS))
+       HIPFLAGS    += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
        HIPFLAGS    += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
        HIPFLAGS    += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
        HIPFLAGS    += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
index 5d6217d139d21a12ad9c29a91a6f64bf46d9b854..7dd6fc0eba82450172807903d24d7d25eee1621e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -528,13 +528,28 @@ Building the program with BLAS support may lead to some performance improvements
     ```
   - Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
     ```bash
-    CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
-        cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+    HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
+        cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
         && cmake --build build --config Release -- -j 16
     ```
     On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON`.
     However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
 
+    Note that if you get the following error:
+    ```
+    clang: error: cannot find ROCm device library; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+    ```
+    Try searching for a directory under `HIP_PATH` that contains the file
+    `oclc_abi_version_400.bc`. Then, add the following to the start of the
+    command: `HIP_DEVICE_LIB_PATH=<directory-you-just-found>`, so something
+    like:
+    ```bash
+    HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -p)" \
+    HIP_DEVICE_LIB_PATH=<directory-you-just-found> \
+        cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+        && cmake --build build -- -j 16
+    ```
+
   - Using `make` (example for target gfx1030, build with 16 CPU threads):
     ```bash
     make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gfx1030
@@ -543,10 +558,8 @@ Building the program with BLAS support may lead to some performance improvements
   - Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
     ```bash
     set PATH=%HIP_PATH%\bin;%PATH%
-    mkdir build
-    cd build
-    cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release ..
-    cmake --build .
+    cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
+    cmake --build build
     ```
     Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
     Find your gpu version string by matching the most significant version information from `rocminfo | grep gfx | head -1 | awk '{print $2}'` with the list of processors, e.g. `gfx1035` maps to `gfx1030`.