ROCm: use native CMake HIP support (#5966)

author Gavin Zhao <redacted>

Fri, 17 May 2024 15:03:03 +0000 (11:03 -0400)

committer GitHub <redacted>

Fri, 17 May 2024 15:03:03 +0000 (17:03 +0200)
author Gavin Zhao <redacted>
Fri, 17 May 2024 15:03:03 +0000 (11:03 -0400)
committer GitHub <redacted>
Fri, 17 May 2024 15:03:03 +0000 (17:03 +0200)
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix

index 2c0ae4e2a071b2c6bd0d9c95de4a8f1fe027eac8..1c9633cdf5557df185d962d0d61c05dc1e39cf46 100644 (file)
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -227,20 +227,20 @@ effectiveStdenv.mkDerivation (
          )
        ]
        ++ optionals useRocm [
-        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
-        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
-
-        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-        # and select the line that matches the current nixpkgs version of rocBLAS.
-        # Should likely use `rocmPackages.clr.gpuTargets`.
-        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
+        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
        ]
        ++ optionals useMetalKit [
          (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
          (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
        ];
  
+    # Environment variables needed for ROCm
+    env = optionals useRocm {
+      ROCM_PATH = "${rocmPackages.clr}";
+      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
+    };
+
      # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
      # if they haven't been added yet.
      postInstall = ''
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index 0742443c60e2d4e9008ee038842e6984bca897fe..0109cc004a44be09ea29a627faf98d19bab05db0 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -392,6 +392,33 @@ jobs:
            cmake -DLLAMA_VULKAN=ON ..
            cmake --build . --config Release -j $(nproc)
  
+  ubuntu-22-cmake-hip:
+    runs-on: ubuntu-22.04
+    container: rocm/dev-ubuntu-22.04:6.0.2
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
+
+      - name: Build with native CMake HIP support
+        id: cmake_build
+        run: |
+          cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
+          cmake --build build --config Release -j $(nproc)
+
+      - name: Build with legacy HIP support
+        id: cmake_build_legacy_hip
+        run: |
+          cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
+          cmake --build build2 --config Release -j $(nproc)
+
    ubuntu-22-cmake-sycl:
      runs-on: ubuntu-22.04
  
@@ -989,6 +1016,37 @@ jobs:
            path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
            name: llama-bin-win-sycl-x64.zip
  
+  windows-latest-cmake-hip:
+    runs-on: windows-latest
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Install
+        id: depends
+        run: |
+          $ErrorActionPreference = "Stop"
+          write-host "Downloading AMD HIP SDK Installer"
+          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+          write-host "Installing AMD HIP SDK"
+          Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+          write-host "Completed AMD HIP SDK installation"
+
+      - name: Verify ROCm
+        id: verify
+        run: |
+          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+
+      - name: Build
+        id: cmake_build
+        run: |
+          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+          cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
+          cmake --build build --config Release
+
    ios-xcode-build:
      runs-on: macos-latest
  
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 8ab6a45a6a07409ca778da17cbbfcb0c36a84f45..990e34b86aae338a0eff5214c039238cac0df1c7 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -555,16 +555,37 @@ if (LLAMA_VULKAN)
  endif()
  
  if (LLAMA_HIPBLAS)
-    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
-
-    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
+    if ($ENV{ROCM_PATH})
+        set(ROCM_PATH $ENV{ROCM_PATH})
+    else()
+        set(ROCM_PATH /opt/rocm)
      endif()
+    list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH})
  
-    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
-        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+    # CMake on Windows doesn't support the HIP language yet
+    if(WIN32)
+        set(CXX_IS_HIPCC TRUE)
+    else()
+        string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}")
      endif()
  
+    if(CXX_IS_HIPCC)
+        if(LINUX)
+            if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+                message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+            endif()
+
+            message(WARNING "Setting hipcc as the C++ compiler is legacy behavior."
+                    " Prefer setting the HIP compiler directly. See README for details.")
+        endif()
+    else()
+        # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
+        if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES)
+            set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_ARGETS})
+        endif()
+        cmake_minimum_required(VERSION 3.21)
+        enable_language(HIP)
+    endif()
      find_package(hip     REQUIRED)
      find_package(hipblas REQUIRED)
      find_package(rocblas REQUIRED)
@@ -598,13 +619,18 @@ if (LLAMA_HIPBLAS)
      add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
      add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
  
-    set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+    if (CXX_IS_HIPCC)
+        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device)
+    else()
+        set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP)
+    endif()
  
      if (LLAMA_STATIC)
          message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
      endif()
  
-    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
+    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas)
  endif()
  
  if (LLAMA_SYCL)
diff --git a/Makefile b/Makefile

index 3fa56d13a4671ceee07a9fde13a11c234953a6ec..22d5218565d2345ccf5688c73c383954934c8a30 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -560,10 +560,10 @@ endif # LLAMA_VULKAN
  ifdef LLAMA_HIPBLAS
         ifeq ($(wildcard /opt/rocm),)
                 ROCM_PATH       ?= /usr
-               GPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
+               AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
         else
                 ROCM_PATH       ?= /opt/rocm
-               GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
+               AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
         endif
         HIPCC                   ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
         LLAMA_CUDA_DMMV_X       ?= 32
@@ -575,7 +575,7 @@ ifdef LLAMA_HIP_UMA
  endif # LLAMA_HIP_UMA
         MK_LDFLAGS  += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
         MK_LDFLAGS      += -lhipblas -lamdhip64 -lrocblas
-       HIPFLAGS    += $(addprefix --offload-arch=,$(GPU_TARGETS))
+       HIPFLAGS    += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
         HIPFLAGS    += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
         HIPFLAGS    += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
         HIPFLAGS    += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
diff --git a/README.md b/README.md

index 5d6217d139d21a12ad9c29a91a6f64bf46d9b854..7dd6fc0eba82450172807903d24d7d25eee1621e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -528,13 +528,28 @@ Building the program with BLAS support may lead to some performance improvements
      ```
    - Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
      ```bash
-    CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
-        cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+    HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
+        cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
          && cmake --build build --config Release -- -j 16
      ```
      On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON`.
      However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
  
+    Note that if you get the following error:
+    ```
+    clang: error: cannot find ROCm device library; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+    ```
+    Try searching for a directory under `HIP_PATH` that contains the file
+    `oclc_abi_version_400.bc`. Then, add the following to the start of the
+    command: `HIP_DEVICE_LIB_PATH=<directory-you-just-found>`, so something
+    like:
+    ```bash
+    HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -p)" \
+    HIP_DEVICE_LIB_PATH=<directory-you-just-found> \
+        cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
+        && cmake --build build -- -j 16
+    ```
+
    - Using `make` (example for target gfx1030, build with 16 CPU threads):
      ```bash
      make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gfx1030
@@ -543,10 +558,8 @@ Building the program with BLAS support may lead to some performance improvements
    - Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
      ```bash
      set PATH=%HIP_PATH%\bin;%PATH%
-    mkdir build
-    cd build
-    cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release ..
-    cmake --build .
+    cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
+    cmake --build build
      ```
      Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
      Find your gpu version string by matching the most significant version information from `rocminfo | grep gfx | head -1 | awk '{print $2}'` with the list of processors, e.g. `gfx1035` maps to `gfx1030`.
author	Gavin Zhao <redacted>
	Fri, 17 May 2024 15:03:03 +0000 (11:03 -0400)
committer	GitHub <redacted>
	Fri, 17 May 2024 15:03:03 +0000 (17:03 +0200)
.devops/nix/package.nix		patch \| blob \| history
.github/workflows/build.yml		patch \| blob \| history
CMakeLists.txt		patch \| blob \| history
Makefile		patch \| blob \| history
README.md		patch \| blob \| history