vulkan: linux builds + small subgroup size fixes (#11767)

author Eve <redacted>

Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)

committer GitHub <redacted>

Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)
author Eve <redacted>
Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)
committer GitHub <redacted>
Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml

index 02f6a13634b3f7e949a3c82ede8aa80b923b35bb..62f4ed8742778d943a05ae04aa61b5c935f1ac22 100644 (file)
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -403,6 +403,34 @@ jobs:
            # This is using llvmpipe and runs slower than other backends
            ctest -L main --verbose --timeout 1800
  
+      - name: Determine tag name
+        id: tag
+        shell: bash
+        run: |
+          BUILD_NUMBER="$(git rev-list --count HEAD)"
+          SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+          if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+            echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+          else
+            SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+            echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Pack artifacts
+        id: pack_artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        run: |
+          cp LICENSE ./build/bin/
+          cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
+          zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
+
+      - name: Upload artifacts
+        if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+        uses: actions/upload-artifact@v4
+        with:
+          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
+          name: llama-bin-ubuntu-vulkan-x64.zip
+
    ubuntu-22-cmake-hip:
      runs-on: ubuntu-22.04
      container: rocm/dev-ubuntu-22.04:6.0.2
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp

index bffe95086af7d8a403340b36010a8e10d4ca698c..99d50afda2d44d04fdf132b6146b5ab040ccfac6 100644 (file)
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -1430,6 +1430,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
      VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")");
  
      // some shaders have a minimum subgroup size
+    const uint32_t subgroup_size_8 = std::max(device->subgroup_size, 8u);
      const uint32_t subgroup_size_16 = std::max(device->subgroup_size, 16u);
      const uint32_t subgroup_size_32 = std::max(device->subgroup_size, 32u);
  
@@ -1492,13 +1493,13 @@ static void ggml_vk_load_shaders(vk_device& device) {
          const uint32_t tk_m = device->coopmat_support ? device->coopmat_k : 1;
          const uint32_t tk_s = device->coopmat_support ? device->coopmat_k : 1;
  
-        l_warptile = { 128, 128, 128, 16, device->subgroup_size * 2, 64, 2, tm_l, tn_l, tk_l, device->subgroup_size };
-        m_warptile = { 128,  64,  64, 16, device->subgroup_size, 32, 2, tm_m, tn_m, tk_m, device->subgroup_size };
-        s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, device->subgroup_size };
+        l_warptile = { 128, 128, 128, 16, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 };
+        m_warptile = { 128,  64,  64, 16, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
+        s_warptile = { subgroup_size_16, 32, 32, 16, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 };
  
-        l_warptile_mmq = { 128, 128, 128, 32, device->subgroup_size * 2, 64, 2, tm_l, tn_l, tk_l, device->subgroup_size };
-        m_warptile_mmq = { 128,  64,  64, 32, device->subgroup_size, 32, 2, tm_m, tn_m, tk_m, device->subgroup_size };
-        s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, device->subgroup_size };
+        l_warptile_mmq = { 128, 128, 128, 32, subgroup_size_8 * 2, 64, 2, tm_l, tn_l, tk_l, subgroup_size_8 };
+        m_warptile_mmq = { 128,  64,  64, 32, subgroup_size_8, 32, 2, tm_m, tn_m, tk_m, subgroup_size_8 };
+        s_warptile_mmq = { subgroup_size_32, 32, 32, 32, 32, 32, 2, tm_s, tn_s, tk_s, subgroup_size_8 };
  
          l_mmq_wg_denoms = l_wg_denoms = {128, 128, 1 };
          m_mmq_wg_denoms = m_wg_denoms = { 64,  64, 1 };
author	Eve <redacted>
	Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)
committer	GitHub <redacted>
	Fri, 14 Feb 2025 02:59:40 +0000 (02:59 +0000)
.github/workflows/build.yml		patch \| blob \| history
ggml/src/ggml-vulkan/ggml-vulkan.cpp		patch \| blob \| history