ggml : fix build on Windows with Snapdragon X (#8531)

author Andreas (Andi) Kunar <redacted>

Thu, 25 Jul 2024 16:01:00 +0000 (18:01 +0200)

committer GitHub <redacted>

Thu, 25 Jul 2024 16:01:00 +0000 (19:01 +0300)
author Andreas (Andi) Kunar <redacted>
Thu, 25 Jul 2024 16:01:00 +0000 (18:01 +0200)
committer GitHub <redacted>
Thu, 25 Jul 2024 16:01:00 +0000 (19:01 +0300)
diff --git a/docs/build.md b/docs/build.md

index 916fcf22d7924d571edb7776ec98c936e767d246..d9d12c46707bd7151de36340645db259992cb1dd 100644 (file)
--- a/docs/build.md
+++ b/docs/build.md
@@ -16,7 +16,7 @@ In order to build llama.cpp you have four different options.
        make
        ```
  
-  - On Windows:
+  - On Windows (x86/x64 only, arm64 requires cmake):
  
      1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
      2. Extract `w64devkit` on your pc.
@@ -60,6 +60,17 @@ In order to build llama.cpp you have four different options.
        cmake -B build -G "Xcode"
        cmake --build build --config Debug
        ```
+    - Building for Windows (x86, x64 and arm64) with MSVC or clang as compilers:
+      - Install Visual Studio 2022, e.g. via the [Community Edition](https://visualstudio.microsoft.com/de/vs/community/). In the installer, select at least the following options (this also automatically installs the required additional tools like CMake,...):
+        - Tab Workload: Desktop-development with C++
+        - Tab Components (select quickly via search): C++-_CMake_ Tools for Windows, _Git_ for Windows, C++-_Clang_ Compiler for Windows, MS-Build Support for LLVM-Toolset (clang)
+      - Please remember to always use a Developer Command Prompt / PowerShell for VS2022 for git, build, test
+      - For Windows on ARM (arm64, WoA) build with:
+        ```bash
+        cmake --preset arm64-windows-llvm-release -D GGML_OPENMP=OFF
+        cmake --build build-arm64-windows-llvm-release
+        ```
+        Note: Building for arm64 could also be done just with MSVC (with the build-arm64-windows-MSVC preset, or the standard CMake build instructions). But MSVC does not support inline ARM assembly-code, used e.g. for the accelerated Q4_0_4_8 CPU kernels.
  
  -   Using `gmake` (FreeBSD):
  
diff --git a/ggml/src/ggml-aarch64.c b/ggml/src/ggml-aarch64.c

index 26535b1c432ba6c82bd43a127a0b04b0d53c3046..af53dea172459af968c07fe7271b710e7a689992 100644 (file)
--- a/ggml/src/ggml-aarch64.c
+++ b/ggml/src/ggml-aarch64.c
@@ -392,7 +392,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
  #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
      GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
                  "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
-#elif defined(__ARM_NEON) && defined(__aarch64__)
+#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      const void * b_ptr = vx;
      const void * a_ptr = vy;
      float * res_ptr = s;
@@ -501,7 +501,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
                      "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
      }
  #endif
-#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      const void * b_ptr = vx;
      const void * a_ptr = vy;
      float * res_ptr = s;
@@ -613,7 +613,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
      UNUSED(ncols_interleaved);
      UNUSED(blocklen);
  
-#if defined(__ARM_FEATURE_SVE)
+#if defined(__ARM_FEATURE_SVE) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      if (svcntw() == 8) {
          const void * b_ptr = vx;
          const void * a_ptr = vy;
@@ -753,7 +753,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void *
  #if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
      GGML_ASSERT(!(ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) &&
                  "__ARM_NEON and __ARM_FEATURE_MATMUL_INT8 defined, use the Q4_0_4_8 quantization format for optimal performance");
-#elif defined(__ARM_NEON) && defined(__aarch64__)
+#elif defined(__ARM_NEON) && defined(__aarch64__) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      const void * b_ptr = vx;
      const void * a_ptr = vy;
      float * res_ptr = s;
@@ -1271,7 +1271,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * restrict s, size_t bs, const void *
                      "__ARM_FEATURE_SVE defined, use the Q4_0_8_8 quantization format for optimal performance");
      }
  #endif
-#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      const void * b_ptr = vx;
      const void * a_ptr = vy;
      float * res_ptr = s;
@@ -1727,7 +1727,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
      UNUSED(ncols_interleaved);
      UNUSED(blocklen);
  
-#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) && ! ((defined(_MSC_VER)) && ! defined(__clang__))
      if (svcntw() == 8) {
          const void * b_ptr = vx;
          const void * a_ptr = vy;
author	Andreas (Andi) Kunar <redacted>
	Thu, 25 Jul 2024 16:01:00 +0000 (18:01 +0200)
committer	GitHub <redacted>
	Thu, 25 Jul 2024 16:01:00 +0000 (19:01 +0300)
docs/build.md		patch \| blob \| history
ggml/src/ggml-aarch64.c		patch \| blob \| history