ggml : support AVX512VNNI (llama/6280)

author Justine Tunney <redacted>

Mon, 25 Mar 2024 05:39:56 +0000 (01:39 -0400)

committer Georgi Gerganov <redacted>

Wed, 27 Mar 2024 11:20:00 +0000 (13:20 +0200)
author Justine Tunney <redacted>
Mon, 25 Mar 2024 05:39:56 +0000 (01:39 -0400)
committer Georgi Gerganov <redacted>
Wed, 27 Mar 2024 11:20:00 +0000 (13:20 +0200)
diff --git a/src/ggml-quants.c b/src/ggml-quants.c

index 2eaca0593fd8b111f79cfc6dd46470702a7e88ea..f26798accdb9a5f4564a007d3873edb03b2bbed6 100644 (file)
--- a/src/ggml-quants.c
+++ b/src/ggml-quants.c
@@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
  }
  
  static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
-#if __AVXVNNI__
+#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
      const __m256i zero = _mm256_setzero_si256();
      const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
      return _mm256_cvtepi32_ps(summed_pairs);
author	Justine Tunney <redacted>
	Mon, 25 Mar 2024 05:39:56 +0000 (01:39 -0400)
committer	Georgi Gerganov <redacted>
	Wed, 27 Mar 2024 11:20:00 +0000 (13:20 +0200)