From: Justine Tunney <redacted>
Date: Mon, 25 Mar 2024 05:39:56 +0000 (-0400)
Subject: ggml : support AVX512VNNI (llama/6280)
X-Git-Tag: upstream/0.0.1642~814
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=2b9042a364c97703d3a4cb892e7524e3a20b7499;p=pkg%2Fggml%2Fsources%2Fggml

ggml : support AVX512VNNI (llama/6280)

This change causes some quants (e.g. Q4_0, Q8_0) to go faster on some
architectures (e.g. AMD Zen 4).
---

diff --git a/src/ggml-quants.c b/src/ggml-quants.c
index 2eaca059..f26798ac 100644
--- a/src/ggml-quants.c
+++ b/src/ggml-quants.c
@@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
 }
 
 static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
-#if __AVXVNNI__
+#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
     const __m256i zero = _mm256_setzero_si256();
     const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
     return _mm256_cvtepi32_ps(summed_pairs);