Add provisions for windows support for BF16 code including CMake provision for enabli...

author Srihari-mcw <redacted>

Mon, 20 May 2024 02:18:39 +0000 (19:18 -0700)

committer Georgi Gerganov <redacted>

Sun, 16 Jun 2024 15:19:48 +0000 (18:19 +0300)
author Srihari-mcw <redacted>
Mon, 20 May 2024 02:18:39 +0000 (19:18 -0700)
committer Georgi Gerganov <redacted>
Sun, 16 Jun 2024 15:19:48 +0000 (18:19 +0300)
diff --git a/ggml-impl.h b/ggml-impl.h

index 59684fa81f07e8d81010147bbf71b3842111fddd..5ff014fe3f339d5d3716718d935ce5ba802c7dd0 100644 (file)
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -17,6 +17,18 @@
  #define MIN(a, b) ((a) < (b) ? (a) : (b))
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
  
+#if defined(_WIN32)
+
+#define m512bh(p) p
+#define m512i(p) p
+
+#else
+
+#define m512bh(p) (__m512bh)(p)
+#define m512i(p) (__m512i)(p)
+
+#endif
+
  /**
   * Converts brain16 to float32.
   *
diff --git a/ggml.c b/ggml.c

index 3a104c486339e5cead080f9405cc898a2076d3f1..53da231ee061c46d503564a6f7dd66395474934f 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -406,10 +406,10 @@ void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
    int i = 0;
  #if defined(__AVX512BF16__)
    for (; i + 32 <= n; i += 32) {
-        _mm512_storeu_ps(
-            (__m512 *)(y + i),
-            (__m512)_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
-                                        _mm512_loadu_ps(x + i)));
+        _mm512_storeu_si512(
+            (__m512i *)(y + i),
+            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
+                                _mm512_loadu_ps(x + i))));
    }
  #endif
      for (; i < n; i++) {
@@ -1666,10 +1666,10 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
      __m512 c1 = _mm512_setzero_ps();
      __m512 c2 = _mm512_setzero_ps();
      for (; i + 64 <= n; i += 64) {
-        c1 = _mm512_dpbf16_ps(c1, (__m512bh)_mm512_loadu_ps((const float *)(x + i)),
-                              (__m512bh)_mm512_loadu_ps((const float *)(y + i)));
-        c2 = _mm512_dpbf16_ps(c2, (__m512bh)_mm512_loadu_ps((const float *)(x + i + 32)),
-                              (__m512bh)_mm512_loadu_ps((const float *)(y + i + 32)));
+        c1 = _mm512_dpbf16_ps(c1, m512bh(_mm512_loadu_si512((x + i))),
+                             m512bh(_mm512_loadu_si512((y + i))));
+        c2 = _mm512_dpbf16_ps(c2, m512bh(_mm512_loadu_si512((x + i + 32))),
+                             m512bh(_mm512_loadu_si512((y + i + 32))));
      }
      sumf += (ggml_float)_mm512_reduce_add_ps(c1);
      sumf += (ggml_float)_mm512_reduce_add_ps(c2);
@@ -23137,6 +23137,14 @@ int ggml_cpu_has_avx512_vnni(void) {
  #endif
  }
  
+int ggml_cpu_has_avx512_bf16(void) {
+#if defined(__AVX512BF16__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
  int ggml_cpu_has_fma(void) {
  #if defined(__FMA__)
      return 1;
diff --git a/ggml.h b/ggml.h

index 8c13f4ba89c6e71a492a182eadbb008e8e810d77..77475710129d7986b1684e56fc45fbd28cb82d5d 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -2390,6 +2390,7 @@ extern "C" {
      GGML_API int ggml_cpu_has_avx512     (void);
      GGML_API int ggml_cpu_has_avx512_vbmi(void);
      GGML_API int ggml_cpu_has_avx512_vnni(void);
+    GGML_API int ggml_cpu_has_avx512_bf16(void);
      GGML_API int ggml_cpu_has_fma        (void);
      GGML_API int ggml_cpu_has_neon       (void);
      GGML_API int ggml_cpu_has_arm_fma    (void);
author	Srihari-mcw <redacted>
	Mon, 20 May 2024 02:18:39 +0000 (19:18 -0700)
committer	Georgi Gerganov <redacted>
	Sun, 16 Jun 2024 15:19:48 +0000 (18:19 +0300)
ggml-impl.h		patch \| blob \| history
ggml.c		patch \| blob \| history
ggml.h		patch \| blob \| history