ggml : resolve merge (#0)

author Georgi Gerganov <redacted>

Sat, 11 May 2024 13:25:50 +0000 (16:25 +0300)

committer Georgi Gerganov <redacted>

Sat, 11 May 2024 18:30:08 +0000 (21:30 +0300)
author Georgi Gerganov <redacted>
Sat, 11 May 2024 13:25:50 +0000 (16:25 +0300)
committer Georgi Gerganov <redacted>
Sat, 11 May 2024 18:30:08 +0000 (21:30 +0300)
diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp

index 4ea8e44af02de1c899f97cc00bc8665c0ee6c514..d8dbc88a01e821eb1860adabe67d2f8e8ab6c731 100644 (file)
--- a/examples/common-ggml.cpp
+++ b/examples/common-ggml.cpp
@@ -71,6 +71,7 @@ bool ggml_common_quantize_0(
          case GGML_FTYPE_MOSTLY_IQ4_NL:
          case GGML_FTYPE_MOSTLY_IQ4_XS:
          case GGML_FTYPE_MOSTLY_IQ1_M:
+        case GGML_FTYPE_MOSTLY_BF16:
                  {
                      fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
                      return false;
@@ -207,6 +208,7 @@ bool ggml_common_quantize_0(
                  case GGML_TYPE_IQ4_NL:
                  case GGML_TYPE_IQ4_XS:
                  case GGML_TYPE_IQ1_M:
+                case GGML_TYPE_BF16:
                  case GGML_TYPE_COUNT:
                      {
                          fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
diff --git a/src/ggml-metal.metal b/src/ggml-metal.metal

index f8b07400c9a53b569297b3680f180458b00e5e13..7af4e8f9342e7ae3f079ed41a3c51ce2a6469fa2 100644 (file)
--- a/src/ggml-metal.metal
+++ b/src/ggml-metal.metal
@@ -296,7 +296,7 @@ kernel void kernel_silu(
      dst[tpig] = x / (1.0f + exp(-x));
  }
  
-+kernel void kernel_silu_4(
+kernel void kernel_silu_4(
          device const float4 * src0,
          device       float4 * dst,
          uint tpig[[thread_position_in_grid]]) {
@@ -2217,7 +2217,7 @@ kernel void kernel_flash_attn_ext_f16(
  
          // ALiBi
          if (max_bias > 0.0f) {
-            const short h = iq2;
+            const uint32_t h = iq2;
  
              const float base = h < n_head_log2 ? m0 : m1;
              const int   exph = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
@@ -2473,7 +2473,7 @@ kernel void kernel_flash_attn_ext_vec_f16(
  
      // ALiBi
      if (max_bias > 0.0f) {
-        const short h = iq2;
+        const uint32_t h = iq2;
  
          const float base = h < n_head_log2 ? m0 : m1;
          const int   exp  = h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1;
diff --git a/src/ggml.c b/src/ggml.c

index 75621d3557bfd0adcd659aca000aadd7341e95f3..263073b1c3c7a9aadfed459ac8c26fc4554422b9 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -4,7 +4,6 @@
  #include "ggml-impl.h"
  #include "ggml-quants.h"
  #include "ggml.h"
-#include "sgemm.h"
  
  #if defined(_MSC_VER) || defined(__MINGW32__)
  #include <malloc.h> // using malloc.h with MSC/MINGW
@@ -37,6 +36,10 @@
  #undef GGML_USE_LLAMAFILE
  #endif
  
+#ifdef GGML_USE_LLAMAFILE
+#include "sgemm.h"
+#endif
+
  #if defined(_MSC_VER)
  // disable "possible loss of data" to avoid hundreds of casts
  // we should just be careful :)
author	Georgi Gerganov <redacted>
	Sat, 11 May 2024 13:25:50 +0000 (16:25 +0300)
committer	Georgi Gerganov <redacted>
	Sat, 11 May 2024 18:30:08 +0000 (21:30 +0300)
examples/common-ggml.cpp		patch \| blob \| history
src/ggml-metal.metal		patch \| blob \| history
src/ggml.c		patch \| blob \| history