Windows fixes (#31)

author Sebastián A <redacted>

Sun, 12 Mar 2023 20:15:00 +0000 (17:15 -0300)

committer GitHub <redacted>

Sun, 12 Mar 2023 20:15:00 +0000 (22:15 +0200)
author Sebastián A <redacted>
Sun, 12 Mar 2023 20:15:00 +0000 (17:15 -0300)
committer GitHub <redacted>
Sun, 12 Mar 2023 20:15:00 +0000 (22:15 +0200)
diff --git a/ggml.c b/ggml.c

index 71c30280b10662f8247106a246d0d1e4c1fdddfb..fbd7b9339dea656c5ccf3fc7b57b80c6b2e87185 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
      const int nb = k / QK;
      const size_t bs = sizeof(float) + QK/2;
  
-    uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
-    uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
+    uint8_t * restrict pd = ((uint8_t *)y + 0*bs);
+    uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
  
      uint8_t pp[QK/2];
  
@@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
      const int nb = k / QK;
      const size_t bs = sizeof(float) + QK/2;
  
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
  
      // scalar
      for (int i = 0; i < nb; i++) {
@@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
  
      const size_t bs = sizeof(float) + QK/2;
  
-    const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
+    const uint8_t * restrict pd0 = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pd1 = ((const uint8_t *)y + 0*bs);
  
-    const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
-    const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
+    const uint8_t * restrict pb0 = ((const uint8_t *)x + 0*bs + sizeof(float));
+    const uint8_t * restrict pb1 = ((const uint8_t *)y + 0*bs + sizeof(float));
  
      float sumf = 0.0;
  
@@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
      const int nb = n / QK;
      const size_t bs = sizeof(float) + QK/2;
  
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
+    const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
  
  #if __ARM_NEON
  #if QK == 32
diff --git a/main.cpp b/main.cpp

index f02b5ddbde94dae213e28d6211c1305d7501449d..a11d755aabf64ca0e90137b44f29a04c14d25c2b 100644 (file)
--- a/main.cpp
+++ b/main.cpp
@@ -209,8 +209,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
      // create the ggml context
      {
          struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
+            /*.mem_size   =*/ ctx_size,
+            /*.mem_buffer =*/ NULL,
          };
  
          model.ctx = ggml_init(params);
@@ -546,12 +546,13 @@ bool llama_eval(
      }
  
      struct ggml_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
+        /*.mem_size   =*/ buf_size,
+        /*.mem_buffer =*/ buf,
      };
  
      struct ggml_context * ctx0 = ggml_init(params);
-    struct ggml_cgraph gf = { .n_threads = n_threads };
+    ggml_cgraph gf = {};
+    gf.n_threads = n_threads;
  
      struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
      memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@@ -733,6 +734,7 @@ bool llama_eval(
  }
  
  int main(int argc, char ** argv) {
+    ggml_time_init();
      const int64_t t_main_start_us = ggml_time_us();
  
      gpt_params params;
diff --git a/quantize.cpp b/quantize.cpp

index 0ae537339ecf3a48ee408057d98c5470ca9a7db3..14c7b277a4024a6fd3592984f79b492f4fef87c6 100644 (file)
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -289,6 +289,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
  //  ./llama-quantize models/llama/ggml-model.bin models/llama/ggml-model-quant.bin type
  //
  int main(int argc, char ** argv) {
+    ggml_time_init();
      if (argc != 4) {
          fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
          fprintf(stderr, "  type = 2 - q4_0\n");
diff --git a/utils.cpp b/utils.cpp

index 49023bd7b8626ef9aa91ff17dde1417566fa44d8..58e7070598ab06ac24c96bc3964de9a7eb82fe6a 100644 (file)
--- a/utils.cpp
+++ b/utils.cpp
@@ -5,6 +5,12 @@
  #include <fstream>
  #include <regex>
  
+ #if defined(_MSC_VER) || defined(__MINGW32__)
+ #include <malloc.h> // using malloc.h with MSC/MINGW
+ #elif !defined(__FreeBSD__)
+ #include <alloca.h>
+ #endif
+
  bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
      for (int i = 1; i < argc; i++) {
          std::string arg = argv[i];
@@ -472,7 +478,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
  
      assert(k % qk == 0);
  
-    uint8_t pp[qk/2];
+    const size_t pp_size = qk / 2;
+    uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
  
      char * pdst = (char *) dst;
  
@@ -511,7 +518,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
                      pp[l/2] = vi0 | (vi1 << 4);
                  }
  
-                memcpy(pb, pp, sizeof(pp));
+                memcpy(pb, pp, pp_size);
                  pb += bs;
              }
          }
@@ -526,7 +533,8 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
  
      assert(k % qk == 0);
  
-    uint8_t pp[qk/2];
+    const size_t pp_size = qk / 2;
+    uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
  
      char * pdst = (char *) dst;
  
@@ -570,7 +578,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
                      pp[l/2] = vi0 | (vi1 << 4);
                  }
  
-                memcpy(pb + i*qk/2, pp, sizeof(pp));
+                memcpy(pb + i*qk/2, pp, pp_size);
              }
          }
      }
author	Sebastián A <redacted>
	Sun, 12 Mar 2023 20:15:00 +0000 (17:15 -0300)
committer	GitHub <redacted>
	Sun, 12 Mar 2023 20:15:00 +0000 (22:15 +0200)
ggml.c		patch \| blob \| history
main.cpp		patch \| blob \| history
quantize.cpp		patch \| blob \| history
utils.cpp		patch \| blob \| history