ggml : sync with ggml repo (warning fixes + asserts)

author Georgi Gerganov <redacted>

Sat, 29 Apr 2023 16:30:22 +0000 (19:30 +0300)

committer Georgi Gerganov <redacted>

Sat, 29 Apr 2023 16:33:28 +0000 (19:33 +0300)
author Georgi Gerganov <redacted>
Sat, 29 Apr 2023 16:30:22 +0000 (19:30 +0300)
committer Georgi Gerganov <redacted>
Sat, 29 Apr 2023 16:33:28 +0000 (19:33 +0300)
diff --git a/ggml.c b/ggml.c

index 2ec0c0bf9d9f4408bb736e6b32c8a205bbe489c5..ebbaf11c620cc87fb661d400979d89b3f5d2c058 100644 (file)
--- a/ggml.c
+++ b/ggml.c
@@ -8245,8 +8245,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
          ggml_fp16_t * d_X = ggml_cuda_pool_malloc(sizeof(float) * x_ne, &x_size);
          ggml_fp16_t * d_Y = ggml_cuda_pool_malloc(sizeof(float) * y_ne, &y_size);
          float       * d_D = ggml_cuda_pool_malloc(sizeof(float) * d_ne, &d_size);
-#else
-        float * const wdata = params->wdata;
  #endif
          for (int64_t i03 = 0; i03 < ne03; i03++) {
              for (int64_t i02 = 0; i02 < ne02; i02++) {
@@ -8263,8 +8261,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
                              wdata[id++] = GGML_FP32_TO_FP16(*(float *) ((char *) src1->data + i03*nb13 + i02*nb12 + i01*nb11 + i00*nb10));
                          }
                      }
+
+                    assert(id*sizeof(ggml_fp16_t) <= params->wsize);
                  }
  #else
+                float * const wdata = params->wdata;
                  {
                      size_t id = 0;
                      for (int64_t i01 = 0; i01 < ne01; ++i01) {
@@ -8272,6 +8273,8 @@ static void ggml_compute_forward_mul_mat_f16_f32(
                              wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
                          }
                      }
+
+                    assert(id*sizeof(float) <= params->wsize);
                  }
  #endif
  
@@ -8537,7 +8540,10 @@ static void ggml_compute_forward_mul_mat_q_f32(
                          dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00);
                          id += ne00;
                      }
+
+                    assert(id*sizeof(float) <= params->wsize);
                  }
+
                  const float * x = wdata;
  #endif
  
@@ -9118,7 +9124,7 @@ static void ggml_compute_forward_alibi_f32(
      //const int nb3 = src0->nb[3];
  
      assert(nb0 == sizeof(float));
-    assert(ne1+n_past == ne0);
+    assert(ne1 + n_past == ne0); (void) n_past;
  
      // add alibi to src0 (KQ_scaled)
      const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
@@ -9179,7 +9185,7 @@ static void ggml_compute_forward_alibi_f16(
      //const int nb3 = src0->nb[3];
  
      assert(nb0 == sizeof(ggml_fp16_t));
-    assert(ne1+n_past == ne0);
+    assert(ne1 + n_past == ne0); (void) n_past;
  
      // add alibi to src0 (KQ_scaled)
      const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
@@ -11571,12 +11577,12 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                              if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
                                  node->n_tasks = 1; // TODO: this actually is doing nothing
                                                     //       the threads are still spinning
-#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+#if defined(GGML_USE_CUBLAS)
+                                // with cuBLAS, we need memory for the full 3D / 4D data of src1
+                                cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1);
+#else
                                  // here we need memory just for single 2D matrix from src0
                                  cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]);
-#else
-                                // with GPU, we need memory for the full 3D / 4D data
-                                cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*MAX(ggml_nelements(node->src1), ggml_nelements(node->src0));
  #endif
                              } else {
                                  cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1);
@@ -11586,7 +11592,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
  #endif
                          } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) {
                              cur = 0;
-#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS)
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
                              if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
                                  node->n_tasks = 1;
                              }
diff --git a/ggml.h b/ggml.h

index 38ae9a6eeeb71b29cb40a8e318ef3319334c3b0c..c1c5495c63f44cbe8669bf4a48a4fd419cd5c760 100644 (file)
--- a/ggml.h
+++ b/ggml.h
@@ -701,8 +701,8 @@ extern "C" {
              struct ggml_tensor  * c1);
  
      // Mapping operations
-    GGML_API typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
-    GGML_API typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
+    typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
+    typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
  
      GGML_API struct ggml_tensor * ggml_map_unary_f32(
              struct ggml_context        * ctx,
author	Georgi Gerganov <redacted>
	Sat, 29 Apr 2023 16:30:22 +0000 (19:30 +0300)
committer	Georgi Gerganov <redacted>
	Sat, 29 Apr 2023 16:33:28 +0000 (19:33 +0300)