ggml : improve ggml_is_contiguous logic (llama/7856)

author Georgi Gerganov <redacted>

Wed, 12 Jun 2024 12:24:20 +0000 (15:24 +0300)

committer Georgi Gerganov <redacted>

Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
author Georgi Gerganov <redacted>
Wed, 12 Jun 2024 12:24:20 +0000 (15:24 +0300)
committer Georgi Gerganov <redacted>
Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)
diff --git a/src/ggml.c b/src/ggml.c

index 1fc77743bc7b945baa6441845735a73870949ada..5fb9e9a327d22d20671961ddae2dd557d08d7f9e 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -3212,35 +3212,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
      return tensor->nb[0] > tensor->nb[1];
  }
  
-GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
-    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
+static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
+    size_t next_nb = ggml_type_size(tensor->type);
+    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
+        return false;
+    }
+    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
+    for (int i = 1; i < GGML_MAX_DIMS; i++) {
+        if (tensor->ne[i] != 1) {
+            if (i > n) {
+                if (tensor->nb[i] != next_nb) {
+                    return false;
+                }
+                next_nb *= tensor->ne[i];
+            } else {
+                // this dimension does not need to be contiguous
+                next_nb = tensor->ne[i]*tensor->nb[i];
+            }
+        }
+    }
+    return true;
+}
  
-    return
-        tensor->nb[0] == ggml_type_size(tensor->type) &&
-        tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
-        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
+    return ggml_is_contiguous_0(tensor);
  }
  
  GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
-    return ggml_is_contiguous(tensor);
+    return ggml_is_contiguous_n(tensor, 0);
  }
  
  GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
-    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
-
-    return
-        tensor->nb[0] == ggml_type_size(tensor->type) &&
-        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+    return ggml_is_contiguous_n(tensor, 1);
  }
  
  GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
-    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
-
-    return
-        tensor->nb[0] == ggml_type_size(tensor->type) &&
-        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
+    return ggml_is_contiguous_n(tensor, 2);
  }
  
  GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
@@ -3272,20 +3279,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
      static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  
      return
-        (t0->ne[0] == t1->ne[0] ) &&
-        (t0->ne[1] == t1->ne[1] ) &&
-        (t0->ne[2] == t1->ne[2] ) &&
-        (t0->ne[3] == t1->ne[3] );
+        (t0->ne[0] == t1->ne[0]) &&
+        (t0->ne[1] == t1->ne[1]) &&
+        (t0->ne[2] == t1->ne[2]) &&
+        (t0->ne[3] == t1->ne[3]);
  }
  
  bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
      static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  
      return
-        (t0->nb[0] == t1->nb[0] ) &&
-        (t0->nb[1] == t1->nb[1] ) &&
-        (t0->nb[2] == t1->nb[2] ) &&
-        (t0->nb[3] == t1->nb[3] );
+        (t0->nb[0] == t1->nb[0]) &&
+        (t0->nb[1] == t1->nb[1]) &&
+        (t0->nb[2] == t1->nb[2]) &&
+        (t0->nb[3] == t1->nb[3]);
  }
  
  // check if t1 can be represented as a repeatition of t0
@@ -4078,32 +4085,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
      switch (tensor->type) {
          case GGML_TYPE_I8:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
                  return ((int8_t *)(tensor->data))[i];
              }
          case GGML_TYPE_I16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
                  return ((int16_t *)(tensor->data))[i];
              }
          case GGML_TYPE_I32:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
                  return ((int32_t *)(tensor->data))[i];
              }
          case GGML_TYPE_F16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
                  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
              }
          case GGML_TYPE_BF16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
                  return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
              }
          case GGML_TYPE_F32:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(float));
                  return ((float *)(tensor->data))[i];
              }
          default:
@@ -4125,32 +4126,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
      switch (tensor->type) {
          case GGML_TYPE_I8:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
                  ((int8_t *)(tensor->data))[i] = value;
              } break;
          case GGML_TYPE_I16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
                  ((int16_t *)(tensor->data))[i] = value;
              } break;
          case GGML_TYPE_I32:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
                  ((int32_t *)(tensor->data))[i] = value;
              } break;
          case GGML_TYPE_F16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
                  ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
              } break;
          case GGML_TYPE_BF16:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
                  ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
              } break;
          case GGML_TYPE_F32:
              {
-                GGML_ASSERT(tensor->nb[0] == sizeof(float));
                  ((float *)(tensor->data))[i] = value;
              } break;
          default:
@@ -7343,7 +7338,7 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
      return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
  }
  
-// gmml_unary
+// ggml_unary
  
  static struct ggml_tensor * ggml_unary_impl(
          struct ggml_context * ctx,
author	Georgi Gerganov <redacted>
	Wed, 12 Jun 2024 12:24:20 +0000 (15:24 +0300)
committer	Georgi Gerganov <redacted>
	Sat, 15 Jun 2024 19:05:47 +0000 (22:05 +0300)