ggml : fix cont with transposed tensors when one dimension is 1 (#934)

author Salvatore Mesoraca <redacted>

Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)

committer GitHub <redacted>

Wed, 28 Aug 2024 08:23:02 +0000 (11:23 +0300)
author Salvatore Mesoraca <redacted>
Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)
committer GitHub <redacted>
Wed, 28 Aug 2024 08:23:02 +0000 (11:23 +0300)
diff --git a/src/ggml.c b/src/ggml.c

index 9c105fd353de4c12ab189fc7f2601d7e3ae8238d..3b059ab6db07ebcad4b114f5a1cba9270982e378 100644 (file)
--- a/src/ggml.c
+++ b/src/ggml.c
@@ -8120,8 +8120,7 @@ static void ggml_compute_forward_dup_same_cont(
      GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
      GGML_ASSERT(src0->type == dst->type);
  
-    const size_t nb00 = src0->nb[0];
-    const size_t nb0 = dst->nb[0];
+    const size_t nb0 = ggml_type_size(src0->type);
  
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
@@ -8135,8 +8134,8 @@ static void ggml_compute_forward_dup_same_cont(
      if (ie0 < ie1) {
          memcpy(
              ((char *)  dst->data + ie0*nb0),
-            ((char *) src0->data + ie0*nb00),
-            (ie1 - ie0) * ggml_type_size(src0->type));
+            ((char *) src0->data + ie0*nb0),
+            (ie1 - ie0) * nb0);
      }
  }
  
@@ -8153,11 +8152,6 @@ static void ggml_compute_forward_dup_f16(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -8422,11 +8416,6 @@ static void ggml_compute_forward_dup_bf16(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -8778,11 +8767,6 @@ static void ggml_compute_forward_dup_f32(
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
  
-    if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
-        ggml_compute_forward_dup_same_cont(params, dst);
-        return;
-    }
-
      // parallelize by rows
      const int nr = ne01;
      // number of rows per thread
@@ -9092,13 +9076,13 @@ static void ggml_compute_forward_dup_bytes(
      GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
      GGML_ASSERT(src0->type == dst->type);
  
+    GGML_TENSOR_UNARY_OP_LOCALS;
+
      if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
          ggml_compute_forward_dup_same_cont(params, dst);
          return;
      }
  
-    GGML_TENSOR_UNARY_OP_LOCALS;
-
      const size_t type_size = ggml_type_size(src0->type);
      const int ith = params->ith; // thread index
      const int nth = params->nth; // number of threads
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt

index 2cba62196c85ef5b1bbf4f5ebe53c3cd2752daa7..dfa649209c79fa9742181b9d9fbb71aa3ade101b 100644 (file)
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -437,3 +437,13 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
  target_link_libraries(${TEST_TARGET} PRIVATE ggml)
  add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
  set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
+
+
+#
+# test-cont
+
+set(TEST_TARGET test-cont)
+add_executable(${TEST_TARGET} ${TEST_TARGET}.c)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml)
+add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
+set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp

index c832bc9569bbf160f29bb7cadf27da4d22b3a062..318848962f59f4aaf3e1ea23d32954979686dfd0 100644 (file)
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -2320,6 +2320,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
      }
  
      test_cases.emplace_back(new test_cont());
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
+    test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
  
      auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
          for (auto op : {ggml_add, ggml_mul, ggml_div}) {
diff --git a/tests/test-cont.c b/tests/test-cont.c

new file mode 100644 (file)

index 0000000..b91f45b
--- /dev/null
+++ b/tests/test-cont.c
@@ -0,0 +1,168 @@
+#include "ggml-backend.h"
+#include "ggml.h"
+
+#ifdef GGML_USE_CUDA
+#include "ggml-cuda.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+struct model {
+    struct ggml_context* ctx;
+    struct ggml_context* ctx0;
+    ggml_backend_t backend;
+    ggml_backend_buffer_t buffer;
+    struct ggml_cgraph* gf;
+    ggml_gallocr_t allocr;
+    uint8_t* buf;
+};
+
+struct ggml_context* make_ctx(void) {
+    struct ggml_init_params params = {
+        .mem_size = ggml_tensor_overhead() * 3,
+        .mem_buffer = NULL,
+        .no_alloc = true,
+    };
+    return ggml_init(params);
+}
+
+ggml_backend_t make_backend(void) {
+    ggml_backend_t backend = NULL;
+
+#ifdef GGML_USE_CUDA
+    backend = ggml_backend_cuda_init(0);
+    GGML_ASSERT(backend != NULL);
+#endif
+
+    if (!backend)
+        backend = ggml_backend_cpu_init();
+
+    return backend;
+}
+
+void model_init(struct model* m) {
+    m->ctx = make_ctx();
+    m->backend = make_backend();
+
+    size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
+    m->buf = calloc(buf_size, sizeof(uint8_t));
+    struct ggml_init_params params0 = {
+        .mem_size = buf_size,
+        .mem_buffer = m->buf,
+        .no_alloc = true,
+    };
+    m->ctx0 = ggml_init(params0);
+    m->gf = ggml_new_graph(m->ctx0);
+}
+
+void model_alloc(struct model* m) {
+    m->buffer = ggml_backend_alloc_ctx_tensors(m->ctx, m->backend);
+    m->allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(m->backend));
+}
+
+void model_compute(struct model* m) {
+    ggml_gallocr_alloc_graph(m->allocr, m->gf);
+    ggml_backend_graph_compute(m->backend, m->gf);
+}
+
+void model_free(struct model* m) {
+    ggml_free(m->ctx0);
+    free(m->buf);
+    ggml_gallocr_free(m->allocr);
+    ggml_free(m->ctx);
+    ggml_backend_buffer_free(m->buffer);
+    ggml_backend_free(m->backend);
+}
+
+void check_tensor(struct ggml_tensor* t,
+                  const float* expected_t_d,
+                  const int ne0,
+                  const int ne1,
+                  const int ne2) {
+    GGML_ASSERT(t->ne[0] == ne0);
+    GGML_ASSERT(t->ne[1] == ne1);
+    GGML_ASSERT(t->ne[2] == ne2);
+    const size_t bsize = ggml_nbytes(t);
+    if (t->type == GGML_TYPE_F32) {
+        float* buffer = malloc(bsize);
+        ggml_backend_tensor_get(t, buffer, 0, bsize);
+        for (int i = 0; i < bsize / sizeof(float); ++i) {
+            float expected = expected_t_d[i];
+            float actual = buffer[i];
+            if (expected != actual) {
+                printf("expected %.1f, got %.1f\n", expected, actual);
+            }
+            GGML_ASSERT(expected == actual);
+        }
+        free(buffer);
+    } else if (t->type == GGML_TYPE_F16) {
+        ggml_fp16_t* buffer = malloc(bsize);
+        ggml_backend_tensor_get(t, buffer, 0, bsize);
+        for (int i = 0; i < bsize / sizeof(ggml_fp16_t); ++i) {
+            float expected = expected_t_d[i];
+            float actual = ggml_fp16_to_fp32(buffer[i]);
+            if (expected != actual) {
+                printf("expected %.1f, got %.1f\n", expected, actual);
+            }
+            GGML_ASSERT(expected == actual);
+        }
+        free(buffer);
+    } else if (t->type == GGML_TYPE_BF16) {
+        ggml_bf16_t* buffer = malloc(bsize);
+        ggml_backend_tensor_get(t, buffer, 0, bsize);
+        for (int i = 0; i < bsize / sizeof(ggml_bf16_t); ++i) {
+            float expected = expected_t_d[i];
+            float actual = ggml_bf16_to_fp32(buffer[i]);
+            if (expected != actual) {
+                printf("expected %.1f, got %.1f\n", expected, actual);
+            }
+            GGML_ASSERT(expected == actual);
+        }
+        free(buffer);
+    } else {
+        GGML_ABORT("unknown type");
+    }
+}
+
+void test_cont(void) {
+    float buf_f32[] = {1.0, 2.0};
+    ggml_fp16_t buf_f16[] = {ggml_fp32_to_fp16(buf_f32[0]), ggml_fp32_to_fp16(buf_f32[1])};
+    ggml_bf16_t buf_bf16[] = {ggml_fp32_to_bf16(buf_f32[0]), ggml_fp32_to_bf16(buf_f32[1])};
+
+    float expected_out[] = {1.0, 2.0};
+
+    struct model m;
+    model_init(&m);
+
+    struct ggml_tensor* in_1 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_F32, 2);
+    struct ggml_tensor* in_2 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_F16, 2);
+    struct ggml_tensor* in_3 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_BF16, 2);
+
+    model_alloc(&m);
+
+    ggml_backend_tensor_set(in_1, buf_f32, 0, ggml_nbytes(in_1));
+    ggml_backend_tensor_set(in_2, buf_f16, 0, ggml_nbytes(in_2));
+    ggml_backend_tensor_set(in_3, buf_bf16, 0, ggml_nbytes(in_3));
+
+    struct ggml_tensor* out_1 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_1));
+    struct ggml_tensor* out_2 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_2));
+    struct ggml_tensor* out_3 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_3));
+
+    ggml_build_forward_expand(m.gf, out_1);
+    ggml_build_forward_expand(m.gf, out_2);
+    ggml_build_forward_expand(m.gf, out_3);
+
+    model_compute(&m);
+
+    check_tensor(out_1, expected_out, 1, 2, 1);
+    check_tensor(out_2, expected_out, 1, 2, 1);
+    check_tensor(out_3, expected_out, 1, 2, 1);
+
+    model_free(&m);
+}
+
+int main(int argc, const char* argv[]) {
+    test_cont();
+    return 0;
+}
author	Salvatore Mesoraca <redacted>
	Wed, 28 Aug 2024 08:23:02 +0000 (10:23 +0200)
committer	GitHub <redacted>
	Wed, 28 Aug 2024 08:23:02 +0000 (11:23 +0300)
src/ggml.c		patch \| blob \| history
tests/CMakeLists.txt		patch \| blob \| history
tests/test-backend-ops.cpp		patch \| blob \| history
tests/test-cont.c	[new file with mode: 0644]	patch \| blob