From: Salvatore Mesoraca Date: Wed, 28 Aug 2024 08:23:02 +0000 (+0200) Subject: ggml : fix cont with transposed tensors when one dimension is 1 (#934) X-Git-Tag: upstream/0.0.1642~415 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=c78a3d13cd9ac19e52068d8fc95b0a152be14f78;p=pkg%2Fggml%2Fsources%2Fggml ggml : fix cont with transposed tensors when one dimension is 1 (#934) * ggml_cont: fix issue with transposed tensors when one dimension is 1 when using multiple threads, it is not enough to check for the tensors to be contiguous for ggml_compute_forward_dup_same_cont to work correctly. The tensors strides also need to match. Signed-off-by: Salvatore Mesoraca * Add ggml_cont tests Signed-off-by: Salvatore Mesoraca * Remove dead code it isn't possible to reach this code because all these functions are invoked by ggml_compute_forward_dup if and only if src0->type != dst->type Signed-off-by: Salvatore Mesoraca * Make ggml_compute_forward_dup_same_cont work with contiguous tensors Co-authored-by: Georgi Gerganov Signed-off-by: Salvatore Mesoraca --------- Signed-off-by: Salvatore Mesoraca Co-authored-by: Georgi Gerganov --- diff --git a/src/ggml.c b/src/ggml.c index 9c105fd3..3b059ab6 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -8120,8 +8120,7 @@ static void ggml_compute_forward_dup_same_cont( GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); GGML_ASSERT(src0->type == dst->type); - const size_t nb00 = src0->nb[0]; - const size_t nb0 = dst->nb[0]; + const size_t nb0 = ggml_type_size(src0->type); const int ith = params->ith; // thread index const int nth = params->nth; // number of threads @@ -8135,8 +8134,8 @@ static void ggml_compute_forward_dup_same_cont( if (ie0 < ie1) { memcpy( ((char *) dst->data + ie0*nb0), - ((char *) src0->data + ie0*nb00), - (ie1 - ie0) * ggml_type_size(src0->type)); + ((char *) src0->data + ie0*nb0), + (ie1 - ie0) * nb0); } } @@ -8153,11 +8152,6 @@ static void ggml_compute_forward_dup_f16( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -8422,11 +8416,6 @@ static void ggml_compute_forward_dup_bf16( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -8778,11 +8767,6 @@ static void ggml_compute_forward_dup_f32( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -9092,13 +9076,13 @@ static void ggml_compute_forward_dup_bytes( GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); GGML_ASSERT(src0->type == dst->type); + GGML_TENSOR_UNARY_OP_LOCALS; + if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) { ggml_compute_forward_dup_same_cont(params, dst); return; } - GGML_TENSOR_UNARY_OP_LOCALS; - const size_t type_size = ggml_type_size(src0->type); const int ith = params->ith; // thread index const int nth = params->nth; // number of threads diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2cba6219..dfa64920 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -437,3 +437,13 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") + + +# +# test-cont + +set(TEST_TARGET test-cont) +add_executable(${TEST_TARGET} ${TEST_TARGET}.c) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) +add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index c832bc95..31884896 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2320,6 +2320,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op } test_cases.emplace_back(new test_cont()); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7})); auto add_test_bin_bcast = [&](ggml_type type, std::array ne, std::array nr) { for (auto op : {ggml_add, ggml_mul, ggml_div}) { diff --git a/tests/test-cont.c b/tests/test-cont.c new file mode 100644 index 00000000..b91f45ba --- /dev/null +++ b/tests/test-cont.c @@ -0,0 +1,168 @@ +#include "ggml-backend.h" +#include "ggml.h" + +#ifdef GGML_USE_CUDA +#include "ggml-cuda.h" +#endif + +#include +#include + +struct model { + struct ggml_context* ctx; + struct ggml_context* ctx0; + ggml_backend_t backend; + ggml_backend_buffer_t buffer; + struct ggml_cgraph* gf; + ggml_gallocr_t allocr; + uint8_t* buf; +}; + +struct ggml_context* make_ctx(void) { + struct ggml_init_params params = { + .mem_size = ggml_tensor_overhead() * 3, + .mem_buffer = NULL, + .no_alloc = true, + }; + return ggml_init(params); +} + +ggml_backend_t make_backend(void) { + ggml_backend_t backend = NULL; + +#ifdef GGML_USE_CUDA + backend = ggml_backend_cuda_init(0); + GGML_ASSERT(backend != NULL); +#endif + + if (!backend) + backend = ggml_backend_cpu_init(); + + return backend; +} + +void model_init(struct model* m) { + m->ctx = make_ctx(); + m->backend = make_backend(); + + size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); + m->buf = calloc(buf_size, sizeof(uint8_t)); + struct ggml_init_params params0 = { + .mem_size = buf_size, + .mem_buffer = m->buf, + .no_alloc = true, + }; + m->ctx0 = ggml_init(params0); + m->gf = ggml_new_graph(m->ctx0); +} + +void model_alloc(struct model* m) { + m->buffer = ggml_backend_alloc_ctx_tensors(m->ctx, m->backend); + m->allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(m->backend)); +} + +void model_compute(struct model* m) { + ggml_gallocr_alloc_graph(m->allocr, m->gf); + ggml_backend_graph_compute(m->backend, m->gf); +} + +void model_free(struct model* m) { + ggml_free(m->ctx0); + free(m->buf); + ggml_gallocr_free(m->allocr); + ggml_free(m->ctx); + ggml_backend_buffer_free(m->buffer); + ggml_backend_free(m->backend); +} + +void check_tensor(struct ggml_tensor* t, + const float* expected_t_d, + const int ne0, + const int ne1, + const int ne2) { + GGML_ASSERT(t->ne[0] == ne0); + GGML_ASSERT(t->ne[1] == ne1); + GGML_ASSERT(t->ne[2] == ne2); + const size_t bsize = ggml_nbytes(t); + if (t->type == GGML_TYPE_F32) { + float* buffer = malloc(bsize); + ggml_backend_tensor_get(t, buffer, 0, bsize); + for (int i = 0; i < bsize / sizeof(float); ++i) { + float expected = expected_t_d[i]; + float actual = buffer[i]; + if (expected != actual) { + printf("expected %.1f, got %.1f\n", expected, actual); + } + GGML_ASSERT(expected == actual); + } + free(buffer); + } else if (t->type == GGML_TYPE_F16) { + ggml_fp16_t* buffer = malloc(bsize); + ggml_backend_tensor_get(t, buffer, 0, bsize); + for (int i = 0; i < bsize / sizeof(ggml_fp16_t); ++i) { + float expected = expected_t_d[i]; + float actual = ggml_fp16_to_fp32(buffer[i]); + if (expected != actual) { + printf("expected %.1f, got %.1f\n", expected, actual); + } + GGML_ASSERT(expected == actual); + } + free(buffer); + } else if (t->type == GGML_TYPE_BF16) { + ggml_bf16_t* buffer = malloc(bsize); + ggml_backend_tensor_get(t, buffer, 0, bsize); + for (int i = 0; i < bsize / sizeof(ggml_bf16_t); ++i) { + float expected = expected_t_d[i]; + float actual = ggml_bf16_to_fp32(buffer[i]); + if (expected != actual) { + printf("expected %.1f, got %.1f\n", expected, actual); + } + GGML_ASSERT(expected == actual); + } + free(buffer); + } else { + GGML_ABORT("unknown type"); + } +} + +void test_cont(void) { + float buf_f32[] = {1.0, 2.0}; + ggml_fp16_t buf_f16[] = {ggml_fp32_to_fp16(buf_f32[0]), ggml_fp32_to_fp16(buf_f32[1])}; + ggml_bf16_t buf_bf16[] = {ggml_fp32_to_bf16(buf_f32[0]), ggml_fp32_to_bf16(buf_f32[1])}; + + float expected_out[] = {1.0, 2.0}; + + struct model m; + model_init(&m); + + struct ggml_tensor* in_1 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_F32, 2); + struct ggml_tensor* in_2 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_F16, 2); + struct ggml_tensor* in_3 = ggml_new_tensor_1d(m.ctx, GGML_TYPE_BF16, 2); + + model_alloc(&m); + + ggml_backend_tensor_set(in_1, buf_f32, 0, ggml_nbytes(in_1)); + ggml_backend_tensor_set(in_2, buf_f16, 0, ggml_nbytes(in_2)); + ggml_backend_tensor_set(in_3, buf_bf16, 0, ggml_nbytes(in_3)); + + struct ggml_tensor* out_1 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_1)); + struct ggml_tensor* out_2 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_2)); + struct ggml_tensor* out_3 = ggml_cont(m.ctx0, ggml_transpose(m.ctx0, in_3)); + + ggml_build_forward_expand(m.gf, out_1); + ggml_build_forward_expand(m.gf, out_2); + ggml_build_forward_expand(m.gf, out_3); + + model_compute(&m); + + check_tensor(out_1, expected_out, 1, 2, 1); + check_tensor(out_2, expected_out, 1, 2, 1); + check_tensor(out_3, expected_out, 1, 2, 1); + + model_free(&m); +} + +int main(int argc, const char* argv[]) { + test_cont(); + return 0; +}