From: Georgi Gerganov Date: Sun, 14 May 2023 11:55:28 +0000 (+0300) Subject: tests : add tests from llama.cpp X-Git-Tag: upstream/0.0.1642~1474 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=95dda5ba8b6f3740c01c11057715c9013ef6f77b;p=pkg%2Fggml%2Fsources%2Fggml tests : add tests from llama.cpp --- diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4e7efccd..2dfad2ce 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -162,6 +162,14 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +# +# test-opt + +set(TEST_TARGET test-opt) +add_executable(${TEST_TARGET} ${TEST_TARGET}.c) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) +add_test(NAME ${TEST_TARGET} COMMAND $) + # # test-mul-mat0 diff --git a/tests/test-grad0.c b/tests/test-grad0.c index e614d227..ec505922 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -1,4 +1,4 @@ -#include "ggml/ggml.h" +#include "ggml.h" #include #include @@ -7,12 +7,44 @@ #define MAX_NARGS 2 -float frand() { +#undef MIN +#undef MAX +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define GGML_SILU_FP16 + +// +// logging +// + +#if (GGML_DEBUG >= 1) +#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG(...) +#endif + +#if (GGML_DEBUG >= 5) +#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_5(...) +#endif + +#if (GGML_DEBUG >= 10) +#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_10(...) +#endif + +#define GGML_PRINT(...) printf(__VA_ARGS__) + +float frand(void) { return (float)rand()/(float)RAND_MAX; } int irand(int n) { - return rand()%n; + if (n == 0) return 0; + else return rand()%n; } void get_random_dims(int64_t * dims, int ndims) { @@ -71,14 +103,89 @@ struct ggml_tensor * get_random_tensor( return result; } +struct ggml_tensor * get_random_tensor_int( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + int32_t imin, + int32_t imax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i0] = irand(imax - imin) + imin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + float get_element(const struct ggml_tensor * t, int idx) { - return ((float *)t->data)[idx]; + if (t->type == GGML_TYPE_F32) { + return ((float *)t->data)[idx]; + } else if (t->type == GGML_TYPE_I32) { + return ((int32_t *)t->data)[idx]; + } else { + assert(false); + return INFINITY; + } } void set_element(struct ggml_tensor * t, int idx, float value) { ((float *)t->data)[idx] = value; } +void print_elements(const char* label, const struct ggml_tensor * t) { + if (!t) { + printf("%s: %s = null\n", __func__, label); + return; + } + const int nelements = ggml_nelements(t); + printf("%s: %s = [", __func__, label); + for (int k = 0; k < nelements; ++k) { + if (k > 0) { printf(", "); } + printf("%.5f", get_element(t, k)); + } + printf("] shape: ["); + for (int k = 0; k < t->n_dims; ++k) { + if (k > 0) { printf(", "); } + printf("%d", (int)t->ne[k]); + } + printf("]\n"); + +} + bool check_gradient( const char * op_name, struct ggml_context * ctx0, @@ -98,21 +205,22 @@ bool check_gradient( ggml_set_f32 (f->grad, 1.0f); ggml_graph_compute(ctx0, &gb); - ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot"); - ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot"); + // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot"); + // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot"); for (int i = 0; i < nargs; ++i) { const int nelements = ggml_nelements(x[i]); for (int k = 0; k < nelements; ++k) { // compute gradient using finite differences const float x0 = get_element(x[i], k); - - set_element(x[i], k, x0 + eps); + const float xm = x0 - eps; + const float xp = x0 + eps; + set_element(x[i], k, xp); ggml_graph_compute(ctx0, &gf); const float f0 = ggml_get_f32_1d(f, 0); - set_element(x[i], k, x0 - eps); + set_element(x[i], k, xm); ggml_graph_compute(ctx0, &gf); const float f1 = ggml_get_f32_1d(f, 0); @@ -132,9 +240,10 @@ bool check_gradient( const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabs(g0) : 0; if (error_abs > max_error_abs || error_rel > max_error_rel) { - printf("%s: ndims=%d, i=%d, k=%d, g0=%f, g1=%f, error_abs=%f, error_rel=%f\n", - op_name, ndims, i, k, g0, g1, error_abs, error_rel); - assert(false); + printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n", + op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel); + //assert(false); + return false; } } } @@ -155,32 +264,32 @@ bool check_mat_mul( const int nr = x1->ne[1]; const int nk = x0->ne[0]; - printf("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk); + GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk); - printf("x0:\n"); + GGML_PRINT_DEBUG("x0:\n"); for (int j = 0; j < x0->ne[1]; ++j) { for (int i = 0; i < x0->ne[0]; ++i) { - printf("%6.3f ", src0[j*nk + i]); + GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]); } - printf("\n"); + GGML_PRINT_DEBUG("\n"); } - printf("\n"); + GGML_PRINT_DEBUG("\n"); - printf("x1:\n"); + GGML_PRINT_DEBUG("x1:\n"); for (int j = 0; j < x1->ne[1]; ++j) { for (int i = 0; i < x1->ne[0]; ++i) { - printf("%6.3f ", src1[j*nk + i]); + GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]); } - printf("\n"); + GGML_PRINT_DEBUG("\n"); } - printf("\n"); + GGML_PRINT_DEBUG("\n"); - printf("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]); + GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]); for (int j = 0; j < y->ne[1]; ++j) { for (int i = 0; i < y->ne[0]; ++i) { - printf("%6.3f ", dst[j*nr + i]); + GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]); } - printf("\n"); + GGML_PRINT_DEBUG("\n"); } for (int i = 0; i < nr; ++i) { @@ -192,7 +301,7 @@ bool check_mat_mul( } if (fabsf(dst[i*nc + j] - sum) > 1e-5f) { - printf("check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum); + fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum); assert(false); return false; } @@ -202,6 +311,8 @@ bool check_mat_mul( return true; } +#define NUM_PERMUTATIONS (4*3*2*1) + int main(int argc, const char ** argv) { struct ggml_init_params params = { .mem_size = 128*1024*1024, @@ -211,8 +322,34 @@ int main(int argc, const char ** argv) { int64_t ne[4]; + int all_permutations[4 * NUM_PERMUTATIONS]; + { + int count = 0; + for (int ax0=0; ax0<4; ++ax0) { + for (int ax1=0; ax1<4; ++ax1) { + if (ax1 == ax0) continue; + for (int ax2=0; ax2<4; ++ax2) { + if (ax2 == ax0) continue; + if (ax2 == ax1) continue; + for (int ax3=0; ax3<4; ++ax3) { + if (ax3 == ax0) continue; + if (ax3 == ax1) continue; + if (ax3 == ax2) continue; + assert(count < NUM_PERMUTATIONS); + all_permutations[count*4+0] = ax0; + all_permutations[count*4+1] = ax1; + all_permutations[count*4+2] = ax2; + all_permutations[count*4+3] = ax3; + ++count; + } + } + } + } + } + + // original loop: 1000 - int niter = 1000; + int niter = 4; const char *env = getenv("GGML_NLOOP"); if (env != NULL) { niter = atoi(env); @@ -232,7 +369,7 @@ int main(int argc, const char ** argv) { { const int nargs = 2; - for (int ndims = 1; ndims <= 2; ++ndims) { + for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); @@ -240,7 +377,7 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); } } @@ -248,7 +385,7 @@ int main(int argc, const char ** argv) { { const int nargs = 2; - for (int ndims = 1; ndims <= 2; ++ndims) { + for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); @@ -264,7 +401,7 @@ int main(int argc, const char ** argv) { { const int nargs = 2; - for (int ndims = 1; ndims <= 2; ++ndims) { + for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); @@ -280,7 +417,7 @@ int main(int argc, const char ** argv) { { const int nargs = 2; - for (int ndims = 1; ndims <= 2; ++ndims) { + for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); ggml_set_param(ctx0, x[i]); @@ -288,7 +425,7 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1])); - check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-2f); + check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f); } } @@ -324,6 +461,22 @@ int main(int argc, const char ** argv) { } } + // log + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0])); + + check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); + } + } + // sum { const int nargs = 1; @@ -340,6 +493,46 @@ int main(int argc, const char ** argv) { } } + + // sum_rows + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0]))); + + check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } + } + + // repeat + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); + + check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } + + } + // abs (finite differences do not work) //{ // const int nargs = 1; @@ -358,7 +551,7 @@ int main(int argc, const char ** argv) { // mul_mat { - const int nargs = 1; + const int nargs = 2; for (int ndims = 2; ndims <= 2; ++ndims) { x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); @@ -370,17 +563,567 @@ int main(int argc, const char ** argv) { } ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]); struct ggml_tensor * f = ggml_sum(ctx0, m); - printf("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims); + GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims); check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); check_mat_mul(m, x[1], x[0]); } } + // silu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0])); + +#ifdef GGML_SILU_FP16 + // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds. + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); +#else + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); +#endif + } + } + + // rms_norm + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0])); + + check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); + } + } + + // scale + { + const int nargs = 2; + + int64_t ne2[4]; + ne2[0] = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1])); + + check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // reshape (1d->nd) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + int64_t ne2[4]; + ne2[0] = 1; + ne2[1] = 1; + ne2[2] = 1; + ne2[3] = 1; + for (int i = 0; i < ndims; ++i) { + ne2[0] *= ne[i]; + } + x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); + check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // reshape (nd->1d) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + int64_t ne2[4]; + ne2[0] = 1; + ne2[1] = 1; + ne2[2] = 1; + ne2[3] = 1; + for (int i = 0; i < ndims; ++i) { + ne2[0] *= ne[i]; + } + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); + check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 1d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + + const int nargs = 2; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 1); + while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 1); + } + + x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); + const int offset = irand(max_offset) * ggml_element_size(x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 2d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 2; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 2); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 2); + } + + x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + const int offset = offsets[0] + offsets[1]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 3d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 3; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 3); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 3); + } + + x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; + const int offset = offsets[0] + offsets[1] + offsets[2]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 4d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 4; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 4); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 4); + } + + x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); + max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; + offsets[3] = irand(max_offsets[3]) * x[0]->nb[3]; + const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // set_1d + { + int64_t ne2[4]; + + const int nargs = 2; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 1); + while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 1); + } + + x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); + const int offset = irand(max_offset) * ggml_element_size(x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset)); + + check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // set_2d + { + int64_t ne2[4]; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 1; + for (int ndims = 2; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 2); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 2); + } + + x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + const int offset = offsets[0] + offsets[1]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset)); + + check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_1d + { + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const int k0 = irand(ggml_nelements(x[0])); + const int k1 = irand(ggml_nelements(x[0])); + const int i0 = MIN(k0, k1); + const int i1 = MAX(k0, k1); + + const int offset = i0 * sizeof(float); + const int nelem = i1 - i0; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset)); + + check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_2d + { + int64_t ne2[4]; + int64_t nb2[4]; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + + get_random_dims(ne2, 2); + while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { + get_random_dims(ne2, 2); + } + const int count = ne2[0]*ne2[1]; + + nb2[0] = sizeof(float); + nb2[1] = nb2[0]*ne2[0]; + + ggml_set_param(ctx0, x[0]); + + const int max_offset = ggml_nelements(x[0]) - count; + const int offset = irand(max_offset+1) * sizeof(float); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset)); + + check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_3d + { + int64_t ne2[4] = {1,1,1,1}; + int64_t nb2[4] = {0,0,0,0}; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + + get_random_dims(ne2, 3); + while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { + get_random_dims(ne2, 3); + } + const int count = ne2[0]*ne2[1]*ne2[2]; + + nb2[0] = sizeof(float); + nb2[1] = nb2[0]*ne2[0]; + nb2[2] = nb2[1]*ne2[1]; + + ggml_set_param(ctx0, x[0]); + + const int max_offset = ggml_nelements(x[0]) - count; + const int offset = irand(max_offset+1) * sizeof(float); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset)); + + check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // permute + { + int64_t ne2[4]; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) + { + // ggml_permute will set axes of dimensions below n_dims to 1. + // to make ggml_permute work correctly on all axes, + // the input tensor needs maximal n_dim of 4. + for (int i=0; i +#include +#include +#include + +#define MAX_NARGS 2 + + +// +// logging +// +#define GGML_DEBUG 0 +#if (GGML_DEBUG >= 1) +#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG(...) +#endif + +#if (GGML_DEBUG >= 5) +#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_5(...) +#endif + +#if (GGML_DEBUG >= 10) +#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_10(...) +#endif + +#define GGML_PRINT(...) printf(__VA_ARGS__) + + +float frand() { + return (float)rand()/(float)RAND_MAX; +} + +int irand(int n) { + return rand()%n; +} + +void get_random_dims(int64_t * dims, int ndims) { + dims[0] = dims[1] = dims[2] = dims[3] = 1; + + for (int i = 0; i < ndims; i++) { + dims[i] = 1 + irand(4); + } +} + +void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) { + dims[0] = dims[1] = dims[2] = dims[3] = 1; + + for (int i = 0; i < ndims; i++) { + dims[i] = min + irand(max-min); + } +} + + +struct ggml_tensor * get_random_tensor( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +float get_element(const struct ggml_tensor * t, int idx) { + return ((float *)t->data)[idx]; +} + +void set_element(struct ggml_tensor * t, int idx, float value) { + ((float *)t->data)[idx] = value; +} + +int main(int argc, const char ** argv) { + struct ggml_init_params params = { + .mem_size = 1024*1024*1024, + .mem_buffer = NULL, + .no_alloc = false, + }; + struct ggml_context * ctx = ggml_init(params); + + int64_t ne1[4] = {4, 1024, 1, 1}; + int64_t ne2[4] = {4, 2048, 1, 1};; + int64_t ne3[4] = {1024, 2048, 1, 1}; + + struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); + struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); + ggml_set_param(ctx, a); + ggml_set_param(ctx, b); + + struct ggml_tensor * c = get_random_tensor(ctx, 2, ne3, -1, +1); + + struct ggml_tensor * ab = ggml_mul_mat(ctx, a, b); + struct ggml_tensor * d = ggml_sub(ctx, c, ab); + struct ggml_tensor * e = ggml_sum(ctx, ggml_sqr(ctx, d)); + + + struct ggml_cgraph ge = ggml_build_forward(e); + ggml_graph_reset (&ge); + ggml_graph_compute(ctx, &ge); + const float fe = ggml_get_f32_1d(e, 0); + printf("%s: e = %.4f\n", __func__, fe); + + struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); + + ggml_opt(ctx, opt_params, e); + + ggml_graph_reset (&ge); + ggml_graph_compute(ctx, &ge); + const float fe_opt = ggml_get_f32_1d(e, 0); + printf("%s: original e = %.4f\n", __func__, fe); + printf("%s: optimized e = %.4f\n", __func__, fe_opt); + + const bool success = (fe_opt <= fe); + assert(success); + + ggml_free(ctx); + return success ? 0 : -1; +} +// int64_t ne1[4] = {4, 128, 1, 1}; +// int64_t ne2[4] = {4, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 25890.9375 +// main: optimized e = 10094.7031 + +// int64_t ne1[4] = {8, 128, 1, 1}; +// int64_t ne2[4] = {8, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 39429.5078 +// main: optimized e = 9275.8936 + +// int64_t ne1[4] = {16, 128, 1, 1}; +// int64_t ne2[4] = {16, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 68371.1328 +// main: optimized e = 7854.4502 + + +// int64_t ne1[4] = {32, 128, 1, 1}; +// int64_t ne2[4] = {32, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 126061.1953 +// main: optimized e = 5451.0166 + +// int64_t ne1[4] = {4, 1024, 1, 1}; +// int64_t ne2[4] = {4, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 1620817.8750 +// main: optimized e = 698387.6875 + +// another run on M1 +// int64_t ne1[4] = {4, 1024, 1, 1}; +// int64_t ne2[4] = {4, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 1629595.6250 +// main: optimized e = 698169.1250 + +// int64_t ne1[4] = {32, 1024, 1, 1}; +// int64_t ne2[4] = {32, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 8146770.5000 +// main: optimized e = 651119.1250