From: Georgi Gerganov Date: Sat, 24 Jun 2023 16:03:09 +0000 (+0300) Subject: tests : use LBFGS optimizer instead of ADAM (close #276) X-Git-Tag: upstream/0.0.1642~1394 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=f9d21624f9b19be091da59dc76a6cb917847ae92;p=pkg%2Fggml%2Fsources%2Fggml tests : use LBFGS optimizer instead of ADAM (close #276) ADAM seems to behave differently since the recent training changes. Need to see how to make it work again for test2 - probably some parameters need to be adjusted --- diff --git a/tests/test0.c b/tests/test0.c index 2844da40..7fba63e7 100644 --- a/tests/test0.c +++ b/tests/test0.c @@ -2,7 +2,6 @@ #include #include -#include int main(int argc, const char ** argv) { struct ggml_init_params params = { @@ -17,23 +16,23 @@ int main(int argc, const char ** argv) { struct ggml_tensor * t2 = ggml_new_tensor_2d(ctx0, GGML_TYPE_I16, 10, 20); struct ggml_tensor * t3 = ggml_new_tensor_3d(ctx0, GGML_TYPE_I32, 10, 20, 30); - assert(t1->n_dims == 1); - assert(t1->ne[0] == 10); - assert(t1->nb[1] == 10*sizeof(float)); - - assert(t2->n_dims == 2); - assert(t2->ne[0] == 10); - assert(t2->ne[1] == 20); - assert(t2->nb[1] == 10*sizeof(int16_t)); - assert(t2->nb[2] == 10*20*sizeof(int16_t)); - - assert(t3->n_dims == 3); - assert(t3->ne[0] == 10); - assert(t3->ne[1] == 20); - assert(t3->ne[2] == 30); - assert(t3->nb[1] == 10*sizeof(int32_t)); - assert(t3->nb[2] == 10*20*sizeof(int32_t)); - assert(t3->nb[3] == 10*20*30*sizeof(int32_t)); + GGML_ASSERT(t1->n_dims == 1); + GGML_ASSERT(t1->ne[0] == 10); + GGML_ASSERT(t1->nb[1] == 10*sizeof(float)); + + GGML_ASSERT(t2->n_dims == 2); + GGML_ASSERT(t2->ne[0] == 10); + GGML_ASSERT(t2->ne[1] == 20); + GGML_ASSERT(t2->nb[1] == 10*sizeof(int16_t)); + GGML_ASSERT(t2->nb[2] == 10*20*sizeof(int16_t)); + + GGML_ASSERT(t3->n_dims == 3); + GGML_ASSERT(t3->ne[0] == 10); + GGML_ASSERT(t3->ne[1] == 20); + GGML_ASSERT(t3->ne[2] == 30); + GGML_ASSERT(t3->nb[1] == 10*sizeof(int32_t)); + GGML_ASSERT(t3->nb[2] == 10*20*sizeof(int32_t)); + GGML_ASSERT(t3->nb[3] == 10*20*30*sizeof(int32_t)); ggml_print_objects(ctx0); diff --git a/tests/test1.c b/tests/test1.c index a69e65a8..8c1a352e 100644 --- a/tests/test1.c +++ b/tests/test1.c @@ -2,7 +2,6 @@ #include #include -#include int main(int argc, const char ** argv) { struct ggml_init_params params = { @@ -41,8 +40,8 @@ int main(int argc, const char ** argv) { printf("f = %f\n", ggml_get_f32_1d(f, 0)); printf("df/dx = %f\n", ggml_get_f32_1d(x->grad, 0)); - assert(ggml_get_f32_1d(f, 0) == 12.0f); - assert(ggml_get_f32_1d(x->grad, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(f, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(x->grad, 0) == 12.0f); ggml_set_f32(x, 3.0f); @@ -54,8 +53,8 @@ int main(int argc, const char ** argv) { printf("f = %f\n", ggml_get_f32_1d(f, 0)); printf("df/dx = %f\n", ggml_get_f32_1d(x->grad, 0)); - assert(ggml_get_f32_1d(f, 0) == 27.0f); - assert(ggml_get_f32_1d(x->grad, 0) == 18.0f); + GGML_ASSERT(ggml_get_f32_1d(f, 0) == 27.0f); + GGML_ASSERT(ggml_get_f32_1d(x->grad, 0) == 18.0f); ggml_graph_dump_dot(&gf, NULL, "test1-1-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-1-backward.dot"); @@ -89,9 +88,9 @@ int main(int argc, const char ** argv) { printf("df/dx1 = %f\n", ggml_get_f32_1d(x1->grad, 0)); printf("df/dx2 = %f\n", ggml_get_f32_1d(x2->grad, 0)); - assert(ggml_get_f32_1d(y, 0) == 12.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 7.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 7.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 3.0f); struct ggml_tensor * g1 = x1->grad; struct ggml_tensor * g2 = x2->grad; @@ -106,8 +105,8 @@ int main(int argc, const char ** argv) { printf("H * [1, 1] = [ %f %f ]\n", ggml_get_f32_1d(x1->grad, 0), ggml_get_f32_1d(x2->grad, 0)); - assert(ggml_get_f32_1d(x1->grad, 0) == 3.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 1.0f); ggml_graph_dump_dot(&gf, NULL, "test1-2-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-2-backward.dot"); @@ -139,9 +138,9 @@ int main(int argc, const char ** argv) { printf("df/dx1 = %f\n", ggml_get_f32_1d(x1->grad, 0)); printf("df/dx2 = %f\n", ggml_get_f32_1d(x2->grad, 0)); - assert(ggml_get_f32_1d(y, 0) == 63.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 51.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 9.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 63.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 51.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 9.0f); ggml_graph_dump_dot(&gf, NULL, "test1-3-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-3-backward.dot"); @@ -177,10 +176,10 @@ int main(int argc, const char ** argv) { printf("df/dx2 = %f\n", ggml_get_f32_1d(x2->grad, 0)); printf("df/dx3 = %f\n", ggml_get_f32_1d(x3->grad, 0)); - assert(ggml_get_f32_1d(y, 0) == 12.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 24.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 12.0f); - assert(ggml_get_f32_1d(x3->grad, 0) == 4.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 24.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(x3->grad, 0) == 4.0f); struct ggml_tensor * g1 = x1->grad; struct ggml_tensor * g2 = x2->grad; @@ -200,9 +199,9 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 0), ggml_get_f32_1d(x3->grad, 0)); - assert(ggml_get_f32_1d(x1->grad, 0) == 56.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 34.0f); - assert(ggml_get_f32_1d(x3->grad, 0) == 12.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 56.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 34.0f); + GGML_ASSERT(ggml_get_f32_1d(x3->grad, 0) == 12.0f); ggml_graph_dump_dot(&gf, NULL, "test1-4-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-4-backward.dot"); @@ -240,13 +239,13 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 1), ggml_get_f32_1d(x2->grad, 2)); - assert(ggml_get_f32_1d(y, 0) == 45.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 5.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 3.0f); - assert(ggml_get_f32_1d(x1->grad, 1) == 5.0f); - assert(ggml_get_f32_1d(x2->grad, 1) == 3.0f); - assert(ggml_get_f32_1d(x1->grad, 2) == 5.0f); - assert(ggml_get_f32_1d(x2->grad, 2) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 45.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 5.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 1) == 5.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 1) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 2) == 5.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 2) == 3.0f); ggml_graph_dump_dot(&gf, NULL, "test1-5-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-5-backward.dot"); @@ -293,13 +292,13 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 1), ggml_get_f32_1d(x2->grad, 2)); - assert(ggml_get_f32_1d(y, 0) == -9.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == -7.0f); - assert(ggml_get_f32_1d(x1->grad, 1) == -7.0f); - assert(ggml_get_f32_1d(x1->grad, 2) == -7.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 3.0f); - assert(ggml_get_f32_1d(x2->grad, 1) == 3.0f); - assert(ggml_get_f32_1d(x2->grad, 2) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == -9.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == -7.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 1) == -7.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 2) == -7.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 1) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 2) == 3.0f); ggml_graph_dump_dot(&gf, NULL, "test1-6-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-6-backward.dot"); @@ -346,13 +345,13 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 1), ggml_get_f32_1d(x2->grad, 2)); - assert(ggml_get_f32_1d(y, 0) == 99.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 17.0f); - assert(ggml_get_f32_1d(x1->grad, 1) == 17.0f); - assert(ggml_get_f32_1d(x1->grad, 2) == 17.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 3.0f); - assert(ggml_get_f32_1d(x2->grad, 1) == 3.0f); - assert(ggml_get_f32_1d(x2->grad, 2) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 99.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 17.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 1) == 17.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 2) == 17.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 1) == 3.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 2) == 3.0f); ggml_graph_dump_dot(&gf, NULL, "test1-7-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-7-backward.dot"); @@ -393,13 +392,13 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 1), ggml_get_f32_1d(x2->grad, 2)); - assert(ggml_get_f32_1d(y, 0) == 2.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == -1.0f); - assert(ggml_get_f32_1d(x1->grad, 1) == -1.0f); - assert(ggml_get_f32_1d(x1->grad, 2) == -1.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == 1.0f); - assert(ggml_get_f32_1d(x2->grad, 1) == 1.0f); - assert(ggml_get_f32_1d(x2->grad, 2) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 2.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 1) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 2) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 1) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 2) == 1.0f); ggml_set_f32(x1, 7.0f); ggml_set_f32(x2, 5.0f); @@ -419,13 +418,13 @@ int main(int argc, const char ** argv) { ggml_get_f32_1d(x2->grad, 1), ggml_get_f32_1d(x2->grad, 2)); - assert(ggml_get_f32_1d(y, 0) == 2.0f); - assert(ggml_get_f32_1d(x1->grad, 0) == 1.0f); - assert(ggml_get_f32_1d(x1->grad, 1) == 1.0f); - assert(ggml_get_f32_1d(x1->grad, 2) == 1.0f); - assert(ggml_get_f32_1d(x2->grad, 0) == -1.0f); - assert(ggml_get_f32_1d(x2->grad, 1) == -1.0f); - assert(ggml_get_f32_1d(x2->grad, 2) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(y, 0) == 2.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 0) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 1) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x1->grad, 2) == 1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 0) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 1) == -1.0f); + GGML_ASSERT(ggml_get_f32_1d(x2->grad, 2) == -1.0f); ggml_graph_dump_dot(&gf, NULL, "test1-8-forward.dot"); ggml_graph_dump_dot(&gb, &gf, "test1-8-backward.dot"); diff --git a/tests/test2.c b/tests/test2.c index 4e03d98a..3b223412 100644 --- a/tests/test2.c +++ b/tests/test2.c @@ -3,7 +3,6 @@ #include #include #include -#include bool is_close(float a, float b, float epsilon) { return fabs(a - b) < epsilon; @@ -16,10 +15,10 @@ int main(int argc, const char ** argv) { .no_alloc = false, }; - //struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_LBFGS); + //struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); + //opt_params.adam.alpha = 0.01f; - struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); - opt_params.adam.alpha = 0.01f; + struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_LBFGS); // original threads: 8 int nthreads = 8; @@ -72,13 +71,13 @@ int main(int argc, const char ** argv) { enum ggml_opt_result res = ggml_opt(NULL, opt_params, f); - assert(res == GGML_OPT_OK); - printf("t0 = %f\n", ggml_get_f32_1d(t0, 0)); printf("t1 = %f\n", ggml_get_f32_1d(t1, 0)); - assert(is_close(ggml_get_f32_1d(t0, 0), 5.0f, 1e-3f)); - assert(is_close(ggml_get_f32_1d(t1, 0), 10.0f, 1e-3f)); + GGML_ASSERT(res == GGML_OPT_OK); + + GGML_ASSERT(is_close(ggml_get_f32_1d(t0, 0), 5.0f, 1e-3f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t1, 0), 10.0f, 1e-3f)); } { @@ -106,9 +105,9 @@ int main(int argc, const char ** argv) { enum ggml_opt_result res = ggml_opt(NULL, opt_params, f); - assert(res == GGML_OPT_OK); - assert(is_close(ggml_get_f32_1d(t0, 0), 5.0f, 1e-2f)); - assert(is_close(ggml_get_f32_1d(t1, 0), 10.0f, 1e-2f)); + GGML_ASSERT(res == GGML_OPT_OK); + GGML_ASSERT(is_close(ggml_get_f32_1d(t0, 0), 5.0f, 1e-2f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t1, 0), 10.0f, 1e-2f)); } { @@ -127,10 +126,10 @@ int main(int argc, const char ** argv) { enum ggml_opt_result res = ggml_opt(NULL, opt_params, f); - assert(res == GGML_OPT_OK); - assert(is_close(ggml_get_f32_1d(f, 0), 0.0f, 1e-3f)); - assert(is_close(ggml_get_f32_1d(t0, 0), 0.0f, 1e-3f)); - assert(is_close(ggml_get_f32_1d(t1, 0), 0.0f, 1e-3f)); + GGML_ASSERT(res == GGML_OPT_OK); + GGML_ASSERT(is_close(ggml_get_f32_1d(f, 0), 0.0f, 1e-3f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t0, 0), 0.0f, 1e-3f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t1, 0), 0.0f, 1e-3f)); } ///////////////////////////////////////// @@ -165,10 +164,10 @@ int main(int argc, const char ** argv) { enum ggml_opt_result res = ggml_opt(NULL, opt_params, f); - assert(res == GGML_OPT_OK); - assert(is_close(ggml_get_f32_1d(f, 0), 0.0f, 1e-3f)); - assert(is_close(ggml_get_f32_1d(t0, 0), 1.0f, 1e-3f)); - assert(is_close(ggml_get_f32_1d(t1, 0), 3.0f, 1e-3f)); + GGML_ASSERT(res == GGML_OPT_OK); + GGML_ASSERT(is_close(ggml_get_f32_1d(f, 0), 0.0f, 1e-3f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t0, 0), 1.0f, 1e-3f)); + GGML_ASSERT(is_close(ggml_get_f32_1d(t1, 0), 3.0f, 1e-3f)); } ggml_free(ctx0); diff --git a/tests/test3.c b/tests/test3.c index 9209e943..a5ccdb7f 100644 --- a/tests/test3.c +++ b/tests/test3.c @@ -3,7 +3,6 @@ #include #include #include -#include bool is_close(float a, float b, float epsilon) { return fabs(a - b) < epsilon; @@ -16,8 +15,8 @@ int main(int argc, const char ** argv) { .no_alloc = false, }; - struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_LBFGS); //struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); + struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_LBFGS); opt_params.n_threads = (argc > 1) ? atoi(argv[1]) : 8; @@ -69,7 +68,7 @@ int main(int argc, const char ** argv) { enum ggml_opt_result res = ggml_opt(NULL, opt_params, f); - assert(res == GGML_OPT_OK); + GGML_ASSERT(res == GGML_OPT_OK); // print results for (int i = 0; i < 16; i++) { @@ -83,9 +82,9 @@ int main(int argc, const char ** argv) { for (int i = 0; i < NF; ++i) { if (i < NF/2) { - assert(is_close(((float *)x->data)[i], 1.0f, 1e-2f)); + GGML_ASSERT(is_close(((float *)x->data)[i], 1.0f, 1e-2f)); } else { - assert(is_close(((float *)x->data)[i], -1.0f, 1e-2f)); + GGML_ASSERT(is_close(((float *)x->data)[i], -1.0f, 1e-2f)); } } }