From: goerch Date: Sun, 23 Jul 2023 16:35:43 +0000 (+0200) Subject: ggml : add coverage measurement for Clang, increase test coverage, F16 ggml_sum ... X-Git-Tag: upstream/0.0.1642~1296 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=7b55e124e3cecd79c310a5b6505f8c4ad3223435;p=pkg%2Fggml%2Fsources%2Fggml ggml : add coverage measurement for Clang, increase test coverage, F16 ggml_sum (#377) * First shot at adding clang/llvm coverage analysis * Fix for compiler dependency * Reducing dimensions in test-opt * cmake : try to fix test coverage build + CI * cmake : fix CMAKE option + CI * Adding some tests for half precision floating point tests * Adding missing tests for unary operations * Some more tests for unary operations * Fix syntax error. * Fix bug in relu derivative computation * Revert testing change * ggml : style fixes --------- Co-authored-by: Georgi Gerganov --- diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8332deff..e635257c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,12 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Dependencies for Ubuntu + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install llvm + - name: Set GGML_N_THREADS for Ubuntu run: echo "GGML_N_THREADS=2" >> $GITHUB_ENV if: matrix.os == 'ubuntu-latest' @@ -35,7 +41,7 @@ jobs: - name: Configure CMake working-directory: ./build - run: cmake .. + run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON .. - name: Build working-directory: ./build @@ -44,3 +50,19 @@ jobs: - name: Test working-directory: ./build run: ctest --verbose --timeout 900 + + - name: Test Coverage for Ubuntu + if: matrix.os == 'ubuntu-latest' + working-directory: ./build + run: | + llvm-profdata merge -sparse tests/*.profraw -o ggml.profdata + llvm-cov report ./bin/test-grad0 -instr-profile=ggml.profdata + llvm-cov report ./bin/test-opt -instr-profile=ggml.profdata + + - name: Test Coverage for MacOS + if: matrix.os == 'macos-latest' + working-directory: ./build + run: | + xcrun llvm-profdata merge -sparse tests/*.profraw -o ggml.profdata + xcrun llvm-cov report ./bin/test-grad0 -instr-profile=ggml.profdata + xcrun llvm-cov report ./bin/test-opt -instr-profile=ggml.profdata diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b774a43..af078e8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,8 @@ option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF) option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE}) option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE}) +option(GGML_TEST_COVERAGE "ggml: enable test coverage" OFF) + option(GGML_PERF "ggml: enable perf timings" OFF) option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF) option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF) @@ -67,6 +69,17 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo") endif () +if (GGML_BUILD_TESTS) + if (GGML_TEST_COVERAGE) + if (CMAKE_C_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + else() + message(WARNING "Test coverage is only supported for Clang") + endif() + endif() +endif() + add_subdirectory(src) if (GGML_BUILD_TESTS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9862a0d3..e3706079 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -118,7 +118,6 @@ else() endif() endif() - # ggml set(TARGET ggml) @@ -183,6 +182,7 @@ if (GGML_CLBLAST) message(WARNING "clBLAST not found") endif() endif() + if (GGML_CUBLAS) cmake_minimum_required(VERSION 3.17) diff --git a/src/ggml.c b/src/ggml.c index 333e8845..90f32a57 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -3605,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { #endif } -inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x) { +inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) { sum += (ggml_float)x[i]; @@ -3613,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x *s = sum; } +inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) { + float sum = 0.0f; + for (int i = 0; i < n; ++i) { + sum += GGML_FP16_TO_FP32(x[i]); + } + *s = sum; +} + inline static void ggml_vec_max_f32(const int n, float * s, const float * x) { #ifndef GGML_USE_ACCELERATE float max = -INFINITY; @@ -9298,7 +9306,7 @@ static void ggml_compute_forward_sum_f32( for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { - ggml_vec_sum_ggf(ne00, + ggml_vec_sum_f32_ggf(ne00, &row_sum, (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); sum += row_sum; @@ -9308,6 +9316,38 @@ static void ggml_compute_forward_sum_f32( ((float *) dst->data)[0] = sum; } +static void ggml_compute_forward_sum_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_is_scalar(dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + assert(src0->nb[0] == sizeof(ggml_fp16_t)); + + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); + GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); + + float sum = 0; + float row_sum = 0; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + ggml_vec_sum_f16_ggf(ne00, + &row_sum, + (ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03)); + sum += row_sum; + } + } + } + ((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum); +} + static void ggml_compute_forward_sum( const struct ggml_compute_params * params, const struct ggml_tensor * src0, @@ -9317,6 +9357,10 @@ static void ggml_compute_forward_sum( { ggml_compute_forward_sum_f32(params, src0, dst); } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_sum_f16(params, src0, dst); + } break; default: { GGML_ASSERT(false); @@ -15159,7 +15203,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_RELU: { if (src0->grad) { - src0->grad = ggml_sub_impl(ctx, + src0->grad = ggml_add_impl(ctx, src0->grad, ggml_mul(ctx, ggml_step(ctx, src0), diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 25cec2fa..af99e6a8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,68 +26,68 @@ endif() if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") - #set(GGML_C_FLAGS "${GGML_C_FLAGS} -mcpu=apple-m1") + #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=apple-m1") elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") message(STATUS "PPC64 detected") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mpower9-vector") else() message(STATUS "x86 detected") - #set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx -mavx2 -mfma -mf16c") + #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c") if (UNAME_S MATCHES "Darwin") execute_process(COMMAND sysctl machdep.cpu.features OUTPUT_VARIABLE AVX1_M) if (AVX1_M MATCHES "AVX1.0") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") endif() execute_process(COMMAND sysctl machdep.cpu.leaf7_features OUTPUT_VARIABLE AVX2_M) if (AVX2_M MATCHES "AVX2") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") endif() if (AVX1_M MATCHES "FMA") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mfma") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") endif() - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mf16c") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c") elseif (UNAME_S MATCHES "Linux") message(STATUS "Linux detected") execute_process(COMMAND grep "avx " /proc/cpuinfo OUTPUT_VARIABLE AVX1_M) if (AVX1_M MATCHES "avx") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") endif() execute_process(COMMAND grep "avx2 " /proc/cpuinfo OUTPUT_VARIABLE AVX2_M) if (AVX2_M MATCHES "avx2") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") endif() execute_process(COMMAND grep "fma " /proc/cpuinfo OUTPUT_VARIABLE FMA_M) if (FMA_M MATCHES "fma") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mfma") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") endif() execute_process(COMMAND grep "f16c " /proc/cpuinfo OUTPUT_VARIABLE F16C_M) if (F16C_M MATCHES "f16c") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mf16c") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c") endif() execute_process(COMMAND grep "sse3 " /proc/cpuinfo OUTPUT_VARIABLE SSE3_M) if (SSE3_M MATCHES "sse3") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -msse3") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") endif() elseif (UNAME_S MATCHES "Haiku") message(STATUS "Haiku detected") execute_process(COMMAND sysinfo -cpu COMMAND grep "AVX " OUTPUT_VARIABLE AVX1_M) if (AVX1_M MATCHES "avx") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") endif() execute_process(COMMAND sysinfo -cpu COMMAND grep "AVX2 " OUTPUT_VARIABLE AVX2_M) if (AVX2_M MATCHES "avx2") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") endif() execute_process(COMMAND sysinfo -cpu COMMAND grep "FMA " OUTPUT_VARIABLE FMA_M) if (FMA_M MATCHES "fma") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mfma") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") endif() execute_process(COMMAND sysinfo -cpu COMMAND grep "F16C " OUTPUT_VARIABLE F16C_M) if (F16C_M MATCHES "f16c") - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mf16c") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c") endif() else() - set(GGML_C_FLAGS "${GGML_C_FLAGS} -mfma -mf16c -mavx -mavx2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma -mf16c -mavx -mavx2") endif() endif() @@ -142,8 +142,6 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86") set(TEST_TARGET test-vec1) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) - #set_target_properties(${TEST_TARGET} PROPERTIES COMPILE_FLAGS "-mavx -mavx2 -mfma -mf16c") - set_target_properties(${TEST_TARGET} PROPERTIES COMPILE_FLAGS ${GGML_C_FLAGS}) endif() # @@ -161,6 +159,7 @@ set(TEST_TARGET test-grad0) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-opt @@ -169,6 +168,7 @@ set(TEST_TARGET test-opt) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-quantize-fns @@ -177,6 +177,7 @@ set(TEST_TARGET test-quantize-fns) add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-quantize-perf @@ -185,6 +186,7 @@ set(TEST_TARGET test-quantize-perf) add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-mul-mat0 @@ -194,6 +196,7 @@ add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml ${GGML_EXTRA_LIBS}) target_compile_options(${TEST_TARGET} PRIVATE ${GGML_EXTRA_FLAGS}) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-mul-mat1 (arm) @@ -214,6 +217,7 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" AND NOT GGML_NO_ACCELERATE) target_link_libraries(${TEST_TARGET} PRIVATE ggml ${GGML_EXTRA_LIBS}) target_compile_options(${TEST_TARGET} PRIVATE ${GGML_EXTRA_FLAGS}) add_test(NAME ${TEST_TARGET} COMMAND $ 128 128 128) + set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") endif() # @@ -223,6 +227,7 @@ set(TEST_TARGET test-mul-mat2) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test0 @@ -231,6 +236,7 @@ set(TEST_TARGET test0) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test1 @@ -242,6 +248,7 @@ if (MSVC) target_link_options(${TEST_TARGET} PRIVATE "/STACK: 8388608") # 8MB endif() add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test2 @@ -250,6 +257,7 @@ set(TEST_TARGET test2) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test3 @@ -258,6 +266,7 @@ set(TEST_TARGET test3) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-pool @@ -266,6 +275,7 @@ set(TEST_TARGET test-pool) add_executable(${TEST_TARGET} ${TEST_TARGET}.c) target_link_libraries(${TEST_TARGET} PRIVATE ggml) add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") # # test-svd0 (arm/x86) diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 01467bc1..7e03b542 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { } } -struct ggml_tensor * get_random_tensor( +struct ggml_tensor * get_random_tensor_f32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( return result; } -struct ggml_tensor * get_random_tensor_int( +struct ggml_tensor * get_random_tensor_f16( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +struct ggml_tensor * get_random_tensor_i32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -161,20 +209,39 @@ struct ggml_tensor * get_random_tensor_int( } float get_element(const struct ggml_tensor * t, int idx) { - if (t->type == GGML_TYPE_F32) { - return ((float *)t->data)[idx]; - } - - if (t->type == GGML_TYPE_I32) { - return ((int32_t *)t->data)[idx]; + switch (t->type) { + case GGML_TYPE_F32: + return ((float *)t->data)[idx]; + case GGML_TYPE_I32: + return ((int32_t *)t->data)[idx]; + case GGML_TYPE_F16: + return ggml_fp16_to_fp32(((ggml_fp16_t *)t->data)[idx]); + case GGML_TYPE_I16: + return ((int16_t *)t->data)[idx]; + default: + assert(false); } - - assert(false); return INFINITY; } void set_element(struct ggml_tensor * t, int idx, float value) { - ((float *)t->data)[idx] = value; + switch (t->type) { + case GGML_TYPE_F32: + ((float *)t->data)[idx] = value; + break; + case GGML_TYPE_I32: + ((int32_t *)t->data)[idx] = value; + break; + case GGML_TYPE_F16: + ((ggml_fp16_t*)t->data)[idx] = ggml_fp32_to_fp16(value); + break; + case GGML_TYPE_I16: + ((int16_t *)t->data)[idx] = value; + break; + default: + assert(false); + } + ; } void print_elements(const char* label, const struct ggml_tensor * t) { @@ -392,19 +459,35 @@ int main(int argc, const char ** argv) { struct ggml_tensor * x[MAX_NARGS]; - // add + // add f32 { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + } + } + + // add f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); } } @@ -414,7 +497,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -430,7 +513,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -446,7 +529,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -462,7 +545,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -478,7 +561,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -494,7 +577,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -510,7 +593,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -527,7 +610,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -537,6 +620,40 @@ int main(int argc, const char ** argv) { } } + // mean, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); + + check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // argmax + if (0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); + + check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // repeat { int64_t ne2[4]; @@ -549,15 +666,36 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); } + } + // repeat back + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); + + check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } } // abs (finite differences do not work) @@ -566,7 +704,7 @@ int main(int argc, const char ** argv) { // for (int ndims = 1; ndims <= 2; ++ndims) { // for (int i = 0; i < nargs; ++i) { - // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); // ggml_set_param(ctx0, x[i]); // } @@ -576,17 +714,82 @@ int main(int argc, const char ** argv) { // } //} + // sgn + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); + + check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // neg + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); + + check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // step + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); + + check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // tanh, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); + + check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // mul_mat { const int nargs = 2; for (int ndims = 2; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); { int64_t ne2[4]; get_random_dims(ne2, 4); ne2[0] = ne[0]; - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); } ggml_set_param(ctx0, x[0]); @@ -602,13 +805,63 @@ int main(int argc, const char ** argv) { } } + // elu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); + + check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // relu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); + + check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // gelu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); + + check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // silu { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -629,7 +882,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -647,8 +900,8 @@ int main(int argc, const char ** argv) { ne2[0] = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); @@ -659,20 +912,37 @@ int main(int argc, const char ** argv) { } } - // cpy + // cpy f32 { const int nargs = 2; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); } } @@ -689,8 +959,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -712,8 +982,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -729,7 +999,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -737,7 +1007,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -758,7 +1028,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -766,7 +1036,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -790,7 +1060,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 3; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 3); @@ -798,7 +1068,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 3); } - x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -824,7 +1094,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 4; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 4); @@ -832,7 +1102,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); } - x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -858,7 +1128,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -866,7 +1136,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -887,7 +1157,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -895,7 +1165,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -915,7 +1185,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -941,7 +1211,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 2); while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { @@ -971,7 +1241,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 3); while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { @@ -1010,7 +1280,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1043,7 +1313,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1060,8 +1330,8 @@ int main(int argc, const char ** argv) { int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); ggml_set_param(ctx0, x[0]); @@ -1075,7 +1345,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1090,7 +1360,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1108,7 +1378,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); @@ -1125,8 +1395,8 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); @@ -1136,7 +1406,41 @@ int main(int argc, const char ** argv) { } } - // rope + // rope f32 + { + const int nargs = 1; + + int64_t ne2[4]; + get_random_dims(ne2, 4); + ne2[0] += ne2[0] % 2; + int n_rot = ne2[0]; + + for (int ndims = 3; ndims <= 4; ++ndims) { + for (int mode = 0; mode < 4; ++mode) { + for (int n_past = 1; n_past < ne2[2]; ++n_past) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const bool skip_past = (mode & 1); + if (skip_past) { + // we have no past, so this would have to work on uninitialized memory. + // we only test the gradients here; + // skip_past should have no influence on gradient computation. + // so when other modes work, we assume that this does as well. + continue; + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); + + GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + } + } + } + } + + // rope f16 { const int nargs = 1; @@ -1148,7 +1452,7 @@ int main(int argc, const char ** argv) { for (int ndims = 3; ndims <= 4; ++ndims) { for (int mode = 0; mode < 4; ++mode) { for (int n_past = 1; n_past < ne2[2]; ++n_past) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1163,14 +1467,55 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); - GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); - check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); } } } } - // flash_attn + // flash_attn f32 + { + const int nargs = 3; + + int64_t ne2[4]; + + get_random_dims(ne2, 4); + int64_t D = ne2[0]; + int64_t N = ne2[1]; + int64_t M = ne2[2] + N; + int64_t B = ne2[3]; + + for (int masked = 0; masked <= 1; ++masked) { + for (int ndims = 2; ndims <= 4; ++ndims) { + int64_t neq[4] = { D, N, B, ne[3] }; + int64_t nek[4] = { D, M, B, ne[3] }; + int64_t nev[4] = { M, D, B, ne[3] }; + if (ndims == 2) { + neq[2] = 1; neq[3] = 1; + nek[2] = 1; nek[3] = 1; + nev[2] = 1; nev[3] = 1; + } else if (ndims == 3) { + neq[3] = 1; + nek[3] = 1; + nev[3] = 1; + } + x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + ggml_set_param(ctx0, x[2]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); + + check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + } + } + } + + // flash_attn f16, not yet fully implemented + if(0) { const int nargs = 3; @@ -1196,16 +1541,16 @@ int main(int argc, const char ** argv) { nek[3] = 1; nev[3] = 1; } - x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); - x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); - x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); + x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); ggml_set_param(ctx0, x[2]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); - check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } diff --git a/tests/test-opt.c b/tests/test-opt.c index 5531814c..4eef62bc 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -125,9 +125,9 @@ int main(void) { }; struct ggml_context * ctx = ggml_init(params); - int64_t ne1[4] = {4, 1024, 1, 1}; - int64_t ne2[4] = {4, 2048, 1, 1};; - int64_t ne3[4] = {1024, 2048, 1, 1}; + int64_t ne1[4] = {4, 128, 1, 1}; + int64_t ne2[4] = {4, 256, 1, 1};; + int64_t ne3[4] = {128, 256, 1, 1}; struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1);