From: Georgi Gerganov <redacted>
Date: Sun, 16 Jul 2023 17:55:06 +0000 (+0300)
Subject: ci : integrate with ggml-org/ci (#393)
X-Git-Tag: upstream/0.0.1642~1311
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=9e3c2936a0b745c7d915ac961361ed2d1128e132;p=pkg%2Fggml%2Fsources%2Fggml

ci : integrate with ggml-org/ci (#393)

* ci : add initial scripts

* ci : remove setup.sh

* run : add deps

* run : fix

* ci : remove deps

* ci : ignore test-opt

* ci : fix ctest

* ci : induce error in ci-1

* ci : try to capture exit codes

* ci : try fix pipefail

* ci : output time of commands

* ci : disable test-opt for release as well

* ci : generate README.md

* ci : update README.md

* ci : fix header

* ci : utilize gg_printf

row 0
row 1 testing adafa

asdfdsa

* ci : move out the README.md header generation

row 0
row 1 dsfkdjs

adslfkaj ska
test

* ci : fix

row 0
row 1 dsfkdjs

adslfkaj ska
test

* ci : induce error

* ci : fix error

remove assert(false)

* ci : fix ctest summary

* ci : fix comment

minor

test test

* ci : fix gg_printf usage

* ci : switch ci-1 to Release

* ci : try to simplify

* ci : induce error

* Revert "ci : induce error"

This reverts commit a9cef1eeb174764a0a1eb5b13753a7637b10f9dd.

* ci : induce error

* ci : fix pipefail + status

* ci : try to fix pipefail

* ci : fix output

* ci : fix return codes

* ci : test

* ci : fix test

* ci : add gpt-2 ci

* ci : fix gpt-2 test

* ci : gpt-2 seed

* ci : fix checks

* ci : time gpt-2

* ci : fix gpt-2 output

* ci : try to fix duplicated output

* ci : try fix duplicated output

* ci : cat gpt-2 output

* ci : finally fix double output

* ci : try to add "set -x"

* ci : fix model var

* ci : append logs

* ci : add mpt

* ci : fix model dir creation

* ci : fix mpt convert

* ci : add mpt config.json

* ci : add configuration_mpt.py

* ci : don't run test-opt on low perf systems

* ci : do not run test-opt in Debug
---

diff --git a/.gitignore b/.gitignore
index 698b04a9..daef9cc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,11 @@
 build/
 build-debug/
-build-*/
+build-release/
+build-sanitize-addr/
+build-sanitize-thread/
+build-cov/
 out/
+models/
 
 compile_commands.json
 CMakeSettings.json
@@ -20,4 +24,4 @@ tests/arm_neon.h
 zig-out/
 zig-cache/
 
-*.dot
\ No newline at end of file
+*.dot
diff --git a/ci/run.sh b/ci/run.sh
new file mode 100644
index 00000000..a3aaf688
--- /dev/null
+++ b/ci/run.sh
@@ -0,0 +1,201 @@
+#/bin/bash
+
+sd=`dirname $0`
+cd $sd/../
+
+SRC=`pwd`
+OUT=$1
+
+## helpers
+
+# download a file if it does not exist or if it is outdated
+function gg_wget {
+    local out=$1
+    local url=$2
+
+    local cwd=`pwd`
+
+    mkdir -p $out
+    cd $out
+
+    # should not re-download if file is the same
+    wget -N $url
+
+    cd $cwd
+}
+
+function gg_printf {
+    printf -- "$@" >> $OUT/README.md
+}
+
+function gg_run {
+    ci=$1
+
+    set -o pipefail
+    set -x
+
+    gg_run_$ci | tee $OUT/$ci.log
+    cur=$?
+    echo "$cur" > $OUT/$ci.exit
+
+    set +x
+    set +o pipefail
+
+    gg_sum_$ci
+
+    ret=$((ret | cur))
+}
+
+## ci
+
+# ctest_debug
+
+function gg_run_ctest_debug {
+    cd ${SRC}
+
+    rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
+
+    set -e
+
+    (time cmake -DCMAKE_BUILD_TYPE=Debug ..     ) 2>&1 | tee -a $OUT/${ci}-cmake.log
+    (time make -j                               ) 2>&1 | tee -a $OUT/${ci}-make.log
+
+    (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+
+    set +e
+}
+
+function gg_sum_ctest_debug {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs ctest in debug mode\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
+    gg_printf '```\n'
+    gg_printf '\n'
+}
+
+# ctest_release
+
+function gg_run_ctest_release {
+    cd ${SRC}
+
+    rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
+
+    set -e
+
+    (time cmake -DCMAKE_BUILD_TYPE=Release ..   ) 2>&1 | tee -a $OUT/${ci}-cmake.log
+    (time make -j                               ) 2>&1 | tee -a $OUT/${ci}-make.log
+
+    if [ -z $GG_BUILD_LOW_PERF ]; then
+        (time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+    else
+        (time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
+    fi
+
+    set +e
+}
+
+function gg_sum_ctest_release {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs ctest in release mode\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
+    gg_printf '```\n'
+}
+
+# gpt_2
+
+function gg_run_gpt_2 {
+    cd ${SRC}
+
+    gg_wget models/gpt-2 https://huggingface.co/ggerganov/ggml/resolve/main/ggml-model-gpt-2-117M.bin
+
+    cd build-ci-release
+
+    set -e
+
+    model="../models/gpt-2/ggml-model-gpt-2-117M.bin"
+    prompts="../examples/prompts/gpt-2.txt"
+
+    (time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -t 4 -tt ${prompts}                       ) 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -t 4 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+
+    set +e
+}
+
+function gg_sum_gpt_2 {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs short GPT-2 text generation\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
+    gg_printf '```\n'
+}
+
+# mpt
+
+function gg_run_mpt {
+    cd ${SRC}
+
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
+    gg_wget models/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
+
+    cd build-ci-release
+
+    set -e
+
+    path_models="../models/mpt/7B"
+    model_f16="${path_models}/ggml-model-f16.bin"
+    model_q4_0="${path_models}/ggml-model-q4_0.bin"
+
+    python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
+    ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
+
+    (time ./bin/mpt --model ${model_f16} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -t 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+
+    set +e
+}
+
+function gg_sum_mpt {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs short MPT text generation\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
+    gg_printf '```\n'
+}
+
+## main
+
+if [ -z $GG_BUILD_LOW_PERF ]; then
+    rm -rf ${SRC}/models
+
+    mkdir -p $(realpath ${GG_GGML_MNT}/models)
+    ln -sfn ${GG_GGML_MNT}/models ${SRC}/models
+
+    python3 -m pip install -r ${SRC}/requirements.txt
+fi
+
+ret=0
+
+test $ret -eq 0 && gg_run ctest_debug
+test $ret -eq 0 && gg_run ctest_release
+test $ret -eq 0 && gg_run gpt_2
+
+if [ -z $GG_BUILD_LOW_PERF ]; then
+    test $ret -eq 0 && gg_run mpt
+fi
+
+exit $ret