ci : add whisper test (#525)

author Georgi Gerganov <redacted>

Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)

committer GitHub <redacted>

Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)
author Georgi Gerganov <redacted>
Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)
committer GitHub <redacted>
Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)
diff --git a/ci/run.sh b/ci/run.sh

index abb43e76735872a511790d08ab8d927f7f84f449..65dc55fe5aee850cf0cf49b5a6e8a1cc0ab8b0e0 100644 (file)
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -161,76 +161,107 @@ function gg_sum_gpt_2 {
      gg_printf '```\n'
  }
  
-# mpt
+# mnist
  
-function gg_run_mpt {
+function gg_run_mnist {
      cd ${SRC}
  
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
-    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
-
      cd build-ci-release
  
      set -e
  
-    path_models="../models-mnt/mpt/7B"
-    model_f16="${path_models}/ggml-model-f16.bin"
-    model_q4_0="${path_models}/ggml-model-q4_0.bin"
+    mkdir -p models/mnist
+    python3 ../examples/mnist/convert-h5-to-ggml.py ../examples/mnist/models/mnist/mnist_model.state_dict
  
-    python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
-    ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
+    model_f32="./models/mnist/ggml-model-f32.bin"
+    samples="../examples/mnist/models/mnist/t10k-images.idx3-ubyte"
  
-    (time ./bin/mpt --model ${model_f16}  -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
-    (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    # first command runs and exports "mnist.ggml", the second command runs the exported model
+
+    (time ./bin/mnist     ${model_f32} ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
+    (time ./bin/mnist-cpu ./mnist.ggml ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
  
      set +e
  }
  
-function gg_sum_mpt {
+function gg_sum_mnist {
      gg_printf '### %s\n\n' "${ci}"
  
-    gg_printf 'Runs short MPT text generation\n'
+    gg_printf 'MNIST\n'
      gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
      gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
+    gg_printf '%s\n' "$(cat $OUT/${ci}-mnist.log)"
      gg_printf '```\n'
  }
  
-# mnist
+# whisper
  
-function gg_run_mnist {
+function gg_run_whisper {
      cd ${SRC}
  
+    gg_wget models-mnt/whisper/ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
+    gg_wget models-mnt/whisper/ https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav
+
      cd build-ci-release
  
      set -e
  
-    mkdir -p models/mnist
-    python3 ../examples/mnist/convert-h5-to-ggml.py ../examples/mnist/models/mnist/mnist_model.state_dict
+    path_models="../models-mnt/whisper/"
+    model_f16="${path_models}/ggml-base.en.bin"
+    audio_0="${path_models}/jfk.wav"
  
-    model_f32="./models/mnist/ggml-model-f32.bin"
-    samples="../examples/mnist/models/mnist/t10k-images.idx3-ubyte"
+    (time ./bin/whisper -m ${model_f16} -f ${audio_0} 2>&1 | tee -a $OUT/${ci}-main.log) || true
  
-    # first command runs and exports "mnist.ggml", the second command runs the exported model
+    set +e
+}
  
-    (time ./bin/mnist     ${model_f32} ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
-    (time ./bin/mnist-cpu ./mnist.ggml ${samples} ) 2>&1 | tee -a $OUT/${ci}-mnist.log
+function gg_sum_whisper {
+    gg_printf '### %s\n\n' "${ci}"
+
+    gg_printf 'Runs short Whisper transcription\n'
+    gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
+    gg_printf '```\n'
+    gg_printf '%s\n' "$(cat $OUT/${ci}-main.log)"
+    gg_printf '```\n'
+}
+
+# mpt
+
+function gg_run_mpt {
+    cd ${SRC}
+
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/config.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/tokenizer_config.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/pytorch_model.bin.index.json
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/raw/main/configuration_mpt.py
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00001-of-00002.bin
+    gg_wget models-mnt/mpt/7B/ https://huggingface.co/mosaicml/mpt-7b/resolve/main/pytorch_model-00002-of-00002.bin
+
+    cd build-ci-release
+
+    set -e
+
+    path_models="../models-mnt/mpt/7B"
+    model_f16="${path_models}/ggml-model-f16.bin"
+    model_q4_0="${path_models}/ggml-model-q4_0.bin"
+
+    python3 ../examples/mpt/convert-h5-to-ggml.py ${path_models} 1
+    ./bin/mpt-quantize ${model_f16} ${model_q4_0} q4_0
+
+    (time ./bin/mpt --model ${model_f16}  -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/mpt --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
  
      set +e
  }
  
-function gg_sum_mnist {
+function gg_sum_mpt {
      gg_printf '### %s\n\n' "${ci}"
  
-    gg_printf 'MNIST\n'
+    gg_printf 'Runs short MPT text generation\n'
      gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
      gg_printf '```\n'
-    gg_printf '%s\n' "$(cat $OUT/${ci}-mnist.log)"
+    gg_printf '%s\n' "$(cat $OUT/${ci}-tg.log)"
      gg_printf '```\n'
  }
  
@@ -252,6 +283,7 @@ test $ret -eq 0 && gg_run ctest_debug
  test $ret -eq 0 && gg_run ctest_release
  test $ret -eq 0 && gg_run gpt_2
  test $ret -eq 0 && gg_run mnist
+test $ret -eq 0 && gg_run whisper
  
  if [ -z $GG_BUILD_LOW_PERF ]; then
      test $ret -eq 0 && gg_run mpt
author	Georgi Gerganov <redacted>
	Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)
committer	GitHub <redacted>
	Fri, 15 Sep 2023 17:58:43 +0000 (20:58 +0300)