From: Georgi Gerganov Date: Tue, 9 Apr 2024 17:12:17 +0000 (+0300) Subject: files : rename ./extra to ./scripts X-Git-Tag: upstream/1.7.4~846 X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=52ccd4a3a8efb2e346b5db0dc01cababd825b186;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp files : rename ./extra to ./scripts --- diff --git a/README.md b/README.md index 6b6ea673..1499e107 100644 --- a/README.md +++ b/README.md @@ -744,10 +744,10 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a ## Video comparison of different models -Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format: +Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format: ```bash -./extra/bench-wts.sh samples/jfk.wav +./scripts/bench-wts.sh samples/jfk.wav ffplay ./samples/jfk.wav.all.mp4 ``` @@ -768,7 +768,7 @@ Additionally a script to run whisper.cpp with different models and audio files i You can run it with the following command, by default it will run against any standard model in the models folder. ```bash -python3 extra/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2 +python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2 ``` It is written in python with the intention of being easy to modify and extend for your benchmarking use case. diff --git a/extra/bench-all.sh b/extra/bench-all.sh deleted file mode 100755 index 6939dafa..00000000 --- a/extra/bench-all.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash - -# Helper script to run the bench tool on all models and print the results in share-able format - -printf "Usage: ./bench.sh [n_threads] [encoder-only]\n" - -if [ -z "$1" ]; then - n_threads=4 -else - n_threads=$1 -fi - -encoder_only=0 -if [ -z "$2" ]; then - encoder_only=0 -else - encoder_only=$2 -fi - -models=( \ - "tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \ - "base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \ - "small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \ - "medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" "medium-dis" \ - "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \ -) - -if [ "$encoder_only" -eq 0 ]; then - printf "\n" - printf "Running memcpy benchmark\n" - printf "\n" - - ./bench -w 1 -t $n_threads 2>&1 - - printf "\n" - printf "Running ggml_mul_mat benchmark with $n_threads threads\n" - printf "\n" - - ./bench -w 2 -t $n_threads 2>&1 - - printf "\n" - printf "Running benchmark for all models\n" - printf "This can take a while!\n" - printf "\n" -fi - -printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit" -printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---" - -for model in "${models[@]}"; do - # actual run - # store stderr output in a variable in order to parse it later - output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1) - ret=$? - - # parse the output: - encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}') - decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}') - batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}') - prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}') - system_info=$(echo "$output" | grep "system_info") - n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}') - - # floor to milliseconds - #encode_time=${encode_time%.*} - #decode_time=${decode_time%.*} - #prompt_time=${prompt_time%.*} - - config="" - - if [[ $system_info == *"AVX2 = 1"* ]]; then - config="$config AVX2" - fi - - if [[ $system_info == *"NEON = 1"* ]]; then - config="$config NEON" - fi - - if [[ $system_info == *"BLAS = 1"* ]]; then - config="$config BLAS" - fi - - if [[ $system_info == *"COREML = 1"* ]]; then - config="$config COREML" - fi - - if [[ $system_info == *"CUDA = 1"* ]]; then - config="$config CUDA" - fi - - if [[ $system_info == *"METAL = 1"* ]]; then - config="$config METAL" - fi - - commit=$(git rev-parse --short HEAD) - - if [ $ret -eq 0 ]; then - printf "| | | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit" - fi -done diff --git a/extra/bench-wts.sh b/extra/bench-wts.sh deleted file mode 100755 index 223d71b8..00000000 --- a/extra/bench-wts.sh +++ /dev/null @@ -1,70 +0,0 @@ -# Benchmark word-level timestamps for different models -# -# This script takes two arguments -# - an audio file -# - [optional] path to a font file - -# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu - -if [ -z "$1" ]; then - echo "Usage: $0