## Video comparison of different models
-Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:
+Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
```bash
-./extra/bench-wts.sh samples/jfk.wav
+./scripts/bench-wts.sh samples/jfk.wav
ffplay ./samples/jfk.wav.all.mp4
```
You can run it with the following command, by default it will run against any standard model in the models folder.
```bash
-python3 extra/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
+python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
```
It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
+++ /dev/null
-#!/bin/bash
-
-# Helper script to run the bench tool on all models and print the results in share-able format
-
-printf "Usage: ./bench.sh [n_threads] [encoder-only]\n"
-
-if [ -z "$1" ]; then
- n_threads=4
-else
- n_threads=$1
-fi
-
-encoder_only=0
-if [ -z "$2" ]; then
- encoder_only=0
-else
- encoder_only=$2
-fi
-
-models=( \
- "tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
- "base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \
- "small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \
- "medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" "medium-dis" \
- "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \
-)
-
-if [ "$encoder_only" -eq 0 ]; then
- printf "\n"
- printf "Running memcpy benchmark\n"
- printf "\n"
-
- ./bench -w 1 -t $n_threads 2>&1
-
- printf "\n"
- printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
- printf "\n"
-
- ./bench -w 2 -t $n_threads 2>&1
-
- printf "\n"
- printf "Running benchmark for all models\n"
- printf "This can take a while!\n"
- printf "\n"
-fi
-
-printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit"
-printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---"
-
-for model in "${models[@]}"; do
- # actual run
- # store stderr output in a variable in order to parse it later
- output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
- ret=$?
-
- # parse the output:
- encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
- decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
- batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
- prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
- system_info=$(echo "$output" | grep "system_info")
- n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
-
- # floor to milliseconds
- #encode_time=${encode_time%.*}
- #decode_time=${decode_time%.*}
- #prompt_time=${prompt_time%.*}
-
- config=""
-
- if [[ $system_info == *"AVX2 = 1"* ]]; then
- config="$config AVX2"
- fi
-
- if [[ $system_info == *"NEON = 1"* ]]; then
- config="$config NEON"
- fi
-
- if [[ $system_info == *"BLAS = 1"* ]]; then
- config="$config BLAS"
- fi
-
- if [[ $system_info == *"COREML = 1"* ]]; then
- config="$config COREML"
- fi
-
- if [[ $system_info == *"CUDA = 1"* ]]; then
- config="$config CUDA"
- fi
-
- if [[ $system_info == *"METAL = 1"* ]]; then
- config="$config METAL"
- fi
-
- commit=$(git rev-parse --short HEAD)
-
- if [ $ret -eq 0 ]; then
- printf "| <todo> | <todo> | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit"
- fi
-done
+++ /dev/null
-# Benchmark word-level timestamps for different models
-#
-# This script takes two arguments
-# - an audio file
-# - [optional] path to a font file
-
-# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu
-
-if [ -z "$1" ]; then
- echo "Usage: $0 <audio file> [font file]"
- exit 1
-fi
-
-#TODO: Make this a command line parameter
-#models="base small large"
-#models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large"
-models="tiny.en base.en small.en medium.en large"
-
-DURATION=$(ffprobe -i $1 -show_entries format=duration -v quiet -of csv="p=0")
-DURATION=$(printf "%.2f" $DURATION)
-echo "Input file duration: ${DURATION}s"
-
-for model in $models; do
- echo "Running $model"
- COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
-
- if [ ! -z "$2" ]; then
- COMMAND="$COMMAND -fp $2"
- fi
- #TODO: Surface errors better
- # TIMEFMT is for zsh, TIMEFORMAT is for bash
- EXECTIME=$({ TIMEFMT="%E";TIMEFORMAT=%E; time $COMMAND >/dev/null 2>&1; } 2>&1)
-
- # Slightly different formats between zsh and bash
- if [ "${EXECTIME: -1}" == "s" ]; then
- EXECTIME=${EXECTIME::-1}
- fi
-
- RATIO=$(echo "$DURATION / $EXECTIME" | bc -l)
- RATIO=$(printf "%.2f" $RATIO)
-
- echo "Execution time: ${EXECTIME}s (${RATIO}x realtime)"
-
- # If the file already exists, delete it
- if [ -f $1.mp4 ]; then
- rm $1.mp4
- fi
-
- bash $1.$model.wts >/dev/null 2>&1
- mv $1.mp4 $1.$model.mp4
-
- ffmpeg -y -f lavfi -i color=c=black:s=1200x50:d=$DURATION -vf "drawtext=fontfile=$2:fontsize=36:x=10:y=(h-text_h)/2:text='ggml-$model - ${EXECTIME}s (${RATIO}x realtime)':fontcolor=lightgrey" $1.$model.info.mp4 >/dev/null 2>&1
-done
-
-COMMAND="ffmpeg -y"
-for model in $models; do
- COMMAND="$COMMAND -i $1.$model.info.mp4 -i $1.$model.mp4"
-done
-COMMAND="$COMMAND -filter_complex \""
-COUNT=0
-for model in $models; do
- COMMAND="$COMMAND[${COUNT}:v][$(($COUNT+1)):v]"
- COUNT=$((COUNT+2))
-done
-COMMAND="$COMMAND vstack=inputs=${COUNT}[v]\" -map \"[v]\" -map 1:a $1.all.mp4 >/dev/null 2>&1"
-
-echo $COMMAND
-
-# Run the command
-eval $COMMAND
+++ /dev/null
-import os
-import subprocess
-import re
-import csv
-import wave
-import contextlib
-import argparse
-
-
-# Custom action to handle comma-separated list
-class ListAction(argparse.Action):
- def __call__(self, parser, namespace, values, option_string=None):
- setattr(namespace, self.dest, [int(val) for val in values.split(",")])
-
-
-parser = argparse.ArgumentParser(description="Benchmark the speech recognition model")
-
-# Define the argument to accept a list
-parser.add_argument(
- "-t",
- "--threads",
- dest="threads",
- action=ListAction,
- default=[4],
- help="List of thread counts to benchmark (comma-separated, default: 4)",
-)
-
-parser.add_argument(
- "-p",
- "--processors",
- dest="processors",
- action=ListAction,
- default=[1],
- help="List of processor counts to benchmark (comma-separated, default: 1)",
-)
-
-
-parser.add_argument(
- "-f",
- "--filename",
- type=str,
- default="./samples/jfk.wav",
- help="Relative path of the file to transcribe (default: ./samples/jfk.wav)",
-)
-
-# Parse the command line arguments
-args = parser.parse_args()
-
-sample_file = args.filename
-
-threads = args.threads
-processors = args.processors
-
-# Define the models, threads, and processor counts to benchmark
-models = [
- "ggml-tiny.en.bin",
- "ggml-tiny.bin",
- "ggml-base.en.bin",
- "ggml-base.bin",
- "ggml-small.en.bin",
- "ggml-small.bin",
- "ggml-medium.en.bin",
- "ggml-medium.bin",
- "ggml-large-v1.bin",
- "ggml-large-v2.bin",
- "ggml-large-v3.bin",
-]
-
-
-metal_device = ""
-
-# Initialize a dictionary to hold the results
-results = {}
-
-gitHashHeader = "Commit"
-modelHeader = "Model"
-hardwareHeader = "Hardware"
-recordingLengthHeader = "Recording Length (seconds)"
-threadHeader = "Thread"
-processorCountHeader = "Processor Count"
-loadTimeHeader = "Load Time (ms)"
-sampleTimeHeader = "Sample Time (ms)"
-encodeTimeHeader = "Encode Time (ms)"
-decodeTimeHeader = "Decode Time (ms)"
-sampleTimePerRunHeader = "Sample Time per Run (ms)"
-encodeTimePerRunHeader = "Encode Time per Run (ms)"
-decodeTimePerRunHeader = "Decode Time per Run (ms)"
-totalTimeHeader = "Total Time (ms)"
-
-
-def check_file_exists(file: str) -> bool:
- return os.path.isfile(file)
-
-
-def get_git_short_hash() -> str:
- try:
- return (
- subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
- .decode()
- .strip()
- )
- except subprocess.CalledProcessError as e:
- return ""
-
-
-def wav_file_length(file: str = sample_file) -> float:
- with contextlib.closing(wave.open(file, "r")) as f:
- frames = f.getnframes()
- rate = f.getframerate()
- duration = frames / float(rate)
- return duration
-
-
-def extract_metrics(output: str, label: str) -> tuple[float, float]:
- match = re.search(rf"{label} \s*=\s*(\d+\.\d+)\s*ms\s*/\s*(\d+)\s*runs", output)
- time = float(match.group(1)) if match else None
- runs = float(match.group(2)) if match else None
- return time, runs
-
-
-def extract_device(output: str) -> str:
- match = re.search(r"picking default device: (.*)", output)
- device = match.group(1) if match else "Not found"
- return device
-
-
-# Check if the sample file exists
-if not check_file_exists(sample_file):
- raise FileNotFoundError(f"Sample file {sample_file} not found")
-
-recording_length = wav_file_length()
-
-
-# Check that all models exist
-# Filter out models from list that are not downloaded
-filtered_models = []
-for model in models:
- if check_file_exists(f"models/{model}"):
- filtered_models.append(model)
- else:
- print(f"Model {model} not found, removing from list")
-
-models = filtered_models
-
-# Loop over each combination of parameters
-for model in filtered_models:
- for thread in threads:
- for processor_count in processors:
- # Construct the command to run
- cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
- # Run the command and get the output
- process = subprocess.Popen(
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
- )
-
- output = ""
- while process.poll() is None:
- output += process.stdout.read().decode()
-
- # Parse the output
- load_time_match = re.search(r"load time\s*=\s*(\d+\.\d+)\s*ms", output)
- load_time = float(load_time_match.group(1)) if load_time_match else None
-
- metal_device = extract_device(output)
- sample_time, sample_runs = extract_metrics(output, "sample time")
- encode_time, encode_runs = extract_metrics(output, "encode time")
- decode_time, decode_runs = extract_metrics(output, "decode time")
-
- total_time_match = re.search(r"total time\s*=\s*(\d+\.\d+)\s*ms", output)
- total_time = float(total_time_match.group(1)) if total_time_match else None
-
- model_name = model.replace("ggml-", "").replace(".bin", "")
-
- print(
- f"Ran model={model_name} threads={thread} processor_count={processor_count}, took {total_time}ms"
- )
- # Store the times in the results dictionary
- results[(model_name, thread, processor_count)] = {
- loadTimeHeader: load_time,
- sampleTimeHeader: sample_time,
- encodeTimeHeader: encode_time,
- decodeTimeHeader: decode_time,
- sampleTimePerRunHeader: round(sample_time / sample_runs, 2),
- encodeTimePerRunHeader: round(encode_time / encode_runs, 2),
- decodeTimePerRunHeader: round(decode_time / decode_runs, 2),
- totalTimeHeader: total_time,
- }
-
-# Write the results to a CSV file
-with open("benchmark_results.csv", "w", newline="") as csvfile:
- fieldnames = [
- gitHashHeader,
- modelHeader,
- hardwareHeader,
- recordingLengthHeader,
- threadHeader,
- processorCountHeader,
- loadTimeHeader,
- sampleTimeHeader,
- encodeTimeHeader,
- decodeTimeHeader,
- sampleTimePerRunHeader,
- encodeTimePerRunHeader,
- decodeTimePerRunHeader,
- totalTimeHeader,
- ]
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-
- writer.writeheader()
-
- shortHash = get_git_short_hash()
- # Sort the results by total time in ascending order
- sorted_results = sorted(results.items(), key=lambda x: x[1].get(totalTimeHeader, 0))
- for params, times in sorted_results:
- row = {
- gitHashHeader: shortHash,
- modelHeader: params[0],
- hardwareHeader: metal_device,
- recordingLengthHeader: recording_length,
- threadHeader: params[1],
- processorCountHeader: params[2],
- }
- row.update(times)
- writer.writerow(row)
+++ /dev/null
-#!/bin/bash
-
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
-
-for model in "${models[@]}"; do
- python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
- mv -v models/ggml-model.bin models/ggml-$model.bin
-done
+++ /dev/null
-#!/bin/bash
-#
-# This is a helper script to deploy all WebAssembly examples to my node
-# Run from the build directory:
-#
-# cd build-em
-# ../extra/deploy-wasm.sh
-#
-
-# check if emcmake is available
-if ! command -v emcmake &> /dev/null
-then
- echo "Error: emscripten environment is not set up"
- exit
-fi
-
-emcmake cmake .. && make -j
-if [ $? -ne 0 ]; then
- echo "Error: build failed"
- exit
-fi
-
-# copy all wasm files to the node
-scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libmain.worker.js root@linode0:/var/www/html/whisper/
-scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/
-scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
-scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/
-scp bin/bench.wasm/* root@linode0:/var/www/html/whisper/bench/ && scp bin/libbench.worker.js root@linode0:/var/www/html/whisper/bench/
-
-echo "Done"
-exit
+++ /dev/null
-#!/bin/bash
-
-printf "Usage: $0 <upload>"
-
-if [ $# -ne 1 ]; then
- printf "\nError: Invalid number of arguments\n"
- exit 1
-fi
-
-qtype0="q5_0"
-qtype1="q5_1"
-upload="$1"
-declare -a filedex
-
-cd `dirname $0`
-cd ../
-
-for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
- m="models/$i"
- if [ -f "$m" ]; then
- if [ "${m##*.}" == "bin" ]; then
- ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
- ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
- filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
- fi
- fi
-done
-
-
-
-if [ "$upload" == "1" ]; then
- for i in ${!filedex[@]}; do
- if [ "${filedex[$i]:9:8}" != "for-test" ]; then
- scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9}
- fi
- done
-fi
+++ /dev/null
-#!/bin/bash
-
-# Compute the SHA1 of all model files in ./models/ggml-*.bin
-
-for f in ./models/ggml-*.bin; do
- shasum "$f" -a 1
-done
+++ /dev/null
-#!/bin/bash
-#
-# Synchronize ggml changes to whisper.cpp
-#
-# Usage:
-#
-# $ cd /path/to/whisper.cpp
-# $ ./extra/sync-ggml-am.sh -skip hash0,hash1,hash2...
-#
-
-set -e
-
-sd=$(dirname $0)
-cd $sd/../
-
-SRC_WHISPER=$(pwd)
-SRC_GGML=$(cd ../ggml; pwd)
-
-if [ ! -d $SRC_GGML ]; then
- echo "ggml not found at $SRC_GGML"
- exit 1
-fi
-
-lc=$(cat $SRC_WHISPER/extra/sync-ggml.last)
-echo "Syncing ggml changes since commit $lc"
-
-to_skip=""
-if [ "$1" == "-skip" ]; then
- to_skip=$2
-fi
-
-cd $SRC_GGML
-
-git log --oneline $lc..HEAD
-git log --oneline $lc..HEAD --reverse | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_WHISPER/ggml-commits
-
-if [ ! -s $SRC_WHISPER/ggml-commits ]; then
- rm -v $SRC_WHISPER/ggml-commits
- echo "No new commits"
- exit 0
-fi
-
-if [ -f $SRC_WHISPER/ggml-src.patch ]; then
- rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-while read c; do
- if [ -n "$to_skip" ]; then
- if [[ $to_skip == *"$c"* ]]; then
- echo "Skipping $c"
- continue
- fi
- fi
-
- git format-patch -k $c~1..$c --stdout -- \
- include/ggml/ggml*.h \
- src/ggml*.h \
- src/ggml*.c \
- src/ggml*.cpp \
- src/ggml*.m \
- src/ggml*.metal \
- src/ggml*.cu \
- src/ggml-cuda/* \
- examples/common.h \
- examples/common.cpp \
- examples/common-ggml.h \
- examples/common-ggml.cpp \
- examples/whisper/grammar-parser.h \
- examples/whisper/grammar-parser.cpp \
- examples/whisper/whisper.h \
- examples/whisper/whisper.cpp \
- examples/whisper/main.cpp \
- examples/whisper/quantize.cpp \
- >> $SRC_WHISPER/ggml-src.patch
-done < $SRC_WHISPER/ggml-commits
-
-rm -v $SRC_WHISPER/ggml-commits
-
-# delete files if empty
-if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
- rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-cd $SRC_WHISPER
-
-if [ -f $SRC_WHISPER/ggml-src.patch ]; then
- # replace PR numbers
- #
- # Subject: some text (#1234)
- # Subject: some text (ggml/1234)
- cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
- mv ggml-src.patch.tmp ggml-src.patch
-
- cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
- mv ggml-src.patch.tmp ggml-src.patch
-
- # replace filenames:
- #
- # src/ggml.c -> ggml.c
- # src/ggml-alloc.c -> ggml-alloc.c
- # src/ggml-backend-impl.h -> ggml-backend-impl.h
- # src/ggml-backend.c -> ggml-backend.c
- # src/ggml-common.h -> ggml-common.h
- # src/ggml-cuda/* -> ggml-cuda/
- # src/ggml-cuda.cu -> ggml-cuda.cu
- # src/ggml-cuda.h -> ggml-cuda.h
- # src/ggml-impl.h -> ggml-impl.h
- # src/ggml-kompute.cpp -> ggml-kompute.cpp
- # src/ggml-kompute.h -> ggml-kompute.h
- # src/ggml-metal.h -> ggml-metal.h
- # src/ggml-metal.m -> ggml-metal.m
- # src/ggml-mpi.h -> ggml-mpi.h
- # src/ggml-mpi.c -> ggml-mpi.c
- # src/ggml-opencl.cpp -> ggml-opencl.cpp
- # src/ggml-opencl.h -> ggml-opencl.h
- # src/ggml-quants.c -> ggml-quants.c
- # src/ggml-quants.h -> ggml-quants.h
- # src/ggml-sycl.cpp -> ggml-sycl.cpp
- # src/ggml-sycl.h -> ggml-sycl.h
- # src/ggml-vulkan.cpp -> ggml-vulkan.cpp
- # src/ggml-vulkan.h -> ggml-vulkan.h
- # include/ggml/ggml.h -> ggml.h
- # include/ggml/ggml-alloc.h -> ggml-alloc.h
- # include/ggml/ggml-backend.h -> ggml-backend.h
- #
- # examples/common.h -> examples/common.h
- # examples/common.cpp -> examples/common.cpp
- # examples/common-ggml.h -> examples/common-ggml.h
- # examples/common-ggml.cpp -> examples/common-ggml.cpp
- # examples/whisper/grammar-parser.h -> examples/grammar-parser.h
- # examples/whisper/grammar-parser.cpp -> examples/grammar-parser.cpp
- #
- # examples/whisper/whisper.h -> whisper.h
- # examples/whisper/whisper.cpp -> whisper.cpp
- # examples/whisper/main.cpp -> examples/main/main.cpp
- # examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
-
- cat ggml-src.patch | sed \
- -e 's/src\/ggml\.c/ggml.c/g' \
- -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
- -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
- -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
- -e 's/src\/ggml-common\.h/ggml-common.h/g' \
- -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
- -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
- -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
- -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
- -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
- -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
- -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
- -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
- -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
- -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
- -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
- -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
- -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
- -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
- -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
- -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
- -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
- -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
- -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
- -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
- -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
- -e 's/examples\/common\.h/examples\/common.h/g' \
- -e 's/examples\/common\.cpp/examples\/common.cpp/g' \
- -e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
- -e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
- -e 's/examples\/whisper\/grammar-parser\.h/examples\/grammar-parser.h/g' \
- -e 's/examples\/whisper\/grammar-parser\.cpp/examples\/grammar-parser.cpp/g' \
- -e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
- -e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
- -e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
- -e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
- > ggml-src.patch.tmp
- mv ggml-src.patch.tmp ggml-src.patch
-
- git am ggml-src.patch
-
- rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-# update last commit
-cd $SRC_GGML
-git log -1 --format=%H > $SRC_WHISPER/extra/sync-ggml.last
-
-echo "Done"
-
-exit 0
+++ /dev/null
-bb8d8cff851b2de6fde4904be492d39458837e1a
+++ /dev/null
-#!/bin/bash
-
-cp -rpv ../ggml/src/ggml.c ./ggml.c
-cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
-cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
-cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
-cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
-cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
-cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
-cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
-cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
-cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml-kompute.cpp
-cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h
-cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
-cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
-cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
-#cp -rpv ../ggml/src/ggml-mpi.h ./ggml-mpi.h
-#cp -rpv ../ggml/src/ggml-mpi.c ./ggml-mpi.c
-cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
-cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
-cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
-cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
-cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml-sycl.cpp
-cp -rpv ../ggml/src/ggml-sycl.h ./ggml-sycl.h
-cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml-vulkan.cpp
-cp -rpv ../ggml/src/ggml-vulkan.h ./ggml-vulkan.h
-
-cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
-cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h
-cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
-
-cp -rpv ../ggml/examples/common.h ./examples/common.h
-cp -rpv ../ggml/examples/common.cpp ./examples/common.cpp
-cp -rpv ../ggml/examples/common-ggml.h ./examples/common-ggml.h
-cp -rpv ../ggml/examples/common-ggml.cpp ./examples/common-ggml.cpp
-cp -rpv ../ggml/examples/whisper/grammar-parser.h ./examples/grammar-parser.h
-cp -rpv ../ggml/examples/whisper/grammar-parser.cpp ./examples/grammar-parser.cpp
-
-cp -rpv ../ggml/examples/whisper/whisper.h ./whisper.h
-cp -rpv ../ggml/examples/whisper/whisper.cpp ./whisper.cpp
-cp -rpv ../ggml/examples/whisper/main.cpp ./examples/main/main.cpp
-cp -rpv ../ggml/examples/whisper/quantize.cpp ./examples/quantize/quantize.cpp
-
+++ /dev/null
-#!/bin/bash
-
-cp -rpv ../llama.cpp/llama.h ./examples/talk-llama/llama.h
-cp -rpv ../llama.cpp/llama.cpp ./examples/talk-llama/llama.cpp
-cp -rpv ../llama.cpp/unicode.h ./examples/talk-llama/unicode.h
-cp -rpv ../llama.cpp/unicode.cpp ./examples/talk-llama/unicode.cpp
-cp -rpv ../llama.cpp/unicode-data.h ./examples/talk-llama/unicode-data.h
-cp -rpv ../llama.cpp/unicode-data.cpp ./examples/talk-llama/unicode-data.cpp
--- /dev/null
+#!/bin/bash
+
+# Helper script to run the bench tool on all models and print the results in share-able format
+
+printf "Usage: ./bench.sh [n_threads] [encoder-only]\n"
+
+if [ -z "$1" ]; then
+ n_threads=4
+else
+ n_threads=$1
+fi
+
+encoder_only=0
+if [ -z "$2" ]; then
+ encoder_only=0
+else
+ encoder_only=$2
+fi
+
+models=( \
+ "tiny" "tiny-q4_0" "tiny-q4_1" "tiny-q5_0" "tiny-q5_1" "tiny-q8_0" \
+ "base" "base-q4_0" "base-q4_1" "base-q5_0" "base-q5_1" "base-q8_0" \
+ "small" "small-q4_0" "small-q4_1" "small-q5_0" "small-q5_1" "small-q8_0" \
+ "medium" "medium-q4_0" "medium-q4_1" "medium-q5_0" "medium-q5_1" "medium-q8_0" "medium-dis" \
+ "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \
+)
+
+if [ "$encoder_only" -eq 0 ]; then
+ printf "\n"
+ printf "Running memcpy benchmark\n"
+ printf "\n"
+
+ ./bench -w 1 -t $n_threads 2>&1
+
+ printf "\n"
+ printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
+ printf "\n"
+
+ ./bench -w 2 -t $n_threads 2>&1
+
+ printf "\n"
+ printf "Running benchmark for all models\n"
+ printf "This can take a while!\n"
+ printf "\n"
+fi
+
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit"
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---"
+
+for model in "${models[@]}"; do
+ # actual run
+ # store stderr output in a variable in order to parse it later
+ output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
+ ret=$?
+
+ # parse the output:
+ encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
+ decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
+ batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
+ prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
+ system_info=$(echo "$output" | grep "system_info")
+ n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
+
+ # floor to milliseconds
+ #encode_time=${encode_time%.*}
+ #decode_time=${decode_time%.*}
+ #prompt_time=${prompt_time%.*}
+
+ config=""
+
+ if [[ $system_info == *"AVX2 = 1"* ]]; then
+ config="$config AVX2"
+ fi
+
+ if [[ $system_info == *"NEON = 1"* ]]; then
+ config="$config NEON"
+ fi
+
+ if [[ $system_info == *"BLAS = 1"* ]]; then
+ config="$config BLAS"
+ fi
+
+ if [[ $system_info == *"COREML = 1"* ]]; then
+ config="$config COREML"
+ fi
+
+ if [[ $system_info == *"CUDA = 1"* ]]; then
+ config="$config CUDA"
+ fi
+
+ if [[ $system_info == *"METAL = 1"* ]]; then
+ config="$config METAL"
+ fi
+
+ commit=$(git rev-parse --short HEAD)
+
+ if [ $ret -eq 0 ]; then
+ printf "| <todo> | <todo> | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit"
+ fi
+done
--- /dev/null
+# Benchmark word-level timestamps for different models
+#
+# This script takes two arguments
+# - an audio file
+# - [optional] path to a font file
+
+# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu
+
+if [ -z "$1" ]; then
+ echo "Usage: $0 <audio file> [font file]"
+ exit 1
+fi
+
+#TODO: Make this a command line parameter
+#models="base small large"
+#models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large"
+models="tiny.en base.en small.en medium.en large"
+
+DURATION=$(ffprobe -i $1 -show_entries format=duration -v quiet -of csv="p=0")
+DURATION=$(printf "%.2f" $DURATION)
+echo "Input file duration: ${DURATION}s"
+
+for model in $models; do
+ echo "Running $model"
+ COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
+
+ if [ ! -z "$2" ]; then
+ COMMAND="$COMMAND -fp $2"
+ fi
+ #TODO: Surface errors better
+ # TIMEFMT is for zsh, TIMEFORMAT is for bash
+ EXECTIME=$({ TIMEFMT="%E";TIMEFORMAT=%E; time $COMMAND >/dev/null 2>&1; } 2>&1)
+
+ # Slightly different formats between zsh and bash
+ if [ "${EXECTIME: -1}" == "s" ]; then
+ EXECTIME=${EXECTIME::-1}
+ fi
+
+ RATIO=$(echo "$DURATION / $EXECTIME" | bc -l)
+ RATIO=$(printf "%.2f" $RATIO)
+
+ echo "Execution time: ${EXECTIME}s (${RATIO}x realtime)"
+
+ # If the file already exists, delete it
+ if [ -f $1.mp4 ]; then
+ rm $1.mp4
+ fi
+
+ bash $1.$model.wts >/dev/null 2>&1
+ mv $1.mp4 $1.$model.mp4
+
+ ffmpeg -y -f lavfi -i color=c=black:s=1200x50:d=$DURATION -vf "drawtext=fontfile=$2:fontsize=36:x=10:y=(h-text_h)/2:text='ggml-$model - ${EXECTIME}s (${RATIO}x realtime)':fontcolor=lightgrey" $1.$model.info.mp4 >/dev/null 2>&1
+done
+
+COMMAND="ffmpeg -y"
+for model in $models; do
+ COMMAND="$COMMAND -i $1.$model.info.mp4 -i $1.$model.mp4"
+done
+COMMAND="$COMMAND -filter_complex \""
+COUNT=0
+for model in $models; do
+ COMMAND="$COMMAND[${COUNT}:v][$(($COUNT+1)):v]"
+ COUNT=$((COUNT+2))
+done
+COMMAND="$COMMAND vstack=inputs=${COUNT}[v]\" -map \"[v]\" -map 1:a $1.all.mp4 >/dev/null 2>&1"
+
+echo $COMMAND
+
+# Run the command
+eval $COMMAND
--- /dev/null
+import os
+import subprocess
+import re
+import csv
+import wave
+import contextlib
+import argparse
+
+
+# Custom action to handle comma-separated list
+class ListAction(argparse.Action):
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, [int(val) for val in values.split(",")])
+
+
+parser = argparse.ArgumentParser(description="Benchmark the speech recognition model")
+
+# Define the argument to accept a list
+parser.add_argument(
+ "-t",
+ "--threads",
+ dest="threads",
+ action=ListAction,
+ default=[4],
+ help="List of thread counts to benchmark (comma-separated, default: 4)",
+)
+
+parser.add_argument(
+ "-p",
+ "--processors",
+ dest="processors",
+ action=ListAction,
+ default=[1],
+ help="List of processor counts to benchmark (comma-separated, default: 1)",
+)
+
+
+parser.add_argument(
+ "-f",
+ "--filename",
+ type=str,
+ default="./samples/jfk.wav",
+ help="Relative path of the file to transcribe (default: ./samples/jfk.wav)",
+)
+
+# Parse the command line arguments
+args = parser.parse_args()
+
+sample_file = args.filename
+
+threads = args.threads
+processors = args.processors
+
+# Define the models, threads, and processor counts to benchmark
+models = [
+ "ggml-tiny.en.bin",
+ "ggml-tiny.bin",
+ "ggml-base.en.bin",
+ "ggml-base.bin",
+ "ggml-small.en.bin",
+ "ggml-small.bin",
+ "ggml-medium.en.bin",
+ "ggml-medium.bin",
+ "ggml-large-v1.bin",
+ "ggml-large-v2.bin",
+ "ggml-large-v3.bin",
+]
+
+
+metal_device = ""
+
+# Initialize a dictionary to hold the results
+results = {}
+
+gitHashHeader = "Commit"
+modelHeader = "Model"
+hardwareHeader = "Hardware"
+recordingLengthHeader = "Recording Length (seconds)"
+threadHeader = "Thread"
+processorCountHeader = "Processor Count"
+loadTimeHeader = "Load Time (ms)"
+sampleTimeHeader = "Sample Time (ms)"
+encodeTimeHeader = "Encode Time (ms)"
+decodeTimeHeader = "Decode Time (ms)"
+sampleTimePerRunHeader = "Sample Time per Run (ms)"
+encodeTimePerRunHeader = "Encode Time per Run (ms)"
+decodeTimePerRunHeader = "Decode Time per Run (ms)"
+totalTimeHeader = "Total Time (ms)"
+
+
+def check_file_exists(file: str) -> bool:
+ return os.path.isfile(file)
+
+
+def get_git_short_hash() -> str:
+ try:
+ return (
+ subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
+ .decode()
+ .strip()
+ )
+ except subprocess.CalledProcessError as e:
+ return ""
+
+
+def wav_file_length(file: str = sample_file) -> float:
+ with contextlib.closing(wave.open(file, "r")) as f:
+ frames = f.getnframes()
+ rate = f.getframerate()
+ duration = frames / float(rate)
+ return duration
+
+
+def extract_metrics(output: str, label: str) -> tuple[float, float]:
+ match = re.search(rf"{label} \s*=\s*(\d+\.\d+)\s*ms\s*/\s*(\d+)\s*runs", output)
+ time = float(match.group(1)) if match else None
+ runs = float(match.group(2)) if match else None
+ return time, runs
+
+
+def extract_device(output: str) -> str:
+ match = re.search(r"picking default device: (.*)", output)
+ device = match.group(1) if match else "Not found"
+ return device
+
+
+# Check if the sample file exists
+if not check_file_exists(sample_file):
+ raise FileNotFoundError(f"Sample file {sample_file} not found")
+
+recording_length = wav_file_length()
+
+
+# Check that all models exist
+# Filter out models from list that are not downloaded
+filtered_models = []
+for model in models:
+ if check_file_exists(f"models/{model}"):
+ filtered_models.append(model)
+ else:
+ print(f"Model {model} not found, removing from list")
+
+models = filtered_models
+
+# Loop over each combination of parameters
+for model in filtered_models:
+ for thread in threads:
+ for processor_count in processors:
+ # Construct the command to run
+ cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
+ # Run the command and get the output
+ process = subprocess.Popen(
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+ )
+
+ output = ""
+ while process.poll() is None:
+ output += process.stdout.read().decode()
+
+ # Parse the output
+ load_time_match = re.search(r"load time\s*=\s*(\d+\.\d+)\s*ms", output)
+ load_time = float(load_time_match.group(1)) if load_time_match else None
+
+ metal_device = extract_device(output)
+ sample_time, sample_runs = extract_metrics(output, "sample time")
+ encode_time, encode_runs = extract_metrics(output, "encode time")
+ decode_time, decode_runs = extract_metrics(output, "decode time")
+
+ total_time_match = re.search(r"total time\s*=\s*(\d+\.\d+)\s*ms", output)
+ total_time = float(total_time_match.group(1)) if total_time_match else None
+
+ model_name = model.replace("ggml-", "").replace(".bin", "")
+
+ print(
+ f"Ran model={model_name} threads={thread} processor_count={processor_count}, took {total_time}ms"
+ )
+ # Store the times in the results dictionary
+ results[(model_name, thread, processor_count)] = {
+ loadTimeHeader: load_time,
+ sampleTimeHeader: sample_time,
+ encodeTimeHeader: encode_time,
+ decodeTimeHeader: decode_time,
+ sampleTimePerRunHeader: round(sample_time / sample_runs, 2),
+ encodeTimePerRunHeader: round(encode_time / encode_runs, 2),
+ decodeTimePerRunHeader: round(decode_time / decode_runs, 2),
+ totalTimeHeader: total_time,
+ }
+
+# Write the results to a CSV file
+with open("benchmark_results.csv", "w", newline="") as csvfile:
+ fieldnames = [
+ gitHashHeader,
+ modelHeader,
+ hardwareHeader,
+ recordingLengthHeader,
+ threadHeader,
+ processorCountHeader,
+ loadTimeHeader,
+ sampleTimeHeader,
+ encodeTimeHeader,
+ decodeTimeHeader,
+ sampleTimePerRunHeader,
+ encodeTimePerRunHeader,
+ decodeTimePerRunHeader,
+ totalTimeHeader,
+ ]
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+ writer.writeheader()
+
+ shortHash = get_git_short_hash()
+ # Sort the results by total time in ascending order
+ sorted_results = sorted(results.items(), key=lambda x: x[1].get(totalTimeHeader, 0))
+ for params, times in sorted_results:
+ row = {
+ gitHashHeader: shortHash,
+ modelHeader: params[0],
+ hardwareHeader: metal_device,
+ recordingLengthHeader: recording_length,
+ threadHeader: params[1],
+ processorCountHeader: params[2],
+ }
+ row.update(times)
+ writer.writerow(row)
--- /dev/null
+#!/bin/bash
+
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
+
+for model in "${models[@]}"; do
+ python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
+ mv -v models/ggml-model.bin models/ggml-$model.bin
+done
--- /dev/null
+#!/bin/bash
+#
+# This is a helper script to deploy all WebAssembly examples to my node
+# Run from the build directory:
+#
+# cd build-em
+# ../scripts/deploy-wasm.sh
+#
+
+# check if emcmake is available
+if ! command -v emcmake &> /dev/null
+then
+ echo "Error: emscripten environment is not set up"
+ exit
+fi
+
+emcmake cmake .. && make -j
+if [ $? -ne 0 ]; then
+ echo "Error: build failed"
+ exit
+fi
+
+# copy all wasm files to the node
+scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libmain.worker.js root@linode0:/var/www/html/whisper/
+scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/
+scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
+scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/
+scp bin/bench.wasm/* root@linode0:/var/www/html/whisper/bench/ && scp bin/libbench.worker.js root@linode0:/var/www/html/whisper/bench/
+
+echo "Done"
+exit
--- /dev/null
+#!/bin/bash
+
+printf "Usage: $0 <upload>"
+
+if [ $# -ne 1 ]; then
+ printf "\nError: Invalid number of arguments\n"
+ exit 1
+fi
+
+qtype0="q5_0"
+qtype1="q5_1"
+upload="$1"
+declare -a filedex
+
+cd `dirname $0`
+cd ../
+
+for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
+ m="models/$i"
+ if [ -f "$m" ]; then
+ if [ "${m##*.}" == "bin" ]; then
+ ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
+ ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
+ filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
+ fi
+ fi
+done
+
+
+
+if [ "$upload" == "1" ]; then
+ for i in ${!filedex[@]}; do
+ if [ "${filedex[$i]:9:8}" != "for-test" ]; then
+ scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9}
+ fi
+ done
+fi
--- /dev/null
+#!/bin/bash
+
+# Compute the SHA1 of all model files in ./models/ggml-*.bin
+
+for f in ./models/ggml-*.bin; do
+ shasum "$f" -a 1
+done
--- /dev/null
+#!/bin/bash
+#
+# Synchronize ggml changes to whisper.cpp
+#
+# Usage:
+#
+# $ cd /path/to/whisper.cpp
+# $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2...
+#
+
+set -e
+
+sd=$(dirname $0)
+cd $sd/../
+
+SRC_WHISPER=$(pwd)
+SRC_GGML=$(cd ../ggml; pwd)
+
+if [ ! -d $SRC_GGML ]; then
+ echo "ggml not found at $SRC_GGML"
+ exit 1
+fi
+
+lc=$(cat $SRC_WHISPER/scripts/sync-ggml.last)
+echo "Syncing ggml changes since commit $lc"
+
+to_skip=""
+if [ "$1" == "-skip" ]; then
+ to_skip=$2
+fi
+
+cd $SRC_GGML
+
+git log --oneline $lc..HEAD
+git log --oneline $lc..HEAD --reverse | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_WHISPER/ggml-commits
+
+if [ ! -s $SRC_WHISPER/ggml-commits ]; then
+ rm -v $SRC_WHISPER/ggml-commits
+ echo "No new commits"
+ exit 0
+fi
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+ rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+while read c; do
+ if [ -n "$to_skip" ]; then
+ if [[ $to_skip == *"$c"* ]]; then
+ echo "Skipping $c"
+ continue
+ fi
+ fi
+
+ git format-patch -k $c~1..$c --stdout -- \
+ include/ggml/ggml*.h \
+ src/ggml*.h \
+ src/ggml*.c \
+ src/ggml*.cpp \
+ src/ggml*.m \
+ src/ggml*.metal \
+ src/ggml*.cu \
+ src/ggml-cuda/* \
+ examples/common.h \
+ examples/common.cpp \
+ examples/common-ggml.h \
+ examples/common-ggml.cpp \
+ examples/whisper/grammar-parser.h \
+ examples/whisper/grammar-parser.cpp \
+ examples/whisper/whisper.h \
+ examples/whisper/whisper.cpp \
+ examples/whisper/main.cpp \
+ examples/whisper/quantize.cpp \
+ >> $SRC_WHISPER/ggml-src.patch
+done < $SRC_WHISPER/ggml-commits
+
+rm -v $SRC_WHISPER/ggml-commits
+
+# delete files if empty
+if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
+ rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+cd $SRC_WHISPER
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+ # replace PR numbers
+ #
+ # Subject: some text (#1234)
+ # Subject: some text (ggml/1234)
+ cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ # replace filenames:
+ #
+ # src/ggml.c -> ggml.c
+ # src/ggml-alloc.c -> ggml-alloc.c
+ # src/ggml-backend-impl.h -> ggml-backend-impl.h
+ # src/ggml-backend.c -> ggml-backend.c
+ # src/ggml-common.h -> ggml-common.h
+ # src/ggml-cuda/* -> ggml-cuda/
+ # src/ggml-cuda.cu -> ggml-cuda.cu
+ # src/ggml-cuda.h -> ggml-cuda.h
+ # src/ggml-impl.h -> ggml-impl.h
+ # src/ggml-kompute.cpp -> ggml-kompute.cpp
+ # src/ggml-kompute.h -> ggml-kompute.h
+ # src/ggml-metal.h -> ggml-metal.h
+ # src/ggml-metal.m -> ggml-metal.m
+ # src/ggml-mpi.h -> ggml-mpi.h
+ # src/ggml-mpi.c -> ggml-mpi.c
+ # src/ggml-opencl.cpp -> ggml-opencl.cpp
+ # src/ggml-opencl.h -> ggml-opencl.h
+ # src/ggml-quants.c -> ggml-quants.c
+ # src/ggml-quants.h -> ggml-quants.h
+ # src/ggml-sycl.cpp -> ggml-sycl.cpp
+ # src/ggml-sycl.h -> ggml-sycl.h
+ # src/ggml-vulkan.cpp -> ggml-vulkan.cpp
+ # src/ggml-vulkan.h -> ggml-vulkan.h
+ # include/ggml/ggml.h -> ggml.h
+ # include/ggml/ggml-alloc.h -> ggml-alloc.h
+ # include/ggml/ggml-backend.h -> ggml-backend.h
+ #
+ # examples/common.h -> examples/common.h
+ # examples/common.cpp -> examples/common.cpp
+ # examples/common-ggml.h -> examples/common-ggml.h
+ # examples/common-ggml.cpp -> examples/common-ggml.cpp
+ # examples/whisper/grammar-parser.h -> examples/grammar-parser.h
+ # examples/whisper/grammar-parser.cpp -> examples/grammar-parser.cpp
+ #
+ # examples/whisper/whisper.h -> whisper.h
+ # examples/whisper/whisper.cpp -> whisper.cpp
+ # examples/whisper/main.cpp -> examples/main/main.cpp
+ # examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
+
+ cat ggml-src.patch | sed \
+ -e 's/src\/ggml\.c/ggml.c/g' \
+ -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
+ -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
+ -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
+ -e 's/src\/ggml-common\.h/ggml-common.h/g' \
+ -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
+ -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
+ -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
+ -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
+ -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
+ -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
+ -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
+ -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
+ -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
+ -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
+ -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
+ -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
+ -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
+ -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
+ -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
+ -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
+ -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
+ -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
+ -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
+ -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
+ -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
+ -e 's/examples\/common\.h/examples\/common.h/g' \
+ -e 's/examples\/common\.cpp/examples\/common.cpp/g' \
+ -e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
+ -e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
+ -e 's/examples\/whisper\/grammar-parser\.h/examples\/grammar-parser.h/g' \
+ -e 's/examples\/whisper\/grammar-parser\.cpp/examples\/grammar-parser.cpp/g' \
+ -e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
+ -e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
+ -e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
+ -e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
+ > ggml-src.patch.tmp
+ mv ggml-src.patch.tmp ggml-src.patch
+
+ git am ggml-src.patch
+
+ rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+# update last commit
+cd $SRC_GGML
+git log -1 --format=%H > $SRC_WHISPER/scripts/sync-ggml.last
+
+echo "Done"
+
+exit 0
--- /dev/null
+bb8d8cff851b2de6fde4904be492d39458837e1a
--- /dev/null
+#!/bin/bash
+
+cp -rpv ../ggml/src/ggml.c ./ggml.c
+cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
+cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
+cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
+cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
+cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
+cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
+cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
+cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
+cp -rpv ../ggml/src/ggml-kompute.cpp ./ggml-kompute.cpp
+cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h
+cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
+cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
+cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
+#cp -rpv ../ggml/src/ggml-mpi.h ./ggml-mpi.h
+#cp -rpv ../ggml/src/ggml-mpi.c ./ggml-mpi.c
+cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
+cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
+cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
+cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
+cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml-sycl.cpp
+cp -rpv ../ggml/src/ggml-sycl.h ./ggml-sycl.h
+cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml-vulkan.cpp
+cp -rpv ../ggml/src/ggml-vulkan.h ./ggml-vulkan.h
+
+cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
+cp -rpv ../ggml/include/ggml/ggml-alloc.h ./ggml-alloc.h
+cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
+
+cp -rpv ../ggml/examples/common.h ./examples/common.h
+cp -rpv ../ggml/examples/common.cpp ./examples/common.cpp
+cp -rpv ../ggml/examples/common-ggml.h ./examples/common-ggml.h
+cp -rpv ../ggml/examples/common-ggml.cpp ./examples/common-ggml.cpp
+cp -rpv ../ggml/examples/whisper/grammar-parser.h ./examples/grammar-parser.h
+cp -rpv ../ggml/examples/whisper/grammar-parser.cpp ./examples/grammar-parser.cpp
+
+cp -rpv ../ggml/examples/whisper/whisper.h ./whisper.h
+cp -rpv ../ggml/examples/whisper/whisper.cpp ./whisper.cpp
+cp -rpv ../ggml/examples/whisper/main.cpp ./examples/main/main.cpp
+cp -rpv ../ggml/examples/whisper/quantize.cpp ./examples/quantize/quantize.cpp
+
--- /dev/null
+#!/bin/bash
+
+cp -rpv ../llama.cpp/llama.h ./examples/talk-llama/llama.h
+cp -rpv ../llama.cpp/llama.cpp ./examples/talk-llama/llama.cpp
+cp -rpv ../llama.cpp/unicode.h ./examples/talk-llama/unicode.h
+cp -rpv ../llama.cpp/unicode.cpp ./examples/talk-llama/unicode.cpp
+cp -rpv ../llama.cpp/unicode-data.h ./examples/talk-llama/unicode-data.h
+cp -rpv ../llama.cpp/unicode-data.cpp ./examples/talk-llama/unicode-data.cpp