From: Georgi Gerganov <redacted>
Date: Tue, 9 Apr 2024 17:12:17 +0000 (+0300)
Subject: files : rename ./extra to ./scripts
X-Git-Tag: upstream/1.7.4~846
X-Git-Url: https://git.djapps.eu/?a=commitdiff_plain;h=52ccd4a3a8efb2e346b5db0dc01cababd825b186;p=pkg%2Fggml%2Fsources%2Fwhisper.cpp

files : rename ./extra to ./scripts
---

diff --git a/README.md b/README.md
index 6b6ea673..1499e107 100644
--- a/README.md
+++ b/README.md
@@ -744,10 +744,10 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a
 
 ## Video comparison of different models
 
-Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:
+Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
 
 ```bash
-./extra/bench-wts.sh samples/jfk.wav
+./scripts/bench-wts.sh samples/jfk.wav
 ffplay ./samples/jfk.wav.all.mp4
 ```
 
@@ -768,7 +768,7 @@ Additionally a script to run whisper.cpp with different models and audio files i
 You can run it with the following command, by default it will run against any standard model in the models folder.
 
 ```bash
-python3 extra/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
+python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
 ```
 
 It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
diff --git a/extra/bench-all.sh b/extra/bench-all.sh
deleted file mode 100755
index 6939dafa..00000000
--- a/extra/bench-all.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-
-# Helper script to run the bench tool on all models and print the results in share-able format
-
-printf "Usage: ./bench.sh [n_threads] [encoder-only]\n"
-
-if [ -z "$1" ]; then
-    n_threads=4
-else
-    n_threads=$1
-fi
-
-encoder_only=0
-if [ -z "$2" ]; then
-    encoder_only=0
-else
-    encoder_only=$2
-fi
-
-models=(                                                                                                    \
-      "tiny"     "tiny-q4_0"     "tiny-q4_1"     "tiny-q5_0"     "tiny-q5_1"     "tiny-q8_0"                \
-      "base"     "base-q4_0"     "base-q4_1"     "base-q5_0"     "base-q5_1"     "base-q8_0"                \
-     "small"    "small-q4_0"    "small-q4_1"    "small-q5_0"    "small-q5_1"    "small-q8_0"                \
-    "medium"   "medium-q4_0"   "medium-q4_1"   "medium-q5_0"   "medium-q5_1"   "medium-q8_0"   "medium-dis" \
-  "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \
-)
-
-if [ "$encoder_only" -eq 0 ]; then
-    printf "\n"
-    printf "Running memcpy benchmark\n"
-    printf "\n"
-
-    ./bench -w 1 -t $n_threads 2>&1
-
-    printf "\n"
-    printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
-    printf "\n"
-
-    ./bench -w 2 -t $n_threads 2>&1
-
-    printf "\n"
-    printf "Running benchmark for all models\n"
-    printf "This can take a while!\n"
-    printf "\n"
-fi
-
-printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit"
-printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---"
-
-for model in "${models[@]}"; do
-    # actual run
-    # store stderr output in a variable in order to parse it later
-    output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
-    ret=$?
-
-    # parse the output:
-    encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
-    decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
-    batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
-    prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
-    system_info=$(echo "$output" | grep "system_info")
-    n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
-
-    # floor to milliseconds
-    #encode_time=${encode_time%.*}
-    #decode_time=${decode_time%.*}
-    #prompt_time=${prompt_time%.*}
-
-    config=""
-
-    if [[ $system_info == *"AVX2 = 1"* ]]; then
-        config="$config AVX2"
-    fi
-
-    if [[ $system_info == *"NEON = 1"* ]]; then
-        config="$config NEON"
-    fi
-
-    if [[ $system_info == *"BLAS = 1"* ]]; then
-        config="$config BLAS"
-    fi
-
-    if [[ $system_info == *"COREML = 1"* ]]; then
-        config="$config COREML"
-    fi
-
-    if [[ $system_info == *"CUDA = 1"* ]]; then
-        config="$config CUDA"
-    fi
-
-    if [[ $system_info == *"METAL = 1"* ]]; then
-        config="$config METAL"
-    fi
-
-    commit=$(git rev-parse --short HEAD)
-
-    if [ $ret -eq 0 ]; then
-        printf "| <todo> | <todo> | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit"
-    fi
-done
diff --git a/extra/bench-wts.sh b/extra/bench-wts.sh
deleted file mode 100755
index 223d71b8..00000000
--- a/extra/bench-wts.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-# Benchmark word-level timestamps for different models
-#
-# This script takes two arguments
-# - an audio file
-# - [optional] path to a font file
-
-# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu
-
-if [ -z "$1" ]; then
-    echo "Usage: $0 <audio file> [font file]"
-    exit 1
-fi
-
-#TODO: Make this a command line parameter
-#models="base small large"
-#models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large"
-models="tiny.en base.en small.en medium.en large"
-
-DURATION=$(ffprobe -i $1 -show_entries format=duration -v quiet -of csv="p=0")
-DURATION=$(printf "%.2f" $DURATION)
-echo "Input file duration: ${DURATION}s"
-
-for model in $models; do
-    echo "Running $model"
-    COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
-
-    if [ ! -z "$2" ]; then
-        COMMAND="$COMMAND -fp $2"
-    fi
-    #TODO: Surface errors better
-    # TIMEFMT is for zsh, TIMEFORMAT is for bash
-    EXECTIME=$({ TIMEFMT="%E";TIMEFORMAT=%E; time $COMMAND >/dev/null 2>&1; } 2>&1)
-
-    # Slightly different formats between zsh and bash
-    if [ "${EXECTIME: -1}" == "s" ]; then
-        EXECTIME=${EXECTIME::-1}
-    fi
-
-    RATIO=$(echo "$DURATION / $EXECTIME" | bc -l)
-    RATIO=$(printf "%.2f" $RATIO)
-
-    echo "Execution time: ${EXECTIME}s (${RATIO}x realtime)"
-
-    # If the file already exists, delete it
-    if [ -f $1.mp4 ]; then
-        rm $1.mp4
-    fi
-
-    bash $1.$model.wts >/dev/null 2>&1
-    mv $1.mp4 $1.$model.mp4
-
-    ffmpeg -y -f lavfi -i color=c=black:s=1200x50:d=$DURATION -vf "drawtext=fontfile=$2:fontsize=36:x=10:y=(h-text_h)/2:text='ggml-$model - ${EXECTIME}s (${RATIO}x realtime)':fontcolor=lightgrey" $1.$model.info.mp4 >/dev/null 2>&1
-done
-
-COMMAND="ffmpeg -y"
-for model in $models; do
-    COMMAND="$COMMAND -i $1.$model.info.mp4 -i $1.$model.mp4"
-done
-COMMAND="$COMMAND -filter_complex \""
-COUNT=0
-for model in $models; do
-    COMMAND="$COMMAND[${COUNT}:v][$(($COUNT+1)):v]"
-    COUNT=$((COUNT+2))
-done
-COMMAND="$COMMAND vstack=inputs=${COUNT}[v]\" -map \"[v]\" -map 1:a $1.all.mp4 >/dev/null 2>&1"
-
-echo $COMMAND
-
-# Run the command
-eval $COMMAND
diff --git a/extra/bench.py b/extra/bench.py
deleted file mode 100644
index 25a09db8..00000000
--- a/extra/bench.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import os
-import subprocess
-import re
-import csv
-import wave
-import contextlib
-import argparse
-
-
-# Custom action to handle comma-separated list
-class ListAction(argparse.Action):
-    def __call__(self, parser, namespace, values, option_string=None):
-        setattr(namespace, self.dest, [int(val) for val in values.split(",")])
-
-
-parser = argparse.ArgumentParser(description="Benchmark the speech recognition model")
-
-# Define the argument to accept a list
-parser.add_argument(
-    "-t",
-    "--threads",
-    dest="threads",
-    action=ListAction,
-    default=[4],
-    help="List of thread counts to benchmark (comma-separated, default: 4)",
-)
-
-parser.add_argument(
-    "-p",
-    "--processors",
-    dest="processors",
-    action=ListAction,
-    default=[1],
-    help="List of processor counts to benchmark (comma-separated, default: 1)",
-)
-
-
-parser.add_argument(
-    "-f",
-    "--filename",
-    type=str,
-    default="./samples/jfk.wav",
-    help="Relative path of the file to transcribe (default: ./samples/jfk.wav)",
-)
-
-# Parse the command line arguments
-args = parser.parse_args()
-
-sample_file = args.filename
-
-threads = args.threads
-processors = args.processors
-
-# Define the models, threads, and processor counts to benchmark
-models = [
-    "ggml-tiny.en.bin",
-    "ggml-tiny.bin",
-    "ggml-base.en.bin",
-    "ggml-base.bin",
-    "ggml-small.en.bin",
-    "ggml-small.bin",
-    "ggml-medium.en.bin",
-    "ggml-medium.bin",
-    "ggml-large-v1.bin",
-    "ggml-large-v2.bin",
-    "ggml-large-v3.bin",
-]
-
-
-metal_device = ""
-
-# Initialize a dictionary to hold the results
-results = {}
-
-gitHashHeader = "Commit"
-modelHeader = "Model"
-hardwareHeader = "Hardware"
-recordingLengthHeader = "Recording Length (seconds)"
-threadHeader = "Thread"
-processorCountHeader = "Processor Count"
-loadTimeHeader = "Load Time (ms)"
-sampleTimeHeader = "Sample Time (ms)"
-encodeTimeHeader = "Encode Time (ms)"
-decodeTimeHeader = "Decode Time (ms)"
-sampleTimePerRunHeader = "Sample Time per Run (ms)"
-encodeTimePerRunHeader = "Encode Time per Run (ms)"
-decodeTimePerRunHeader = "Decode Time per Run (ms)"
-totalTimeHeader = "Total Time (ms)"
-
-
-def check_file_exists(file: str) -> bool:
-    return os.path.isfile(file)
-
-
-def get_git_short_hash() -> str:
-    try:
-        return (
-            subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
-            .decode()
-            .strip()
-        )
-    except subprocess.CalledProcessError as e:
-        return ""
-
-
-def wav_file_length(file: str = sample_file) -> float:
-    with contextlib.closing(wave.open(file, "r")) as f:
-        frames = f.getnframes()
-        rate = f.getframerate()
-        duration = frames / float(rate)
-        return duration
-
-
-def extract_metrics(output: str, label: str) -> tuple[float, float]:
-    match = re.search(rf"{label} \s*=\s*(\d+\.\d+)\s*ms\s*/\s*(\d+)\s*runs", output)
-    time = float(match.group(1)) if match else None
-    runs = float(match.group(2)) if match else None
-    return time, runs
-
-
-def extract_device(output: str) -> str:
-    match = re.search(r"picking default device: (.*)", output)
-    device = match.group(1) if match else "Not found"
-    return device
-
-
-# Check if the sample file exists
-if not check_file_exists(sample_file):
-    raise FileNotFoundError(f"Sample file {sample_file} not found")
-
-recording_length = wav_file_length()
-
-
-# Check that all models exist
-# Filter out models from list that are not downloaded
-filtered_models = []
-for model in models:
-    if check_file_exists(f"models/{model}"):
-        filtered_models.append(model)
-    else:
-        print(f"Model {model} not found, removing from list")
-
-models = filtered_models
-
-# Loop over each combination of parameters
-for model in filtered_models:
-    for thread in threads:
-        for processor_count in processors:
-            # Construct the command to run
-            cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
-            # Run the command and get the output
-            process = subprocess.Popen(
-                cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
-            )
-
-            output = ""
-            while process.poll() is None:
-                output += process.stdout.read().decode()
-
-            # Parse the output
-            load_time_match = re.search(r"load time\s*=\s*(\d+\.\d+)\s*ms", output)
-            load_time = float(load_time_match.group(1)) if load_time_match else None
-
-            metal_device = extract_device(output)
-            sample_time, sample_runs = extract_metrics(output, "sample time")
-            encode_time, encode_runs = extract_metrics(output, "encode time")
-            decode_time, decode_runs = extract_metrics(output, "decode time")
-
-            total_time_match = re.search(r"total time\s*=\s*(\d+\.\d+)\s*ms", output)
-            total_time = float(total_time_match.group(1)) if total_time_match else None
-
-            model_name = model.replace("ggml-", "").replace(".bin", "")
-
-            print(
-                f"Ran model={model_name} threads={thread} processor_count={processor_count}, took {total_time}ms"
-            )
-            # Store the times in the results dictionary
-            results[(model_name, thread, processor_count)] = {
-                loadTimeHeader: load_time,
-                sampleTimeHeader: sample_time,
-                encodeTimeHeader: encode_time,
-                decodeTimeHeader: decode_time,
-                sampleTimePerRunHeader: round(sample_time / sample_runs, 2),
-                encodeTimePerRunHeader: round(encode_time / encode_runs, 2),
-                decodeTimePerRunHeader: round(decode_time / decode_runs, 2),
-                totalTimeHeader: total_time,
-            }
-
-# Write the results to a CSV file
-with open("benchmark_results.csv", "w", newline="") as csvfile:
-    fieldnames = [
-        gitHashHeader,
-        modelHeader,
-        hardwareHeader,
-        recordingLengthHeader,
-        threadHeader,
-        processorCountHeader,
-        loadTimeHeader,
-        sampleTimeHeader,
-        encodeTimeHeader,
-        decodeTimeHeader,
-        sampleTimePerRunHeader,
-        encodeTimePerRunHeader,
-        decodeTimePerRunHeader,
-        totalTimeHeader,
-    ]
-    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-
-    writer.writeheader()
-
-    shortHash = get_git_short_hash()
-    # Sort the results by total time in ascending order
-    sorted_results = sorted(results.items(), key=lambda x: x[1].get(totalTimeHeader, 0))
-    for params, times in sorted_results:
-        row = {
-            gitHashHeader: shortHash,
-            modelHeader: params[0],
-            hardwareHeader: metal_device,
-            recordingLengthHeader: recording_length,
-            threadHeader: params[1],
-            processorCountHeader: params[2],
-        }
-        row.update(times)
-        writer.writerow(row)
diff --git a/extra/convert-all.sh b/extra/convert-all.sh
deleted file mode 100755
index ff765c92..00000000
--- a/extra/convert-all.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
-
-for model in "${models[@]}"; do
-    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
-    mv -v models/ggml-model.bin models/ggml-$model.bin
-done
diff --git a/extra/deploy-wasm.sh b/extra/deploy-wasm.sh
deleted file mode 100755
index f6569c72..00000000
--- a/extra/deploy-wasm.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-#
-# This is a helper script to deploy all WebAssembly examples to my node
-# Run from the build directory:
-#
-# cd build-em
-# ../extra/deploy-wasm.sh
-#
-
-# check if emcmake is available
-if ! command -v emcmake &> /dev/null
-then
-    echo "Error: emscripten environment is not set up"
-    exit
-fi
-
-emcmake cmake .. && make -j
-if [ $? -ne 0 ]; then
-    echo "Error: build failed"
-    exit
-fi
-
-# copy all wasm files to the node
-scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/libmain.worker.js    root@linode0:/var/www/html/whisper/
-scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
-scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
-scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
-scp bin/bench.wasm/*   root@linode0:/var/www/html/whisper/bench/   && scp bin/libbench.worker.js   root@linode0:/var/www/html/whisper/bench/
-
-echo "Done"
-exit
diff --git a/extra/quantize-all.sh b/extra/quantize-all.sh
deleted file mode 100755
index 767462b8..00000000
--- a/extra/quantize-all.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-printf "Usage: $0 <upload>"
-
-if [ $# -ne 1 ]; then
-    printf "\nError: Invalid number of arguments\n"
-    exit 1
-fi
-
-qtype0="q5_0"
-qtype1="q5_1"
-upload="$1"
-declare -a filedex
-
-cd `dirname $0`
-cd ../
-
-for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
-    m="models/$i"
-    if [ -f "$m" ]; then
-        if [ "${m##*.}" == "bin" ]; then
-            ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
-            ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
-            filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
-        fi
-    fi
-done
-
-
-
-if [ "$upload" == "1" ]; then
-    for i in ${!filedex[@]}; do
-        if [ "${filedex[$i]:9:8}" != "for-test" ]; then
-            scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9}
-        fi
-    done
-fi
diff --git a/extra/sha-all.sh b/extra/sha-all.sh
deleted file mode 100755
index dba087bb..00000000
--- a/extra/sha-all.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-# Compute the SHA1 of all model files in ./models/ggml-*.bin
-
-for f in ./models/ggml-*.bin; do
-    shasum "$f" -a 1
-done
diff --git a/extra/sync-ggml-am.sh b/extra/sync-ggml-am.sh
deleted file mode 100755
index dc4be9bd..00000000
--- a/extra/sync-ggml-am.sh
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/bin/bash
-#
-# Synchronize ggml changes to whisper.cpp
-#
-# Usage:
-#
-#   $ cd /path/to/whisper.cpp
-#   $ ./extra/sync-ggml-am.sh -skip hash0,hash1,hash2...
-#
-
-set -e
-
-sd=$(dirname $0)
-cd $sd/../
-
-SRC_WHISPER=$(pwd)
-SRC_GGML=$(cd ../ggml; pwd)
-
-if [ ! -d $SRC_GGML ]; then
-    echo "ggml not found at $SRC_GGML"
-    exit 1
-fi
-
-lc=$(cat $SRC_WHISPER/extra/sync-ggml.last)
-echo "Syncing ggml changes since commit $lc"
-
-to_skip=""
-if [ "$1" == "-skip" ]; then
-    to_skip=$2
-fi
-
-cd $SRC_GGML
-
-git log --oneline $lc..HEAD
-git log --oneline $lc..HEAD --reverse | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_WHISPER/ggml-commits
-
-if [ ! -s $SRC_WHISPER/ggml-commits ]; then
-    rm -v $SRC_WHISPER/ggml-commits
-    echo "No new commits"
-    exit 0
-fi
-
-if [ -f $SRC_WHISPER/ggml-src.patch ]; then
-    rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-while read c; do
-    if [ -n "$to_skip" ]; then
-        if [[ $to_skip == *"$c"* ]]; then
-            echo "Skipping $c"
-            continue
-        fi
-    fi
-
-    git format-patch -k $c~1..$c --stdout -- \
-        include/ggml/ggml*.h \
-        src/ggml*.h \
-        src/ggml*.c \
-        src/ggml*.cpp \
-        src/ggml*.m \
-        src/ggml*.metal \
-        src/ggml*.cu \
-        src/ggml-cuda/* \
-        examples/common.h \
-        examples/common.cpp \
-        examples/common-ggml.h \
-        examples/common-ggml.cpp \
-        examples/whisper/grammar-parser.h \
-        examples/whisper/grammar-parser.cpp \
-        examples/whisper/whisper.h \
-        examples/whisper/whisper.cpp \
-        examples/whisper/main.cpp \
-        examples/whisper/quantize.cpp \
-        >> $SRC_WHISPER/ggml-src.patch
-done < $SRC_WHISPER/ggml-commits
-
-rm -v $SRC_WHISPER/ggml-commits
-
-# delete files if empty
-if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
-    rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-cd $SRC_WHISPER
-
-if [ -f $SRC_WHISPER/ggml-src.patch ]; then
-    # replace PR numbers
-    #
-    # Subject: some text (#1234)
-    # Subject: some text (ggml/1234)
-    cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    # replace filenames:
-    #
-    # src/ggml.c                  -> ggml.c
-    # src/ggml-alloc.c            -> ggml-alloc.c
-    # src/ggml-backend-impl.h     -> ggml-backend-impl.h
-    # src/ggml-backend.c          -> ggml-backend.c
-    # src/ggml-common.h           -> ggml-common.h
-    # src/ggml-cuda/*             -> ggml-cuda/
-    # src/ggml-cuda.cu            -> ggml-cuda.cu
-    # src/ggml-cuda.h             -> ggml-cuda.h
-    # src/ggml-impl.h             -> ggml-impl.h
-    # src/ggml-kompute.cpp        -> ggml-kompute.cpp
-    # src/ggml-kompute.h          -> ggml-kompute.h
-    # src/ggml-metal.h            -> ggml-metal.h
-    # src/ggml-metal.m            -> ggml-metal.m
-    # src/ggml-mpi.h              -> ggml-mpi.h
-    # src/ggml-mpi.c              -> ggml-mpi.c
-    # src/ggml-opencl.cpp         -> ggml-opencl.cpp
-    # src/ggml-opencl.h           -> ggml-opencl.h
-    # src/ggml-quants.c           -> ggml-quants.c
-    # src/ggml-quants.h           -> ggml-quants.h
-    # src/ggml-sycl.cpp           -> ggml-sycl.cpp
-    # src/ggml-sycl.h             -> ggml-sycl.h
-    # src/ggml-vulkan.cpp         -> ggml-vulkan.cpp
-    # src/ggml-vulkan.h           -> ggml-vulkan.h
-    # include/ggml/ggml.h         -> ggml.h
-    # include/ggml/ggml-alloc.h   -> ggml-alloc.h
-    # include/ggml/ggml-backend.h -> ggml-backend.h
-    #
-    # examples/common.h                   -> examples/common.h
-    # examples/common.cpp                 -> examples/common.cpp
-    # examples/common-ggml.h              -> examples/common-ggml.h
-    # examples/common-ggml.cpp            -> examples/common-ggml.cpp
-    # examples/whisper/grammar-parser.h   -> examples/grammar-parser.h
-    # examples/whisper/grammar-parser.cpp -> examples/grammar-parser.cpp
-    #
-    # examples/whisper/whisper.h    -> whisper.h
-    # examples/whisper/whisper.cpp  -> whisper.cpp
-    # examples/whisper/main.cpp     -> examples/main/main.cpp
-    # examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
-
-    cat ggml-src.patch | sed \
-        -e 's/src\/ggml\.c/ggml.c/g' \
-        -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
-        -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
-        -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
-        -e 's/src\/ggml-common\.h/ggml-common.h/g' \
-        -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
-        -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
-        -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
-        -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
-        -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
-        -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
-        -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
-        -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
-        -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
-        -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
-        -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
-        -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
-        -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
-        -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
-        -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
-        -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
-        -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
-        -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
-        -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
-        -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
-        -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
-        -e 's/examples\/common\.h/examples\/common.h/g' \
-        -e 's/examples\/common\.cpp/examples\/common.cpp/g' \
-        -e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
-        -e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
-        -e 's/examples\/whisper\/grammar-parser\.h/examples\/grammar-parser.h/g' \
-        -e 's/examples\/whisper\/grammar-parser\.cpp/examples\/grammar-parser.cpp/g' \
-        -e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
-        -e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
-        -e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
-        -e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
-        > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    git am ggml-src.patch
-
-    rm -v $SRC_WHISPER/ggml-src.patch
-fi
-
-# update last commit
-cd $SRC_GGML
-git log -1 --format=%H > $SRC_WHISPER/extra/sync-ggml.last
-
-echo "Done"
-
-exit 0
diff --git a/extra/sync-ggml.last b/extra/sync-ggml.last
deleted file mode 100644
index 82195550..00000000
--- a/extra/sync-ggml.last
+++ /dev/null
@@ -1 +0,0 @@
-bb8d8cff851b2de6fde4904be492d39458837e1a
diff --git a/extra/sync-ggml.sh b/extra/sync-ggml.sh
deleted file mode 100755
index 1482cfbc..00000000
--- a/extra/sync-ggml.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-cp -rpv ../ggml/src/ggml.c              ./ggml.c
-cp -rpv ../ggml/src/ggml-impl.h         ./ggml-impl.h
-cp -rpv ../ggml/src/ggml-alloc.c        ./ggml-alloc.c
-cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
-cp -rpv ../ggml/src/ggml-backend.c      ./ggml-backend.c
-cp -rpv ../ggml/src/ggml-common.h       ./ggml-common.h
-cp -rpv ../ggml/src/ggml-cuda/*         ./ggml-cuda/
-cp -rpv ../ggml/src/ggml-cuda.cu        ./ggml-cuda.cu
-cp -rpv ../ggml/src/ggml-cuda.h         ./ggml-cuda.h
-cp -rpv ../ggml/src/ggml-kompute.cpp    ./ggml-kompute.cpp
-cp -rpv ../ggml/src/ggml-kompute.h      ./ggml-kompute.h
-cp -rpv ../ggml/src/ggml-metal.h        ./ggml-metal.h
-cp -rpv ../ggml/src/ggml-metal.m        ./ggml-metal.m
-cp -rpv ../ggml/src/ggml-metal.metal    ./ggml-metal.metal
-#cp -rpv ../ggml/src/ggml-mpi.h          ./ggml-mpi.h
-#cp -rpv ../ggml/src/ggml-mpi.c          ./ggml-mpi.c
-cp -rpv ../ggml/src/ggml-opencl.cpp     ./ggml-opencl.cpp
-cp -rpv ../ggml/src/ggml-opencl.h       ./ggml-opencl.h
-cp -rpv ../ggml/src/ggml-quants.c       ./ggml-quants.c
-cp -rpv ../ggml/src/ggml-quants.h       ./ggml-quants.h
-cp -rpv ../ggml/src/ggml-sycl.cpp       ./ggml-sycl.cpp
-cp -rpv ../ggml/src/ggml-sycl.h         ./ggml-sycl.h
-cp -rpv ../ggml/src/ggml-vulkan.cpp     ./ggml-vulkan.cpp
-cp -rpv ../ggml/src/ggml-vulkan.h       ./ggml-vulkan.h
-
-cp -rpv ../ggml/include/ggml/ggml.h         ./ggml.h
-cp -rpv ../ggml/include/ggml/ggml-alloc.h   ./ggml-alloc.h
-cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
-
-cp -rpv ../ggml/examples/common.h                   ./examples/common.h
-cp -rpv ../ggml/examples/common.cpp                 ./examples/common.cpp
-cp -rpv ../ggml/examples/common-ggml.h              ./examples/common-ggml.h
-cp -rpv ../ggml/examples/common-ggml.cpp            ./examples/common-ggml.cpp
-cp -rpv ../ggml/examples/whisper/grammar-parser.h   ./examples/grammar-parser.h
-cp -rpv ../ggml/examples/whisper/grammar-parser.cpp ./examples/grammar-parser.cpp
-
-cp -rpv ../ggml/examples/whisper/whisper.h    ./whisper.h
-cp -rpv ../ggml/examples/whisper/whisper.cpp  ./whisper.cpp
-cp -rpv ../ggml/examples/whisper/main.cpp     ./examples/main/main.cpp
-cp -rpv ../ggml/examples/whisper/quantize.cpp ./examples/quantize/quantize.cpp
-
diff --git a/extra/sync-llama.sh b/extra/sync-llama.sh
deleted file mode 100755
index de71ffcc..00000000
--- a/extra/sync-llama.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-cp -rpv ../llama.cpp/llama.h          ./examples/talk-llama/llama.h
-cp -rpv ../llama.cpp/llama.cpp        ./examples/talk-llama/llama.cpp
-cp -rpv ../llama.cpp/unicode.h        ./examples/talk-llama/unicode.h
-cp -rpv ../llama.cpp/unicode.cpp      ./examples/talk-llama/unicode.cpp
-cp -rpv ../llama.cpp/unicode-data.h   ./examples/talk-llama/unicode-data.h
-cp -rpv ../llama.cpp/unicode-data.cpp ./examples/talk-llama/unicode-data.cpp
diff --git a/scripts/bench-all.sh b/scripts/bench-all.sh
new file mode 100755
index 00000000..6939dafa
--- /dev/null
+++ b/scripts/bench-all.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Helper script to run the bench tool on all models and print the results in share-able format
+
+printf "Usage: ./bench.sh [n_threads] [encoder-only]\n"
+
+if [ -z "$1" ]; then
+    n_threads=4
+else
+    n_threads=$1
+fi
+
+encoder_only=0
+if [ -z "$2" ]; then
+    encoder_only=0
+else
+    encoder_only=$2
+fi
+
+models=(                                                                                                    \
+      "tiny"     "tiny-q4_0"     "tiny-q4_1"     "tiny-q5_0"     "tiny-q5_1"     "tiny-q8_0"                \
+      "base"     "base-q4_0"     "base-q4_1"     "base-q5_0"     "base-q5_1"     "base-q8_0"                \
+     "small"    "small-q4_0"    "small-q4_1"    "small-q5_0"    "small-q5_1"    "small-q8_0"                \
+    "medium"   "medium-q4_0"   "medium-q4_1"   "medium-q5_0"   "medium-q5_1"   "medium-q8_0"   "medium-dis" \
+  "large-v2" "large-v2-q4_0" "large-v2-q4_1" "large-v2-q5_0" "large-v2-q5_1" "large-v2-q8_0" "large-v2-dis" \
+)
+
+if [ "$encoder_only" -eq 0 ]; then
+    printf "\n"
+    printf "Running memcpy benchmark\n"
+    printf "\n"
+
+    ./bench -w 1 -t $n_threads 2>&1
+
+    printf "\n"
+    printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
+    printf "\n"
+
+    ./bench -w 2 -t $n_threads 2>&1
+
+    printf "\n"
+    printf "Running benchmark for all models\n"
+    printf "This can take a while!\n"
+    printf "\n"
+fi
+
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "CPU" "OS" "Config" "Model" "Th" "Enc." "Dec." "Bch5" "PP" "Commit"
+printf "| %6s | %6s | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "---" "---" "---" "---" "---" "---" "---" "---" "---" "---"
+
+for model in "${models[@]}"; do
+    # actual run
+    # store stderr output in a variable in order to parse it later
+    output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
+    ret=$?
+
+    # parse the output:
+    encode_time=$(echo "$output" | grep "encode time" | awk '{print $11}')
+    decode_time=$(echo "$output" | grep "decode time" | awk '{print $11}')
+    batchd_time=$(echo "$output" | grep "batchd time" | awk '{print $11}')
+    prompt_time=$(echo "$output" | grep "prompt time" | awk '{print $11}')
+    system_info=$(echo "$output" | grep "system_info")
+    n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
+
+    # floor to milliseconds
+    #encode_time=${encode_time%.*}
+    #decode_time=${decode_time%.*}
+    #prompt_time=${prompt_time%.*}
+
+    config=""
+
+    if [[ $system_info == *"AVX2 = 1"* ]]; then
+        config="$config AVX2"
+    fi
+
+    if [[ $system_info == *"NEON = 1"* ]]; then
+        config="$config NEON"
+    fi
+
+    if [[ $system_info == *"BLAS = 1"* ]]; then
+        config="$config BLAS"
+    fi
+
+    if [[ $system_info == *"COREML = 1"* ]]; then
+        config="$config COREML"
+    fi
+
+    if [[ $system_info == *"CUDA = 1"* ]]; then
+        config="$config CUDA"
+    fi
+
+    if [[ $system_info == *"METAL = 1"* ]]; then
+        config="$config METAL"
+    fi
+
+    commit=$(git rev-parse --short HEAD)
+
+    if [ $ret -eq 0 ]; then
+        printf "| <todo> | <todo> | %16s | %13s | %3s | %7s | %7s | %7s | %7s | %7s |\n" "$config" "$model" "$n_threads" "$encode_time" "$decode_time" "$batchd_time" "$prompt_time" "$commit"
+    fi
+done
diff --git a/scripts/bench-wts.sh b/scripts/bench-wts.sh
new file mode 100755
index 00000000..223d71b8
--- /dev/null
+++ b/scripts/bench-wts.sh
@@ -0,0 +1,70 @@
+# Benchmark word-level timestamps for different models
+#
+# This script takes two arguments
+# - an audio file
+# - [optional] path to a font file
+
+# I'm using "/usr/share/fonts/truetype/freefont/FreeMono.ttf" on Ubuntu
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <audio file> [font file]"
+    exit 1
+fi
+
+#TODO: Make this a command line parameter
+#models="base small large"
+#models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large"
+models="tiny.en base.en small.en medium.en large"
+
+DURATION=$(ffprobe -i $1 -show_entries format=duration -v quiet -of csv="p=0")
+DURATION=$(printf "%.2f" $DURATION)
+echo "Input file duration: ${DURATION}s"
+
+for model in $models; do
+    echo "Running $model"
+    COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
+
+    if [ ! -z "$2" ]; then
+        COMMAND="$COMMAND -fp $2"
+    fi
+    #TODO: Surface errors better
+    # TIMEFMT is for zsh, TIMEFORMAT is for bash
+    EXECTIME=$({ TIMEFMT="%E";TIMEFORMAT=%E; time $COMMAND >/dev/null 2>&1; } 2>&1)
+
+    # Slightly different formats between zsh and bash
+    if [ "${EXECTIME: -1}" == "s" ]; then
+        EXECTIME=${EXECTIME::-1}
+    fi
+
+    RATIO=$(echo "$DURATION / $EXECTIME" | bc -l)
+    RATIO=$(printf "%.2f" $RATIO)
+
+    echo "Execution time: ${EXECTIME}s (${RATIO}x realtime)"
+
+    # If the file already exists, delete it
+    if [ -f $1.mp4 ]; then
+        rm $1.mp4
+    fi
+
+    bash $1.$model.wts >/dev/null 2>&1
+    mv $1.mp4 $1.$model.mp4
+
+    ffmpeg -y -f lavfi -i color=c=black:s=1200x50:d=$DURATION -vf "drawtext=fontfile=$2:fontsize=36:x=10:y=(h-text_h)/2:text='ggml-$model - ${EXECTIME}s (${RATIO}x realtime)':fontcolor=lightgrey" $1.$model.info.mp4 >/dev/null 2>&1
+done
+
+COMMAND="ffmpeg -y"
+for model in $models; do
+    COMMAND="$COMMAND -i $1.$model.info.mp4 -i $1.$model.mp4"
+done
+COMMAND="$COMMAND -filter_complex \""
+COUNT=0
+for model in $models; do
+    COMMAND="$COMMAND[${COUNT}:v][$(($COUNT+1)):v]"
+    COUNT=$((COUNT+2))
+done
+COMMAND="$COMMAND vstack=inputs=${COUNT}[v]\" -map \"[v]\" -map 1:a $1.all.mp4 >/dev/null 2>&1"
+
+echo $COMMAND
+
+# Run the command
+eval $COMMAND
diff --git a/scripts/bench.py b/scripts/bench.py
new file mode 100644
index 00000000..25a09db8
--- /dev/null
+++ b/scripts/bench.py
@@ -0,0 +1,224 @@
+import os
+import subprocess
+import re
+import csv
+import wave
+import contextlib
+import argparse
+
+
+# Custom action to handle comma-separated list
+class ListAction(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, [int(val) for val in values.split(",")])
+
+
+parser = argparse.ArgumentParser(description="Benchmark the speech recognition model")
+
+# Define the argument to accept a list
+parser.add_argument(
+    "-t",
+    "--threads",
+    dest="threads",
+    action=ListAction,
+    default=[4],
+    help="List of thread counts to benchmark (comma-separated, default: 4)",
+)
+
+parser.add_argument(
+    "-p",
+    "--processors",
+    dest="processors",
+    action=ListAction,
+    default=[1],
+    help="List of processor counts to benchmark (comma-separated, default: 1)",
+)
+
+
+parser.add_argument(
+    "-f",
+    "--filename",
+    type=str,
+    default="./samples/jfk.wav",
+    help="Relative path of the file to transcribe (default: ./samples/jfk.wav)",
+)
+
+# Parse the command line arguments
+args = parser.parse_args()
+
+sample_file = args.filename
+
+threads = args.threads
+processors = args.processors
+
+# Define the models, threads, and processor counts to benchmark
+models = [
+    "ggml-tiny.en.bin",
+    "ggml-tiny.bin",
+    "ggml-base.en.bin",
+    "ggml-base.bin",
+    "ggml-small.en.bin",
+    "ggml-small.bin",
+    "ggml-medium.en.bin",
+    "ggml-medium.bin",
+    "ggml-large-v1.bin",
+    "ggml-large-v2.bin",
+    "ggml-large-v3.bin",
+]
+
+
+metal_device = ""
+
+# Initialize a dictionary to hold the results
+results = {}
+
+gitHashHeader = "Commit"
+modelHeader = "Model"
+hardwareHeader = "Hardware"
+recordingLengthHeader = "Recording Length (seconds)"
+threadHeader = "Thread"
+processorCountHeader = "Processor Count"
+loadTimeHeader = "Load Time (ms)"
+sampleTimeHeader = "Sample Time (ms)"
+encodeTimeHeader = "Encode Time (ms)"
+decodeTimeHeader = "Decode Time (ms)"
+sampleTimePerRunHeader = "Sample Time per Run (ms)"
+encodeTimePerRunHeader = "Encode Time per Run (ms)"
+decodeTimePerRunHeader = "Decode Time per Run (ms)"
+totalTimeHeader = "Total Time (ms)"
+
+
+def check_file_exists(file: str) -> bool:
+    return os.path.isfile(file)
+
+
+def get_git_short_hash() -> str:
+    try:
+        return (
+            subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
+            .decode()
+            .strip()
+        )
+    except subprocess.CalledProcessError as e:
+        return ""
+
+
+def wav_file_length(file: str = sample_file) -> float:
+    with contextlib.closing(wave.open(file, "r")) as f:
+        frames = f.getnframes()
+        rate = f.getframerate()
+        duration = frames / float(rate)
+        return duration
+
+
+def extract_metrics(output: str, label: str) -> tuple[float, float]:
+    match = re.search(rf"{label} \s*=\s*(\d+\.\d+)\s*ms\s*/\s*(\d+)\s*runs", output)
+    time = float(match.group(1)) if match else None
+    runs = float(match.group(2)) if match else None
+    return time, runs
+
+
+def extract_device(output: str) -> str:
+    match = re.search(r"picking default device: (.*)", output)
+    device = match.group(1) if match else "Not found"
+    return device
+
+
+# Check if the sample file exists
+if not check_file_exists(sample_file):
+    raise FileNotFoundError(f"Sample file {sample_file} not found")
+
+recording_length = wav_file_length()
+
+
+# Check that all models exist
+# Filter out models from list that are not downloaded
+filtered_models = []
+for model in models:
+    if check_file_exists(f"models/{model}"):
+        filtered_models.append(model)
+    else:
+        print(f"Model {model} not found, removing from list")
+
+models = filtered_models
+
+# Loop over each combination of parameters
+for model in filtered_models:
+    for thread in threads:
+        for processor_count in processors:
+            # Construct the command to run
+            cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
+            # Run the command and get the output
+            process = subprocess.Popen(
+                cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            )
+
+            output = ""
+            while process.poll() is None:
+                output += process.stdout.read().decode()
+
+            # Parse the output
+            load_time_match = re.search(r"load time\s*=\s*(\d+\.\d+)\s*ms", output)
+            load_time = float(load_time_match.group(1)) if load_time_match else None
+
+            metal_device = extract_device(output)
+            sample_time, sample_runs = extract_metrics(output, "sample time")
+            encode_time, encode_runs = extract_metrics(output, "encode time")
+            decode_time, decode_runs = extract_metrics(output, "decode time")
+
+            total_time_match = re.search(r"total time\s*=\s*(\d+\.\d+)\s*ms", output)
+            total_time = float(total_time_match.group(1)) if total_time_match else None
+
+            model_name = model.replace("ggml-", "").replace(".bin", "")
+
+            print(
+                f"Ran model={model_name} threads={thread} processor_count={processor_count}, took {total_time}ms"
+            )
+            # Store the times in the results dictionary
+            results[(model_name, thread, processor_count)] = {
+                loadTimeHeader: load_time,
+                sampleTimeHeader: sample_time,
+                encodeTimeHeader: encode_time,
+                decodeTimeHeader: decode_time,
+                sampleTimePerRunHeader: round(sample_time / sample_runs, 2),
+                encodeTimePerRunHeader: round(encode_time / encode_runs, 2),
+                decodeTimePerRunHeader: round(decode_time / decode_runs, 2),
+                totalTimeHeader: total_time,
+            }
+
+# Write the results to a CSV file
+with open("benchmark_results.csv", "w", newline="") as csvfile:
+    fieldnames = [
+        gitHashHeader,
+        modelHeader,
+        hardwareHeader,
+        recordingLengthHeader,
+        threadHeader,
+        processorCountHeader,
+        loadTimeHeader,
+        sampleTimeHeader,
+        encodeTimeHeader,
+        decodeTimeHeader,
+        sampleTimePerRunHeader,
+        encodeTimePerRunHeader,
+        decodeTimePerRunHeader,
+        totalTimeHeader,
+    ]
+    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+    writer.writeheader()
+
+    shortHash = get_git_short_hash()
+    # Sort the results by total time in ascending order
+    sorted_results = sorted(results.items(), key=lambda x: x[1].get(totalTimeHeader, 0))
+    for params, times in sorted_results:
+        row = {
+            gitHashHeader: shortHash,
+            modelHeader: params[0],
+            hardwareHeader: metal_device,
+            recordingLengthHeader: recording_length,
+            threadHeader: params[1],
+            processorCountHeader: params[2],
+        }
+        row.update(times)
+        writer.writerow(row)
diff --git a/scripts/convert-all.sh b/scripts/convert-all.sh
new file mode 100755
index 00000000..ff765c92
--- /dev/null
+++ b/scripts/convert-all.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
+
+for model in "${models[@]}"; do
+    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
+    mv -v models/ggml-model.bin models/ggml-$model.bin
+done
diff --git a/scripts/deploy-wasm.sh b/scripts/deploy-wasm.sh
new file mode 100755
index 00000000..1271c398
--- /dev/null
+++ b/scripts/deploy-wasm.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+#
+# This is a helper script to deploy all WebAssembly examples to my node
+# Run from the build directory:
+#
+# cd build-em
+# ../scripts/deploy-wasm.sh
+#
+
+# check if emcmake is available
+if ! command -v emcmake &> /dev/null
+then
+    echo "Error: emscripten environment is not set up"
+    exit
+fi
+
+emcmake cmake .. && make -j
+if [ $? -ne 0 ]; then
+    echo "Error: build failed"
+    exit
+fi
+
+# copy all wasm files to the node
+scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/         && scp bin/libmain.worker.js    root@linode0:/var/www/html/whisper/
+scp bin/stream.wasm/*  root@linode0:/var/www/html/whisper/stream/  && scp bin/libstream.worker.js  root@linode0:/var/www/html/whisper/stream/
+scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/
+scp bin/talk.wasm/*    root@linode0:/var/www/html/whisper/talk/    && scp bin/libtalk.worker.js    root@linode0:/var/www/html/whisper/talk/
+scp bin/bench.wasm/*   root@linode0:/var/www/html/whisper/bench/   && scp bin/libbench.worker.js   root@linode0:/var/www/html/whisper/bench/
+
+echo "Done"
+exit
diff --git a/scripts/quantize-all.sh b/scripts/quantize-all.sh
new file mode 100755
index 00000000..767462b8
--- /dev/null
+++ b/scripts/quantize-all.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+printf "Usage: $0 <upload>"
+
+if [ $# -ne 1 ]; then
+    printf "\nError: Invalid number of arguments\n"
+    exit 1
+fi
+
+qtype0="q5_0"
+qtype1="q5_1"
+upload="$1"
+declare -a filedex
+
+cd `dirname $0`
+cd ../
+
+for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
+    m="models/$i"
+    if [ -f "$m" ]; then
+        if [ "${m##*.}" == "bin" ]; then
+            ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
+            ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
+            filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
+        fi
+    fi
+done
+
+
+
+if [ "$upload" == "1" ]; then
+    for i in ${!filedex[@]}; do
+        if [ "${filedex[$i]:9:8}" != "for-test" ]; then
+            scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9}
+        fi
+    done
+fi
diff --git a/scripts/sha-all.sh b/scripts/sha-all.sh
new file mode 100755
index 00000000..dba087bb
--- /dev/null
+++ b/scripts/sha-all.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Compute the SHA1 of all model files in ./models/ggml-*.bin
+
+for f in ./models/ggml-*.bin; do
+    shasum "$f" -a 1
+done
diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh
new file mode 100755
index 00000000..00149931
--- /dev/null
+++ b/scripts/sync-ggml-am.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+#
+# Synchronize ggml changes to whisper.cpp
+#
+# Usage:
+#
+#   $ cd /path/to/whisper.cpp
+#   $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2...
+#
+
+set -e
+
+sd=$(dirname $0)
+cd $sd/../
+
+SRC_WHISPER=$(pwd)
+SRC_GGML=$(cd ../ggml; pwd)
+
+if [ ! -d $SRC_GGML ]; then
+    echo "ggml not found at $SRC_GGML"
+    exit 1
+fi
+
+lc=$(cat $SRC_WHISPER/scripts/sync-ggml.last)
+echo "Syncing ggml changes since commit $lc"
+
+to_skip=""
+if [ "$1" == "-skip" ]; then
+    to_skip=$2
+fi
+
+cd $SRC_GGML
+
+git log --oneline $lc..HEAD
+git log --oneline $lc..HEAD --reverse | grep -v "(whisper/[0-9]*)" | cut -d' ' -f1 > $SRC_WHISPER/ggml-commits
+
+if [ ! -s $SRC_WHISPER/ggml-commits ]; then
+    rm -v $SRC_WHISPER/ggml-commits
+    echo "No new commits"
+    exit 0
+fi
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+while read c; do
+    if [ -n "$to_skip" ]; then
+        if [[ $to_skip == *"$c"* ]]; then
+            echo "Skipping $c"
+            continue
+        fi
+    fi
+
+    git format-patch -k $c~1..$c --stdout -- \
+        include/ggml/ggml*.h \
+        src/ggml*.h \
+        src/ggml*.c \
+        src/ggml*.cpp \
+        src/ggml*.m \
+        src/ggml*.metal \
+        src/ggml*.cu \
+        src/ggml-cuda/* \
+        examples/common.h \
+        examples/common.cpp \
+        examples/common-ggml.h \
+        examples/common-ggml.cpp \
+        examples/whisper/grammar-parser.h \
+        examples/whisper/grammar-parser.cpp \
+        examples/whisper/whisper.h \
+        examples/whisper/whisper.cpp \
+        examples/whisper/main.cpp \
+        examples/whisper/quantize.cpp \
+        >> $SRC_WHISPER/ggml-src.patch
+done < $SRC_WHISPER/ggml-commits
+
+rm -v $SRC_WHISPER/ggml-commits
+
+# delete files if empty
+if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+cd $SRC_WHISPER
+
+if [ -f $SRC_WHISPER/ggml-src.patch ]; then
+    # replace PR numbers
+    #
+    # Subject: some text (#1234)
+    # Subject: some text (ggml/1234)
+    cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    # replace filenames:
+    #
+    # src/ggml.c                  -> ggml.c
+    # src/ggml-alloc.c            -> ggml-alloc.c
+    # src/ggml-backend-impl.h     -> ggml-backend-impl.h
+    # src/ggml-backend.c          -> ggml-backend.c
+    # src/ggml-common.h           -> ggml-common.h
+    # src/ggml-cuda/*             -> ggml-cuda/
+    # src/ggml-cuda.cu            -> ggml-cuda.cu
+    # src/ggml-cuda.h             -> ggml-cuda.h
+    # src/ggml-impl.h             -> ggml-impl.h
+    # src/ggml-kompute.cpp        -> ggml-kompute.cpp
+    # src/ggml-kompute.h          -> ggml-kompute.h
+    # src/ggml-metal.h            -> ggml-metal.h
+    # src/ggml-metal.m            -> ggml-metal.m
+    # src/ggml-mpi.h              -> ggml-mpi.h
+    # src/ggml-mpi.c              -> ggml-mpi.c
+    # src/ggml-opencl.cpp         -> ggml-opencl.cpp
+    # src/ggml-opencl.h           -> ggml-opencl.h
+    # src/ggml-quants.c           -> ggml-quants.c
+    # src/ggml-quants.h           -> ggml-quants.h
+    # src/ggml-sycl.cpp           -> ggml-sycl.cpp
+    # src/ggml-sycl.h             -> ggml-sycl.h
+    # src/ggml-vulkan.cpp         -> ggml-vulkan.cpp
+    # src/ggml-vulkan.h           -> ggml-vulkan.h
+    # include/ggml/ggml.h         -> ggml.h
+    # include/ggml/ggml-alloc.h   -> ggml-alloc.h
+    # include/ggml/ggml-backend.h -> ggml-backend.h
+    #
+    # examples/common.h                   -> examples/common.h
+    # examples/common.cpp                 -> examples/common.cpp
+    # examples/common-ggml.h              -> examples/common-ggml.h
+    # examples/common-ggml.cpp            -> examples/common-ggml.cpp
+    # examples/whisper/grammar-parser.h   -> examples/grammar-parser.h
+    # examples/whisper/grammar-parser.cpp -> examples/grammar-parser.cpp
+    #
+    # examples/whisper/whisper.h    -> whisper.h
+    # examples/whisper/whisper.cpp  -> whisper.cpp
+    # examples/whisper/main.cpp     -> examples/main/main.cpp
+    # examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
+
+    cat ggml-src.patch | sed \
+        -e 's/src\/ggml\.c/ggml.c/g' \
+        -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
+        -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
+        -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
+        -e 's/src\/ggml-common\.h/ggml-common.h/g' \
+        -e 's/src\/ggml-cuda\//ggml-cuda\//g' \
+        -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
+        -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
+        -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
+        -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
+        -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
+        -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
+        -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
+        -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
+        -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
+        -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
+        -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
+        -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
+        -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
+        -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
+        -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
+        -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
+        -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
+        -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
+        -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
+        -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
+        -e 's/examples\/common\.h/examples\/common.h/g' \
+        -e 's/examples\/common\.cpp/examples\/common.cpp/g' \
+        -e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
+        -e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
+        -e 's/examples\/whisper\/grammar-parser\.h/examples\/grammar-parser.h/g' \
+        -e 's/examples\/whisper\/grammar-parser\.cpp/examples\/grammar-parser.cpp/g' \
+        -e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
+        -e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
+        -e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
+        -e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
+        > ggml-src.patch.tmp
+    mv ggml-src.patch.tmp ggml-src.patch
+
+    git am ggml-src.patch
+
+    rm -v $SRC_WHISPER/ggml-src.patch
+fi
+
+# update last commit
+cd $SRC_GGML
+git log -1 --format=%H > $SRC_WHISPER/scripts/sync-ggml.last
+
+echo "Done"
+
+exit 0
diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last
new file mode 100644
index 00000000..82195550
--- /dev/null
+++ b/scripts/sync-ggml.last
@@ -0,0 +1 @@
+bb8d8cff851b2de6fde4904be492d39458837e1a
diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh
new file mode 100755
index 00000000..1482cfbc
--- /dev/null
+++ b/scripts/sync-ggml.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+cp -rpv ../ggml/src/ggml.c              ./ggml.c
+cp -rpv ../ggml/src/ggml-impl.h         ./ggml-impl.h
+cp -rpv ../ggml/src/ggml-alloc.c        ./ggml-alloc.c
+cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
+cp -rpv ../ggml/src/ggml-backend.c      ./ggml-backend.c
+cp -rpv ../ggml/src/ggml-common.h       ./ggml-common.h
+cp -rpv ../ggml/src/ggml-cuda/*         ./ggml-cuda/
+cp -rpv ../ggml/src/ggml-cuda.cu        ./ggml-cuda.cu
+cp -rpv ../ggml/src/ggml-cuda.h         ./ggml-cuda.h
+cp -rpv ../ggml/src/ggml-kompute.cpp    ./ggml-kompute.cpp
+cp -rpv ../ggml/src/ggml-kompute.h      ./ggml-kompute.h
+cp -rpv ../ggml/src/ggml-metal.h        ./ggml-metal.h
+cp -rpv ../ggml/src/ggml-metal.m        ./ggml-metal.m
+cp -rpv ../ggml/src/ggml-metal.metal    ./ggml-metal.metal
+#cp -rpv ../ggml/src/ggml-mpi.h          ./ggml-mpi.h
+#cp -rpv ../ggml/src/ggml-mpi.c          ./ggml-mpi.c
+cp -rpv ../ggml/src/ggml-opencl.cpp     ./ggml-opencl.cpp
+cp -rpv ../ggml/src/ggml-opencl.h       ./ggml-opencl.h
+cp -rpv ../ggml/src/ggml-quants.c       ./ggml-quants.c
+cp -rpv ../ggml/src/ggml-quants.h       ./ggml-quants.h
+cp -rpv ../ggml/src/ggml-sycl.cpp       ./ggml-sycl.cpp
+cp -rpv ../ggml/src/ggml-sycl.h         ./ggml-sycl.h
+cp -rpv ../ggml/src/ggml-vulkan.cpp     ./ggml-vulkan.cpp
+cp -rpv ../ggml/src/ggml-vulkan.h       ./ggml-vulkan.h
+
+cp -rpv ../ggml/include/ggml/ggml.h         ./ggml.h
+cp -rpv ../ggml/include/ggml/ggml-alloc.h   ./ggml-alloc.h
+cp -rpv ../ggml/include/ggml/ggml-backend.h ./ggml-backend.h
+
+cp -rpv ../ggml/examples/common.h                   ./examples/common.h
+cp -rpv ../ggml/examples/common.cpp                 ./examples/common.cpp
+cp -rpv ../ggml/examples/common-ggml.h              ./examples/common-ggml.h
+cp -rpv ../ggml/examples/common-ggml.cpp            ./examples/common-ggml.cpp
+cp -rpv ../ggml/examples/whisper/grammar-parser.h   ./examples/grammar-parser.h
+cp -rpv ../ggml/examples/whisper/grammar-parser.cpp ./examples/grammar-parser.cpp
+
+cp -rpv ../ggml/examples/whisper/whisper.h    ./whisper.h
+cp -rpv ../ggml/examples/whisper/whisper.cpp  ./whisper.cpp
+cp -rpv ../ggml/examples/whisper/main.cpp     ./examples/main/main.cpp
+cp -rpv ../ggml/examples/whisper/quantize.cpp ./examples/quantize/quantize.cpp
+
diff --git a/scripts/sync-llama.sh b/scripts/sync-llama.sh
new file mode 100755
index 00000000..de71ffcc
--- /dev/null
+++ b/scripts/sync-llama.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+cp -rpv ../llama.cpp/llama.h          ./examples/talk-llama/llama.h
+cp -rpv ../llama.cpp/llama.cpp        ./examples/talk-llama/llama.cpp
+cp -rpv ../llama.cpp/unicode.h        ./examples/talk-llama/unicode.h
+cp -rpv ../llama.cpp/unicode.cpp      ./examples/talk-llama/unicode.cpp
+cp -rpv ../llama.cpp/unicode-data.h   ./examples/talk-llama/unicode-data.h
+cp -rpv ../llama.cpp/unicode-data.cpp ./examples/talk-llama/unicode-data.cpp