scripts : add script to bench models (#16894)

author Georgi Gerganov <redacted>

Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)

committer GitHub <redacted>

Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)
author Georgi Gerganov <redacted>
Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)
committer GitHub <redacted>
Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)
diff --git a/scripts/bench-models.sh b/scripts/bench-models.sh

new file mode 100644 (file)

index 0000000..744b0de
--- /dev/null
+++ b/scripts/bench-models.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+
+RESULTS="bench-models-results.txt"
+: > "$RESULTS"
+
+ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"
+ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"
+
+QUICK=0
+while (( "$#" )); do
+  case "$1" in
+    --quick) QUICK=1; shift ;;
+    *) shift ;;
+  esac
+done
+
+if (( QUICK )); then
+  ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
+  ARGS_B="-d 0 -p 2048 -n 32"
+fi
+
+run_model() {
+  local HFR=$1
+  local HFF=$2
+
+  printf "## ${HFR}\n" | tee -a "$RESULTS"
+  printf "\n" | tee -a "$RESULTS"
+  printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
+  printf "\n" | tee -a "$RESULTS"
+
+  printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
+  printf "\n" | tee -a "$RESULTS"
+
+  ./bin/llama-batched-bench \
+    -hfr "${HFR}" -hff "${HFF}" \
+    -m "${HFF}" -fa 1 -ub 2048 --no-mmap \
+    ${ARGS_BB} | tee -a "$RESULTS"
+
+  printf "\n" | tee -a "$RESULTS"
+
+  printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
+  printf "\n" | tee -a "$RESULTS"
+
+  ./bin/llama-bench \
+    -m "${HFF}" -fa 1 -ub 2048 -mmp 0 \
+    ${ARGS_B} | tee -a "$RESULTS"
+
+  printf "\n" | tee -a "$RESULTS"
+
+  printf "\n"
+}
+
+run_model "ggml-org/gpt-oss-20b-GGUF"                       "gpt-oss-20b-mxfp4.gguf"
+run_model "ggml-org/gpt-oss-120b-GGUF"                      "gpt-oss-120b-mxfp4-00001-of-00003.gguf"
+run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"
+run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"             "qwen2.5-coder-7b-q8_0.gguf"
+run_model "ggml-org/gemma-3-4b-it-qat-GGUF"                 "gemma-3-4b-it-qat-Q4_0.gguf"
+
+if [[ -f models-extra.txt ]]; then
+    while read -r HFR HFF; do
+        [[ -z "$HFR" ]] && continue
+        run_model "$HFR" "$HFF"
+    done < models-extra.txt
+fi
+
+printf "\n=====================================\n"
+printf "\n"
+
+cat "$RESULTS"
+
+printf "\n"
+printf "Done! Results are written to $RESULTS\n"
+printf "\n"
+
diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp

index fcfcd80771c516d0ccc59a98b7545ed77fd5acc2..f1ab27cd54d0af2653116de3ea9695bdd9c078d8 100644 (file)
--- a/tools/batched-bench/batched-bench.cpp
+++ b/tools/batched-bench/batched-bench.cpp
@@ -221,7 +221,5 @@ int main(int argc, char ** argv) {
  
      llama_backend_free();
  
-    LOG("\n\n");
-
      return 0;
  }
author	Georgi Gerganov <redacted>
	Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)
committer	GitHub <redacted>
	Sat, 1 Nov 2025 22:15:31 +0000 (00:15 +0200)
scripts/bench-models.sh	[new file with mode: 0644]	patch \| blob
tools/batched-bench/batched-bench.cpp		patch \| blob \| history