docker: add perplexity and bench commands to full image (#11438)

author Nuno <redacted>

Tue, 28 Jan 2025 10:42:32 +0000 (11:42 +0100)

committer GitHub <redacted>

Tue, 28 Jan 2025 10:42:32 +0000 (10:42 +0000)
author Nuno <redacted>
Tue, 28 Jan 2025 10:42:32 +0000 (11:42 +0100)
committer GitHub <redacted>
Tue, 28 Jan 2025 10:42:32 +0000 (10:42 +0000)
diff --git a/.devops/tools.sh b/.devops/tools.sh

index 9a86e6ea0185df68d5174cdd517a872e3bce83c8..41a6b1e55c7d295c9075a24eeaf2b8b0bc8eaa01 100755 (executable)
--- a/.devops/tools.sh
+++ b/.devops/tools.sh
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
      exec ./llama-quantize "$@"
  elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
      exec ./llama-cli "$@"
+elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
+    exec ./llama-bench "$@"
+elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
+    exec ./llama-perplexity "$@"
  elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
      echo "Converting PTH to GGML..."
-    for i in `ls $1/$2/ggml-model-f16.bin*`; do
+    for i in $(ls $1/$2/ggml-model-f16.bin*); do
          if [ -f "${i/f16/q4_0}" ]; then
              echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
          else
@@ -30,6 +34,10 @@ else
      echo "Available commands: "
      echo "  --run (-r): Run a model previously converted into ggml"
      echo "              ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
+    echo "  --bench (-b): Benchmark the performance of the inference for various parameters."
+    echo "              ex: -m model.gguf"
+    echo "  --perplexity (-p): Measure the perplexity of a model over a given text."
+    echo "              ex: -m model.gguf -f file.txt"
      echo "  --convert (-c): Convert a llama model into ggml"
      echo "              ex: --outtype f16 \"/models/7B/\" "
      echo "  --quantize (-q): Optimize with quantization process ggml"
author	Nuno <redacted>
	Tue, 28 Jan 2025 10:42:32 +0000 (11:42 +0100)
committer	GitHub <redacted>
	Tue, 28 Jan 2025 10:42:32 +0000 (10:42 +0000)