models : make all scripts to be POSIX Compliant (#1725)

author Sơn Phan Trung <redacted>

Fri, 12 Jan 2024 12:11:04 +0000 (19:11 +0700)

committer GitHub <redacted>

Fri, 12 Jan 2024 12:11:04 +0000 (14:11 +0200)
author Sơn Phan Trung <redacted>
Fri, 12 Jan 2024 12:11:04 +0000 (19:11 +0700)
committer GitHub <redacted>
Fri, 12 Jan 2024 12:11:04 +0000 (14:11 +0200)
diff --git a/models/download-coreml-model.sh b/models/download-coreml-model.sh

index 9e67a15002f67bf40fd3614b094e7723107a4974..83f2b238e940f95ff18d9286558e71ea030184c1 100755 (executable)
--- a/models/download-coreml-model.sh
+++ b/models/download-coreml-model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
  
  # This script downloads Whisper model files that have already been converted to Core ML format.
  # This way you don't have to convert them yourself.
@@ -7,32 +7,32 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
  pfx="resolve/main/ggml"
  
  # get the path of this script
-function get_script_path() {
+get_script_path() {
      if [ -x "$(command -v realpath)" ]; then
-        echo "$(dirname $(realpath $0))"
+       dirname "$(realpath "$0")"
      else
-        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
-        echo "$ret"
+        _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
+        echo "$_ret"
      fi
  }
  
  models_path="$(get_script_path)"
  
  # Whisper models
-models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
+models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
  
  # list available models
-function list_models {
-    printf "\n"
-    printf "  Available models:"
-    for model in "${models[@]}"; do
-        printf " $model"
-    done
-    printf "\n\n"
+list_models() {
+        printf "\n"
+        printf "  Available models:"
+        for model in $models; do
+                printf " %s" "$models"
+        done
+        printf "\n\n"
  }
  
  if [ "$#" -ne 1 ]; then
-    printf "Usage: $0 <model>\n"
+    printf "Usage: %s <model>\n" "$0"
      list_models
  
      exit 1
@@ -40,8 +40,8 @@ fi
  
  model=$1
  
-if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
-    printf "Invalid model: $model\n"
+if ! echo "$models" | grep -q -w "$model"; then
+    printf "Invalid model: %s\n" "$model"
      list_models
  
      exit 1
@@ -49,19 +49,19 @@ fi
  
  # download Core ML model
  
-printf "Downloading Core ML model $model from '$src' ...\n"
+printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"
  
-cd $models_path
+cd "$models_path" || exit
  
  if [ -f "ggml-$model.mlmodel" ]; then
-    printf "Model $model already exists. Skipping download.\n"
+    printf "Model %s already exists. Skipping download.\n" "$model"
      exit 0
  fi
  
  if [ -x "$(command -v wget)" ]; then
-    wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+    wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
  elif [ -x "$(command -v curl)" ]; then
-    curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel
+    curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
  else
      printf "Either wget or curl is required to download models.\n"
      exit 1
@@ -69,14 +69,14 @@ fi
  
  
  if [ $? -ne 0 ]; then
-    printf "Failed to download Core ML model $model \n"
+    printf "Failed to download Core ML model %s \n" "$model"
      printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
      exit 1
  fi
  
-printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n"
+printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model"
  printf "Run the following command to compile it:\n\n"
-printf "  $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n"
+printf "  $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model"
  printf "You can now use it like this:\n\n"
-printf "  $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
+printf "  $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model"
  printf "\n"
diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh

index c976d2fb4de7b52b1edd808e59e46c070fcfaccd..74dece99586ee85aa2edd48ac5c3ebf603853ffd 100755 (executable)
--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
  
  # This script downloads Whisper model files that have already been converted to ggml format.
  # This way you don't have to convert them yourself.
@@ -10,54 +10,52 @@ src="https://huggingface.co/ggerganov/whisper.cpp"
  pfx="resolve/main/ggml"
  
  # get the path of this script
-function get_script_path() {
+get_script_path() {
      if [ -x "$(command -v realpath)" ]; then
-        echo "$(dirname "$(realpath "$0")")"
+        dirname "$(realpath "$0")"
      else
-        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
-        echo "$ret"
+        _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
+        echo "$_ret"
      fi
  }
  
  models_path="${2:-$(get_script_path)}"
  
  # Whisper models
-models=(
-    "tiny.en"
-    "tiny"
-    "tiny-q5_1"
-    "tiny.en-q5_1"
-    "base.en"
-    "base"
-    "base-q5_1"
-    "base.en-q5_1"
-    "small.en"
-    "small.en-tdrz"
-    "small"
-    "small-q5_1"
-    "small.en-q5_1"
-    "medium"
-    "medium.en"
-    "medium-q5_0"
-    "medium.en-q5_0"
-    "large-v1"
-    "large-v2"
-    "large-v3"
-    "large-v3-q5_0"
-)
+models="tiny.en
+tiny
+tiny-q5_1
+tiny.en-q5_1
+base.en
+base
+base-q5_1
+base.en-q5_1
+small.en
+small.en-tdrz
+small
+small-q5_1
+small.en-q5_1
+medium
+medium.en
+medium-q5_0
+medium.en-q5_0
+large-v1
+large-v2
+large-v3
+large-v3-q5_0"
  
  # list available models
-function list_models {
+list_models() {
      printf "\n"
      printf "  Available models:"
-    for model in "${models[@]}"; do
-        printf " $model"
+    for model in $models; do
+        printf " %s" "$model"
      done
      printf "\n\n"
  }
  
  if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: $0 <model> [models_path]\n"
+    printf "Usage: %s <model> [models_path]\n" "$0"
      list_models
  
      exit 1
@@ -65,34 +63,36 @@ fi
  
  model=$1
  
-if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
-    printf "Invalid model: $model\n"
+if ! echo "$models" | grep -q -w "$model"; then
+    printf "Invalid model: %s\n" "$model"
      list_models
  
      exit 1
  fi
  
  # check if model contains `tdrz` and update the src and pfx accordingly
-if [[ $model == *"tdrz"* ]]; then
+if echo "$model" | grep -q "tdrz"; then
      src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
      pfx="resolve/main/ggml"
  fi
  
+echo "$model" | grep -q '^"tdrz"*$'
+
  # download ggml model
  
-printf "Downloading ggml model $model from '$src' ...\n"
+printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
  
-cd "$models_path"
+cd "$models_path" || exit
  
  if [ -f "ggml-$model.bin" ]; then
-    printf "Model $model already exists. Skipping download.\n"
+    printf "Model %s already exists. Skipping download.\n" "$model"
      exit 0
  fi
  
  if [ -x "$(command -v wget)" ]; then
-    wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin
+    wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
  elif [ -x "$(command -v curl)" ]; then
-    curl -L --output ggml-$model.bin $src/$pfx-$model.bin
+    curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
  else
      printf "Either wget or curl is required to download models.\n"
      exit 1
@@ -100,12 +100,13 @@ fi
  
  
  if [ $? -ne 0 ]; then
-    printf "Failed to download ggml model $model \n"
+    printf "Failed to download ggml model %s \n" "$model"
      printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
      exit 1
  fi
  
-printf "Done! Model '$model' saved in '$models_path/ggml-$model.bin'\n"
+
+printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
  printf "You can now use it like this:\n\n"
-printf "  $ ./main -m $models_path/ggml-$model.bin -f samples/jfk.wav\n"
+printf "  $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
  printf "\n"
diff --git a/models/generate-coreml-interface.sh b/models/generate-coreml-interface.sh

index 553d5f654f4e3752b92e6d3fb9827a4d80f96314..b205eb1cde8296a9acc603e29072525e2077cc88 100755 (executable)
--- a/models/generate-coreml-interface.sh
+++ b/models/generate-coreml-interface.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
  #
  # This generates:
  #   - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
@@ -6,7 +6,7 @@
  #
  
  wd=$(dirname "$0")
-cd "$wd/../"
+cd "$wd/../" || exit
  
  python3 models/convert-whisper-to-coreml.py --model tiny.en
  
diff --git a/models/generate-coreml-model.sh b/models/generate-coreml-model.sh

index cb8be6dcbc0c66c57725ef4bd9aa59f326d62e8f..8f96fdec84acc69a0d76c846bcd489a91c874ab7 100755 (executable)
--- a/models/generate-coreml-model.sh
+++ b/models/generate-coreml-model.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
  
  # Usage: ./generate-coreml-model.sh <model-name>
  if [ $# -eq 0 ]; then
@@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then
    echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
    echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
    exit 1
-elif [[ "$1" == "-h5" && $# != 3 ]]; then
+elif [ "$1" = "-h5" ] && [ $# != 3 ]; then
    echo "No model name and model path supplied for a HuggingFace model"
    echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
    exit 1
@@ -15,20 +15,20 @@ fi
  mname="$1"
  
  wd=$(dirname "$0")
-cd "$wd/../"
+cd "$wd/../" || exit
  
-if [[ $mname == "-h5" ]]; then
+if [ "$mname" = "-h5" ]; then
    mname="$2"
    mpath="$3"
-  echo $mpath
-  python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True
+  echo "$mpath"
+  python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True
  else
-  python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True  --optimize-ane True
+  python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True
  fi
  
-xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/
-rm -rf models/ggml-${mname}-encoder.mlmodelc
-mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc
+xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/
+rm -rf models/ggml-"${mname}"-encoder.mlmodelc
+mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc
  
  # TODO: decoder (sometime in the future maybe)
  #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/
author	Sơn Phan Trung <redacted>
	Fri, 12 Jan 2024 12:11:04 +0000 (19:11 +0700)
committer	GitHub <redacted>
	Fri, 12 Jan 2024 12:11:04 +0000 (14:11 +0200)
models/download-coreml-model.sh		patch \| blob \| history
models/download-ggml-model.sh		patch \| blob \| history
models/generate-coreml-interface.sh		patch \| blob \| history
models/generate-coreml-model.sh		patch \| blob \| history