]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
`speak` scripts for Windows
authorNicholas Albion <redacted>
Thu, 1 Jun 2023 12:45:00 +0000 (22:45 +1000)
committerNicholas Albion <redacted>
Thu, 1 Jun 2023 12:45:00 +0000 (22:45 +1000)
12 files changed:
examples/talk-llama/README.md
examples/talk-llama/speak [new file with mode: 0644]
examples/talk-llama/speak.bat [new file with mode: 0644]
examples/talk-llama/speak.ps1 [new file with mode: 0644]
examples/talk-llama/speak.sh [deleted file]
examples/talk-llama/talk-llama.cpp
examples/talk/README.md
examples/talk/speak [new file with mode: 0644]
examples/talk/speak.bat [new file with mode: 0644]
examples/talk/speak.ps1 [new file with mode: 0644]
examples/talk/speak.sh [deleted file]
examples/talk/talk.cpp

index 295bc4dbb7d66aec1681c472061557d0513eb645..01f696d1a44883484d0dd402a61eb1d0b9ddfa57 100644 (file)
@@ -42,8 +42,8 @@ Example usage:
 ## TTS\r
 \r
 For best experience, this example needs a TTS tool to convert the generated text responses to voice.\r
-You can use any TTS engine that you would like - simply edit the [speak.sh](speak.sh) script to your needs.\r
-By default, it is configured to use MacOS's `say`, but you can use whatever you wish.\r
+You can use any TTS engine that you would like - simply edit the [speak](speak) script to your needs.\r
+By default, it is configured to use MacOS's `say` or Windows SpeechSynthesizer, but you can use whatever you wish.\r
 \r
 ## Discussion\r
 \r
diff --git a/examples/talk-llama/speak b/examples/talk-llama/speak
new file mode 100644 (file)
index 0000000..40fdad2
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Usage:
+#  speak.sh <voice_id> <text-to-speak>
+
+# espeak
+# Mac OS: brew install espeak
+# Linux: apt-get install espeak
+#
+#espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 "$2"
+
+# for Mac
+say "$2"
+
+# Eleven Labs
+# To use it, install the elevenlabs module from pip (pip install elevenlabs)
+# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY='
+#Keep the line commented to use the free version whitout api key
+#
+#export ELEVEN_API_KEY=your_api_key
+#wd=$(dirname $0)
+#script=$wd/eleven-labs.py
+#python3 $script $1 "$2" >/dev/null 2>&1
+#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
diff --git a/examples/talk-llama/speak.bat b/examples/talk-llama/speak.bat
new file mode 100644 (file)
index 0000000..d719d69
--- /dev/null
@@ -0,0 +1 @@
+@powershell -ExecutionPolicy Bypass -F examples\talk\speak.ps1 %1 %2\r
diff --git a/examples/talk-llama/speak.ps1 b/examples/talk-llama/speak.ps1
new file mode 100644 (file)
index 0000000..bdc4c5f
--- /dev/null
@@ -0,0 +1,12 @@
+# Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser\r
+param(\r
+  # voice options are David or Zira\r
+  [Parameter(Mandatory=$true)][string]$voice,\r
+  [Parameter(Mandatory=$true)][string]$text\r
+)\r
+\r
+Add-Type -AssemblyName System.Speech;\r
+$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer;\r
+$speak.SelectVoice("Microsoft $voice Desktop");\r
+$speak.Rate="0";\r
+$speak.Speak($text);\r
diff --git a/examples/talk-llama/speak.sh b/examples/talk-llama/speak.sh
deleted file mode 100755 (executable)
index 40fdad2..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# Usage:
-#  speak.sh <voice_id> <text-to-speak>
-
-# espeak
-# Mac OS: brew install espeak
-# Linux: apt-get install espeak
-#
-#espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 "$2"
-
-# for Mac
-say "$2"
-
-# Eleven Labs
-# To use it, install the elevenlabs module from pip (pip install elevenlabs)
-# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY='
-#Keep the line commented to use the free version whitout api key
-#
-#export ELEVEN_API_KEY=your_api_key
-#wd=$(dirname $0)
-#script=$wd/eleven-labs.py
-#python3 $script $1 "$2" >/dev/null 2>&1
-#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1
index cdeb2d9bf4e98aada9c8b8ab90814a2560723057..57a02eacf627068d0092641a4c51e8e652c63504 100644 (file)
@@ -47,7 +47,7 @@ struct whisper_params {
     std::string language    = "en";
     std::string model_wsp   = "models/ggml-base.en.bin";
     std::string model_llama = "models/ggml-llama-7B.bin";
-    std::string speak       = "./examples/talk-llama/speak.sh";
+    std::string speak       = "./examples/talk-llama/speak";
     std::string prompt      = "";
     std::string fname_out;
     std::string path_session = "";       // path to file for saving/loading model eval state
index 818a4283f9c54ac93273c4df0340be39d06cc9e1..fe85795eb136d438de368cd575417b024f75f96e 100644 (file)
@@ -37,5 +37,5 @@ wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://huggingface.c
 ## TTS\r
 \r
 For best experience, this example needs a TTS tool to convert the generated text responses to voice.\r
-You can use any TTS engine that you would like - simply edit the [speak.sh](speak.sh) script to your needs.\r
-By default, it is configured to use `espeak`, but you can use whatever you wish.\r
+You can use any TTS engine that you would like - simply edit the [speak](speak) script to your needs.\r
+By default, it is configured to use MacOS's `say` or `espeak` or Windows SpeechSynthesizer, but you can use whatever you wish.\r
diff --git a/examples/talk/speak b/examples/talk/speak
new file mode 100644 (file)
index 0000000..b822f61
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Usage:
+#  speak.sh <voice_id> <text-to-speak>
+
+# espeak
+# Mac OS: brew install espeak
+# Linux: apt-get install espeak
+#
+#espeak -v en-us+m$1 -s 175 -p 50 -a 200 -g 5 -k 5 "$2"
+
+# Mac OS "say" command
+say "$2"
+
+# Eleven Labs
+# To use it, install the elevenlabs module from pip (pip install elevenlabs)
+# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY='
+#Keep the line commented to use the free version without api key
+#
+#export ELEVEN_API_KEY=your_api_key
+#wd=$(dirname $0)
+#script=$wd/eleven-labs.py
+#python3 $script $1 "$2"
+#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3
diff --git a/examples/talk/speak.bat b/examples/talk/speak.bat
new file mode 100644 (file)
index 0000000..d719d69
--- /dev/null
@@ -0,0 +1 @@
+@powershell -ExecutionPolicy Bypass -F examples\talk\speak.ps1 %1 %2\r
diff --git a/examples/talk/speak.ps1 b/examples/talk/speak.ps1
new file mode 100644 (file)
index 0000000..bdc4c5f
--- /dev/null
@@ -0,0 +1,12 @@
+# Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope CurrentUser\r
+param(\r
+  # voice options are David or Zira\r
+  [Parameter(Mandatory=$true)][string]$voice,\r
+  [Parameter(Mandatory=$true)][string]$text\r
+)\r
+\r
+Add-Type -AssemblyName System.Speech;\r
+$speak = New-Object System.Speech.Synthesis.SpeechSynthesizer;\r
+$speak.SelectVoice("Microsoft $voice Desktop");\r
+$speak.Rate="0";\r
+$speak.Speak($text);\r
diff --git a/examples/talk/speak.sh b/examples/talk/speak.sh
deleted file mode 100755 (executable)
index f6954d1..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# Usage:
-#  speak.sh <voice_id> <text-to-speak>
-
-# espeak
-# Mac OS: brew install espeak
-# Linux: apt-get install espeak
-#
-#espeak -v en-us+m$1 -s 175 -p 50 -a 200 -g 5 -k 5 "$2"
-
-# Mac OS "say" command
-say "$2"
-
-# Eleven Labs
-# To use it, install the elevenlabs module from pip (pip install elevenlabs)
-# It's possible to use the API for free with limited number of characters. To increase this limit register to https://beta.elevenlabs.io to get an api key and paste it after 'ELEVEN_API_KEY='
-#Keep the line commented to use the free version whitout api key
-#
-#export ELEVEN_API_KEY=your_api_key
-#wd=$(dirname $0)
-#script=$wd/eleven-labs.py
-#python3 $script $1 "$2"
-#ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3
index 0def64449e42d2f845b8c3c14ceada7762a530bb..651ca2008f3e426d063bd1f625c7d5b2e0415605 100644 (file)
@@ -36,7 +36,7 @@ struct whisper_params {
     std::string language  = "en";
     std::string model_wsp = "models/ggml-base.en.bin";
     std::string model_gpt = "models/ggml-gpt-2-117M.bin";
-    std::string speak     = "./examples/talk/speak.sh";
+    std::string speak     = "./examples/talk/speak";
     std::string fname_out;
 };