Models can be downloaded by running the following command on Linux or MacOS:
```console
-$ ./models/download-vad-model.sh silero-v5.1.2
-Downloading ggml model silero-v5.1.2 from 'https://huggingface.co/ggml-org/whisper-vad' ...
-ggml-silero-v5.1.2.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s
-Done! Model 'silero-v5.1.2' saved in '/path/models/ggml-silero-v5.1.2.bin'
+$ ./models/download-vad-model.sh silero-v6.2.0
+Downloading ggml model silero-v6.2.0 from 'https://huggingface.co/ggml-org/whisper-vad' ...
+ggml-silero-v6.2.0.bin 100%[==============================================>] 864.35K --.-KB/s in 0.04s
+Done! Model 'silero-v6.2.0' saved in '/path/models/ggml-silero-v6.2.0.bin'
You can now use it like this:
- $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v5.1.2.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin
+ $ ./build/bin/whisper-cli -vm /path/models/ggml-silero-v6.2.0.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin
```
And the following command on Windows:
```console
-> .\models\download-vad-model.cmd silero-v5.1.2
-Downloading vad model silero-v5.1.2...
-Done! Model silero-v5.1.2 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v5.1.2.bin
+> .\models\download-vad-model.cmd silero-v6.2.0
+Downloading vad model silero-v6.2.0...
+Done! Model silero-v6.2.0 saved in C:\Users\danie\work\ai\whisper.cpp\ggml-silero-v6.2.0.bin
You can now use it like this:
-C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v5.1.2.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
+C:\path\build\bin\Release\whisper-cli.exe -vm C:\path\ggml-silero-v6.2.0.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
```
$ python3 -m venv venv && source venv/bin/activate
$ (venv) pip install silero-vad
$ (venv) $ python models/convert-silero-vad-to-ggml.py --output models/silero.bin
-Saving GGML Silero-VAD model to models/silero-v5.1.2-ggml.bin
+Saving GGML Silero-VAD model to models/silero-v6.2.0-ggml.bin
```
And it can then be used with whisper as follows:
```console
--file ./samples/jfk.wav \
--model ./models/ggml-base.en.bin \
--vad \
- --vad-model ./models/silero-v5.1.2-ggml.bin
+ --vad-model ./models/silero-v6.2.0-ggml.bin
```
### VAD Options
```ruby
Whisper::Params.new(
vad: true,
- vad_model_path: "silero-v5.1.2",
+ vad_model_path: "silero-v6.2.0",
# other arguments...
)
```
-When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically.
-Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model.
+When you pass the model name (`"silero-v6.2.0"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin`), it will be downloaded automatically.
+Currently, "silero-v6.2.0" is registered as pre-converted model like ASR models. You also specify file path or URI of model.
If you need configure VAD behavior, pass params for that:
```ruby
Whisper::Params.new(
vad: true,
- vad_model_path: "silero-v5.1.2",
+ vad_model_path: "silero-v6.2.0",
vad_params: Whisper::VAD::Params.new(
threshold: 1.0, # defaults to 0.5
min_speech_duration_ms: 500, # defaults to 250
VAD feature itself is useful. You can use it separately from ASR:
```ruby
-vad = Whisper::VAD::Context.new("silero-v5.1.2")
+vad = Whisper::VAD::Context.new("silero-v6.2.0")
vad
.detect("path/to/audio.wav", Whisper::VAD::Params.new)
.each_with_index do |segment, index|
%w[
silero-v5.1.2
+ silero-v6.2.0
].each do |name|
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
end
def test_vad_model_path
assert_nil @params.vad_model_path
- @params.vad_model_path = "silero-v5.1.2"
- assert_equal Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path, @params.vad_model_path
+ @params.vad_model_path = "silero-v6.2.0"
+ assert_equal Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path, @params.vad_model_path
end
def test_vad_model_path_with_nil
- @params.vad_model_path = "silero-v5.1.2"
+ @params.vad_model_path = "silero-v6.2.0"
@params.vad_model_path = nil
assert_nil @params.vad_model_path
end
end
def test_vad_model_path_with_URI_string
- @params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin"
- assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
+ @params.vad_model_path = "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin"
+ assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
end
def test_vad_model_path_with_URI
- @params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin")
- assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
+ @params.vad_model_path = URI("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin")
+ assert_equal @params.vad_model_path, Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
end
def test_vad_params
in [/_user_data\Z/, *]
Object.new
in [:vad_model_path, *]
- Whisper::Model.pre_converted_models["silero-v5.1.2"].to_path
+ Whisper::Model.pre_converted_models["silero-v6.2.0"].to_path
in [:vad_params, *]
Whisper::VAD::Params.new
end
vad_params = Whisper::VAD::Params.new
@params = Whisper::Params.new(
vad: true,
- vad_model_path: "silero-v5.1.2",
+ vad_model_path: "silero-v6.2.0",
vad_params:
)
end
class TestVADContext < TestBase
def test_initialize
- context = Whisper::VAD::Context.new("silero-v5.1.2")
+ context = Whisper::VAD::Context.new("silero-v6.2.0")
assert_instance_of Whisper::VAD::Context, context
end
def test_detect
- context = Whisper::VAD::Context.new("silero-v5.1.2")
+ context = Whisper::VAD::Context.new("silero-v6.2.0")
segments = context.detect(AUDIO, Whisper::VAD::Params.new)
assert_instance_of Whisper::VAD::Segments, segments
assert_equal segment.start_time, start_time
assert_equal segment.end_time, end_time
- assert_equal 5, segments.length
+ assert_equal 4, segments.length
end
def test_invalid_model_type
```shell
# From the whisper.cpp root directory
-./models/download-vad-model.sh silero-v5.1.2
+./models/download-vad-model.sh silero-v6.2.0
```
### VAD Parameters
model: path.join(__dirname, "../../models/ggml-base.en.bin"),
fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
vad: true,
- vad_model: path.join(__dirname, "../../models/ggml-silero-v5.1.2.bin"),
+ vad_model: path.join(__dirname, "../../models/ggml-silero-v6.2.0.bin"),
vad_threshold: 0.5,
progress_callback: (progress) => console.log(`Progress: ${progress}%`)
};
max_len: 0,
// VAD parameters
vad: true,
- vad_model: path.join(__dirname, "../../models/ggml-silero-v5.1.2.bin"), // You need to download this model
+ vad_model: path.join(__dirname, "../../models/ggml-silero-v6.2.0.bin"), // You need to download this model
vad_threshold: 0.5,
vad_min_speech_duration_ms: 250,
vad_min_silence_duration_ms: 100,
const fs = require('fs');
if (!fs.existsSync(vadParams.vad_model)) {
console.log("⚠️ VAD model not found. Please download the VAD model first:");
- console.log(" ./models/download-vad-model.sh silero-v5.1.2");
+ console.log(" ./models/download-vad-model.sh silero-v6.2.0");
console.log(" Or run: python models/convert-silero-vad-to-ggml.py");
console.log("\n Falling back to traditional transcription without VAD...\n");
that we use internally for testing:
```console
./build/bin/vad-speech-segments \
- -vad-model models/for-tests-silero-v5.1.2-ggml.bin \
+ -vad-model models/for-tests-silero-v6.2.0-ggml.bin \
--file samples/jfk.wav \
--no-prints
set argc=0
for %%x in (%*) do set /A argc+=1
-set models=silero-v5.1.2
+set models=silero-v5.1.2 silero-v6.2.0
rem If argc is not equal to 1 or 2, print usage information and exit
if %argc% NEQ 1 (
models_path="${2:-$default_download_path}"
# Whisper VAD models
-models="silero-v5.1.2"
+models="silero-v5.1.2 silero-v6.2.0"
# list available models
list_models() {
```
$ # Execute the commands below in the project root dir.
-$ ./models/download-vad-model.sh silero-v5.1.2
+$ ./models/download-vad-model.sh silero-v6.2.0
```
Create `eval.conf` with the following content:
```
-WHISPER_FLAGS = --no-prints --language en --output-txt --vad --vad-model ../../models/ggml-silero-v5.1.2.bin
+WHISPER_FLAGS = --no-prints --language en --output-txt --vad --vad-model ../../models/ggml-silero-v6.2.0.bin
```