```ruby
require "whisper"
-whisper = Whisper::Context.new(Whisper::Model["base"])
+whisper = Whisper::Context.new("base")
params = Whisper::Params.new
params.language = "en"
Some models are prepared up-front:
```ruby
-base_en = Whisper::Model["base.en"]
+base_en = Whisper::Model.pre_converted_models["base.en"]
whisper = Whisper::Context.new(base_en)
```
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
```ruby
-Whisper::Model["base"].clear_cache
+Whisper::Model.pre_converted_models["base"].clear_cache
```
-You can see the list of prepared model names by `Whisper::Model.preconverted_model_names`:
+You also can use shorthand for pre-converted models:
+
+```ruby
+whisper = Whisper::Context.new("base.en")
+```
+
+You can see the list of prepared model names by `Whisper::Model.preconverted_models.keys`:
```ruby
puts Whisper::Model.preconverted_model_names
You can also add hook to params called on new segment:
```ruby
-def format_time(time_ms)
- sec, decimal_part = time_ms.divmod(1000)
- min, sec = sec.divmod(60)
- hour, min = min.divmod(60)
- "%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
-end
-
# Add hook before calling #transcribe
params.on_new_segment do |segment|
line = "[%{st} --> %{ed}] %{text}" % {
You can see model information:
```ruby
-whisper = Whisper::Context.new(Whisper::Model["base"])
+whisper = Whisper::Context.new("base")
model = whisper.model
model.n_vocab # => 51864
Whisper.log_set ->(level, buffer, user_data) {
# do nothing
}, nil
-Whisper::Context.new(MODEL)
+Whisper::Context.new("base")
```
### Low-level API to transcribe ###
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
-whisper = Whisper::Context.new(Whisper::Model["base"])
+whisper = Whisper::Context.new("base")
whisper.full(Whisper::Params.new, samples)
whisper.each_segment do |segment|
puts segment.text
directory "pkg"
CLOBBER.include "pkg"
-TEST_MODEL = "../../models/ggml-base.en.bin"
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
SO_FILE = File.join("ext", LIB_NAME)
LIB_FILE = File.join("lib", LIB_NAME)
sh "make"
end
end
-CLEAN.include LIB_FILE
+CLEAN.include SO_FILE
directory "lib"
file LIB_FILE => [SO_FILE, "lib"] do |t|
copy t.source, t.name
end
+CLEAN.include LIB_FILE
Rake::TestTask.new do |t|
t.test_files = FileList["tests/test_*.rb"]
end
-task test: [TEST_MODEL, LIB_FILE]
-
-file TEST_MODEL do
- Dir.chdir "../.." do
- sh "./models/download-ggml-model.sh base.en"
- end
-end
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
end
end
CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
-task test: TEST_MEMORY_VIEW
+
+task test: [LIB_FILE, TEST_MEMORY_VIEW]
$MK_CFLAGS << ' -march=native -mtune=native'
$HOST_CXXFLAGS << ' -march=native -mtune=native'
end
-
- if $UNAME_M.match? /aarch64.*/
- $MK_CFLAGS << ' -mcpu=native'
- $MK_CXXFLAGS << ' -mcpu=native'
- end
else
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
VALUE cParams;
VALUE eError;
+VALUE cSegment;
+VALUE cModel;
+
static ID id_to_s;
static ID id_call;
static ID id___method__;
static ID id_next;
static ID id_new;
static ID id_to_path;
+static ID id_pre_converted_models;
static bool is_log_callback_finalized = false;
ruby_whisper_params *rwp;
rwp = ALLOC(ruby_whisper_params);
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+ rwp->diarize = false;
rwp->new_segment_callback_container = rb_whisper_callback_container_allocate();
rwp->progress_callback_container = rb_whisper_callback_container_allocate();
rwp->abort_callback_container = rb_whisper_callback_container_allocate();
/*
* call-seq:
- * new(Whisper::Model["base.en"]) -> Whisper::Context
+ * new("base.en") -> Whisper::Context
* new("path/to/model.bin") -> Whisper::Context
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
*/
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
Data_Get_Struct(self, ruby_whisper, rw);
+ VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
+ VALUE pre_converted_model = rb_hash_aref(pre_converted_models, whisper_model_file_path);
+ if (!NIL_P(pre_converted_model)) {
+ whisper_model_file_path = pre_converted_model;
+ }
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
}
rwp->params.logprob_thold = RFLOAT_VALUE(value);
return value;
}
+/*
+ * call-seq:
+ * no_speech_thold -> Float
+ */
+static VALUE ruby_whisper_params_get_no_speech_thold(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.no_speech_thold);
+}
+/*
+ * call-seq:
+ * no_speech_thold = threshold -> threshold
+ */
+static VALUE ruby_whisper_params_set_no_speech_thold(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.no_speech_thold = RFLOAT_VALUE(value);
+ return value;
+}
/*
* Sets new segment callback, called for every newly generated text segment.
*
VALUE context;
} ruby_whisper_model;
-VALUE cSegment;
-VALUE cModel;
-
static void rb_whisper_segment_mark(ruby_whisper_segment *rws) {
rb_gc_mark(rws->context);
}
id_next = rb_intern("next");
id_new = rb_intern("new");
id_to_path = rb_intern("to_path");
+ id_pre_converted_models = rb_intern("pre_converted_models");
mWhisper = rb_define_module("Whisper");
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
rb_define_method(cParams, "entropy_thold=", ruby_whisper_params_set_entropy_thold, 1);
rb_define_method(cParams, "logprob_thold", ruby_whisper_params_get_logprob_thold, 0);
rb_define_method(cParams, "logprob_thold=", ruby_whisper_params_set_logprob_thold, 1);
+ rb_define_method(cParams, "no_speech_thold", ruby_whisper_params_get_no_speech_thold, 0);
+ rb_define_method(cParams, "no_speech_thold=", ruby_whisper_params_set_no_speech_thold, 1);
rb_define_method(cParams, "new_segment_callback=", ruby_whisper_params_set_new_segment_callback, 1);
rb_define_method(cParams, "new_segment_callback_user_data=", ruby_whisper_params_set_new_segment_callback_user_data, 1);
require "whisper.so"
-require "whisper/model"
+require "whisper/model/uri"
+++ /dev/null
-require "whisper.so"
-require "uri"
-require "net/http"
-require "pathname"
-require "io/console/size"
-
-class Whisper::Model
- class URI
- def initialize(uri)
- @uri = URI(uri)
- end
-
- def to_path
- cache
- cache_path.to_path
- end
-
- def clear_cache
- path = cache_path
- path.delete if path.exist?
- end
-
- private
-
- def cache_path
- base_cache_dir/@uri.host/@uri.path[1..]
- end
-
- def base_cache_dir
- base = case RUBY_PLATFORM
- when /mswin|mingw/
- ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
- when /darwin/
- Pathname(Dir.home)/"Library/Caches"
- else
- ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
- end
- base/"whisper.cpp"
- end
-
- def cache
- path = cache_path
- headers = {}
- headers["if-modified-since"] = path.mtime.httpdate if path.exist?
- request @uri, headers
- path
- end
-
- def request(uri, headers)
- Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
- request = Net::HTTP::Get.new(uri, headers)
- http.request request do |response|
- case response
- when Net::HTTPNotModified
- # noop
- when Net::HTTPOK
- download response
- when Net::HTTPRedirection
- request URI(response["location"])
- else
- raise response
- end
- end
- end
- end
-
- def download(response)
- path = cache_path
- path.dirname.mkpath unless path.dirname.exist?
- downloading_path = Pathname("#{path}.downloading")
- size = response.content_length
- downloading_path.open "wb" do |file|
- downloaded = 0
- response.read_body do |chunk|
- file << chunk
- downloaded += chunk.bytesize
- show_progress downloaded, size
- end
- end
- downloading_path.rename path
- end
-
- def show_progress(current, size)
- return unless size
-
- unless @prev
- @prev = Time.now
- $stderr.puts "Downloading #{@uri}"
- end
-
- now = Time.now
- return if now - @prev < 1 && current < size
-
- progress_width = 20
- progress = current.to_f / size
- arrow_length = progress * progress_width
- arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
- line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
- padding = ' ' * ($stderr.winsize[1] - line.size)
- $stderr.print "\r#{line}#{padding}"
- $stderr.puts if current >= size
- @prev = now
- end
-
- def format_bytesize(bytesize)
- return "0.0 B" if bytesize.zero?
-
- units = %w[B KiB MiB GiB TiB]
- exp = (Math.log(bytesize) / Math.log(1024)).to_i
- format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
- end
- end
-
- @names = {}
- %w[
- tiny
- tiny.en
- tiny-q5_1
- tiny.en-q5_1
- tiny-q8_0
- base
- base.en
- base-q5_1
- base.en-q5_1
- base-q8_0
- small
- small.en
- small.en-tdrz
- small-q5_1
- small.en-q5_1
- small-q8_0
- medium
- medium.en
- medium-q5_0
- medium.en-q5_0
- medium-q8_0
- large-v1
- large-v2
- large-v2-q5_0
- large-v2-8_0
- large-v3
- large-v3-q5_0
- large-v3-turbo
- large-v3-turbo-q5_0
- large-v3-turbo-q8_0
- ].each do |name|
- @names[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
- end
-
- class << self
- def [](name)
- @names[name]
- end
-
- def preconverted_model_names
- @names.keys
- end
- end
-end
--- /dev/null
+require "whisper.so"
+require "uri"
+require "net/http"
+require "time"
+require "pathname"
+require "io/console/size"
+
+class Whisper::Model
+ class URI
+ def initialize(uri)
+ @uri = URI(uri)
+ end
+
+ def to_path
+ cache
+ cache_path.to_path
+ end
+
+ def clear_cache
+ path = cache_path
+ path.delete if path.exist?
+ end
+
+ private
+
+ def cache_path
+ base_cache_dir/@uri.host/@uri.path[1..]
+ end
+
+ def base_cache_dir
+ base = case RUBY_PLATFORM
+ when /mswin|mingw/
+ ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
+ when /darwin/
+ Pathname(Dir.home)/"Library/Caches"
+ else
+ ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
+ end
+ base/"whisper.cpp"
+ end
+
+ def cache
+ path = cache_path
+ headers = {}
+ headers["if-modified-since"] = path.mtime.httpdate if path.exist?
+ request @uri, headers
+ path
+ end
+
+ def request(uri, headers)
+ Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
+ request = Net::HTTP::Get.new(uri, headers)
+ http.request request do |response|
+ case response
+ when Net::HTTPNotModified
+ # noop
+ when Net::HTTPOK
+ download response
+ when Net::HTTPRedirection
+ request URI(response["location"]), headers
+ else
+ return if headers.key?("if-modified-since") # Use cache file
+
+ raise "#{response.code} #{response.message}\n#{response.body}"
+ end
+ end
+ end
+ end
+
+ def download(response)
+ path = cache_path
+ path.dirname.mkpath unless path.dirname.exist?
+ downloading_path = Pathname("#{path}.downloading")
+ size = response.content_length
+ downloading_path.open "wb" do |file|
+ downloaded = 0
+ response.read_body do |chunk|
+ file << chunk
+ downloaded += chunk.bytesize
+ show_progress downloaded, size
+ end
+ end
+ downloading_path.rename path
+ end
+
+ def show_progress(current, size)
+ return unless $stderr.tty?
+ return unless size
+
+ unless @prev
+ @prev = Time.now
+ $stderr.puts "Downloading #{@uri}"
+ end
+
+ now = Time.now
+ return if now - @prev < 1 && current < size
+
+ progress_width = 20
+ progress = current.to_f / size
+ arrow_length = progress * progress_width
+ arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
+ line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
+ padding = ' ' * ($stderr.winsize[1] - line.size)
+ $stderr.print "\r#{line}#{padding}"
+ $stderr.puts if current >= size
+ @prev = now
+ end
+
+ def format_bytesize(bytesize)
+ return "0.0 B" if bytesize.zero?
+
+ units = %w[B KiB MiB GiB TiB]
+ exp = (Math.log(bytesize) / Math.log(1024)).to_i
+ format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
+ end
+ end
+
+ @pre_converted_models = {}
+ %w[
+ tiny
+ tiny.en
+ tiny-q5_1
+ tiny.en-q5_1
+ tiny-q8_0
+ base
+ base.en
+ base-q5_1
+ base.en-q5_1
+ base-q8_0
+ small
+ small.en
+ small.en-tdrz
+ small-q5_1
+ small.en-q5_1
+ small-q8_0
+ medium
+ medium.en
+ medium-q5_0
+ medium.en-q5_0
+ medium-q8_0
+ large-v1
+ large-v2
+ large-v2-q5_0
+ large-v2-q8_0
+ large-v3
+ large-v3-q5_0
+ large-v3-turbo
+ large-v3-turbo-q5_0
+ large-v3-turbo-q8_0
+ ].each do |name|
+ @pre_converted_models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
+ end
+
+ class << self
+ attr_reader :pre_converted_models
+ end
+end
require_relative "jfk_reader/jfk_reader"
class TestBase < Test::Unit::TestCase
- MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
end
-require "test/unit"
-require "whisper"
-
-class TestCallback < Test::Unit::TestCase
- TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+require_relative "helper"
+class TestCallback < TestBase
def setup
GC.start
@params = Whisper::Params.new
- @whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
- @audio = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
+ @whisper = Whisper::Context.new("base.en")
+ @audio = File.join(AUDIO)
end
def test_new_segment_callback
class TestModel < TestBase
def test_model
- whisper = Whisper::Context.new(MODEL)
+ whisper = Whisper::Context.new("base.en")
assert_instance_of Whisper::Model, whisper.model
end
def test_attributes
- whisper = Whisper::Context.new(MODEL)
+ whisper = Whisper::Context.new("base.en")
model = whisper.model
assert_equal 51864, model.n_vocab
end
def test_gc
- model = Whisper::Context.new(MODEL).model
+ model = Whisper::Context.new("base.en").model
GC.start
assert_equal 51864, model.n_vocab
end
def test_pathname
- path = Pathname(MODEL)
+ path = Pathname(Whisper::Model.pre_converted_models["base.en"].to_path)
whisper = Whisper::Context.new(path)
model = whisper.model
assert_equal 1, model.ftype
assert_equal "base", model.type
end
+
+ def test_auto_download
+ path = Whisper::Model.pre_converted_models["base.en"].to_path
+
+ assert_path_exist path
+ assert_equal 147964211, File.size(path)
+ end
end
@params.logprob_thold = -0.5
assert_in_delta -0.5, @params.logprob_thold
end
+
+ def test_no_speech_thold
+ assert_in_delta 0.6, @params.no_speech_thold
+ @params.no_speech_thold = 0.2
+ assert_in_delta 0.2, @params.no_speech_thold
+ end
end
attr_reader :whisper
def startup
- @whisper = Whisper::Context.new(TestBase::MODEL)
+ @whisper = Whisper::Context.new("base.en")
params = Whisper::Params.new
params.print_timestamps = false
@whisper.transcribe(TestBase::AUDIO, params)
end
def test_whisper
- @whisper = Whisper::Context.new(MODEL)
+ @whisper = Whisper::Context.new("base.en")
params = Whisper::Params.new
params.print_timestamps = false
attr_reader :whisper
def startup
- @whisper = Whisper::Context.new(TestBase::MODEL)
+ @whisper = Whisper::Context.new("base.en")
params = Whisper::Params.new
params.print_timestamps = false
@whisper.transcribe(TestBase::AUDIO, params)
logs << [level, buffer, udata]
}
Whisper.log_set log_callback, user_data
- Whisper::Context.new(MODEL)
+ Whisper::Context.new("base.en")
assert logs.length > 30
logs.each do |log|
}, nil
dev = StringIO.new("")
$stderr = dev
- Whisper::Context.new(MODEL)
+ Whisper::Context.new("base.en")
assert_empty dev.string
ensure
$stderr = stderr
sub_test_case "full" do
def setup
super
- @whisper = Whisper::Context.new(MODEL)
+ @whisper = Whisper::Context.new("base.en")
@samples = File.read(AUDIO, nil, 78).unpack("s<*").collect {|i| i.to_f / 2**15}
end
s.test_files = s.files.select {|file| file.start_with? "tests/"}
s.extensions << 'ext/extconf.rb'
-
+ s.required_ruby_version = '>= 3.1.0'
#### Documentation and testing.
s.homepage = 'https://github.com/ggerganov/whisper.cpp'