LICENSE
pkg/
lib/whisper.*
-ext/sources/*
-!ext/sources/CMakeGraphVizOptions.cmake
-ext/mkmf.log
whisper.so
whisper.bundle
whisper.dll
-scripts/get-flags.mk
*.o
-/*/**/*.c
-/*/**/*.cpp
-/*/**/*.h
-/*/**/*.m
-/*/**/*.metal
+*.a
+sources/*
+!sources/CMakeGraphVizOptions.cmake
+mkmf.log
Dir.chdir __dir__ do
output = `#{@cmake.shellescape} -S sources -B build -L`
end
- started = false
- @cmake_options = output.lines.filter_map {|line|
- if line.chomp == "-- Cache values"
- started = true
- next
- end
- next unless started
- option, value = line.chomp.split("=", 2)
- name, type = option.split(":", 2)
- [name, type, value]
- }
+ @cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
+ .filter_map {|line|
+ option, value = line.chomp.split("=", 2)
+ name, type = option.split(":", 2)
+ [
+ name,
+ [
+ type,
+ type == "BOOL" ? value == "ON" : value
+ ]
+ ]
+ }.to_h
end
private
def configure
- cmake_options.each do |name, type, default_value|
+ cmake_options.each_pair do |name, (type, default_value)|
option = option_name(name)
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
@options[name] = [type, value]
end
+
+ configure_coreml
+ end
+
+ def configure_coreml
+ use_coreml = if @options["WHISPER_COREML"][1].nil?
+ cmake_options["WHISPER_COREML"][1]
+ else
+ @options["WHISPER_COREML"][1]
+ end
+ $CPPFLAGS << " -DRUBY_WHISPER_USE_COREML" if use_coreml
end
def option_name(name)
ID id_to_path;
ID id_URI;
ID id_pre_converted_models;
+ID id_coreml_compiled_models;
+ID id_cache;
static bool is_log_callback_finalized = false;
return rb_str_new2(str_full);
}
+/*
+ * call-seq:
+ * system_info_str -> String
+ */
+static VALUE ruby_whisper_s_system_info_str(VALUE self) {
+ return rb_str_new2(whisper_print_system_info());
+}
+
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
is_log_callback_finalized = true;
return Qnil;
id_to_path = rb_intern("to_path");
id_URI = rb_intern("URI");
id_pre_converted_models = rb_intern("pre_converted_models");
+ id_coreml_compiled_models = rb_intern("coreml_compiled_models");
+ id_cache = rb_intern("cache");
mWhisper = rb_define_module("Whisper");
mVAD = rb_define_module_under(mWhisper, "VAD");
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
+ rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
extern ID id_to_path;
extern ID id_URI;
extern ID id_pre_converted_models;
+extern ID id_coreml_compiled_models;
+extern ID id_cache;
extern VALUE cContext;
extern VALUE eError;
extern const rb_data_type_t ruby_whisper_params_type;
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
-extern VALUE rb_whisper_model_initialize(VALUE context);
-extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
+extern VALUE rb_whisper_model_s_new(VALUE context);
+extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
static void
if (!rw) {
return 0;
}
+ if (rw->context) {
+ size += sizeof(rw->context);
+ }
return size;
}
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
if (!NIL_P(pre_converted_model)) {
model_path = pre_converted_model;
+#ifdef RUBY_WHISPER_USE_COREML
+ VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
+ VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
+ if (!NIL_P(coreml_converted_model)) {
+ rb_funcall(coreml_converted_model, id_cache, 0);
+ }
+#endif
}
else if (TYPE(model_path) == T_STRING) {
const char * model_path_str = StringValueCStr(model_path);
// Should check when samples.respond_to?(:length)?
} else {
if (TYPE(samples) == T_ARRAY) {
- n_samples = RARRAY_LEN(samples);
+ if (RARRAY_LEN(samples) > INT_MAX) {
+ rb_raise(rb_eArgError, "samples are too long");
+ }
+ n_samples = (int)RARRAY_LEN(samples);
} else if (memory_view_available_p) {
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
- n_samples = view.byte_size / view.item_size;
+ ssize_t n_samples_size = view.byte_size / view.item_size;
+ if (n_samples_size > INT_MAX) {
+ rb_raise(rb_eArgError, "samples are too long");
+ }
+ n_samples = (int)n_samples_size;
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
- n_samples = view.byte_size / view.item_size;
+ ssize_t n_samples_size = view.byte_size / view.item_size;
+ if (n_samples_size > INT_MAX) {
+ rb_raise(rb_eArgError, "samples are too long");
+ }
+ n_samples = (int)n_samples_size;
} else {
if (TYPE(samples) == T_ARRAY) {
- n_samples = RARRAY_LEN(samples);
+ if (RARRAY_LEN(samples) > INT_MAX) {
+ rb_raise(rb_eArgError, "samples are too long");
+ }
+ n_samples = (int)RARRAY_LEN(samples);
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
- return INT2NUM(t0);
+ return LONG2NUM(t0);
}
/*
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
- return INT2NUM(t1);
+ return LONG2NUM(t1);
}
/*
static VALUE
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
{
- return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
+ return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
}
/*
const int n_segments = whisper_full_n_segments(rw->context);
for (int i = 0; i < n_segments; ++i) {
- rb_yield(rb_whisper_segment_initialize(self, i));
+ rb_yield(rb_whisper_segment_s_new(self, i));
}
return self;
static VALUE
ruby_whisper_get_model(VALUE self)
{
- return rb_whisper_model_initialize(self);
+ return rb_whisper_model_s_new(self);
}
void
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
}
-VALUE rb_whisper_model_initialize(VALUE context) {
+VALUE rb_whisper_model_s_new(VALUE context) {
ruby_whisper_model *rwm;
const VALUE model = ruby_whisper_model_allocate(cModel);
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
extern ID id_call;
extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
-extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
+extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
extern const rb_data_type_t ruby_whisper_vad_params_type;
static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
const int n_segments = whisper_full_n_segments_from_state(state);
for (int i = n_new; i > 0; i--) {
int i_segment = n_segments - i;
- VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment);
+ VALUE segment = rb_whisper_segment_s_new(*container->context, i_segment);
for (int j = 0; j < callbacks_len; j++) {
VALUE cb = rb_ary_entry(container->callbacks, j);
rb_funcall(cb, id_call, 1, segment);
}
VALUE
-rb_whisper_segment_initialize(VALUE context, int index)
+rb_whisper_segment_s_new(VALUE context, int index)
{
ruby_whisper_segment *rws;
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
- return INT2NUM(t0 * 10);
+ return LONG2NUM(t0 * 10);
}
/*
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
- return INT2NUM(t1 * 10);
+ return LONG2NUM(t1 * 10);
}
/*
end
end
+ class ZipURI < URI
+ def cache
+ zip_path = Pathname(super)
+ dest = unzipped_path
+ return if dest.exist? && dest.mtime >= zip_path.mtime
+ escaping dest do
+ system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
+ end
+ zip_path.to_path
+ end
+
+ def clear_cache
+ super
+ unzipped_path.rmtree if unzipped_path.exist?
+ end
+
+ private
+
+ def unzipped_path
+ cache_path.sub_ext("")
+ end
+
+ def escaping(path)
+ escaped = Pathname("#{path}.removing")
+ if path.exist?
+ escaped.rmtree if escaped.exist?
+ path.rename escaped
+ end
+ yield
+ ensure
+ if path.exist?
+ escaped.rmtree if escaped.exist?
+ else
+ escaped.rename path if escaped.exist?
+ end
+ end
+ end
+
@pre_converted_models = %w[
tiny
tiny.en
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
end
+ @coreml_compiled_models = %w[
+ tiny
+ tiny.en
+ base
+ base.en
+ small
+ small.en
+ medium
+ medium.en
+ large-v1
+ large-v2
+ large-v3
+ large-v3-turbo
+ ].each_with_object({}) do |name, models|
+ models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
+ end
+
class << self
- attr_reader :pre_converted_models
+ attr_reader :pre_converted_models, :coreml_compiled_models
end
end
end
def self.lang_str: (Integer id) -> String
def self.lang_str_full: (Integer id) -> String
def self.log_set: (log_callback, Object? user_data) -> log_callback
+ def self.system_info_str: () -> String
class Context
def self.new: (path | ::URI::HTTP) -> instance
class Model
def self.pre_converted_models: () -> Hash[String, Model::URI]
+ def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
def self.new: () -> instance
def n_vocab: () -> Integer
def n_audio_ctx: () -> Integer
def to_path: -> String
def clear_cache: -> void
end
+
+ class ZipURI < URI
+ def cache: () -> String
+ def clear_cache: () -> void
+ end
end
class Segment
assert_equal 1, model.ftype
assert_equal "base", model.type
end
+
+ def test_coreml_model_auto_download
+ uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
+ model_path = Pathname(uri.to_path).sub_ext("")
+ model_path.rmtree if model_path.exist?
+
+ uri.cache
+ assert_path_exist model_path
+ end
end
end
end
+ def test_install_with_coreml
+ omit_unless RUBY_PLATFORM.match?(/darwin/) do
+ gemspec = Gem::Specification.load("whispercpp.gemspec")
+ Dir.mktmpdir do |dir|
+ system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
+ assert_installed dir, gemspec.version
+ assert_nothing_raised do
+ libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
+ system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
+ end
+ end
+ end
+ end
+
private
def assert_installed(dir, version)
end
end
+ def test_system_info_str
+ assert_match /\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str
+ end
+
def test_log_set
user_data = Object.new
logs = []
s.name = "whispercpp"
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
s.version = '1.3.3'
- s.date = '2025-05-29'
+ s.date = '2025-06-01'
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
s.email = 'todd.fisher@gmail.com'
s.extra_rdoc_files = ['LICENSE', 'README.md']