$ gem install whispercpp
+You can pass build options for whisper.cpp, for instance:
+
+ $ bundle config build.whispercpp --enable-ggml-cuda
+
+or,
+
+ $ gem install whispercpp -- --enable-ggml-cuda
+
+See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options.
+For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`.
+For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`.
+
Usage
-----
+++ /dev/null
-ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
- ggml/src/ggml-cpu/ggml-cpu.cpp \
- ggml/src/ggml-cpu/unary-ops.cpp \
- ggml/src/ggml-cpu/binary-ops.cpp \
- ggml/src/ggml-cpu/vec.cpp \
- ggml/src/ggml-cpu/ops.cpp \
- ggml/include/ggml-backend.h \
- ggml/include/ggml.h \
- ggml/include/ggml-alloc.h \
- ggml/src/ggml-backend-impl.h \
- ggml/include/ggml-cpu.h \
- ggml/src/ggml-impl.h
- $(CXX) $(CXXFLAGS) -c $< -o $@
--- /dev/null
+require "tsort"
+
+class Dependencies
+ def initialize(cmake, options)
+ @cmake = cmake
+ @options = options
+
+ generate_dot
+ @libs = parse_dot
+ end
+
+ def to_s
+ @libs.join(" ")
+ end
+
+ private
+
+ def dot_path
+ File.join(__dir__, "build", "whisper.cpp.dot")
+ end
+
+ def generate_dot
+ system @cmake, "-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF", @options.to_s, exception: true
+ end
+
+ def parse_dot
+ static_lib_shape = nil
+ nodes = {}
+ depends = Hash.new {|h, k| h[k] = []}
+
+ class << depends
+ include TSort
+ alias tsort_each_node each_key
+ def tsort_each_child(node, &block)
+ fetch(node, []).each(&block)
+ end
+ end
+
+ File.open(dot_path).each_line do |line|
+ case line
+ when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
+ static_lib_shape = $~[:shape]
+ when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
+ node = $~[:node]
+ label = $~[:label]
+ shape = $~[:shape]
+ nodes[node] = [label, shape]
+ when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
+ depender = $~[:depender]
+ dependee = $~[:dependee]
+ depends[depender] ||= []
+ depends[depender] << dependee
+ end
+ end
+ depends.tsort.filter_map {|node|
+ label, shape = nodes[node]
+ shape == static_lib_shape ? label : nil
+ }.collect {|lib| "lib#{lib}.a"}
+ .reverse
+ end
+end
require "mkmf"
-require "tsort"
-
-# TODO: options such as CoreML
+require_relative "options"
+require_relative "dependencies"
cmake = find_executable("cmake") || abort
-
+options = Options.new
have_library("gomp") rescue nil
+libs = Dependencies.new(cmake, options)
-prefix = File.join("build", "whisper.cpp.dot")
-system cmake, "-S", "sources", "-B", "build", "--graphviz", prefix, "-D", "BUILD_SHARED_LIBS=OFF", exception: true
-
-static_lib_shape = nil
-nodes = {}
-depends = {}
-class << depends
- include TSort
- alias tsort_each_node each_key
- def tsort_each_child(node, &block)
- fetch(node, []).each(&block)
- end
-end
-File.open(File.join("build", "whisper.cpp.dot")).each_line do |line|
- case line
- when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/
- static_lib_shape = $~[:shape]
- when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/
- node = $~[:node]
- label = $~[:label]
- shape = $~[:shape]
- nodes[node] = [label, shape]
- when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/
- depender = $~[:depender]
- dependee = $~[:dependee]
- depends[depender] ||= []
- depends[depender] << dependee
- end
-end
-libs = depends.tsort.filter_map {|node|
- label, shape = nodes[node]
- shape == static_lib_shape ? label : nil
-}.collect {|lib| "lib#{lib}.a"}
- .reverse
- .join(" ")
-
-$CFLAGS << " -std=c11 -fPIC"
-$CXXFLAGS << " -std=c++17 -O3 -DNDEBUG"
$INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples"
$LOCAL_LIBS << " #{libs}"
$cleanfiles << " build #{libs}"
$(TARGET_SO): #{libs}
#{libs}: cmake-targets
cmake-targets:
- #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON
+ #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
#{"\t"}#{cmake} --build build --config Release --target common whisper
- #{"\t"}
EOF
end
+++ /dev/null
-ggml/src/ggml-metal/ggml-metal-embed.o: \
- ggml/src/ggml-metal/ggml-metal.metal \
- ggml/src/ggml-metal/ggml-metal-impl.h \
- ggml/src/ggml-common.h
- @echo "Embedding Metal library"
- @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
- @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
- $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
- $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
- @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
- @rmdir ${TEMP_ASSEMBLY}
+++ /dev/null
-ggml/src/ggml-metal/ggml-metal.o: \
- ggml/src/ggml-metal/ggml-metal.m \
- ggml/src/ggml-metal/ggml-metal-impl.h \
- ggml/include/ggml-metal.h \
- ggml/include/ggml.h
- $(CC) $(CFLAGS) -c $< -o $@
--- /dev/null
+class Options
+ def initialize
+ @options = {}
+ @pending_options = []
+ @ignored_options = []
+
+ configure
+ end
+
+ def help
+ @options
+ .collect_concat {|name, (type, value)|
+ option = option_name(name)
+ if type == :bool
+ ["--enable-#{option}", "--disable-#{option}"]
+ else
+ "--#{option}=#{type.upcase}"
+ end
+ }
+ .join($/)
+ end
+
+ def to_s
+ @options
+ .reject {|name, (type, value)| value.nil?}
+ .collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"}
+ .join(" ")
+ end
+
+ def cmake_options
+ return @cmake_options if @cmake_options
+
+ output = nil
+ Dir.chdir __dir__ do
+ output = `cmake -S sources -B build -L`
+ end
+ started = false
+ @cmake_options = output.lines.filter_map {|line|
+ if line.chomp == "-- Cache values"
+ started = true
+ next
+ end
+ next unless started
+ option, value = line.chomp.split("=", 2)
+ name, type = option.split(":", 2)
+ [name, type, value]
+ }
+ end
+
+ def missing_options
+ cmake_options.collect {|name, type, value| name} -
+ @options.keys - @pending_options - @ignored_options
+ end
+
+ def extra_options
+ @options.keys + @pending_options - @ignored_options -
+ cmake_options.collect {|name, type, value| name}
+ end
+
+ private
+
+ def configure
+ filepath "ACCELERATE_FRAMEWORK"
+ ignored "BUILD_SHARED_LIBS"
+ ignored "BUILD_TESTING"
+ ignored "CMAKE_BUILD_TYPE"
+ ignored "CMAKE_INSTALL_PREFIX"
+ string "CMAKE_OSX_ARCHITECTURES"
+ ignored "CMAKE_OSX_DEPLOYMENT_TARGET"
+ string "CMAKE_OSX_SYSROOT"
+ filepath "FOUNDATION_LIBRARY"
+ bool "GGML_ACCELERATE"
+ bool "GGML_ALL_WARNINGS_3RD_PARTY"
+ bool "GGML_AMX_BF16"
+ bool "GGML_AMX_INT8"
+ bool "GGML_AMX_TILE"
+ bool "GGML_AVX"
+ bool "GGML_AVX2"
+ bool "GGML_AVX512"
+ bool "GGML_AVX512_BF16"
+ bool "GGML_AVX512_VBMI"
+ bool "GGML_AVX512_VNNI"
+ bool "GGML_AVX_VNNI"
+ ignored "GGML_BACKEND_DL"
+ ignored "GGML_BIN_INSTALL_DIR"
+ bool "GGML_BLAS"
+ string "GGML_BLAS_VENDOR"
+ bool "GGML_BMI2"
+ ignored "GGML_BUILD_EXAMPLES"
+ ignored "GGML_BUILD_TESTS"
+ filepath "GGML_CCACHE_FOUND"
+ bool "GGML_CPU"
+ bool "GGML_CPU_AARCH64"
+ ignored "GGML_CPU_ALL_VARIANTS"
+ string "GGML_CPU_ARM_ARCH"
+ bool "GGML_CPU_HBM"
+ bool "GGML_CPU_KLEIDIAI"
+ string "GGML_CPU_POWERPC_CPUTYPE"
+ bool "GGML_CUDA"
+ string "GGML_CUDA_COMPRESSION_MODE"
+ bool "GGML_CUDA_F16"
+ bool "GGML_CUDA_FA"
+ bool "GGML_CUDA_FA_ALL_QUANTS"
+ bool "GGML_CUDA_FORCE_CUBLAS"
+ bool "GGML_CUDA_FORCE_MMQ"
+ ignored "GGML_CUDA_GRAPHS"
+ bool "GGML_CUDA_NO_PEER_COPY"
+ bool "GGML_CUDA_NO_VMM"
+ string "GGML_CUDA_PEER_MAX_BATCH_SIZE"
+ bool "GGML_F16C"
+ bool "GGML_FMA"
+ bool "GGML_GPROF"
+ bool "GGML_HIP"
+ bool "GGML_HIP_GRAPHS"
+ bool "GGML_HIP_NO_VMM"
+ bool "GGML_HIP_ROCWMMA_FATTN"
+ bool "GGML_HIP_UMA"
+ ignored "GGML_INCLUDE_INSTALL_DIR"
+ bool "GGML_KOMPUTE"
+ bool "GGML_LASX"
+ ignored "GGML_LIB_INSTALL_DIR"
+ ignored "GGML_LLAMAFILE"
+ bool "GGML_LSX"
+ bool "GGML_LTO"
+ bool "GGML_METAL"
+ bool "GGML_METAL_EMBED_LIBRARY"
+ string "GGML_METAL_MACOSX_VERSION_MIN"
+ bool "GGML_METAL_NDEBUG"
+ bool "GGML_METAL_SHADER_DEBUG"
+ string "GGML_METAL_STD"
+ bool "GGML_METAL_USE_BF16"
+ bool "GGML_MUSA"
+ bool "GGML_NATIVE"
+ bool "GGML_OPENCL"
+ bool "GGML_OPENCL_EMBED_KERNELS"
+ bool "GGML_OPENCL_PROFILING"
+ string "GGML_OPENCL_TARGET_VERSION"
+ bool "GGML_OPENCL_USE_ADRENO_KERNELS"
+ bool "GGML_OPENMP"
+ bool "GGML_RPC"
+ bool "GGML_RVV"
+ bool "GGML_RV_ZFH"
+ pending "GGML_SCCACHE_FOUND"
+ string "GGML_SCHED_MAX_COPIES"
+ ignored "GGML_STATIC"
+ bool "GGML_SYCL"
+ string "GGML_SYCL_DEVICE_ARCH"
+ bool "GGML_SYCL_F16"
+ bool "GGML_SYCL_GRAPH"
+ string "GGML_SYCL_TARGET"
+ bool "GGML_VULKAN"
+ bool "GGML_VULKAN_CHECK_RESULTS"
+ bool "GGML_VULKAN_DEBUG"
+ bool "GGML_VULKAN_MEMORY_DEBUG"
+ bool "GGML_VULKAN_PERF"
+ ignored "GGML_VULKAN_RUN_TESTS"
+ filepath "GGML_VULKAN_SHADERS_GEN_TOOLCHAIN"
+ bool "GGML_VULKAN_SHADER_DEBUG_INFO"
+ pending "GGML_VULKAN_VALIDATE"
+ bool "GGML_VXE"
+ filepath "GIT_EXE"
+ filepath "MATH_LIBRARY"
+ filepath "METALKIT_FRAMEWORK"
+ filepath "METAL_FRAMEWORK"
+ bool "WHISPER_ALL_WARNINGS"
+ bool "WHISPER_ALL_WARNINGS_3RD_PARTY"
+ ignored "WHISPER_BIN_INSTALL_DIR"
+ ignored "WHISPER_BUILD_EXAMPLES"
+ ignored "WHISPER_BUILD_SERVER"
+ ignored"WHISPER_BUILD_TESTS"
+ bool "WHISPER_CCACHE"
+ bool "WHISPER_COREML"
+ bool "WHISPER_COREML_ALLOW_FALLBACK"
+ ignored "WHISPER_CURL"
+ bool "WHISPER_FATAL_WARNINGS"
+ ignored "WHISPER_FFMPEG"
+ ignored "WHISPER_INCLUDE_INSTALL_DIR"
+ ignored "WHISPER_LIB_INSTALL_DIR"
+ bool "WHISPER_OPENVINO"
+ bool "WHISPER_SANITIZE_ADDRESS"
+ bool "WHISPER_SANITIZE_THREAD"
+ bool "WHISPER_SANITIZE_UNDEFINED"
+ ignored "WHISPER_SDL2"
+ pending "WHISPER_USE_SYSTEM_GGML"
+ end
+
+ def option_name(name)
+ name.downcase.gsub("_", "-")
+ end
+
+ def bool(name)
+ option = option_name(name)
+ value = enable_config(option)
+ @options[name] = [:bool, value]
+ end
+
+ def string(name, type=:string)
+ option = "--#{option_name(name)}"
+ value = arg_config(option)
+ raise "String expected for #{option}" if value == true || value&.empty?
+ @options[name] = [type, value]
+ end
+
+ def path(name)
+ string(name, :path)
+ end
+
+ def filepath(name)
+ string(name, :filepath)
+ end
+
+ def pending(name)
+ @pending_options << name
+ end
+
+ def ignored(name)
+ @ignored_options << name
+ end
+end
def self.log_set: (log_callback, Object? user_data) -> log_callback
class Context
- def self.new: (string | _ToPath | ::URI::HTTP) -> instance
+ def self.new: (path | ::URI::HTTP) -> instance
+
+ # transcribe a single file
+ # can emit to a block results
+ #
+ # params = Whisper::Params.new
+ # params.duration = 60_000
+ # whisper.transcribe "path/to/audio.wav", params do |text|
+ # puts text
+ # end
+ #
def transcribe: (string, Params) -> self
| (string, Params) { (String) -> void } -> self
+
def model_n_vocab: () -> Integer
def model_n_audio_ctx: () -> Integer
def model_n_audio_state: () -> Integer
def model_n_mels: () -> Integer
def model_ftype: () -> Integer
def model_type: () -> String
+
+ # Yields each Whisper::Segment:
+ #
+ # whisper.transcribe("path/to/audio.wav", params)
+ # whisper.each_segment do |segment|
+ # puts segment.text
+ # end
+ #
+ # Returns an Enumerator if no block given:
+ #
+ # whisper.transcribe("path/to/audio.wav", params)
+ # enum = whisper.each_segment
+ # enum.to_a # => [#<Whisper::Segment>, ...]
+ #
def each_segment: { (Segment) -> void } -> void
| () -> Enumerator[Segment]
+
def model: () -> Model
def full_get_segment: (Integer nth) -> Segment
def full_n_segments: () -> Integer
+
+ # Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
+ #
def full_lang_id: () -> Integer
+
+ # Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
+ #
+ # full_get_segment_t0(3) # => 1668 (16680 ms)
+ #
def full_get_segment_t0: (Integer) -> Integer
+
+ # End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
+ #
+ # full_get_segment_t1(3) # => 1668 (16680 ms)
+ #
def full_get_segment_t1: (Integer) -> Integer
+
+ # Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
+ #
+ # full_get_segment_speacker_turn_next(3) # => true
+ #
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
+
+ # Text of a segment indexed by +segment_index+.
+ #
+ # full_get_segment_text(3) # => "ask not what your country can do for you, ..."
+ #
def full_get_segment_text: (Integer) -> String
+
def full_get_segment_no_speech_prob: (Integer) -> Float
+
+ # Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
+ # Not thread safe for same context
+ # Uses the specified decoding strategy to obtain the text.
+ #
+ # The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
+ #
def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
| (Params, _Samples, ?Integer n_samples) -> self
+
+ # Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
+ # Result is stored in the default state of the context
+ # Not thread safe if executed in parallel on the same context.
+ # It seems this approach can offer some speedup in some cases.
+ # However, the transcription accuracy can be worse at the beginning and end of each chunk.
+ #
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
| (Params, _Samples, ?Integer n_samples) -> self
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
?abort_callback: abort_callback,
?abort_callback_user_data: Object
) -> instance
+
+ # params.language = "auto" | "en", etc...
+ #
def language=: (String) -> String # TODO: Enumerate lang names
+
def language: () -> String
def translate=: (boolish) -> boolish
def translate: () -> (true | false)
def no_context=: (boolish) -> boolish
+
+ # If true, does not use past transcription (if any) as initial prompt for the decoder.
+ #
def no_context: () -> (true | false)
+
def single_segment=: (boolish) -> boolish
+
+ # If true, forces single segment output (useful for streaming).
+ #
def single_segment: () -> (true | false)
+
def print_special=: (boolish) -> boolish
+
+ # If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.).
+ #
def print_special: () -> (true | false)
+
def print_progress=: (boolish) -> boolish
+
+ # If true, prints progress information.
+ #
def print_progress: () -> (true | false)
+
def print_realtime=: (boolish) -> boolish
+
+ # If true, prints results from within whisper.cpp. (avoid it, use callback instead)
+ #
def print_realtime: () -> (true | false)
+
+ # If true, prints timestamps for each text segment when printing realtime.
+ #
def print_timestamps=: (boolish) -> boolish
+
def print_timestamps: () -> (true | false)
+
def suppress_blank=: (boolish) -> boolish
+
+ # If true, suppresses blank outputs.
+ #
def suppress_blank: () -> (true | false)
+
def suppress_nst=: (boolish) -> boolish
+
+ # If true, suppresses non-speech-tokens.
+ #
def suppress_nst: () -> (true | false)
+
def token_timestamps=: (boolish) -> boolish
+
+ # If true, enables token-level timestamps.
+ #
def token_timestamps: () -> (true | false)
+
def split_on_word=: (boolish) -> boolish
+
+ # If true, split on word rather than on token (when used with max_len).
+ #
def split_on_word: () -> (true | false)
+
def initial_prompt=: (_ToS) -> _ToS
+
+ # Tokens to provide to the whisper decoder as initial prompt
+ # these are prepended to any existing text context from a previous call
+ # use whisper_tokenize() to convert text to tokens.
+ # Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
+ #
def initial_prompt: () -> (String | nil)
+
def diarize=: (boolish) -> boolish
+
+ # If true, enables diarization.
+ #
def diarize: () -> (true | false)
+
def offset=: (Integer) -> Integer
+
+ # Start offset in ms.
+ #
def offset: () -> Integer
+
def duration=: (Integer) -> Integer
+
+ # Audio duration to process in ms.
+ #
def duration: () -> Integer
+
def max_text_tokens=: (Integer) -> Integer
+
+ # Max tokens to use from past text as prompt for the decoder.
+ #
def max_text_tokens: () -> Integer
+
def temperature=: (Float) -> Float
def temperature: () -> Float
def max_initial_ts=: (Float) -> Float
+
+ # See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
+ #
def max_initial_ts: () -> Float
+
def length_penalty=: (Float) -> Float
def length_penalty: () -> Float
def temperature_inc=: (Float) -> Float
def temperature_inc: () -> Float
def entropy_thold=: (Float) -> Float
+
+ # Similar to OpenAI's "compression_ratio_threshold"
+ #
def entropy_thold: () -> Float
+
def logprob_thold=: (Float) -> Float
def logprob_thold: () -> Float
def no_speech_thold=: (Float) -> Float
def no_speech_thold: () -> Float
+
+ # Sets new segment callback, called for every newly generated text segment.
+ #
+ # params.new_segment_callback = ->(context, _, n_new, user_data) {
+ # # ...
+ # }
+ #
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
def new_segment_callback: () -> (new_segment_callback | nil)
+
+ # Sets user data passed to the last argument of new segment callback.
+ #
def new_segment_callback_user_data=: (Object) -> Object
+
def new_segment_callback_user_data: () -> Object
+
+ # Sets progress callback, called on each progress update.
+ #
+ # params.new_segment_callback = ->(context, _, progress, user_data) {
+ # # ...
+ # }
+ #
+ # +progress+ is an Integer between 0 and 100.
+ #
def progress_callback=: (progress_callback) -> progress_callback
+
def progress_callback: () -> (progress_callback | nil)
+
+ # Sets user data passed to the last argument of progress callback.
+ #
def progress_callback_user_data=: (Object) -> Object
+
def progress_callback_user_data: () -> Object
+
+ # Sets abort callback, called to check if the process should be aborted.
+ #
+ # params.abort_callback = ->(user_data) {
+ # # ...
+ # }
+ #
+ #
def abort_callback=: (abort_callback) -> abort_callback
+
def abort_callback: () -> (abort_callback | nil)
+
+ # Sets user data passed to the last argument of abort callback.
+ #
def abort_callback_user_data=: (Object) -> Object
+
def abort_callback_user_data: () -> Object
+
+ # Hook called on new segment. Yields each Whisper::Segment.
+ #
+ # whisper.on_new_segment do |segment|
+ # # ...
+ # end
+ #
def on_new_segment: { (Segment) -> void } -> void
+
+ # Hook called on progress update. Yields each progress Integer between 0 and 100.
+ #
def on_progress: { (Integer progress) -> void } -> void
+
+ # Call block to determine whether abort or not. Return +true+ when you want to abort.
+ #
+ # params.abort_on do
+ # if some_condition
+ # true # abort
+ # else
+ # false # continue
+ # end
+ # end
+ #
def abort_on: { (Object user_data) -> boolish } -> void
end
def type: () -> String
class URI
- def self.new: (string | ::URI::HTTP) -> self
+ def self.new: (string | ::URI::HTTP) -> instance
def to_path: -> String
def clear_cache: -> void
end
end
class Segment
+ # Start time in milliseconds.
+ #
def start_time: () -> Integer
+
+ # End time in milliseconds.
+ #
def end_time: () -> Integer
+
+ # Whether the next segment is predicted as a speaker turn.
def speaker_next_turn?: () -> (true | false)
+
def text: () -> String
def no_speech_prob: () -> Float
end
def whisper
self.class.whisper
end
+
+ module BuildOptions
+ load "ext/options.rb", self
+ Options.include self
+
+ def enable_config(name)
+ end
+
+ def arg_config(name)
+ end
+ end
end
match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
filename = match_data[1]
version = match_data[2]
- basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
Dir.mktmpdir do |dir|
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
- assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
- assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
- assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
+ assert_installed dir, version
end
end
+
+ private
+
+ def assert_installed(dir, version)
+ assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", "whisper.#{RbConfig::CONFIG["DLEXT"]}")
+ assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE")
+ assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build")
+ end
+ end
+
+ def test_build_options
+ options = BuildOptions::Options.new
+ assert_empty options.missing_options
+ unless ENV["CI"]
+ assert_empty options.extra_options
+ end
end
end
Gem::Specification.new do |s|
s.name = "whispercpp"
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
- s.version = '1.3.1'
- s.date = '2024-12-19'
+ s.version = '1.3.2'
+ s.date = '2025-04-17'
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
s.email = 'todd.fisher@gmail.com'
s.extra_rdoc_files = ['LICENSE', 'README.md']