free(rw);
}
+void rb_whisper_callbcack_container_mark(ruby_whisper_callback_container *rwc) {
+ rb_gc_mark(rwc->user_data);
+ rb_gc_mark(rwc->callback);
+ rb_gc_mark(rwc->callbacks);
+}
+
void rb_whisper_params_mark(ruby_whisper_params *rwp) {
- rb_gc_mark(rwp->new_segment_callback_container->user_data);
- rb_gc_mark(rwp->new_segment_callback_container->callback);
- rb_gc_mark(rwp->new_segment_callback_container->callbacks);
+ rb_whisper_callbcack_container_mark(rwp->new_segment_callback_container);
+ rb_whisper_callbcack_container_mark(rwp->progress_callback_container);
+ rb_whisper_callbcack_container_mark(rwp->abort_callback_container);
}
void rb_whisper_params_free(ruby_whisper_params *rwp) {
rwp = ALLOC(ruby_whisper_params);
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
rwp->new_segment_callback_container = rb_whisper_callback_container_allocate();
+ rwp->progress_callback_container = rb_whisper_callback_container_allocate();
+ rwp->abort_callback_container = rb_whisper_callback_container_allocate();
return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
}
rwp->params.new_segment_callback_user_data = rwp->new_segment_callback_container;
}
+ if (!NIL_P(rwp->progress_callback_container->callback) || 0 != RARRAY_LEN(rwp->progress_callback_container->callbacks)) {
+ rwp->params.progress_callback = [](struct whisper_context *ctx, struct whisper_state * /*state*/, int progress_cur, void *user_data) {
+ const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data;
+ const VALUE progress = INT2NUM(progress_cur);
+ // Currently, doesn't support state because
+ // those require to resolve GC-related problems.
+ if (!NIL_P(container->callback)) {
+ rb_funcall(container->callback, id_call, 4, *container->context, Qnil, progress, container->user_data);
+ }
+ const long callbacks_len = RARRAY_LEN(container->callbacks);
+ if (0 == callbacks_len) {
+ return;
+ }
+ for (int j = 0; j < callbacks_len; j++) {
+ VALUE cb = rb_ary_entry(container->callbacks, j);
+ rb_funcall(cb, id_call, 1, progress);
+ }
+ };
+ rwp->progress_callback_container->context = &self;
+ rwp->params.progress_callback_user_data = rwp->progress_callback_container;
+ }
+
+ if (!NIL_P(rwp->abort_callback_container->callback) || 0 != RARRAY_LEN(rwp->abort_callback_container->callbacks)) {
+ rwp->params.abort_callback = [](void * user_data) {
+ const ruby_whisper_callback_container *container = (ruby_whisper_callback_container *)user_data;
+ if (!NIL_P(container->callback)) {
+ VALUE result = rb_funcall(container->callback, id_call, 1, container->user_data);
+ if (!NIL_P(result) && Qfalse != result) {
+ return true;
+ }
+ }
+ const long callbacks_len = RARRAY_LEN(container->callbacks);
+ if (0 == callbacks_len) {
+ return false;
+ }
+ for (int j = 0; j < callbacks_len; j++) {
+ VALUE cb = rb_ary_entry(container->callbacks, j);
+ VALUE result = rb_funcall(cb, id_call, 1, container->user_data);
+ if (!NIL_P(result) && Qfalse != result) {
+ return true;
+ }
+ }
+ return false;
+ };
+ rwp->abort_callback_container->context = &self;
+ rwp->params.abort_callback_user_data = rwp->abort_callback_container;
+ }
+
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
fprintf(stderr, "failed to process audio\n");
return self;
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
BOOL_PARAMS_SETTER(self, split_on_word, value)
}
+/*
+ * Tokens to provide to the whisper decoder as initial prompt
+ * these are prepended to any existing text context from a previous call
+ * use whisper_tokenize() to convert text to tokens.
+ * Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224).
+ *
+ * call-seq:
+ * initial_prompt -> String
+ */
+static VALUE ruby_whisper_params_get_initial_prompt(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return rwp->params.initial_prompt == nullptr ? Qnil : rb_str_new2(rwp->params.initial_prompt);
+}
+/*
+ * call-seq:
+ * initial_prompt = prompt -> prompt
+ */
+static VALUE ruby_whisper_params_set_initial_prompt(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.initial_prompt = StringValueCStr(value);
+ return value;
+}
/*
* If true, enables diarization.
*
rwp->params.n_max_text_ctx = NUM2INT(value);
return value;
}
+/*
+ * call-seq:
+ * temperature -> Float
+ */
+static VALUE ruby_whisper_params_get_temperature(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.temperature);
+}
+/*
+ * call-seq:
+ * temperature = temp -> temp
+ */
+static VALUE ruby_whisper_params_set_temperature(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.temperature = RFLOAT_VALUE(value);
+ return value;
+}
+/*
+ * See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97
+ *
+ * call-seq:
+ * max_initial_ts -> Flaot
+ */
+static VALUE ruby_whisper_params_get_max_initial_ts(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.max_initial_ts);
+}
+/*
+ * call-seq:
+ * max_initial_ts = timestamp -> timestamp
+ */
+static VALUE ruby_whisper_params_set_max_initial_ts(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.max_initial_ts = RFLOAT_VALUE(value);
+ return value;
+}
+/*
+ * call-seq:
+ * length_penalty -> Float
+ */
+static VALUE ruby_whisper_params_get_length_penalty(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.length_penalty);
+}
+/*
+ * call-seq:
+ * length_penalty = penalty -> penalty
+ */
+static VALUE ruby_whisper_params_set_length_penalty(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.length_penalty = RFLOAT_VALUE(value);
+ return value;
+}
+/*
+ * call-seq:
+ * temperature_inc -> Float
+ */
+static VALUE ruby_whisper_params_get_temperature_inc(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.temperature_inc);
+}
+/*
+ * call-seq:
+ * temperature_inc = inc -> inc
+ */
+static VALUE ruby_whisper_params_set_temperature_inc(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.temperature_inc = RFLOAT_VALUE(value);
+ return value;
+}
+/*
+ * Similar to OpenAI's "compression_ratio_threshold"
+ *
+ * call-seq:
+ * entropy_thold -> Float
+ */
+static VALUE ruby_whisper_params_get_entropy_thold(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.entropy_thold);
+}
+/*
+ * call-seq:
+ * entropy_thold = threshold -> threshold
+ */
+static VALUE ruby_whisper_params_set_entropy_thold(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.entropy_thold = RFLOAT_VALUE(value);
+ return value;
+}
+/*
+ * call-seq:
+ * logprob_thold -> Float
+ */
+static VALUE ruby_whisper_params_get_logprob_thold(VALUE self) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ return DBL2NUM(rwp->params.logprob_thold);
+}
+/*
+ * call-seq:
+ * logprob_thold = threshold -> threshold
+ */
+static VALUE ruby_whisper_params_set_logprob_thold(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->params.logprob_thold = RFLOAT_VALUE(value);
+ return value;
+}
/*
* Sets new segment callback, called for every newly generated text segment.
*
rwp->new_segment_callback_container->user_data = value;
return value;
}
+/*
+ * Sets progress callback, called on each progress update.
+ *
+ * params.new_segment_callback = ->(context, _, n_new, user_data) {
+ * # ...
+ * }
+ *
+ * call-seq:
+ * progress_callback = callback -> callback
+ */
+static VALUE ruby_whisper_params_set_progress_callback(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->progress_callback_container->callback = value;
+ return value;
+}
+/*
+ * Sets user data passed to the last argument of progress callback.
+ *
+ * call-seq:
+ * progress_callback_user_data = user_data -> use_data
+ */
+static VALUE ruby_whisper_params_set_progress_callback_user_data(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->progress_callback_container->user_data = value;
+ return value;
+}
+/*
+ * Sets abort callback, called to check if the process should be aborted.
+ *
+ * params.abort_callback = ->(user_data) {
+ * # ...
+ * }
+ *
+ * call-seq:
+ * abort_callback = callback -> callback
+ */
+static VALUE ruby_whisper_params_set_abort_callback(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->abort_callback_container->callback = value;
+ return value;
+}
+/*
+ * Sets user data passed to the last argument of abort callback.
+ *
+ * call-seq:
+ * abort_callback_user_data = user_data -> use_data
+ */
+static VALUE ruby_whisper_params_set_abort_callback_user_data(VALUE self, VALUE value) {
+ ruby_whisper_params *rwp;
+ Data_Get_Struct(self, ruby_whisper_params, rwp);
+ rwp->abort_callback_container->user_data = value;
+ return value;
+}
// High level API
return Qnil;
}
+/*
+ * Hook called on progress update. Yields each progress Integer between 0 and 100.
+ *
+ * whisper.on_progress do |progress|
+ * # ...
+ * end
+ *
+ * call-seq:
+ * on_progress {|progress| ... }
+ */
+static VALUE ruby_whisper_params_on_progress(VALUE self) {
+ ruby_whisper_params *rws;
+ Data_Get_Struct(self, ruby_whisper_params, rws);
+ const VALUE blk = rb_block_proc();
+ rb_ary_push(rws->progress_callback_container->callbacks, blk);
+ return Qnil;
+}
+
+/*
+ * Call block to determine whether abort or not. Return +true+ when you want to abort.
+ *
+ * params.abort_on do
+ * if some_condition
+ * true # abort
+ * else
+ * false # continue
+ * end
+ * end
+ *
+ * call-seq:
+ * abort_on { ... }
+ */
+static VALUE ruby_whisper_params_abort_on(VALUE self) {
+ ruby_whisper_params *rws;
+ Data_Get_Struct(self, ruby_whisper_params, rws);
+ const VALUE blk = rb_block_proc();
+ rb_ary_push(rws->abort_callback_container->callbacks, blk);
+ return Qnil;
+}
+
/*
* Start time in milliseconds.
*
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
+ rb_define_method(cParams, "initial_prompt", ruby_whisper_params_get_initial_prompt, 0);
+ rb_define_method(cParams, "initial_prompt=", ruby_whisper_params_set_initial_prompt, 1);
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
+ rb_define_method(cParams, "temperature", ruby_whisper_params_get_temperature, 0);
+ rb_define_method(cParams, "temperature=", ruby_whisper_params_set_temperature, 1);
+ rb_define_method(cParams, "max_initial_ts", ruby_whisper_params_get_max_initial_ts, 0);
+ rb_define_method(cParams, "max_initial_ts=", ruby_whisper_params_set_max_initial_ts, 1);
+ rb_define_method(cParams, "length_penalty", ruby_whisper_params_get_length_penalty, 0);
+ rb_define_method(cParams, "length_penalty=", ruby_whisper_params_set_length_penalty, 1);
+ rb_define_method(cParams, "temperature_inc", ruby_whisper_params_get_temperature_inc, 0);
+ rb_define_method(cParams, "temperature_inc=", ruby_whisper_params_set_temperature_inc, 1);
+ rb_define_method(cParams, "entropy_thold", ruby_whisper_params_get_entropy_thold, 0);
+ rb_define_method(cParams, "entropy_thold=", ruby_whisper_params_set_entropy_thold, 1);
+ rb_define_method(cParams, "logprob_thold", ruby_whisper_params_get_logprob_thold, 0);
+ rb_define_method(cParams, "logprob_thold=", ruby_whisper_params_set_logprob_thold, 1);
rb_define_method(cParams, "new_segment_callback=", ruby_whisper_params_set_new_segment_callback, 1);
rb_define_method(cParams, "new_segment_callback_user_data=", ruby_whisper_params_set_new_segment_callback_user_data, 1);
+ rb_define_method(cParams, "progress_callback=", ruby_whisper_params_set_progress_callback, 1);
+ rb_define_method(cParams, "progress_callback_user_data=", ruby_whisper_params_set_progress_callback_user_data, 1);
+ rb_define_method(cParams, "abort_callback=", ruby_whisper_params_set_abort_callback, 1);
+ rb_define_method(cParams, "abort_callback_user_data=", ruby_whisper_params_set_abort_callback_user_data, 1);
// High leve
cSegment = rb_define_class_under(mWhisper, "Segment", rb_cObject);
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
+ rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);
+ rb_define_method(cParams, "abort_on", ruby_whisper_params_abort_on, 0);
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);