ruby : Add ruby binding for max_len (#3365)

* add ruby binding for max_len

* add test, update param numbers
This commit is contained in:
Adam Debono
2025-08-07 12:37:45 +10:00
committed by GitHub
parent 0becabc8d6
commit 4245c77b65
3 changed files with 70 additions and 25 deletions

View File

@ -26,7 +26,7 @@
rb_define_method(cParams, #param_name, ruby_whisper_params_get_ ## param_name, 0); \ rb_define_method(cParams, #param_name, ruby_whisper_params_get_ ## param_name, 0); \
rb_define_method(cParams, #param_name "=", ruby_whisper_params_set_ ## param_name, 1); rb_define_method(cParams, #param_name "=", ruby_whisper_params_set_ ## param_name, 1);
#define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT 35 #define RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT 36
extern VALUE cParams; extern VALUE cParams;
extern VALUE cVADParams; extern VALUE cVADParams;
@ -49,6 +49,7 @@ static ID id_print_timestamps;
static ID id_suppress_blank; static ID id_suppress_blank;
static ID id_suppress_nst; static ID id_suppress_nst;
static ID id_token_timestamps; static ID id_token_timestamps;
static ID id_max_len;
static ID id_split_on_word; static ID id_split_on_word;
static ID id_initial_prompt; static ID id_initial_prompt;
static ID id_diarize; static ID id_diarize;
@ -514,6 +515,33 @@ ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value)
{ {
BOOL_PARAMS_SETTER(self, token_timestamps, value) BOOL_PARAMS_SETTER(self, token_timestamps, value)
} }
/*
* max segment length in characters.
*
* call-seq:
* max_len -> Integer
*/
static VALUE
ruby_whisper_params_get_max_len(VALUE self)
{
ruby_whisper_params *rwp;
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
return INT2NUM(rwp->params.max_len);
}
/*
* call-seq:
* max_len = length -> length
*/
static VALUE
ruby_whisper_params_set_max_len(VALUE self, VALUE value)
{
ruby_whisper_params *rwp;
TypedData_Get_Struct(self, ruby_whisper_params, &ruby_whisper_params_type, rwp);
rwp->params.max_len = NUM2INT(value);
return value;
}
/* /*
* If true, split on word rather than on token (when used with max_len). * If true, split on word rather than on token (when used with max_len).
* *
@ -1137,6 +1165,7 @@ ruby_whisper_params_initialize(int argc, VALUE *argv, VALUE self)
SET_PARAM_IF_SAME(suppress_blank) SET_PARAM_IF_SAME(suppress_blank)
SET_PARAM_IF_SAME(suppress_nst) SET_PARAM_IF_SAME(suppress_nst)
SET_PARAM_IF_SAME(token_timestamps) SET_PARAM_IF_SAME(token_timestamps)
SET_PARAM_IF_SAME(max_len)
SET_PARAM_IF_SAME(split_on_word) SET_PARAM_IF_SAME(split_on_word)
SET_PARAM_IF_SAME(initial_prompt) SET_PARAM_IF_SAME(initial_prompt)
SET_PARAM_IF_SAME(offset) SET_PARAM_IF_SAME(offset)
@ -1271,30 +1300,31 @@ init_ruby_whisper_params(VALUE *mWhisper)
DEFINE_PARAM(suppress_blank, 8) DEFINE_PARAM(suppress_blank, 8)
DEFINE_PARAM(suppress_nst, 9) DEFINE_PARAM(suppress_nst, 9)
DEFINE_PARAM(token_timestamps, 10) DEFINE_PARAM(token_timestamps, 10)
DEFINE_PARAM(split_on_word, 11) DEFINE_PARAM(max_len, 11)
DEFINE_PARAM(initial_prompt, 12) DEFINE_PARAM(split_on_word, 12)
DEFINE_PARAM(diarize, 13) DEFINE_PARAM(initial_prompt, 13)
DEFINE_PARAM(offset, 14) DEFINE_PARAM(diarize, 14)
DEFINE_PARAM(duration, 15) DEFINE_PARAM(offset, 15)
DEFINE_PARAM(max_text_tokens, 16) DEFINE_PARAM(duration, 16)
DEFINE_PARAM(temperature, 17) DEFINE_PARAM(max_text_tokens, 17)
DEFINE_PARAM(max_initial_ts, 18) DEFINE_PARAM(temperature, 18)
DEFINE_PARAM(length_penalty, 19) DEFINE_PARAM(max_initial_ts, 19)
DEFINE_PARAM(temperature_inc, 20) DEFINE_PARAM(length_penalty, 20)
DEFINE_PARAM(entropy_thold, 21) DEFINE_PARAM(temperature_inc, 21)
DEFINE_PARAM(logprob_thold, 22) DEFINE_PARAM(entropy_thold, 22)
DEFINE_PARAM(no_speech_thold, 23) DEFINE_PARAM(logprob_thold, 23)
DEFINE_PARAM(new_segment_callback, 24) DEFINE_PARAM(no_speech_thold, 24)
DEFINE_PARAM(new_segment_callback_user_data, 25) DEFINE_PARAM(new_segment_callback, 25)
DEFINE_PARAM(progress_callback, 26) DEFINE_PARAM(new_segment_callback_user_data, 26)
DEFINE_PARAM(progress_callback_user_data, 27) DEFINE_PARAM(progress_callback, 27)
DEFINE_PARAM(encoder_begin_callback, 28) DEFINE_PARAM(progress_callback_user_data, 28)
DEFINE_PARAM(encoder_begin_callback_user_data, 29) DEFINE_PARAM(encoder_begin_callback, 29)
DEFINE_PARAM(abort_callback, 30) DEFINE_PARAM(encoder_begin_callback_user_data, 30)
DEFINE_PARAM(abort_callback_user_data, 31) DEFINE_PARAM(abort_callback, 31)
DEFINE_PARAM(vad, 32) DEFINE_PARAM(abort_callback_user_data, 32)
DEFINE_PARAM(vad_model_path, 33) DEFINE_PARAM(vad, 33)
DEFINE_PARAM(vad_params, 34) DEFINE_PARAM(vad_model_path, 34)
DEFINE_PARAM(vad_params, 35)
rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0); rb_define_method(cParams, "on_new_segment", ruby_whisper_params_on_new_segment, 0);
rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0); rb_define_method(cParams, "on_progress", ruby_whisper_params_on_progress, 0);

View File

@ -135,6 +135,7 @@ module Whisper
?suppress_blank: boolish, ?suppress_blank: boolish,
?suppress_nst: boolish, ?suppress_nst: boolish,
?token_timestamps: boolish, ?token_timestamps: boolish,
?max_len: Integer,
?split_on_word: boolish, ?split_on_word: boolish,
?initial_prompt: string | nil, ?initial_prompt: string | nil,
?diarize: boolish, ?diarize: boolish,
@ -222,6 +223,12 @@ module Whisper
# #
def token_timestamps: () -> (true | false) def token_timestamps: () -> (true | false)
def max_len=: (Integer) -> Integer
# max segment length in characters.
#
def max_len: () -> Integer
def split_on_word=: (boolish) -> boolish def split_on_word=: (boolish) -> boolish
# If true, split on word rather than on token (when used with max_len). # If true, split on word rather than on token (when used with max_len).

View File

@ -13,6 +13,7 @@ class TestParams < TestBase
:suppress_blank, :suppress_blank,
:suppress_nst, :suppress_nst,
:token_timestamps, :token_timestamps,
:max_len,
:split_on_word, :split_on_word,
:initial_prompt, :initial_prompt,
:diarize, :diarize,
@ -139,6 +140,13 @@ class TestParams < TestBase
assert !@params.token_timestamps assert !@params.token_timestamps
end end
def test_max_len
@params.max_len = 42
assert_equal @params.max_len, 42
@params.max_len = 0
assert_equal @params.max_len, 0
end
def test_split_on_word def test_split_on_word
@params.split_on_word = true @params.split_on_word = true
assert @params.split_on_word assert @params.split_on_word