mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-05 17:26:41 +02:00
ruby : add Core ML support (#3214)
* Prevent overflow * Fix memsize of Whisper::Context * Rename xxx_initialize to more Ruby-esque name: xxx_s_new * Define Whisper::Model::ZipURI * Define Whisper::Model.coreml_compiled_models * Make Options' @cmake_options Hash * Use --{enable,disable}-whisper-coreml option for -I/opt/homebrew/opt/llvm/include * Prepare Core ML model if enabled * Add test for ZipURI * Add signatures for ZipURI * Add Whisper.system_info_str * Add test for Whisper.system_info_str * Add signagure for Model.coreml_compiled_models * Add signature for Whisper.system_info_str * Add test for Core ML * Update date * Maintain .gitignore
This commit is contained in:
parent
98dfe8dc26
commit
0251445005
3
bindings/ruby/.gitignore
vendored
3
bindings/ruby/.gitignore
vendored
@ -1,6 +1,3 @@
|
||||
LICENSE
|
||||
pkg/
|
||||
lib/whisper.*
|
||||
ext/sources/*
|
||||
!ext/sources/CMakeGraphVizOptions.cmake
|
||||
ext/mkmf.log
|
||||
|
10
bindings/ruby/ext/.gitignore
vendored
10
bindings/ruby/ext/.gitignore
vendored
@ -2,10 +2,8 @@ Makefile
|
||||
whisper.so
|
||||
whisper.bundle
|
||||
whisper.dll
|
||||
scripts/get-flags.mk
|
||||
*.o
|
||||
/*/**/*.c
|
||||
/*/**/*.cpp
|
||||
/*/**/*.h
|
||||
/*/**/*.m
|
||||
/*/**/*.metal
|
||||
*.a
|
||||
sources/*
|
||||
!sources/CMakeGraphVizOptions.cmake
|
||||
mkmf.log
|
||||
|
@ -20,27 +20,39 @@ class Options
|
||||
Dir.chdir __dir__ do
|
||||
output = `#{@cmake.shellescape} -S sources -B build -L`
|
||||
end
|
||||
started = false
|
||||
@cmake_options = output.lines.filter_map {|line|
|
||||
if line.chomp == "-- Cache values"
|
||||
started = true
|
||||
next
|
||||
end
|
||||
next unless started
|
||||
option, value = line.chomp.split("=", 2)
|
||||
name, type = option.split(":", 2)
|
||||
[name, type, value]
|
||||
}
|
||||
@cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
|
||||
.filter_map {|line|
|
||||
option, value = line.chomp.split("=", 2)
|
||||
name, type = option.split(":", 2)
|
||||
[
|
||||
name,
|
||||
[
|
||||
type,
|
||||
type == "BOOL" ? value == "ON" : value
|
||||
]
|
||||
]
|
||||
}.to_h
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def configure
|
||||
cmake_options.each do |name, type, default_value|
|
||||
cmake_options.each_pair do |name, (type, default_value)|
|
||||
option = option_name(name)
|
||||
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
|
||||
@options[name] = [type, value]
|
||||
end
|
||||
|
||||
configure_coreml
|
||||
end
|
||||
|
||||
def configure_coreml
|
||||
use_coreml = if @options["WHISPER_COREML"][1].nil?
|
||||
cmake_options["WHISPER_COREML"][1]
|
||||
else
|
||||
@options["WHISPER_COREML"][1]
|
||||
end
|
||||
$CPPFLAGS << " -DRUBY_WHISPER_USE_COREML" if use_coreml
|
||||
end
|
||||
|
||||
def option_name(name)
|
||||
|
@ -22,6 +22,8 @@ ID id_new;
|
||||
ID id_to_path;
|
||||
ID id_URI;
|
||||
ID id_pre_converted_models;
|
||||
ID id_coreml_compiled_models;
|
||||
ID id_cache;
|
||||
|
||||
static bool is_log_callback_finalized = false;
|
||||
|
||||
@ -83,6 +85,14 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
||||
return rb_str_new2(str_full);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* system_info_str -> String
|
||||
*/
|
||||
static VALUE ruby_whisper_s_system_info_str(VALUE self) {
|
||||
return rb_str_new2(whisper_print_system_info());
|
||||
}
|
||||
|
||||
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
||||
is_log_callback_finalized = true;
|
||||
return Qnil;
|
||||
@ -130,6 +140,8 @@ void Init_whisper() {
|
||||
id_to_path = rb_intern("to_path");
|
||||
id_URI = rb_intern("URI");
|
||||
id_pre_converted_models = rb_intern("pre_converted_models");
|
||||
id_coreml_compiled_models = rb_intern("coreml_compiled_models");
|
||||
id_cache = rb_intern("cache");
|
||||
|
||||
mWhisper = rb_define_module("Whisper");
|
||||
mVAD = rb_define_module_under(mWhisper, "VAD");
|
||||
@ -145,6 +157,7 @@ void Init_whisper() {
|
||||
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
||||
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
||||
rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
|
||||
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
||||
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
||||
|
||||
|
@ -11,6 +11,8 @@ extern ID id_new;
|
||||
extern ID id_to_path;
|
||||
extern ID id_URI;
|
||||
extern ID id_pre_converted_models;
|
||||
extern ID id_coreml_compiled_models;
|
||||
extern ID id_cache;
|
||||
|
||||
extern VALUE cContext;
|
||||
extern VALUE eError;
|
||||
@ -18,8 +20,8 @@ extern VALUE cModel;
|
||||
|
||||
extern const rb_data_type_t ruby_whisper_params_type;
|
||||
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
||||
extern VALUE rb_whisper_model_initialize(VALUE context);
|
||||
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
|
||||
extern VALUE rb_whisper_model_s_new(VALUE context);
|
||||
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
||||
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);
|
||||
|
||||
static void
|
||||
@ -53,6 +55,9 @@ ruby_whisper_memsize(const void *p)
|
||||
if (!rw) {
|
||||
return 0;
|
||||
}
|
||||
if (rw->context) {
|
||||
size += sizeof(rw->context);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@ -79,6 +84,13 @@ ruby_whisper_normalize_model_path(VALUE model_path)
|
||||
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
|
||||
if (!NIL_P(pre_converted_model)) {
|
||||
model_path = pre_converted_model;
|
||||
#ifdef RUBY_WHISPER_USE_COREML
|
||||
VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
|
||||
VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
|
||||
if (!NIL_P(coreml_converted_model)) {
|
||||
rb_funcall(coreml_converted_model, id_cache, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (TYPE(model_path) == T_STRING) {
|
||||
const char * model_path_str = StringValueCStr(model_path);
|
||||
@ -293,13 +305,20 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
||||
// Should check when samples.respond_to?(:length)?
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
n_samples = RARRAY_LEN(samples);
|
||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)RARRAY_LEN(samples);
|
||||
} else if (memory_view_available_p) {
|
||||
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
n_samples = view.byte_size / view.item_size;
|
||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
||||
if (n_samples_size > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)n_samples_size;
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
@ -387,10 +406,17 @@ ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
||||
view.obj = Qnil;
|
||||
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||
}
|
||||
n_samples = view.byte_size / view.item_size;
|
||||
ssize_t n_samples_size = view.byte_size / view.item_size;
|
||||
if (n_samples_size > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)n_samples_size;
|
||||
} else {
|
||||
if (TYPE(samples) == T_ARRAY) {
|
||||
n_samples = RARRAY_LEN(samples);
|
||||
if (RARRAY_LEN(samples) > INT_MAX) {
|
||||
rb_raise(rb_eArgError, "samples are too long");
|
||||
}
|
||||
n_samples = (int)RARRAY_LEN(samples);
|
||||
} else if (rb_respond_to(samples, id_length)) {
|
||||
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||
} else {
|
||||
@ -476,7 +502,7 @@ ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
||||
return INT2NUM(t0);
|
||||
return LONG2NUM(t0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -494,7 +520,7 @@ ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
||||
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
||||
return INT2NUM(t1);
|
||||
return LONG2NUM(t1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -552,7 +578,7 @@ ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
||||
static VALUE
|
||||
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
||||
{
|
||||
return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
|
||||
return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -586,7 +612,7 @@ ruby_whisper_each_segment(VALUE self)
|
||||
|
||||
const int n_segments = whisper_full_n_segments(rw->context);
|
||||
for (int i = 0; i < n_segments; ++i) {
|
||||
rb_yield(rb_whisper_segment_initialize(self, i));
|
||||
rb_yield(rb_whisper_segment_s_new(self, i));
|
||||
}
|
||||
|
||||
return self;
|
||||
@ -599,7 +625,7 @@ ruby_whisper_each_segment(VALUE self)
|
||||
static VALUE
|
||||
ruby_whisper_get_model(VALUE self)
|
||||
{
|
||||
return rb_whisper_model_initialize(self);
|
||||
return rb_whisper_model_s_new(self);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -35,7 +35,7 @@ static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
}
|
||||
|
||||
VALUE rb_whisper_model_initialize(VALUE context) {
|
||||
VALUE rb_whisper_model_s_new(VALUE context) {
|
||||
ruby_whisper_model *rwm;
|
||||
const VALUE model = ruby_whisper_model_allocate(cModel);
|
||||
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
|
||||
|
@ -34,7 +34,7 @@ extern VALUE cVADParams;
|
||||
extern ID id_call;
|
||||
|
||||
extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
|
||||
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
|
||||
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
|
||||
extern const rb_data_type_t ruby_whisper_vad_params_type;
|
||||
|
||||
static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
|
||||
@ -110,7 +110,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta
|
||||
const int n_segments = whisper_full_n_segments_from_state(state);
|
||||
for (int i = n_new; i > 0; i--) {
|
||||
int i_segment = n_segments - i;
|
||||
VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment);
|
||||
VALUE segment = rb_whisper_segment_s_new(*container->context, i_segment);
|
||||
for (int j = 0; j < callbacks_len; j++) {
|
||||
VALUE cb = rb_ary_entry(container->callbacks, j);
|
||||
rb_funcall(cb, id_call, 1, segment);
|
||||
|
@ -38,7 +38,7 @@ ruby_whisper_segment_allocate(VALUE klass)
|
||||
}
|
||||
|
||||
VALUE
|
||||
rb_whisper_segment_initialize(VALUE context, int index)
|
||||
rb_whisper_segment_s_new(VALUE context, int index)
|
||||
{
|
||||
ruby_whisper_segment *rws;
|
||||
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
||||
@ -63,7 +63,7 @@ ruby_whisper_segment_get_start_time(VALUE self)
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return INT2NUM(t0 * 10);
|
||||
return LONG2NUM(t0 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -81,7 +81,7 @@ ruby_whisper_segment_get_end_time(VALUE self)
|
||||
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
|
||||
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
||||
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||
return INT2NUM(t1 * 10);
|
||||
return LONG2NUM(t1 * 10);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -130,6 +130,44 @@ module Whisper
|
||||
end
|
||||
end
|
||||
|
||||
class ZipURI < URI
|
||||
def cache
|
||||
zip_path = Pathname(super)
|
||||
dest = unzipped_path
|
||||
return if dest.exist? && dest.mtime >= zip_path.mtime
|
||||
escaping dest do
|
||||
system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
|
||||
end
|
||||
zip_path.to_path
|
||||
end
|
||||
|
||||
def clear_cache
|
||||
super
|
||||
unzipped_path.rmtree if unzipped_path.exist?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def unzipped_path
|
||||
cache_path.sub_ext("")
|
||||
end
|
||||
|
||||
def escaping(path)
|
||||
escaped = Pathname("#{path}.removing")
|
||||
if path.exist?
|
||||
escaped.rmtree if escaped.exist?
|
||||
path.rename escaped
|
||||
end
|
||||
yield
|
||||
ensure
|
||||
if path.exist?
|
||||
escaped.rmtree if escaped.exist?
|
||||
else
|
||||
escaped.rename path if escaped.exist?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@pre_converted_models = %w[
|
||||
tiny
|
||||
tiny.en
|
||||
@ -171,8 +209,25 @@ module Whisper
|
||||
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
|
||||
end
|
||||
|
||||
@coreml_compiled_models = %w[
|
||||
tiny
|
||||
tiny.en
|
||||
base
|
||||
base.en
|
||||
small
|
||||
small.en
|
||||
medium
|
||||
medium.en
|
||||
large-v1
|
||||
large-v2
|
||||
large-v3
|
||||
large-v3-turbo
|
||||
].each_with_object({}) do |name, models|
|
||||
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
|
||||
end
|
||||
|
||||
class << self
|
||||
attr_reader :pre_converted_models
|
||||
attr_reader :pre_converted_models, :coreml_compiled_models
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -22,6 +22,7 @@ module Whisper
|
||||
def self.lang_str: (Integer id) -> String
|
||||
def self.lang_str_full: (Integer id) -> String
|
||||
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
||||
def self.system_info_str: () -> String
|
||||
|
||||
class Context
|
||||
def self.new: (path | ::URI::HTTP) -> instance
|
||||
@ -386,6 +387,7 @@ module Whisper
|
||||
|
||||
class Model
|
||||
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
||||
def self.coreml_compiled_models: () -> Hash[Model::URI, Model::ZipURI]
|
||||
def self.new: () -> instance
|
||||
def n_vocab: () -> Integer
|
||||
def n_audio_ctx: () -> Integer
|
||||
@ -405,6 +407,11 @@ module Whisper
|
||||
def to_path: -> String
|
||||
def clear_cache: -> void
|
||||
end
|
||||
|
||||
class ZipURI < URI
|
||||
def cache: () -> String
|
||||
def clear_cache: () -> void
|
||||
end
|
||||
end
|
||||
|
||||
class Segment
|
||||
|
@ -106,4 +106,13 @@ class TestModel < TestBase
|
||||
assert_equal 1, model.ftype
|
||||
assert_equal "base", model.type
|
||||
end
|
||||
|
||||
def test_coreml_model_auto_download
|
||||
uri = Whisper::Model.coreml_compiled_models[Whisper::Model.pre_converted_models["tiny"]]
|
||||
model_path = Pathname(uri.to_path).sub_ext("")
|
||||
model_path.rmtree if model_path.exist?
|
||||
|
||||
uri.cache
|
||||
assert_path_exist model_path
|
||||
end
|
||||
end
|
||||
|
@ -25,6 +25,20 @@ class TestPackage < TestBase
|
||||
end
|
||||
end
|
||||
|
||||
def test_install_with_coreml
|
||||
omit_unless RUBY_PLATFORM.match?(/darwin/) do
|
||||
gemspec = Gem::Specification.load("whispercpp.gemspec")
|
||||
Dir.mktmpdir do |dir|
|
||||
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{gemspec.file_name.shellescape}", "--", "--enable-whisper-coreml", exception: true
|
||||
assert_installed dir, gemspec.version
|
||||
assert_nothing_raised do
|
||||
libdir = File.join(dir, "gems", "#{gemspec.name}-#{gemspec.version}", "lib")
|
||||
system "ruby", "-I", libdir, "-r", "whisper", "-e", "Whisper::Context.new('tiny')", exception: true
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_installed(dir, version)
|
||||
|
@ -94,6 +94,10 @@ class TestWhisper < TestBase
|
||||
end
|
||||
end
|
||||
|
||||
def test_system_info_str
|
||||
assert_match /\AWHISPER : COREML = \d | OPENVINO = \d |/, Whisper.system_info_str
|
||||
end
|
||||
|
||||
def test_log_set
|
||||
user_data = Object.new
|
||||
logs = []
|
||||
|
@ -4,7 +4,7 @@ Gem::Specification.new do |s|
|
||||
s.name = "whispercpp"
|
||||
s.authors = ["Georgi Gerganov", "Todd A. Fisher"]
|
||||
s.version = '1.3.3'
|
||||
s.date = '2025-05-29'
|
||||
s.date = '2025-06-01'
|
||||
s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby}
|
||||
s.email = 'todd.fisher@gmail.com'
|
||||
s.extra_rdoc_files = ['LICENSE', 'README.md']
|
||||
|
Loading…
x
Reference in New Issue
Block a user