diff --git a/bindings/ruby/README.md b/bindings/ruby/README.md index 119940ad..7b1a7f29 100644 --- a/bindings/ruby/README.md +++ b/bindings/ruby/README.md @@ -16,6 +16,18 @@ If bundler is not being used to manage dependencies, install the gem by executin $ gem install whispercpp +You can pass build options for whisper.cpp, for instance: + + $ bundle config build.whispercpp --enable-ggml-cuda + +or, + + $ gem install whispercpp -- --enable-ggml-cuda + +See whisper.cpp's [README](https://github.com/ggml-org/whisper.cpp/blob/master/README.md) for available options. You need convert options present the README to Ruby-style options. +For boolean options like `GGML_CUDA`, the README says `-DGGML_CUDA=1`. You need strip `-D`, prepend `--enable-` for `1` or `ON` (`--disable-` for `0` or `OFF`) and make it kebab-case: `--enable-ggml-cuda`. +For options which require arguments like `CMAKE_CUDA_ARCHITECTURES`, the README says `-DCMAKE_CUDA_ARCHITECTURES="86"`. You need strip `-D`, prepend `--`, make it kebab-case, append `=` and append argument: `--cmake-cuda-architectures="86"`. + Usage ----- diff --git a/bindings/ruby/ext/cpu.mk b/bindings/ruby/ext/cpu.mk deleted file mode 100644 index 135d270b..00000000 --- a/bindings/ruby/ext/cpu.mk +++ /dev/null @@ -1,13 +0,0 @@ -ggml/src/ggml-cpu/ggml-cpu-cpp.o: \ - ggml/src/ggml-cpu/ggml-cpu.cpp \ - ggml/src/ggml-cpu/unary-ops.cpp \ - ggml/src/ggml-cpu/binary-ops.cpp \ - ggml/src/ggml-cpu/vec.cpp \ - ggml/src/ggml-cpu/ops.cpp \ - ggml/include/ggml-backend.h \ - ggml/include/ggml.h \ - ggml/include/ggml-alloc.h \ - ggml/src/ggml-backend-impl.h \ - ggml/include/ggml-cpu.h \ - ggml/src/ggml-impl.h - $(CXX) $(CXXFLAGS) -c $< -o $@ diff --git a/bindings/ruby/ext/dependencies.rb b/bindings/ruby/ext/dependencies.rb new file mode 100644 index 00000000..9beb128c --- /dev/null +++ b/bindings/ruby/ext/dependencies.rb @@ -0,0 +1,61 @@ +require "tsort" + +class Dependencies + def initialize(cmake, options) + @cmake = cmake + @options = options + + generate_dot + @libs = parse_dot + end + + def to_s + @libs.join(" ") + end + + private + + def dot_path + File.join(__dir__, "build", "whisper.cpp.dot") + end + + def generate_dot + system @cmake, "-S", "sources", "-B", "build", "--graphviz", dot_path, "-D", "BUILD_SHARED_LIBS=OFF", @options.to_s, exception: true + end + + def parse_dot + static_lib_shape = nil + nodes = {} + depends = Hash.new {|h, k| h[k] = []} + + class << depends + include TSort + alias tsort_each_node each_key + def tsort_each_child(node, &block) + fetch(node, []).each(&block) + end + end + + File.open(dot_path).each_line do |line| + case line + when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/ + static_lib_shape = $~[:shape] + when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/ + node = $~[:node] + label = $~[:label] + shape = $~[:shape] + nodes[node] = [label, shape] + when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/ + depender = $~[:depender] + dependee = $~[:dependee] + depends[depender] ||= [] + depends[depender] << dependee + end + end + depends.tsort.filter_map {|node| + label, shape = nodes[node] + shape == static_lib_shape ? label : nil + }.collect {|lib| "lib#{lib}.a"} + .reverse + end +end diff --git a/bindings/ruby/ext/extconf.rb b/bindings/ruby/ext/extconf.rb index f8e44799..53e2e185 100644 --- a/bindings/ruby/ext/extconf.rb +++ b/bindings/ruby/ext/extconf.rb @@ -1,50 +1,12 @@ require "mkmf" -require "tsort" - -# TODO: options such as CoreML +require_relative "options" +require_relative "dependencies" cmake = find_executable("cmake") || abort - +options = Options.new have_library("gomp") rescue nil +libs = Dependencies.new(cmake, options) -prefix = File.join("build", "whisper.cpp.dot") -system cmake, "-S", "sources", "-B", "build", "--graphviz", prefix, "-D", "BUILD_SHARED_LIBS=OFF", exception: true - -static_lib_shape = nil -nodes = {} -depends = {} -class << depends - include TSort - alias tsort_each_node each_key - def tsort_each_child(node, &block) - fetch(node, []).each(&block) - end -end -File.open(File.join("build", "whisper.cpp.dot")).each_line do |line| - case line - when /\[\s*label\s*=\s*"Static Library"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]/ - static_lib_shape = $~[:shape] - when /\A\s*"(?<node>\w+)"\s*\[\s*label\s*=\s*"(?<label>\S+)"\s*,\s*shape\s*=\s*(?<shape>\w+)\s*\]\s*;\s*\z/ - node = $~[:node] - label = $~[:label] - shape = $~[:shape] - nodes[node] = [label, shape] - when /\A\s*"(?<depender>\w+)"\s*->\s*"(?<dependee>\w+)"/ - depender = $~[:depender] - dependee = $~[:dependee] - depends[depender] ||= [] - depends[depender] << dependee - end -end -libs = depends.tsort.filter_map {|node| - label, shape = nodes[node] - shape == static_lib_shape ? label : nil -}.collect {|lib| "lib#{lib}.a"} - .reverse - .join(" ") - -$CFLAGS << " -std=c11 -fPIC" -$CXXFLAGS << " -std=c++17 -O3 -DNDEBUG" $INCFLAGS << " -Isources/include -Isources/ggml/include -Isources/examples" $LOCAL_LIBS << " #{libs}" $cleanfiles << " build #{libs}" @@ -54,8 +16,7 @@ create_makefile "whisper" do |conf| $(TARGET_SO): #{libs} #{libs}: cmake-targets cmake-targets: - #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON + #{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options} #{"\t"}#{cmake} --build build --config Release --target common whisper - #{"\t"} EOF end diff --git a/bindings/ruby/ext/metal-embed.mk b/bindings/ruby/ext/metal-embed.mk deleted file mode 100644 index cad86f87..00000000 --- a/bindings/ruby/ext/metal-embed.mk +++ /dev/null @@ -1,17 +0,0 @@ -ggml/src/ggml-metal/ggml-metal-embed.o: \ - ggml/src/ggml-metal/ggml-metal.metal \ - ggml/src/ggml-metal/ggml-metal-impl.h \ - ggml/src/ggml-common.h - @echo "Embedding Metal library" - @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp - @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal - $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) - @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ - @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s - @rmdir ${TEMP_ASSEMBLY} diff --git a/bindings/ruby/ext/metal.mk b/bindings/ruby/ext/metal.mk deleted file mode 100644 index 2b53cdef..00000000 --- a/bindings/ruby/ext/metal.mk +++ /dev/null @@ -1,6 +0,0 @@ -ggml/src/ggml-metal/ggml-metal.o: \ - ggml/src/ggml-metal/ggml-metal.m \ - ggml/src/ggml-metal/ggml-metal-impl.h \ - ggml/include/ggml-metal.h \ - ggml/include/ggml.h - $(CC) $(CFLAGS) -c $< -o $@ diff --git a/bindings/ruby/ext/options.rb b/bindings/ruby/ext/options.rb new file mode 100644 index 00000000..be63de10 --- /dev/null +++ b/bindings/ruby/ext/options.rb @@ -0,0 +1,219 @@ +class Options + def initialize + @options = {} + @pending_options = [] + @ignored_options = [] + + configure + end + + def help + @options + .collect_concat {|name, (type, value)| + option = option_name(name) + if type == :bool + ["--enable-#{option}", "--disable-#{option}"] + else + "--#{option}=#{type.upcase}" + end + } + .join($/) + end + + def to_s + @options + .reject {|name, (type, value)| value.nil?} + .collect {|name, (type, value)| "-D #{name}=#{value == true ? "ON" : value == false ? "OFF" : value.shellescape}"} + .join(" ") + end + + def cmake_options + return @cmake_options if @cmake_options + + output = nil + Dir.chdir __dir__ do + output = `cmake -S sources -B build -L` + end + started = false + @cmake_options = output.lines.filter_map {|line| + if line.chomp == "-- Cache values" + started = true + next + end + next unless started + option, value = line.chomp.split("=", 2) + name, type = option.split(":", 2) + [name, type, value] + } + end + + def missing_options + cmake_options.collect {|name, type, value| name} - + @options.keys - @pending_options - @ignored_options + end + + def extra_options + @options.keys + @pending_options - @ignored_options - + cmake_options.collect {|name, type, value| name} + end + + private + + def configure + filepath "ACCELERATE_FRAMEWORK" + ignored "BUILD_SHARED_LIBS" + ignored "BUILD_TESTING" + ignored "CMAKE_BUILD_TYPE" + ignored "CMAKE_INSTALL_PREFIX" + string "CMAKE_OSX_ARCHITECTURES" + ignored "CMAKE_OSX_DEPLOYMENT_TARGET" + string "CMAKE_OSX_SYSROOT" + filepath "FOUNDATION_LIBRARY" + bool "GGML_ACCELERATE" + bool "GGML_ALL_WARNINGS_3RD_PARTY" + bool "GGML_AMX_BF16" + bool "GGML_AMX_INT8" + bool "GGML_AMX_TILE" + bool "GGML_AVX" + bool "GGML_AVX2" + bool "GGML_AVX512" + bool "GGML_AVX512_BF16" + bool "GGML_AVX512_VBMI" + bool "GGML_AVX512_VNNI" + bool "GGML_AVX_VNNI" + ignored "GGML_BACKEND_DL" + ignored "GGML_BIN_INSTALL_DIR" + bool "GGML_BLAS" + string "GGML_BLAS_VENDOR" + bool "GGML_BMI2" + ignored "GGML_BUILD_EXAMPLES" + ignored "GGML_BUILD_TESTS" + filepath "GGML_CCACHE_FOUND" + bool "GGML_CPU" + bool "GGML_CPU_AARCH64" + ignored "GGML_CPU_ALL_VARIANTS" + string "GGML_CPU_ARM_ARCH" + bool "GGML_CPU_HBM" + bool "GGML_CPU_KLEIDIAI" + string "GGML_CPU_POWERPC_CPUTYPE" + bool "GGML_CUDA" + string "GGML_CUDA_COMPRESSION_MODE" + bool "GGML_CUDA_F16" + bool "GGML_CUDA_FA" + bool "GGML_CUDA_FA_ALL_QUANTS" + bool "GGML_CUDA_FORCE_CUBLAS" + bool "GGML_CUDA_FORCE_MMQ" + ignored "GGML_CUDA_GRAPHS" + bool "GGML_CUDA_NO_PEER_COPY" + bool "GGML_CUDA_NO_VMM" + string "GGML_CUDA_PEER_MAX_BATCH_SIZE" + bool "GGML_F16C" + bool "GGML_FMA" + bool "GGML_GPROF" + bool "GGML_HIP" + bool "GGML_HIP_GRAPHS" + bool "GGML_HIP_NO_VMM" + bool "GGML_HIP_ROCWMMA_FATTN" + bool "GGML_HIP_UMA" + ignored "GGML_INCLUDE_INSTALL_DIR" + bool "GGML_KOMPUTE" + bool "GGML_LASX" + ignored "GGML_LIB_INSTALL_DIR" + ignored "GGML_LLAMAFILE" + bool "GGML_LSX" + bool "GGML_LTO" + bool "GGML_METAL" + bool "GGML_METAL_EMBED_LIBRARY" + string "GGML_METAL_MACOSX_VERSION_MIN" + bool "GGML_METAL_NDEBUG" + bool "GGML_METAL_SHADER_DEBUG" + string "GGML_METAL_STD" + bool "GGML_METAL_USE_BF16" + bool "GGML_MUSA" + bool "GGML_NATIVE" + bool "GGML_OPENCL" + bool "GGML_OPENCL_EMBED_KERNELS" + bool "GGML_OPENCL_PROFILING" + string "GGML_OPENCL_TARGET_VERSION" + bool "GGML_OPENCL_USE_ADRENO_KERNELS" + bool "GGML_OPENMP" + bool "GGML_RPC" + bool "GGML_RVV" + bool "GGML_RV_ZFH" + pending "GGML_SCCACHE_FOUND" + string "GGML_SCHED_MAX_COPIES" + ignored "GGML_STATIC" + bool "GGML_SYCL" + string "GGML_SYCL_DEVICE_ARCH" + bool "GGML_SYCL_F16" + bool "GGML_SYCL_GRAPH" + string "GGML_SYCL_TARGET" + bool "GGML_VULKAN" + bool "GGML_VULKAN_CHECK_RESULTS" + bool "GGML_VULKAN_DEBUG" + bool "GGML_VULKAN_MEMORY_DEBUG" + bool "GGML_VULKAN_PERF" + ignored "GGML_VULKAN_RUN_TESTS" + filepath "GGML_VULKAN_SHADERS_GEN_TOOLCHAIN" + bool "GGML_VULKAN_SHADER_DEBUG_INFO" + pending "GGML_VULKAN_VALIDATE" + bool "GGML_VXE" + filepath "GIT_EXE" + filepath "MATH_LIBRARY" + filepath "METALKIT_FRAMEWORK" + filepath "METAL_FRAMEWORK" + bool "WHISPER_ALL_WARNINGS" + bool "WHISPER_ALL_WARNINGS_3RD_PARTY" + ignored "WHISPER_BIN_INSTALL_DIR" + ignored "WHISPER_BUILD_EXAMPLES" + ignored "WHISPER_BUILD_SERVER" + ignored"WHISPER_BUILD_TESTS" + bool "WHISPER_CCACHE" + bool "WHISPER_COREML" + bool "WHISPER_COREML_ALLOW_FALLBACK" + ignored "WHISPER_CURL" + bool "WHISPER_FATAL_WARNINGS" + ignored "WHISPER_FFMPEG" + ignored "WHISPER_INCLUDE_INSTALL_DIR" + ignored "WHISPER_LIB_INSTALL_DIR" + bool "WHISPER_OPENVINO" + bool "WHISPER_SANITIZE_ADDRESS" + bool "WHISPER_SANITIZE_THREAD" + bool "WHISPER_SANITIZE_UNDEFINED" + ignored "WHISPER_SDL2" + pending "WHISPER_USE_SYSTEM_GGML" + end + + def option_name(name) + name.downcase.gsub("_", "-") + end + + def bool(name) + option = option_name(name) + value = enable_config(option) + @options[name] = [:bool, value] + end + + def string(name, type=:string) + option = "--#{option_name(name)}" + value = arg_config(option) + raise "String expected for #{option}" if value == true || value&.empty? + @options[name] = [type, value] + end + + def path(name) + string(name, :path) + end + + def filepath(name) + string(name, :filepath) + end + + def pending(name) + @pending_options << name + end + + def ignored(name) + @ignored_options << name + end +end diff --git a/bindings/ruby/sig/whisper.rbs b/bindings/ruby/sig/whisper.rbs index 85d941cb..0f3d74e0 100644 --- a/bindings/ruby/sig/whisper.rbs +++ b/bindings/ruby/sig/whisper.rbs @@ -23,9 +23,20 @@ module Whisper def self.log_set: (log_callback, Object? user_data) -> log_callback class Context - def self.new: (string | _ToPath | ::URI::HTTP) -> instance + def self.new: (path | ::URI::HTTP) -> instance + + # transcribe a single file + # can emit to a block results + # + # params = Whisper::Params.new + # params.duration = 60_000 + # whisper.transcribe "path/to/audio.wav", params do |text| + # puts text + # end + # def transcribe: (string, Params) -> self | (string, Params) { (String) -> void } -> self + def model_n_vocab: () -> Integer def model_n_audio_ctx: () -> Integer def model_n_audio_state: () -> Integer @@ -34,19 +45,72 @@ module Whisper def model_n_mels: () -> Integer def model_ftype: () -> Integer def model_type: () -> String + + # Yields each Whisper::Segment: + # + # whisper.transcribe("path/to/audio.wav", params) + # whisper.each_segment do |segment| + # puts segment.text + # end + # + # Returns an Enumerator if no block given: + # + # whisper.transcribe("path/to/audio.wav", params) + # enum = whisper.each_segment + # enum.to_a # => [#<Whisper::Segment>, ...] + # def each_segment: { (Segment) -> void } -> void | () -> Enumerator[Segment] + def model: () -> Model def full_get_segment: (Integer nth) -> Segment def full_n_segments: () -> Integer + + # Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full. + # def full_lang_id: () -> Integer + + # Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). + # + # full_get_segment_t0(3) # => 1668 (16680 ms) + # def full_get_segment_t0: (Integer) -> Integer + + # End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds). + # + # full_get_segment_t1(3) # => 1668 (16680 ms) + # def full_get_segment_t1: (Integer) -> Integer + + # Whether the next segment indexed by +segment_index+ is predicated as a speaker turn. + # + # full_get_segment_speacker_turn_next(3) # => true + # def full_get_segment_speaker_turn_next: (Integer) -> (true | false) + + # Text of a segment indexed by +segment_index+. + # + # full_get_segment_text(3) # => "ask not what your country can do for you, ..." + # def full_get_segment_text: (Integer) -> String + def full_get_segment_no_speech_prob: (Integer) -> Float + + # Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text + # Not thread safe for same context + # Uses the specified decoding strategy to obtain the text. + # + # The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data. + # def full: (Params, Array[Float] samples, ?Integer n_samples) -> self | (Params, _Samples, ?Integer n_samples) -> self + + # Split the input audio in chunks and process each chunk separately using whisper_full_with_state() + # Result is stored in the default state of the context + # Not thread safe if executed in parallel on the same context. + # It seems this approach can offer some speedup in some cases. + # However, the transcription accuracy can be worse at the beginning and end of each chunk. + # def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self | (Params, _Samples, ?Integer n_samples) -> self | (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self @@ -85,68 +149,202 @@ module Whisper ?abort_callback: abort_callback, ?abort_callback_user_data: Object ) -> instance + + # params.language = "auto" | "en", etc... + # def language=: (String) -> String # TODO: Enumerate lang names + def language: () -> String def translate=: (boolish) -> boolish def translate: () -> (true | false) def no_context=: (boolish) -> boolish + + # If true, does not use past transcription (if any) as initial prompt for the decoder. + # def no_context: () -> (true | false) + def single_segment=: (boolish) -> boolish + + # If true, forces single segment output (useful for streaming). + # def single_segment: () -> (true | false) + def print_special=: (boolish) -> boolish + + # If true, prints special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.). + # def print_special: () -> (true | false) + def print_progress=: (boolish) -> boolish + + # If true, prints progress information. + # def print_progress: () -> (true | false) + def print_realtime=: (boolish) -> boolish + + # If true, prints results from within whisper.cpp. (avoid it, use callback instead) + # def print_realtime: () -> (true | false) + + # If true, prints timestamps for each text segment when printing realtime. + # def print_timestamps=: (boolish) -> boolish + def print_timestamps: () -> (true | false) + def suppress_blank=: (boolish) -> boolish + + # If true, suppresses blank outputs. + # def suppress_blank: () -> (true | false) + def suppress_nst=: (boolish) -> boolish + + # If true, suppresses non-speech-tokens. + # def suppress_nst: () -> (true | false) + def token_timestamps=: (boolish) -> boolish + + # If true, enables token-level timestamps. + # def token_timestamps: () -> (true | false) + def split_on_word=: (boolish) -> boolish + + # If true, split on word rather than on token (when used with max_len). + # def split_on_word: () -> (true | false) + def initial_prompt=: (_ToS) -> _ToS + + # Tokens to provide to the whisper decoder as initial prompt + # these are prepended to any existing text context from a previous call + # use whisper_tokenize() to convert text to tokens. + # Maximum of whisper_n_text_ctx()/2 tokens are used (typically 224). + # def initial_prompt: () -> (String | nil) + def diarize=: (boolish) -> boolish + + # If true, enables diarization. + # def diarize: () -> (true | false) + def offset=: (Integer) -> Integer + + # Start offset in ms. + # def offset: () -> Integer + def duration=: (Integer) -> Integer + + # Audio duration to process in ms. + # def duration: () -> Integer + def max_text_tokens=: (Integer) -> Integer + + # Max tokens to use from past text as prompt for the decoder. + # def max_text_tokens: () -> Integer + def temperature=: (Float) -> Float def temperature: () -> Float def max_initial_ts=: (Float) -> Float + + # See https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97 + # def max_initial_ts: () -> Float + def length_penalty=: (Float) -> Float def length_penalty: () -> Float def temperature_inc=: (Float) -> Float def temperature_inc: () -> Float def entropy_thold=: (Float) -> Float + + # Similar to OpenAI's "compression_ratio_threshold" + # def entropy_thold: () -> Float + def logprob_thold=: (Float) -> Float def logprob_thold: () -> Float def no_speech_thold=: (Float) -> Float def no_speech_thold: () -> Float + + # Sets new segment callback, called for every newly generated text segment. + # + # params.new_segment_callback = ->(context, _, n_new, user_data) { + # # ... + # } + # def new_segment_callback=: (new_segment_callback) -> new_segment_callback def new_segment_callback: () -> (new_segment_callback | nil) + + # Sets user data passed to the last argument of new segment callback. + # def new_segment_callback_user_data=: (Object) -> Object + def new_segment_callback_user_data: () -> Object + + # Sets progress callback, called on each progress update. + # + # params.new_segment_callback = ->(context, _, progress, user_data) { + # # ... + # } + # + # +progress+ is an Integer between 0 and 100. + # def progress_callback=: (progress_callback) -> progress_callback + def progress_callback: () -> (progress_callback | nil) + + # Sets user data passed to the last argument of progress callback. + # def progress_callback_user_data=: (Object) -> Object + def progress_callback_user_data: () -> Object + + # Sets abort callback, called to check if the process should be aborted. + # + # params.abort_callback = ->(user_data) { + # # ... + # } + # + # def abort_callback=: (abort_callback) -> abort_callback + def abort_callback: () -> (abort_callback | nil) + + # Sets user data passed to the last argument of abort callback. + # def abort_callback_user_data=: (Object) -> Object + def abort_callback_user_data: () -> Object + + # Hook called on new segment. Yields each Whisper::Segment. + # + # whisper.on_new_segment do |segment| + # # ... + # end + # def on_new_segment: { (Segment) -> void } -> void + + # Hook called on progress update. Yields each progress Integer between 0 and 100. + # def on_progress: { (Integer progress) -> void } -> void + + # Call block to determine whether abort or not. Return +true+ when you want to abort. + # + # params.abort_on do + # if some_condition + # true # abort + # else + # false # continue + # end + # end + # def abort_on: { (Object user_data) -> boolish } -> void end @@ -167,16 +365,24 @@ module Whisper def type: () -> String class URI - def self.new: (string | ::URI::HTTP) -> self + def self.new: (string | ::URI::HTTP) -> instance def to_path: -> String def clear_cache: -> void end end class Segment + # Start time in milliseconds. + # def start_time: () -> Integer + + # End time in milliseconds. + # def end_time: () -> Integer + + # Whether the next segment is predicted as a speaker turn. def speaker_next_turn?: () -> (true | false) + def text: () -> String def no_speech_prob: () -> Float end diff --git a/bindings/ruby/tests/helper.rb b/bindings/ruby/tests/helper.rb index a182319d..a69a2b7e 100644 --- a/bindings/ruby/tests/helper.rb +++ b/bindings/ruby/tests/helper.rb @@ -21,4 +21,15 @@ class TestBase < Test::Unit::TestCase def whisper self.class.whisper end + + module BuildOptions + load "ext/options.rb", self + Options.include self + + def enable_config(name) + end + + def arg_config(name) + end + end end diff --git a/bindings/ruby/tests/test_package.rb b/bindings/ruby/tests/test_package.rb index c4a74b04..8ab2d703 100644 --- a/bindings/ruby/tests/test_package.rb +++ b/bindings/ruby/tests/test_package.rb @@ -21,13 +21,26 @@ class TestPackage < TestBase match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/) filename = match_data[1] version = match_data[2] - basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}" Dir.mktmpdir do |dir| system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true - assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename) - assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE") - assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build") + assert_installed dir, version end end + + private + + def assert_installed(dir, version) + assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", "whisper.#{RbConfig::CONFIG["DLEXT"]}") + assert_path_exist File.join(dir, "gems/whispercpp-#{version}/LICENSE") + assert_path_not_exist File.join(dir, "gems/whispercpp-#{version}/ext/build") + end + end + + def test_build_options + options = BuildOptions::Options.new + assert_empty options.missing_options + unless ENV["CI"] + assert_empty options.extra_options + end end end diff --git a/bindings/ruby/whispercpp.gemspec b/bindings/ruby/whispercpp.gemspec index da00c1ca..329e670b 100644 --- a/bindings/ruby/whispercpp.gemspec +++ b/bindings/ruby/whispercpp.gemspec @@ -3,8 +3,8 @@ require_relative "extsources" Gem::Specification.new do |s| s.name = "whispercpp" s.authors = ["Georgi Gerganov", "Todd A. Fisher"] - s.version = '1.3.1' - s.date = '2024-12-19' + s.version = '1.3.2' + s.date = '2025-04-17' s.description = %q{High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model via Ruby} s.email = 'todd.fisher@gmail.com' s.extra_rdoc_files = ['LICENSE', 'README.md']