ruby : extend API (#2551)

* Handle objs in Ruby code

* Add task to make Makefile

* Share commont constance in test suites

* Add model-related APIs

* Add Whisper::Model class

* Add tests for Whisper::Model

* Add missing LDFLAG -lstdc++

* Add tests for Whisper.log_set

* Add Whisper.set_log

* Define log level

* Add document on logging

* Add license section to README

* Add document on Whisper::Model

* Fix examples in README

* Add test for Model with GC

* Make dependency on Makefile more accurate

* Fix bug about Whisper::Model and GC
This commit is contained in:
KITAITI Makoto
2024-11-14 04:52:56 +09:00
committed by GitHub
parent 5f8a086e22
commit f19463ece2
10 changed files with 564 additions and 93 deletions

View File

@ -0,0 +1,7 @@
require "test/unit"
require "whisper"
class TestBase < Test::Unit::TestCase
MODEL = File.join(__dir__, "..", "..", "..", "models", "ggml-base.en.bin")
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
end

View File

@ -0,0 +1,44 @@
require_relative "helper"
class TestModel < TestBase
def test_model
whisper = Whisper::Context.new(MODEL)
assert_instance_of Whisper::Model, whisper.model
end
def test_attributes
whisper = Whisper::Context.new(MODEL)
model = whisper.model
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
def test_gc
model = Whisper::Context.new(MODEL).model
GC.start
assert_equal 51864, model.n_vocab
assert_equal 1500, model.n_audio_ctx
assert_equal 512, model.n_audio_state
assert_equal 8, model.n_audio_head
assert_equal 6, model.n_audio_layer
assert_equal 448, model.n_text_ctx
assert_equal 512, model.n_text_state
assert_equal 8, model.n_text_head
assert_equal 6, model.n_text_layer
assert_equal 80, model.n_mels
assert_equal 1, model.ftype
assert_equal "base", model.type
end
end

View File

@ -1,9 +1,9 @@
require 'test/unit'
require_relative "helper"
require 'tempfile'
require 'tmpdir'
require 'shellwords'
class TestPackage < Test::Unit::TestCase
class TestPackage < TestBase
def test_build
Tempfile.create do |file|
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)

View File

@ -1,7 +1,6 @@
require 'test/unit'
require 'whisper'
require_relative "helper"
class TestParams < Test::Unit::TestCase
class TestParams < TestBase
def setup
@params = Whisper::Params.new
end

View File

@ -1,18 +1,14 @@
require "test/unit"
require "whisper"
class TestSegment < Test::Unit::TestCase
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
require_relative "helper"
class TestSegment < TestBase
class << self
attr_reader :whisper
def startup
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
@whisper = Whisper::Context.new(TestBase::MODEL)
params = Whisper::Params.new
params.print_timestamps = false
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
@whisper.transcribe(jfk, params)
@whisper.transcribe(TestBase::AUDIO, params)
end
end
@ -60,7 +56,7 @@ class TestSegment < Test::Unit::TestCase
end
index += 1
end
whisper.transcribe(File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav'), params)
whisper.transcribe(AUDIO, params)
assert_equal 0, seg.start_time
assert_match /ask not what your country can do for you, ask what you can do for your country/, seg.text
end
@ -76,7 +72,7 @@ class TestSegment < Test::Unit::TestCase
assert_same seg, segment
return
end
whisper.transcribe(File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav'), params)
whisper.transcribe(AUDIO, params)
end
private

View File

@ -1,20 +1,20 @@
require 'whisper'
require 'test/unit'
require_relative "helper"
require "stringio"
class TestWhisper < Test::Unit::TestCase
TOPDIR = File.expand_path(File.join(File.dirname(__FILE__), '..'))
# Exists to detect memory-related bug
Whisper.log_set ->(level, buffer, user_data) {}, nil
class TestWhisper < TestBase
def setup
@params = Whisper::Params.new
end
def test_whisper
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
@whisper = Whisper::Context.new(MODEL)
params = Whisper::Params.new
params.print_timestamps = false
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
@whisper.transcribe(jfk, params) {|text|
@whisper.transcribe(AUDIO, params) {|text|
assert_match /ask not what your country can do for you, ask what you can do for your country/, text
}
end
@ -24,11 +24,10 @@ class TestWhisper < Test::Unit::TestCase
attr_reader :whisper
def startup
@whisper = Whisper::Context.new(File.join(TOPDIR, '..', '..', 'models', 'ggml-base.en.bin'))
@whisper = Whisper::Context.new(TestBase::MODEL)
params = Whisper::Params.new
params.print_timestamps = false
jfk = File.join(TOPDIR, '..', '..', 'samples', 'jfk.wav')
@whisper.transcribe(jfk, params)
@whisper.transcribe(TestBase::AUDIO, params)
end
end
@ -96,4 +95,33 @@ class TestWhisper < Test::Unit::TestCase
Whisper.lang_str_full(Whisper.lang_max_id + 1)
end
end
def test_log_set
user_data = Object.new
logs = []
log_callback = ->(level, buffer, udata) {
logs << [level, buffer, udata]
}
Whisper.log_set log_callback, user_data
Whisper::Context.new(MODEL)
assert logs.length > 30
logs.each do |log|
assert_equal Whisper::LOG_LEVEL_INFO, log[0]
assert_same user_data, log[2]
end
end
def test_log_suppress
stderr = $stderr
Whisper.log_set ->(level, buffer, user_data) {
# do nothing
}, nil
dev = StringIO.new("")
$stderr = dev
Whisper::Context.new(MODEL)
assert_empty dev.string
ensure
$stderr = stderr
end
end