node : add additional params (#2000)

* Add additional params to addon.node

* Add comma_in_time as parameter

* Fix tests
This commit is contained in:
valVk 2024-05-13 15:15:43 +03:00 committed by GitHub
parent 17fa62d3d3
commit 30f73109b8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 26 additions and 6 deletions

View File

@ -12,6 +12,9 @@ const whisperParamsMock = {
model: path.join(__dirname, "../../../models/ggml-base.en.bin"), model: path.join(__dirname, "../../../models/ggml-base.en.bin"),
fname_inp: path.join(__dirname, "../../../samples/jfk.wav"), fname_inp: path.join(__dirname, "../../../samples/jfk.wav"),
use_gpu: true, use_gpu: true,
no_prints: true,
comma_in_time: false,
translate: true,
no_timestamps: false, no_timestamps: false,
}; };

View File

@ -36,7 +36,9 @@ struct whisper_params {
bool print_colors = false; bool print_colors = false;
bool print_progress = false; bool print_progress = false;
bool no_timestamps = false; bool no_timestamps = false;
bool no_prints = false;
bool use_gpu = true; bool use_gpu = true;
bool comma_in_time = true;
std::string language = "en"; std::string language = "en";
std::string prompt; std::string prompt;
@ -120,7 +122,14 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
} }
} }
void cb_log_disable(enum ggml_log_level, const char *, void *) {}
int run(whisper_params &params, std::vector<std::vector<std::string>> &result) { int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
if (params.no_prints) {
whisper_log_set(cb_log_disable, NULL);
}
if (params.fname_inp.empty()) { if (params.fname_inp.empty()) {
fprintf(stderr, "error: no input files specified\n"); fprintf(stderr, "error: no input files specified\n");
return 2; return 2;
@ -155,14 +164,14 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
} }
// print system information // print system information
{ if (!params.no_prints) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info()); params.n_threads*params.n_processors, std::thread::hardware_concurrency(), whisper_print_system_info());
} }
// print some info about the processing // print some info about the processing
{ if (!params.no_prints) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
if (!whisper_is_multilingual(ctx)) { if (!whisper_is_multilingual(ctx)) {
if (params.language != "en" || params.translate) { if (params.language != "en" || params.translate) {
@ -248,8 +257,8 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
const int64_t t0 = whisper_full_get_segment_t0(ctx, i); const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
const int64_t t1 = whisper_full_get_segment_t1(ctx, i); const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
result[i].emplace_back(to_timestamp(t0, true)); result[i].emplace_back(to_timestamp(t0, params.comma_in_time));
result[i].emplace_back(to_timestamp(t1, true)); result[i].emplace_back(to_timestamp(t1, params.comma_in_time));
result[i].emplace_back(text); result[i].emplace_back(text);
} }
@ -300,13 +309,17 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
std::string model = whisper_params.Get("model").As<Napi::String>(); std::string model = whisper_params.Get("model").As<Napi::String>();
std::string input = whisper_params.Get("fname_inp").As<Napi::String>(); std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>(); bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();
bool no_prints = whisper_params.Get("no_prints").As<Napi::Boolean>();
bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>(); bool no_timestamps = whisper_params.Get("no_timestamps").As<Napi::Boolean>();
bool comma_in_time = whisper_params.Get("comma_in_time").As<Napi::Boolean>();
params.language = language; params.language = language;
params.model = model; params.model = model;
params.fname_inp.emplace_back(input); params.fname_inp.emplace_back(input);
params.use_gpu = use_gpu; params.use_gpu = use_gpu;
params.no_prints = no_prints;
params.no_timestamps = no_timestamps; params.no_timestamps = no_timestamps;
params.comma_in_time = comma_in_time;
Napi::Function callback = info[1].As<Napi::Function>(); Napi::Function callback = info[1].As<Napi::Function>();
Worker* worker = new Worker(callback, params); Worker* worker = new Worker(callback, params);

View File

@ -10,8 +10,11 @@ const whisperAsync = promisify(whisper);
const whisperParams = { const whisperParams = {
language: "en", language: "en",
model: path.join(__dirname, "../../models/ggml-base.en.bin"), model: path.join(__dirname, "../../models/ggml-base.en.bin"),
fname_inp: "../../samples/jfk.wav", fname_inp: path.join(__dirname, "../../samples/jfk.wav"),
use_gpu: true, use_gpu: true,
no_prints: true,
comma_in_time: false,
translate: true,
no_timestamps: false, no_timestamps: false,
}; };
@ -34,5 +37,6 @@ for (const key in params) {
console.log("whisperParams =", whisperParams); console.log("whisperParams =", whisperParams);
whisperAsync(whisperParams).then((result) => { whisperAsync(whisperParams).then((result) => {
console.log(`Result from whisper: ${result}`); console.log();
console.log(result);
}); });