From bae5d074c7b050855f904059b10f69e44e712574 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 16 May 2025 07:53:26 +0200 Subject: [PATCH] vad : store VAD context in whisper_state (#3156) * vad : store VAD context in whisper_state This commit stores the VAD context in the whisper_state structure, allowing for better management and reuse of the VAD context across multiple calls to the whisper_vad function. The motivation for this change is that when updating the stream example I noticed that the VAD context was being re-initialized every time the whisper_vad function was called. This involved loading the VAD model which is expensive and unnecessary if the context can be reused. Storing this in the whisper_state seems follow the pattern simliar to how whisper_coreml_context and whisper_openvion_context are stored. * vad : free vad_context in whisper_free_state --- src/whisper.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/whisper.cpp b/src/whisper.cpp index ad4e7a12..a7e6ef2f 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -954,6 +954,8 @@ struct whisper_state { // [EXPERIMENTAL] speed-up techniques int32_t exp_n_audio_ctx = 0; // 0 - use default + whisper_vad_context * vad_context = nullptr; + struct vad_segment_info { float orig_start; float orig_end; @@ -3853,6 +3855,11 @@ void whisper_free_state(struct whisper_state * state) { // [EXPERIMENTAL] Token-level timestamps with DTW aheads_masks_free(state->aheads_masks); + if (state->vad_context != nullptr) { + whisper_vad_free(state->vad_context); + state->vad_context = nullptr; + } + delete state; } } @@ -6613,12 +6620,16 @@ static bool whisper_vad( WHISPER_LOG_INFO("%s: VAD is enabled, processing speach segments only\n", __func__); filtered_n_samples = 0; - struct whisper_vad_context_params vad_ctx_params = whisper_vad_default_context_params(); - struct whisper_vad_context * vctx = whisper_vad_init_from_file_with_params(params.vad_model_path, vad_ctx_params); - if (vctx == nullptr) { - WHISPER_LOG_ERROR("%s: failed to initialize VAD context\n", __func__); - return false; + if (state->vad_context == nullptr) { + struct whisper_vad_context_params vad_ctx_params = whisper_vad_default_context_params(); + struct whisper_vad_context * vctx = whisper_vad_init_from_file_with_params(params.vad_model_path, vad_ctx_params); + if (vctx == nullptr) { + WHISPER_LOG_ERROR("%s: failed to initialize VAD context\n", __func__); + return false; + } + state->vad_context = vctx; } + auto vctx = state->vad_context; const whisper_vad_params & vad_params = params.vad_params;