whisper : remove extra backend instance (huh?)

2025-07-06 17:31:44 +02:00 · 2024-05-14 17:26:22 +03:00
parent 2877b026cf
commit 4caa64b73e
1 changed files with 4 additions and 15 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@ -819,8 +819,6 @@ struct whisper_state {
    whisper_decoder decoders[WHISPER_MAX_DECODERS];
    ggml_backend_t backend = nullptr;
    // ggml-alloc:
    // - stores meta info about the intermediate tensors into the `meta` buffers
    // - stores the actual tensor data into the `data` buffers
@ -2240,7 +2238,7 @@ static bool whisper_encode_internal(
        }
        if (!whisper_encode_external(wstate)) {
-            if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+            if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
                return false;
            }
        } else {
@ -2263,7 +2261,7 @@ static bool whisper_encode_internal(
            return false;
        }
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
            return false;
        }
    }
@ -2279,7 +2277,7 @@ static bool whisper_encode_internal(
            return false;
        }
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
            return false;
        }
    }
@ -2744,7 +2742,7 @@ static bool whisper_decode_internal(
        logits = gf->nodes[gf->n_nodes - 1];
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
            return false;
        }
    }
@ -3191,13 +3189,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
    whisper_state * state = new whisper_state;
    state->backend = whisper_backend_init(ctx->params);
    if (!state->backend) {
        WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);
        whisper_free_state(state);
        return nullptr;
    }
    // at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
    // in theory, there can be a case where this is not enough, but in practice it should always be enough
    const int factor = 3;
@ -3623,8 +3614,6 @@ void whisper_free_state(struct whisper_state * state) {
        ggml_gallocr_free(state->alloc_cross.alloc);
        ggml_gallocr_free(state->alloc_decode.alloc);
        ggml_backend_free(state->backend);
        // [EXPERIMENTAL] Token-level timestamps with DTW
        aheads_masks_free(state->aheads_masks);