mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-01 03:28:57 +01:00
whisper : allow whisper_full from mel spectrogram - no audio (#1214)
Co-authored-by: jbrough <jamie1612@gmail.com>
This commit is contained in:
parent
7e54df414e
commit
b5bb5c85d4
26
whisper.cpp
26
whisper.cpp
@ -3140,7 +3140,6 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
|
if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
|
||||||
log("%s: failed to eval\n", __func__);
|
log("%s: failed to eval\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
@ -3374,7 +3373,6 @@ float * whisper_get_logits(struct whisper_context * ctx) {
|
|||||||
return ctx->state->logits.data();
|
return ctx->state->logits.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float * whisper_get_logits_from_state(struct whisper_state * state) {
|
float * whisper_get_logits_from_state(struct whisper_state * state) {
|
||||||
return state->logits.data();
|
return state->logits.data();
|
||||||
}
|
}
|
||||||
@ -4087,15 +4085,17 @@ int whisper_full_with_state(
|
|||||||
|
|
||||||
result_all.clear();
|
result_all.clear();
|
||||||
|
|
||||||
// compute log mel spectrogram
|
if (n_samples > 0) {
|
||||||
if (params.speed_up) {
|
// compute log mel spectrogram
|
||||||
// TODO: Replace PV with more advanced algorithm
|
if (params.speed_up) {
|
||||||
log("%s: failed to compute log mel spectrogram\n", __func__);
|
// TODO: Replace PV with more advanced algorithm
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
|
|
||||||
log("%s: failed to compute log mel spectrogram\n", __func__);
|
log("%s: failed to compute log mel spectrogram\n", __func__);
|
||||||
return -2;
|
return -1;
|
||||||
|
} else {
|
||||||
|
if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
|
||||||
|
log("%s: failed to compute log mel spectrogram\n", __func__);
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4121,7 +4121,9 @@ int whisper_full_with_state(
|
|||||||
state->t_beg = 0;
|
state->t_beg = 0;
|
||||||
state->t_last = 0;
|
state->t_last = 0;
|
||||||
state->tid_last = 0;
|
state->tid_last = 0;
|
||||||
state->energy = get_signal_energy(samples, n_samples, 32);
|
if (n_samples > 0) {
|
||||||
|
state->energy = get_signal_energy(samples, n_samples, 32);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int seek_start = params.offset_ms/10;
|
const int seek_start = params.offset_ms/10;
|
||||||
@ -4813,7 +4815,6 @@ int whisper_full_with_state(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int whisper_full(
|
int whisper_full(
|
||||||
struct whisper_context * ctx,
|
struct whisper_context * ctx,
|
||||||
struct whisper_full_params params,
|
struct whisper_full_params params,
|
||||||
@ -4890,7 +4891,6 @@ int whisper_full_parallel(
|
|||||||
result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
|
result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
|
||||||
result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
|
result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
|
||||||
|
|
||||||
|
|
||||||
// make sure that segments are not overlapping
|
// make sure that segments are not overlapping
|
||||||
if (!ctx->state->result_all.empty()) {
|
if (!ctx->state->result_all.empty()) {
|
||||||
result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);
|
result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);
|
||||||
|
Loading…
Reference in New Issue
Block a user