mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-14 18:08:38 +01:00
whisper : improve decoding strategy (#244)
- Clear past prompt when there is very short audio left for processing. My observation is that in these cases the decoding tends to repeat and hallucinate stuff and I think this is induced by the existing prompt - When we fail to sample timestamp token, retry by clearing the past prompt. If it fails again, then we advance the window by 1 second
This commit is contained in:
parent
a82d331034
commit
6a7c82501e
17
whisper.cpp
17
whisper.cpp
@ -2650,10 +2650,17 @@ int whisper_full(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// of only 1 second left, then stop
|
||||||
if (seek + 100 >= seek_end) {
|
if (seek + 100 >= seek_end) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if there is a very short audio segment left to process, we remove any past prompt since it tends
|
||||||
|
// to confuse the decoder and often make it repeat or hallucinate stuff
|
||||||
|
if (seek > seek_start && seek + 500 >= seek_end) {
|
||||||
|
prompt_past.clear();
|
||||||
|
}
|
||||||
|
|
||||||
if (params.encoder_begin_callback) {
|
if (params.encoder_begin_callback) {
|
||||||
if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
|
if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
|
||||||
fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
|
fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
|
||||||
@ -2780,8 +2787,14 @@ int whisper_full(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (failed) {
|
if (failed) {
|
||||||
fprintf(stderr, "\n%s: failed to generate timestamp token - using fallback strategy\n\n", __func__);
|
// when we fail to sample timestamp token, retry by clearing the past prompt
|
||||||
seek += 100;
|
// if it fails again, then we advance the window by 1 second
|
||||||
|
if (prompt_past.size() > 0) {
|
||||||
|
prompt_past.clear();
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
|
||||||
|
seek += 100;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user