Try to improve the sampling strategy a bit

It sill fails sometimes when it does not sample a timestamp token for
the entire segment. We now print a message in such cases
This commit is contained in:
Georgi Gerganov 2022-10-17 23:52:24 +03:00
parent 20d8e7a309
commit d14823582d

View File

@ -2425,7 +2425,7 @@ int whisper_full(
whisper_token id = 0; whisper_token id = 0;
whisper_token tid = whisper_token_beg(ctx); whisper_token tid = whisper_token_beg(ctx);
id = whisper_sample_best(ctx, result_len == 0); id = whisper_sample_best(ctx, result_len == 0 || i > 32);
if (i > 0) { if (i > 0) {
tid = whisper_sample_timestamp(ctx); tid = whisper_sample_timestamp(ctx);
} }
@ -2445,7 +2445,9 @@ int whisper_full(
// end of text token // end of text token
if (id == whisper_token_eot(ctx)) { if (id == whisper_token_eot(ctx)) {
if (result_len == 0) { if (result_len == 0) {
result_len = i + 1; // TODO: figure out how to resolve this
fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
//result_len = i + 1;
} }
break; break;
} }