mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-29 01:58:47 +01:00
main : fix some edge cases for word-level timestamps
This commit is contained in:
parent
5dc74e3aff
commit
0729da9a3b
@ -424,7 +424,7 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
//tokens[j].vlen = tokens[j].pt;
|
//tokens[j].vlen = tokens[j].pt;
|
||||||
tokens[j].vlen = voice_length(tokens[j].text);
|
tokens[j].vlen = voice_length(tokens[j].text);
|
||||||
|
|
||||||
if (token.pt > params.word_thold && token.ptsum > 0.01 && token.tid > tid_last) {
|
if (token.pt > params.word_thold && token.ptsum > 0.01 && token.tid > tid_last && tt <= t1) {
|
||||||
if (j > 0) {
|
if (j > 0) {
|
||||||
tokens[j - 1].t1 = tt;
|
tokens[j - 1].t1 = tt;
|
||||||
}
|
}
|
||||||
@ -482,15 +482,26 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
tokens[j + 1].t0 = tokens[j].t1;
|
tokens[j + 1].t0 = tokens[j].t1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (j > 0) {
|
||||||
|
if (tokens[j - 1].t1 > tokens[j].t0) {
|
||||||
|
tokens[j].t0 = tokens[j - 1].t1;
|
||||||
|
tokens[j].t1 = std::max(tokens[j].t0, tokens[j].t1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tokens[j].tt0 = tokens[j].t0;
|
tokens[j].tt0 = tokens[j].t0;
|
||||||
tokens[j].tt1 = tokens[j].t1;
|
tokens[j].tt1 = tokens[j].t1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// VAD
|
// VAD
|
||||||
{
|
{
|
||||||
const int hw = WHISPER_SAMPLE_RATE; // take one second of audio around the token
|
const int hw = WHISPER_SAMPLE_RATE/8;
|
||||||
|
|
||||||
for (int j = 0; j < n; j++) {
|
for (int j = 0; j < n; j++) {
|
||||||
|
if (tokens[j].id >= whisper_token_eot(ctx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const int64_t t0 = tokens[j].t0;
|
const int64_t t0 = tokens[j].t0;
|
||||||
const int64_t t1 = tokens[j].t1;
|
const int64_t t1 = tokens[j].t1;
|
||||||
|
|
||||||
@ -503,13 +514,12 @@ bool output_wts(struct whisper_context * ctx, const char * fname, const char * f
|
|||||||
const int n = ss1 - ss0;
|
const int n = ss1 - ss0;
|
||||||
|
|
||||||
float sum = 0.0f;
|
float sum = 0.0f;
|
||||||
|
|
||||||
for (int k = ss0; k < ss1; k++) {
|
for (int k = ss0; k < ss1; k++) {
|
||||||
sum += pcm_avg[k];
|
sum += pcm_avg[k];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float avg = sum/n;
|
const float thold = 0.5*sum/n;
|
||||||
|
|
||||||
const float thold = 0.5*avg;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
int k = s0;
|
int k = s0;
|
||||||
|
Loading…
Reference in New Issue
Block a user