From 5f7e9fa2dc2a4c3bec5590316f5b78fed9af7398 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 23 Oct 2022 13:29:36 +0300 Subject: [PATCH] ref #68, #79 : fix segment time output --- main.cpp | 11 ++++++++--- whisper.cpp | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/main.cpp b/main.cpp index cbe9aa9e..fd1e89e2 100644 --- a/main.cpp +++ b/main.cpp @@ -246,7 +246,7 @@ bool output_vtt(struct whisper_context * ctx, const char * fname) { return true; } -bool output_srt(struct whisper_context * ctx, const char * fname) { +bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params) { std::ofstream fout(fname); if (!fout.is_open()) { fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname); @@ -258,7 +258,12 @@ bool output_srt(struct whisper_context * ctx, const char * fname) { const int n_segments = whisper_full_n_segments(ctx); for (int i = 0; i < n_segments; ++i) { const char * text = whisper_full_get_segment_text(ctx, i); - fout << text; + const int64_t t0 = whisper_full_get_segment_t0(ctx, i); + const int64_t t1 = whisper_full_get_segment_t1(ctx, i); + + fout << i + 1 + params.offset_n << "\n"; + fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n"; + fout << text << "\n\n"; } return true; @@ -394,7 +399,7 @@ int main(int argc, char ** argv) { // output to SRT file if (params.output_srt) { const auto fname_srt = fname_inp + ".srt"; - output_srt(ctx, fname_srt.c_str()); + output_srt(ctx, fname_srt.c_str(), params); } } } diff --git a/whisper.cpp b/whisper.cpp index 01f6b00b..0634bc25 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2526,7 +2526,7 @@ int whisper_full( // store the text from this iteration if (tokens_cur.size() > 0) { int i0 = 0; - auto t0 = 2*(tokens_cur.front().tid - whisper_token_beg(ctx)); + auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx)); std::string text = ""; @@ -2540,7 +2540,7 @@ int whisper_full( text += whisper_token_to_str(ctx, tokens_cur[i].id); } if (tokens_cur[i].id > whisper_token_beg(ctx)) { - const auto t1 = 2*(tokens_cur[i].tid - whisper_token_beg(ctx)); + const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx)); if (!text.empty()) { if (params.print_realtime) { if (params.print_timestamps) {