From 6559b538e5e05cfa199c15d46ca5bd0edd353974 Mon Sep 17 00:00:00 2001 From: Oleg Sidorov Date: Tue, 28 Nov 2023 14:42:58 +0100 Subject: [PATCH] server : backport .srt output format (#1565) This commit adds a support of .srt format to Whisper server. The code is effectively backported from examples/main. The output mimetype is set to application/x-subrip as per https://en.wikipedia.org/wiki/SubRip. Example usage: curl 127.0.0.1:8080/inference \ -H "Content-Type: multipart/form-data" \ -F file="@" \ -F temperature="0.2" \ -F response-format="srt" --- examples/server/server.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e9d3f88b..2d15d4cc 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -657,6 +658,27 @@ int main(int argc, char ** argv) { std::string results = output_str(ctx, params, pcmf32s); res.set_content(results.c_str(), "text/html"); } + else if (params.response_format == srt_format) + { + std::stringstream ss; + const int n_segments = whisper_full_n_segments(ctx); + for (int i = 0; i < n_segments; ++i) { + const char * text = whisper_full_get_segment_text(ctx, i); + const int64_t t0 = whisper_full_get_segment_t0(ctx, i); + const int64_t t1 = whisper_full_get_segment_t1(ctx, i); + std::string speaker = ""; + + if (params.diarize && pcmf32s.size() == 2) + { + speaker = estimate_diarization_speaker(pcmf32s, t0, t1); + } + + ss << i + 1 + params.offset_n << "\n"; + ss << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n"; + ss << speaker << text << "\n\n"; + } + res.set_content(ss.str(), "application/x-subrip"); + } // TODO add more output formats else {