diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 63684044..7a09d30a 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -251,7 +251,7 @@ static std::vector get_words(const std::string &txt) { // command-list mode // guide the transcription to match the most likely command from a provided list -static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) { +static int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::ofstream &fout) { fprintf(stderr, "\n"); fprintf(stderr, "%s: guided mode\n", __func__); @@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio const float prob = probs_id[0].first; const int index = probs_id[0].second; + const char * best_command = allowed_commands[index].c_str(); fprintf(stdout, "\n"); fprintf(stdout, "%s: detected command: %s%s%s | p = %f | t = %d ms\n", __func__, - "\033[1m", allowed_commands[index].c_str(), "\033[0m", prob, + "\033[1m", best_command, "\033[0m", prob, (int) std::chrono::duration_cast(t_end - t_start).count()); fprintf(stdout, "\n"); + if (fout.is_open()) { + fout << best_command << std::endl; + } } } @@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio // always-prompt mode // transcribe the voice into text after valid prompt -static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) { +static int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) { bool is_running = true; bool ask_prompt = true; @@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async if ((sim > 0.7f) && (command.size() > 0)) { fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms); + if (fout.is_open()) { + fout << command << std::endl; + } } fprintf(stdout, "\n"); @@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async // general-purpose mode // freely transcribe the voice into text -static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) { +static int process_general_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout) { bool is_running = true; bool have_prompt = false; bool ask_prompt = true; @@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy } else { // cut the prompt from the decoded text const std::string command = ::trim(txt.substr(best_len)); - fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms); + if (fout.is_open()) { + fout << command << std::endl; + } } fprintf(stdout, "\n"); @@ -759,13 +768,22 @@ int main(int argc, char ** argv) { } } + std::ofstream fout; + if (params.fname_out.length() > 0) { + fout.open(params.fname_out); + if (!fout.is_open()) { + fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str()); + return 1; + } + } + if (ret_val == 0) { if (!params.commands.empty()) { - ret_val = process_command_list(ctx, audio, params); + ret_val = process_command_list(ctx, audio, params, fout); } else if (!params.prompt.empty() && params.grammar_parsed.rules.empty()) { - ret_val = always_prompt_transcription(ctx, audio, params); + ret_val = always_prompt_transcription(ctx, audio, params, fout); } else { - ret_val = process_general_transcription(ctx, audio, params); + ret_val = process_general_transcription(ctx, audio, params, fout); } }