command : clean-up / refactoring / formatting (#383)

This commit is contained in:
Georgi Gerganov 2023-01-07 21:43:24 +02:00
parent 9c4a1522f6
commit d1ea1220ff
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -11,7 +11,6 @@
#include <SDL.h>
#include <SDL_audio.h>
#include <iostream>
#include <sstream>
#include <cassert>
#include <cstdio>
@ -515,6 +514,35 @@ std::vector<std::string> read_allowed_commands(const std::string & fname) {
return allowed_commands;
}
std::vector<std::string> get_words(const std::string &txt) {
std::vector<std::string> words;
std::istringstream iss(txt);
std::string word;
while (iss >> word) {
words.push_back(word);
}
return words;
}
// returns true if no exit event was received
bool process_sdl_events() {
SDL_Event event;
while (SDL_PollEvent(&event)) {
switch (event.type) {
case SDL_QUIT:
{
return false;
} break;
default:
break;
}
}
return true;
}
// command-list mode
// guide the transcription to match the most likely command from a provided list
int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@ -606,23 +634,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
// main loop
while (is_running) {
// handle Ctrl + C
{
SDL_Event event;
while (SDL_PollEvent(&event)) {
switch (event.type) {
case SDL_QUIT:
{
is_running = false;
} break;
default:
break;
}
}
if (!is_running) {
return 0;
}
}
is_running = process_sdl_events();
// delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -718,6 +730,84 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
return 0;
}
// always-prompt mode
// transcribe the voice into text after valid prompt
int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
bool is_running = true;
bool ask_prompt = true;
float prob = 0.0f;
std::vector<float> pcmf32_cur;
const std::string k_prompt = params.prompt;
const int k_prompt_length = get_words(k_prompt).size();
fprintf(stderr, "\n");
fprintf(stderr, "%s: always-prompt mode\n", __func__);
// main loop
while (is_running) {
// handle Ctrl + C
is_running = process_sdl_events();
// delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (ask_prompt) {
fprintf(stdout, "\n");
fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
fprintf(stdout, "\n");
ask_prompt = false;
}
{
audio.get(2000, pcmf32_cur);
if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
int64_t t_ms = 0;
// detect the commands
audio.get(params.command_ms, pcmf32_cur);
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
const auto words = get_words(txt);
std::string prompt;
std::string command;
for (int i = 0; i < words.size(); ++i) {
if (i < k_prompt_length) {
prompt += words[i] + " ";
} else {
command += words[i] + " ";
}
}
const float sim = similarity(prompt, k_prompt);
//debug
//fprintf(stdout, "command size: %i\n", command_length);
if ((sim > 0.7f) && (command.size() > 0)) {
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
}
fprintf(stdout, "\n");
audio.clear();
}
}
}
return 0;
}
// general-purpose mode
// freely transcribe the voice into text
int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@ -739,23 +829,7 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
// main loop
while (is_running) {
// handle Ctrl + C
{
SDL_Event event;
while (SDL_PollEvent(&event)) {
switch (event.type) {
case SDL_QUIT:
{
is_running = false;
} break;
default:
break;
}
}
if (!is_running) {
return 0;
}
}
is_running = process_sdl_events();
// delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -842,115 +916,6 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
return 0;
}
// always prompt mode
// transcribe the voice into text after valid prompt
int always_prompt_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
bool is_running = true;
bool ask_prompt = true;
float prob = 0.0f;
std::vector<float> pcmf32_cur;
const std::string k_prompt = params.prompt;
std::vector<std::string> words;
std::istringstream iss(k_prompt);
std::string word;
while (iss >> word) {
words.push_back(word);
}
int k_prompt_length = words.size();
// main loop
while (is_running) {
// handle Ctrl + C
{
SDL_Event event;
while (SDL_PollEvent(&event)) {
switch (event.type) {
case SDL_QUIT:
{
is_running = false;
} break;
default:
break;
}
}
if (!is_running) {
return 0;
}
}
// delay
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (ask_prompt) {
fprintf(stdout, "\n");
fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
fprintf(stdout, "\n");
ask_prompt = false;
}
{
audio.get(2000, pcmf32_cur);
if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
int64_t t_ms = 0;
// detect the commands
audio.get(params.command_ms, pcmf32_cur);
const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
std::istringstream iss(txt);
std::string word;
std::string prompt;
std::string command;
int i = 0;
int command_length = 0;
while (iss >> word) {
if (i == k_prompt_length - 1) {
prompt += word + ' ';
break;
}
prompt += word + ' ';
i++;
}
while (iss >> word) {
command += word + ' ';
command_length++;
}
const float sim = similarity(prompt, k_prompt);
//debug
//fprintf(stdout, "command size: %i\n", command_length);
if ((sim > 0.7f) && (command_length >0)){
fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
}
fprintf(stdout, "\n");
audio.clear();
}
}
}
return 0;
}
int main(int argc, char ** argv) {
whisper_params params;