command : clean-up / refactoring / formatting (#383)

2023-01-07 21:43:24 +02:00
parent 9c4a1522f6
commit d1ea1220ff
1 changed files with 363 additions and 398 deletions
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@@ -11,7 +11,6 @@
 #include <SDL.h>
 #include <SDL_audio.h>
 #include <iostream>
 #include <sstream>
 #include <cassert>
 #include <cstdio>
@@ -515,6 +514,35 @@ std::vector<std::string> read_allowed_commands(const std::string & fname) {
    return allowed_commands;
 }
 std::vector<std::string> get_words(const std::string &txt) {
    std::vector<std::string> words;
    std::istringstream iss(txt);
    std::string word;
    while (iss >> word) {
        words.push_back(word);
    }
    return words;
 }
 // returns true if no exit event was received
 bool process_sdl_events() {
    SDL_Event event;
    while (SDL_PollEvent(&event)) {
        switch (event.type) {
            case SDL_QUIT:
                {
                    return false;
                } break;
            default:
                break;
        }
    }
    return true;
 }
 // command-list mode
 // guide the transcription to match the most likely command from a provided list
 int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@@ -606,23 +634,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
    // main loop
    while (is_running) {
        // handle Ctrl + C
-      {
+        is_running = process_sdl_events();
         SDL_Event event;
         while (SDL_PollEvent(&event)) {
            switch (event.type) {
               case SDL_QUIT:
               {
                  is_running = false;
               } break;
               default:
                  break;
            }
         }
         if (!is_running) {
            return 0;
         }
      }
        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -718,6 +730,84 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
    return 0;
 }
 // always-prompt mode
 // transcribe the voice into text after valid prompt
 int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
    bool is_running = true;
    bool ask_prompt = true;
    float prob = 0.0f;
    std::vector<float> pcmf32_cur;
    const std::string k_prompt = params.prompt;
    const int k_prompt_length = get_words(k_prompt).size();
    fprintf(stderr, "\n");
    fprintf(stderr, "%s: always-prompt mode\n", __func__);
    // main loop
    while (is_running) {
        // handle Ctrl + C
        is_running = process_sdl_events();
        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
        if (ask_prompt) {
            fprintf(stdout, "\n");
            fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
            fprintf(stdout, "\n");
            ask_prompt = false;
        }
        {
            audio.get(2000, pcmf32_cur);
            if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
                int64_t t_ms = 0;
                // detect the commands
                audio.get(params.command_ms, pcmf32_cur);
                const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
                const auto words = get_words(txt);
                std::string prompt;
                std::string command;
                for (int i = 0; i < words.size(); ++i) {
                    if (i < k_prompt_length) {
                        prompt += words[i] + " ";
                    } else {
                        command += words[i] + " ";
                    }
                }
                const float sim = similarity(prompt, k_prompt);
                //debug
                //fprintf(stdout, "command size: %i\n", command_length);
                if ((sim > 0.7f) && (command.size() > 0)) {
                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
                }
                fprintf(stdout, "\n");
                audio.clear();
            }
        }
    }
    return 0;
 }
 // general-purpose mode
 // freely transcribe the voice into text
 int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@@ -739,23 +829,7 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
    // main loop
    while (is_running) {
        // handle Ctrl + C
-      {
+        is_running = process_sdl_events();
         SDL_Event event;
         while (SDL_PollEvent(&event)) {
            switch (event.type) {
               case SDL_QUIT:
               {
                  is_running = false;
               } break;
               default:
                  break;
            }
         }
         if (!is_running) {
            return 0;
         }
      }
        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -842,115 +916,6 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
    return 0;
 }
 // always prompt mode
 // transcribe the voice into text after valid prompt
 int always_prompt_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
   bool is_running  = true;
   bool ask_prompt  = true;
   float prob  = 0.0f;
   std::vector<float> pcmf32_cur;
   const std::string k_prompt = params.prompt;
   std::vector<std::string> words;
   std::istringstream iss(k_prompt);
   std::string word;
   while (iss >> word) {
       words.push_back(word);
   }
   int k_prompt_length = words.size();
   // main loop
   while (is_running) {
      // handle Ctrl + C
      {
         SDL_Event event;
         while (SDL_PollEvent(&event)) {
            switch (event.type) {
               case SDL_QUIT:
               {
                  is_running = false;
               } break;
               default:
                  break;
            }
         }
         if (!is_running) {
            return 0;
         }
      }
      // delay
      std::this_thread::sleep_for(std::chrono::milliseconds(100));
      if (ask_prompt) {
         fprintf(stdout, "\n");
         fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
         fprintf(stdout, "\n");
         ask_prompt = false;
      }
      {
         audio.get(2000, pcmf32_cur);
         if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
            fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
            int64_t t_ms = 0;
            // detect the commands
            audio.get(params.command_ms, pcmf32_cur);
            const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
            std::istringstream iss(txt);
            std::string word;
            std::string prompt;
            std::string command;
            int i = 0;
            int command_length = 0;
            while (iss >> word) {
                if (i == k_prompt_length - 1) {
                    prompt += word + ' ';
                    break;
                }
                prompt += word + ' ';
                i++;
            }
            while (iss >> word) {
             command += word + ' ';
             command_length++;
            }
            const float sim = similarity(prompt, k_prompt);
            //debug
            //fprintf(stdout, "command size: %i\n", command_length); 
            if ((sim > 0.7f) && (command_length >0)){
                fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
            }
            fprintf(stdout, "\n");
            audio.clear();
         }
      }
   }
   return 0;
 }
 int main(int argc, char ** argv) {
    whisper_params params;