command : clean-up / refactoring / formatting (#383)

2023-01-07 21:43:24 +02:00 · 2023-01-07 21:43:24 +02:00 · d1ea1220ff
commit d1ea1220ff
parent 9c4a1522f6
1 changed files with 363 additions and 398 deletions
--- a/examples/command/command.cpp
+++ b/examples/command/command.cpp
@ -11,7 +11,6 @@
 #include <SDL.h>
 #include <SDL_audio.h>

-#include <iostream>
 #include <sstream>
 #include <cassert>
 #include <cstdio>
@ -515,6 +514,35 @@ std::vector<std::string> read_allowed_commands(const std::string & fname) {
    return allowed_commands;
 }

+std::vector<std::string> get_words(const std::string &txt) {
+    std::vector<std::string> words;
+
+    std::istringstream iss(txt);
+    std::string word;
+    while (iss >> word) {
+        words.push_back(word);
+    }
+
+    return words;
+}
+
+// returns true if no exit event was received
+bool process_sdl_events() {
+    SDL_Event event;
+    while (SDL_PollEvent(&event)) {
+        switch (event.type) {
+            case SDL_QUIT:
+                {
+                    return false;
+                } break;
+            default:
+                break;
+        }
+    }
+
+    return true;
+}
+
 // command-list mode
 // guide the transcription to match the most likely command from a provided list
 int process_command_list(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@ -606,23 +634,7 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
    // main loop
    while (is_running) {
        // handle Ctrl + C
-      {
-         SDL_Event event;
-         while (SDL_PollEvent(&event)) {
-            switch (event.type) {
-               case SDL_QUIT:
-               {
-                  is_running = false;
-               } break;
-               default:
-                  break;
-            }
-         }
-
-         if (!is_running) {
-            return 0;
-         }
-      }
+        is_running = process_sdl_events();

        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -718,6 +730,84 @@ int process_command_list(struct whisper_context * ctx, audio_async &audio, const
    return 0;
 }

+// always-prompt mode
+// transcribe the voice into text after valid prompt
+int always_prompt_transcription(struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
+    bool is_running = true;
+    bool ask_prompt = true;
+
+    float prob = 0.0f;
+
+    std::vector<float> pcmf32_cur;
+
+    const std::string k_prompt = params.prompt;
+
+    const int k_prompt_length = get_words(k_prompt).size();
+
+    fprintf(stderr, "\n");
+    fprintf(stderr, "%s: always-prompt mode\n", __func__);
+
+    // main loop
+    while (is_running) {
+        // handle Ctrl + C
+        is_running = process_sdl_events();
+
+        // delay
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+        if (ask_prompt) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
+            fprintf(stdout, "\n");
+
+            ask_prompt = false;
+        }
+
+        {
+            audio.get(2000, pcmf32_cur);
+
+            if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
+                fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
+
+                int64_t t_ms = 0;
+
+                // detect the commands
+                audio.get(params.command_ms, pcmf32_cur);
+
+                const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
+
+                const auto words = get_words(txt);
+
+                std::string prompt;
+                std::string command;
+
+                for (int i = 0; i < words.size(); ++i) {
+                    if (i < k_prompt_length) {
+                        prompt += words[i] + " ";
+                    } else {
+                        command += words[i] + " ";
+                    }
+                }
+
+                const float sim = similarity(prompt, k_prompt);
+
+                //debug
+                //fprintf(stdout, "command size: %i\n", command_length);
+
+                if ((sim > 0.7f) && (command.size() > 0)) {
+                    fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
+                }
+
+                fprintf(stdout, "\n");
+
+                audio.clear();
+            }
+        }
+    }
+
+    return 0;
+}
+
 // general-purpose mode
 // freely transcribe the voice into text
 int process_general_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
@ -739,23 +829,7 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
    // main loop
    while (is_running) {
        // handle Ctrl + C
-      {
-         SDL_Event event;
-         while (SDL_PollEvent(&event)) {
-            switch (event.type) {
-               case SDL_QUIT:
-               {
-                  is_running = false;
-               } break;
-               default:
-                  break;
-            }
-         }
-
-         if (!is_running) {
-            return 0;
-         }
-      }
+        is_running = process_sdl_events();

        // delay
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -842,115 +916,6 @@ int process_general_transcription(struct whisper_context * ctx, audio_async &aud
    return 0;
 }

-
-// always prompt mode
-// transcribe the voice into text after valid prompt
-int always_prompt_transcription(struct whisper_context * ctx, audio_async &audio, const whisper_params &params) {
-   bool is_running  = true;
-   bool ask_prompt  = true;
-
-   float prob  = 0.0f;
-
-   std::vector<float> pcmf32_cur;
-
-   const std::string k_prompt = params.prompt;
-
-   std::vector<std::string> words;
-
-   std::istringstream iss(k_prompt);
-   std::string word;
-
-   while (iss >> word) {
-       words.push_back(word);
-   }
-
-   int k_prompt_length = words.size();
-
-   // main loop
-   while (is_running) {
-      // handle Ctrl + C
-      {
-         SDL_Event event;
-         while (SDL_PollEvent(&event)) {
-            switch (event.type) {
-               case SDL_QUIT:
-               {
-                  is_running = false;
-               } break;
-               default:
-                  break;
-            }
-         }
-
-         if (!is_running) {
-            return 0;
-         }
-      }
-
-      // delay
-      std::this_thread::sleep_for(std::chrono::milliseconds(100));
-
-      if (ask_prompt) {
-         fprintf(stdout, "\n");
-         fprintf(stdout, "%s: The prompt is: '%s%s%s'\n", __func__, "\033[1m", k_prompt.c_str(), "\033[0m");
-         fprintf(stdout, "\n");
-
-         ask_prompt = false;
-      }
-
-      {
-         audio.get(2000, pcmf32_cur);
-
-         if (vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, params.vad_thold, params.freq_thold, params.print_energy)) {
-            fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
-
-            int64_t t_ms = 0;
-
-            // detect the commands
-            audio.get(params.command_ms, pcmf32_cur);
-
-            const auto txt = ::trim(::transcribe(ctx, params, pcmf32_cur, prob, t_ms));
-
-            std::istringstream iss(txt);
-            std::string word;
-            std::string prompt;
-            std::string command;
-            int i = 0;
-            int command_length = 0;
-            while (iss >> word) {
-                if (i == k_prompt_length - 1) {
-                    prompt += word + ' ';
-                    break;
-                }
-                prompt += word + ' ';
-                i++;
-            }
-            while (iss >> word) {
-             command += word + ' ';
-             command_length++;
-            }
-
-            const float sim = similarity(prompt, k_prompt);
-
-            //debug
-            //fprintf(stdout, "command size: %i\n", command_length); 
-
-
-            if ((sim > 0.7f) && (command_length >0)){
-                fprintf(stdout, "%s: Command '%s%s%s', (t = %d ms)\n", __func__, "\033[1m", command.c_str(), "\033[0m", (int) t_ms);
-            }
-
-            fprintf(stdout, "\n");
-
-
-            audio.clear();
-         }
-      }
-   }
-
-   return 0;
-}
-
 int main(int argc, char ** argv) {
    whisper_params params;