wchess: tidy up entry files

This commit is contained in:
Fraxy V 2023-11-25 11:34:06 +02:00
parent f07ff2aa6a
commit a44b21bce0
7 changed files with 150 additions and 155 deletions

View File

@ -3,14 +3,15 @@ add_library(libwchess
WChess.h WChess.h
Chessboard.cpp Chessboard.cpp
Chessboard.h Chessboard.h
) )
target_link_libraries(libwchess target_link_libraries(libwchess
PUBLIC PUBLIC
whisper whisper
common
) )
target_include_directories(libwchess target_include_directories(libwchess
PUBLIC PUBLIC
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>" "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
) )

View File

@ -1,49 +1,48 @@
#include "WChess.h" #include "WChess.h"
#include "Chessboard.h"
#include "grammar-parser.h" #include "grammar-parser.h"
#include "common.h" #include "common.h"
#include <thread> #include <thread>
Chess::Chess(whisper_context * ctx, WChess::WChess(whisper_context * ctx,
const whisper_full_params & wparams, const whisper_full_params & wparams,
StatusSetter status_setter, callbacks cb,
ISRunning running, settings s)
AudioGetter audio,
MovesSetter m_moveSetter)
: m_ctx(ctx) : m_ctx(ctx)
, m_wparams(wparams) , m_wparams(wparams)
, m_status_setter(status_setter) , m_cb(cb)
, m_running(running) , m_settings(s)
, m_audio(audio) , m_board(new Chessboard())
, m_moveSetter( m_moveSetter)
{} {}
void Chess::set_status(const char * msg) { WChess::~WChess() = default;
if (m_status_setter) (*m_status_setter)(msg);
void WChess::set_status(const std::string& msg) const {
if (m_cb.set_status) (*m_cb.set_status)(msg);
} }
void Chess::set_moves(const std::string& moves) { void WChess::set_moves(const std::string& moves) const {
if (m_moveSetter) (*m_moveSetter)(moves); if (m_cb.set_moves) (*m_cb.set_moves)(moves);
} }
bool Chess::check_running() { bool WChess::check_running() const {
if (m_running) return (*m_running)(); if (m_cb.check_running) return (*m_cb.check_running)();
return false; return false;
} }
void Chess::get_audio(int ms, std::vector<float>& pcmf32) { void WChess::get_audio(int ms, std::vector<float>& pcmf32) const {
if (m_audio) (*m_audio)(ms, pcmf32); if (m_cb.get_audio) (*m_cb.get_audio)(ms, pcmf32);
} }
std::string Chess::stringifyBoard() { std::string WChess::stringify_board() const {
return m_board.stringifyBoard(); return m_board->stringifyBoard();
} }
void Chess::run() { void WChess::run() {
set_status("loading data ..."); set_status("loading data ...");
bool have_prompt = false; bool have_prompt = false;
bool ask_prompt = true; bool ask_prompt = true;
bool print_energy = false;
float logprob_min0 = 0.0f; float logprob_min0 = 0.0f;
float logprob_min = 0.0f; float logprob_min = 0.0f;
@ -87,13 +86,6 @@ void Chess::run() {
m_wparams.grammar_penalty = 100.0; m_wparams.grammar_penalty = 100.0;
} }
const int32_t vad_ms = 2000;
const int32_t prompt_ms = 5000;
const int32_t command_ms = 4000;
const float vad_thold = 0.1f;
const float freq_thold = -1.0f;
while (check_running()) { while (check_running()) {
// delay // delay
std::this_thread::sleep_for(std::chrono::milliseconds(100)); std::this_thread::sleep_for(std::chrono::milliseconds(100));
@ -115,14 +107,14 @@ void Chess::run() {
int64_t t_ms = 0; int64_t t_ms = 0;
{ {
get_audio(vad_ms, pcmf32_cur); get_audio(m_settings.vad_ms, pcmf32_cur);
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) { if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, m_settings.vad_thold, m_settings.freq_thold, m_settings.print_energy)) {
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__); fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
set_status("Speech detected! Processing ..."); set_status("Speech detected! Processing ...");
if (!have_prompt) { if (!have_prompt) {
get_audio(prompt_ms, pcmf32_cur); get_audio(m_settings.prompt_ms, pcmf32_cur);
m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("prompt"); m_wparams.i_start_rule = grammar_parsed.symbol_ids.at("prompt");
const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms)); const auto txt = ::trim(transcribe(pcmf32_cur, logprob_min, logprob_sum, n_tokens, t_ms));
@ -151,7 +143,7 @@ void Chess::run() {
have_prompt = true; have_prompt = true;
} }
} else { } else {
get_audio(command_ms, pcmf32_cur); get_audio(m_settings.command_ms, pcmf32_cur);
// prepend 3 second of silence // prepend 3 second of silence
pcmf32_cur.insert(pcmf32_cur.begin(), 3*WHISPER_SAMPLE_RATE, 0.0f); pcmf32_cur.insert(pcmf32_cur.begin(), 3*WHISPER_SAMPLE_RATE, 0.0f);
@ -198,18 +190,15 @@ void Chess::run() {
set_status(txt); set_status(txt);
} }
if (!command.empty()) { if (!command.empty()) {
set_moves(m_board.processTranscription(command)); set_moves(m_board->processTranscription(command));
} }
} }
} }
} }
} }
} }
std::string Chess::transcribe( std::string WChess::transcribe(
const std::vector<float> & pcmf32, const std::vector<float> & pcmf32,
float & logprob_min, float & logprob_min,
float & logprob_sum, float & logprob_sum,
@ -223,7 +212,7 @@ std::string Chess::transcribe(
t_ms = 0; t_ms = 0;
if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) { if (whisper_full(m_ctx, m_wparams, pcmf32.data(), pcmf32.size()) != 0) {
return ""; return {};
} }
std::string result; std::string result;

View File

@ -1,39 +1,59 @@
#pragma once #pragma once
#include "Chessboard.h"
#include "whisper.h" #include "whisper.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory>
class Chess { class Chessboard;
class WChess {
public: public:
using StatusSetter = void (*)(const std::string & status); using SetStatusCb = void (*)(const std::string &);
using ISRunning = bool (*)(); using CheckRunningCb = bool (*)();
using AudioGetter = void (*)(int, std::vector<float>&); using GetAudioCb = void (*)(int, std::vector<float> &);
using MovesSetter = void (*)(const std::string & moves); using SetMovesCb = void (*)(const std::string &);
Chess( whisper_context * ctx,
struct callbacks {
SetStatusCb set_status = nullptr;
CheckRunningCb check_running = nullptr;
GetAudioCb get_audio = nullptr;
SetMovesCb set_moves = nullptr;
};
struct settings {
int32_t vad_ms = 2000;
int32_t prompt_ms = 5000;
int32_t command_ms = 4000;
float vad_thold = 0.1f;
float freq_thold = -1.0f;
bool print_energy = false;
};
WChess(
whisper_context * ctx,
const whisper_full_params & wparams, const whisper_full_params & wparams,
StatusSetter status_setter, callbacks cb,
ISRunning running, settings s
AudioGetter audio, );
MovesSetter moveSetter); ~WChess();
void run(); void run();
std::string stringifyBoard(); std::string stringify_board() const;
private: private:
void get_audio(int ms, std::vector<float>& pcmf32); void get_audio(int ms, std::vector<float>& pcmf32) const;
void set_status(const char* msg); void set_status(const std::string& msg) const;
void set_moves(const std::string& moves); void set_moves(const std::string& moves) const;
bool check_running(); bool check_running() const;
std::string transcribe( std::string transcribe(
const std::vector<float> & pcmf32, const std::vector<float> & pcmf32,
float & logprob_min, float & logprob_min,
float & logprob_sum, float & logprob_sum,
int & n_tokens, int & n_tokens,
int64_t & t_ms); int64_t & t_ms);
whisper_context * m_ctx; whisper_context * m_ctx;
whisper_full_params m_wparams; whisper_full_params m_wparams;
StatusSetter m_status_setter; const callbacks m_cb;
ISRunning m_running; const settings m_settings;
AudioGetter m_audio; std::unique_ptr<Chessboard> m_board;
MovesSetter m_moveSetter;
Chessboard m_board;
}; };

View File

@ -4,5 +4,5 @@ if (WHISPER_SDL2)
include(DefaultTargetOptions) include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE libwchess common common-sdl ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE libwchess common-sdl ${CMAKE_THREAD_LIBS_INIT})
endif () endif ()

View File

@ -1,30 +1,15 @@
// Voice assistant example // Command line voice assisted chess
// //
// Speak short text commands to the microphone. // Speak chess move commands to the microphone.
// This program will detect your voice command and convert them to text. // The moves will translated to chessboard positions.
// //
// ref: https://github.com/ggerganov/whisper.cpp/issues/171
// //
#include "common-sdl.h"
#include "common.h"
#include "WChess.h" #include "WChess.h"
#include "common-sdl.h"
#include <sstream> #include <memory>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <regex>
#include <string>
#include <thread> #include <thread>
#include <vector>
#include <map>
bool file_exists(const std::string & fname) {
std::ifstream f(fname.c_str());
return f.good();
}
// command-line parameters // command-line parameters
struct whisper_params { struct whisper_params {
@ -81,7 +66,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str()); fprintf(stderr, " -cmd FNAME, --commands FNAME [%-7s] text file with allowed commands\n", params.commands.c_str());
fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str()); fprintf(stderr, " -p, --prompt [%-7s] the required activation prompt\n", params.prompt.c_str());
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str()); fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty); fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
@ -124,10 +108,9 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
return true; return true;
} }
std::unique_ptr<WChess> g_wchess;
std::unique_ptr<Chess> g_chess; void set_moves(const std::string & moves) {
void set_moves(const std::string & /* moves */) { if (!moves.empty()) fprintf(stdout, "%s", g_wchess->stringify_board().c_str());
fprintf(stdout, "%s", g_chess->stringifyBoard().c_str());
} }
audio_async g_audio(30*1000); audio_async g_audio(30*1000);
@ -135,10 +118,6 @@ void get_audio(int ms, std::vector<float> & pcmf32_cur) {
g_audio.get(ms, pcmf32_cur); g_audio.get(ms, pcmf32_cur);
} }
bool check_running() {
return sdl_poll_events();
}
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
whisper_params params; whisper_params params;
@ -189,15 +168,30 @@ int main(int argc, char ** argv) {
wparams.beam_search.beam_size = 5; wparams.beam_search.beam_size = 5;
wparams.initial_prompt = params.context.data();
g_audio.resume(); g_audio.resume();
// wait for 1 second to avoid any buffered noise // wait for 1 second to avoid any buffered noise
std::this_thread::sleep_for(std::chrono::milliseconds(1000)); std::this_thread::sleep_for(std::chrono::milliseconds(1000));
g_audio.clear(); g_audio.clear();
g_chess.reset(new Chess(ctx, wparams, nullptr, sdl_poll_events, get_audio, set_moves)); WChess::callbacks cb;
set_moves({}); cb.check_running = sdl_poll_events;
g_chess->run(); cb.get_audio = get_audio;
cb.set_moves = set_moves;
WChess::settings s;
s.vad_ms = 2000;
s.prompt_ms = params.prompt_ms;
s.command_ms = params.command_ms;
s.vad_thold = params.vad_thold;
s.freq_thold = params.freq_thold;
s.print_energy = params.print_energy;
g_wchess.reset(new WChess(ctx, wparams, cb, s));
set_moves("start");
g_wchess->run();
g_audio.pause(); g_audio.pause();

View File

@ -1,7 +1,7 @@
<!doctype html> <!doctype html>
<html lang="en-us"> <html lang="en-us">
<head> <head>
<title>command : Voice assistant example using Whisper + WebAssembly</title> <title>wchess : Voice assistant example using Whisper + WebAssembly</title>
<style> <style>
#output { #output {
@ -28,7 +28,7 @@
</head> </head>
<body onload="loadWhisper()"> <body onload="loadWhisper()">
<div id="main-container"> <div id="main-container">
<b>command : Voice assistant example using Whisper + WebAssembly</b> <b>wchess : Voice assistant example using Whisper + WebAssembly</b>
<br><br> <br><br>
@ -56,6 +56,14 @@
--> -->
</div> </div>
<br>
<div id="myBoard" style="width: 400px"></div>
<script src="js/jquery-3.7.1.min.js"></script>
<script src="js/chessboard-1.0.0.min.js"></script>
<script>
var board = Chessboard('myBoard', 'start')
</script>
<br> <br>
<div id="input"> <div id="input">
@ -72,14 +80,6 @@
<pre id="state-moves">[The moves will be displayed here]</pre> <pre id="state-moves">[The moves will be displayed here]</pre>
</div> </div>
<br><br>
<div id="myBoard" style="width: 400px"></div>
<script src="js/jquery-3.7.1.min.js"></script>
<script src="js/chessboard-1.0.0.min.js"></script>
<script>
var board = Chessboard('myBoard', 'start')
</script>
<hr> <hr>
Debug output: Debug output:

View File

@ -1,19 +1,8 @@
#include "ggml.h" #include <WChess.h>
#include "common.h"
#include <emscripten.h>
#include <emscripten/bind.h> #include <emscripten/bind.h>
#include <WChess.h>
#include <atomic> #include <atomic>
#include <cmath>
#include <mutex>
#include <string>
#include <thread> #include <thread>
#include <vector>
#include <regex>
constexpr int N_THREAD = 8; constexpr int N_THREAD = 8;
@ -59,18 +48,7 @@ bool check_running() {
return g_running; return g_running;
} }
EMSCRIPTEN_BINDINGS(command) { void wchess_main(size_t i) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
if (g_contexts[i] != nullptr) {
g_running = true;
if (g_worker.joinable()) {
g_worker.join();
}
g_worker = std::thread([i]() {
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY); struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency()); wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
@ -96,18 +74,31 @@ EMSCRIPTEN_BINDINGS(command) {
printf("command: using %d threads\n", wparams.n_threads); printf("command: using %d threads\n", wparams.n_threads);
Chess(g_contexts[i], WChess::callbacks cb;
wparams, cb.set_status = set_status;
set_status, cb.check_running = check_running;
check_running, cb.get_audio = get_audio;
get_audio, cb.set_moves = set_moves;
set_moves).run();
WChess(g_contexts[i], wparams, cb, {}).run();
if (i < g_contexts.size()) { if (i < g_contexts.size()) {
whisper_free(g_contexts[i]); whisper_free(g_contexts[i]);
g_contexts[i] = nullptr; g_contexts[i] = nullptr;
} }
}
EMSCRIPTEN_BINDINGS(command) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
for (size_t i = 0; i < g_contexts.size(); ++i) {
if (g_contexts[i] == nullptr) {
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
if (g_contexts[i] != nullptr) {
g_running = true;
if (g_worker.joinable()) {
g_worker.join();
}
g_worker = std::thread([i]() {
wchess_main(i);
}); });
return i + 1; return i + 1;