mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-25 11:48:34 +02:00
examples : fix + refactor Levenshtein distance
This commit is contained in:
parent
794b162a46
commit
4a7d49af95
@ -28,31 +28,6 @@ std::string g_transcribed = "";
|
|||||||
|
|
||||||
std::vector<float> g_pcmf32;
|
std::vector<float> g_pcmf32;
|
||||||
|
|
||||||
// compute similarity between two strings using Levenshtein distance
|
|
||||||
static float similarity(const std::string & s0, const std::string & s1) {
|
|
||||||
const size_t len0 = s0.size() + 1;
|
|
||||||
const size_t len1 = s1.size() + 1;
|
|
||||||
|
|
||||||
std::vector<int> col(len1, 0);
|
|
||||||
std::vector<int> prevCol(len1, 0);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < len1; i++) {
|
|
||||||
prevCol[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < len0; i++) {
|
|
||||||
col[0] = i;
|
|
||||||
for (size_t j = 1; j < len1; j++) {
|
|
||||||
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
|
|
||||||
}
|
|
||||||
col.swap(prevCol);
|
|
||||||
}
|
|
||||||
|
|
||||||
const float dist = prevCol[len1 - 1];
|
|
||||||
|
|
||||||
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
void command_set_status(const std::string & status) {
|
void command_set_status(const std::string & status) {
|
||||||
std::lock_guard<std::mutex> lock(g_mutex);
|
std::lock_guard<std::mutex> lock(g_mutex);
|
||||||
g_status = status;
|
g_status = status;
|
||||||
|
@ -163,31 +163,6 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute similarity between two strings using Levenshtein distance
|
|
||||||
float similarity(const std::string & s0, const std::string & s1) {
|
|
||||||
const size_t len0 = s0.size() + 1;
|
|
||||||
const size_t len1 = s1.size() + 1;
|
|
||||||
|
|
||||||
std::vector<int> col(len1, 0);
|
|
||||||
std::vector<int> prevCol(len1, 0);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < len1; i++) {
|
|
||||||
prevCol[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < len0; i++) {
|
|
||||||
col[0] = i;
|
|
||||||
for (size_t j = 1; j < len1; j++) {
|
|
||||||
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1));
|
|
||||||
}
|
|
||||||
col.swap(prevCol);
|
|
||||||
}
|
|
||||||
|
|
||||||
const float dist = prevCol[len1 - 1];
|
|
||||||
|
|
||||||
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
std::vector<std::string> read_allowed_commands(const std::string & fname) {
|
||||||
std::vector<std::string> allowed_commands;
|
std::vector<std::string> allowed_commands;
|
||||||
|
|
||||||
|
@ -479,3 +479,27 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float similarity(const std::string & s0, const std::string & s1) {
|
||||||
|
const size_t len0 = s0.size() + 1;
|
||||||
|
const size_t len1 = s1.size() + 1;
|
||||||
|
|
||||||
|
std::vector<int> col(len1, 0);
|
||||||
|
std::vector<int> prevCol(len1, 0);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len1; i++) {
|
||||||
|
prevCol[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len0; i++) {
|
||||||
|
col[0] = i;
|
||||||
|
for (size_t j = 1; j < len1; j++) {
|
||||||
|
col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1));
|
||||||
|
}
|
||||||
|
col.swap(prevCol);
|
||||||
|
}
|
||||||
|
|
||||||
|
const float dist = prevCol[len1 - 1];
|
||||||
|
|
||||||
|
return 1.0f - (dist / std::max(s0.size(), s1.size()));
|
||||||
|
}
|
||||||
|
@ -118,3 +118,5 @@ bool vad_simple(
|
|||||||
float freq_thold,
|
float freq_thold,
|
||||||
bool verbose);
|
bool verbose);
|
||||||
|
|
||||||
|
// compute similarity between two strings using Levenshtein distance
|
||||||
|
float similarity(const std::string & s0, const std::string & s1);
|
||||||
|
Loading…
Reference in New Issue
Block a user