examples : add --print-confidence option to cli (#3150)

* examples : add --print-confidence option to cli

This commit adds a new command-line option `--print-confidence` to the
whisper-cli. When enabled, this option prints the confidence level of each
token in the transcribed text using ANSI formatting codes.

The confidence levels are represented using different styles:
```console
main: confidence: highlighted (low confidence), underlined (medium), dim (high confidence)
```

Refs: https://github.com/ggml-org/whisper.cpp/issues/3135
This commit is contained in:
Daniel Bevenius 2025-05-14 19:21:48 +02:00 committed by GitHub
parent 96d791ae61
commit f389d7e3e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 45 additions and 0 deletions

View File

@ -70,6 +70,7 @@ struct whisper_params {
bool no_prints = false;
bool print_special = false;
bool print_colors = false;
bool print_confidence= false;
bool print_progress = false;
bool no_timestamps = false;
bool log_score = false;
@ -179,6 +180,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if (arg == "-np" || arg == "--no-prints") { params.no_prints = true; }
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
else if ( arg == "--print-confidence"){ params.print_confidence= true; }
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
else if (arg == "-l" || arg == "--language") { params.language = whisper_param_turn_lowercase(ARGV_NEXT); }
@ -257,6 +259,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " -np, --no-prints [%-7s] do not print anything other than the results\n", params.no_prints ? "true" : "false");
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
fprintf(stderr, " --print-confidence [%-7s] print confidence\n", params.print_confidence ? "true" : "false");
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "true" : "false");
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language ('auto' for auto-detect)\n", params.language.c_str());
@ -386,6 +389,26 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
}
} else if (params.print_confidence) {
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
if (params.print_special == false) {
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
if (id >= whisper_token_eot(ctx)) {
continue;
}
}
const char * text = whisper_full_get_token_text(ctx, i, j);
const float p = whisper_full_get_token_p (ctx, i, j);
int style_idx = 2; // High confidence - dim
if (p < 0.33) {
style_idx = 0; // Low confidence - inverse (highlighted)
} else if (p < 0.66) {
style_idx = 1; // Medium confidence - underlined
}
printf("%s%s%s%s", speaker.c_str(), k_styles[style_idx].c_str(), text, "\033[0m");
}
} else {
const char * text = whisper_full_get_segment_text(ctx, i);
@ -1115,6 +1138,8 @@ int main(int argc, char ** argv) {
if (params.print_colors) {
fprintf(stderr, "%s: color scheme: red (low confidence), yellow (medium), green (high confidence)\n", __func__);
} else if (params.print_confidence) {
fprintf(stderr, "%s: confidence: highlighted (low confidence), underlined (medium), dim (high confidence)\n", __func__);
}
fprintf(stderr, "\n");
}

View File

@ -294,6 +294,26 @@ const std::vector<std::string> k_colors = {
set_xterm256_foreground( 78, 178, 101),
};
// ANSI formatting codes
static std::string set_inverse() {
return "\033[7m";
}
static std::string set_underline() {
return "\033[4m";
}
static std::string set_dim() {
return "\033[2m";
}
// Style scheme for different confidence levels
const std::vector<std::string> k_styles = {
set_inverse(), // Low confidence - inverse (highlighted)
set_underline(), // Medium confidence - underlined
set_dim(), // High confidence - dim
};
//
// Other utils
//