mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-12 05:18:54 +02:00
Expose more ctx->vocab interfaces.
I need these functions to implement a kind of weighting coefficient logits_filter_callback like: ``` void filter_callback( struct whisper_context * ctx, struct whisper_state * state, const whisper_token_data * tokens, int n_tokens, float * logits, void * user_data ) { const static std::vector<std::string> good_words = { "音声", "認識" }; std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv; auto prev = n_tokens > 0 ? std::string(whisper_token_to_str(ctx, tokens[n_tokens - 1].id)) : ""; for (const std::string & token : good_words) { auto s32 = conv.from_bytes(token); auto s0 = conv.to_bytes(s32[0]); auto s1 = conv.to_bytes(s32[1]); if (whisper_token_exists(ctx, token.c_str())) { logits[whisper_str_to_token(ctx, token.c_str())] *= 2; } else if ( prev.size() >= s0.size() && prev.compare(prev.size() - s0.size(), s0.size(), s0) == 0 && whisper_token_exists(ctx, s1.c_str()) ) { logits[whisper_str_to_token(ctx, s1.c_str())] *= 1.6; } else if (whisper_token_exists(ctx, s0.c_str())) { logits[whisper_str_to_token(ctx, s0.c_str())] *= 1.2; } } } ```
This commit is contained in:
parent
e4e05981d6
commit
d0f38def08
@ -408,6 +408,9 @@ extern "C" {
|
||||
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
||||
WHISPER_API const char * whisper_model_type_readable(struct whisper_context * ctx);
|
||||
|
||||
// String -> Token Id. Uses the vocabulary in the provided context
|
||||
WHISPER_API bool whisper_token_exists(struct whisper_context * ctx, const char * str);
|
||||
WHISPER_API whisper_token whisper_str_to_token(struct whisper_context * ctx, const char * str);
|
||||
|
||||
// Special tokens
|
||||
WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
|
||||
|
@ -4068,6 +4068,14 @@ const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token to
|
||||
return ctx->vocab.id_to_token.at(token).c_str();
|
||||
}
|
||||
|
||||
whisper_token whisper_str_to_token(struct whisper_context * ctx, const char * str) {
|
||||
return ctx->vocab.token_to_id.at(str);
|
||||
}
|
||||
|
||||
bool whisper_token_exists(struct whisper_context * ctx, const char * str) {
|
||||
return ctx->vocab.token_to_id.find(str) != ctx->vocab.token_to_id.end();
|
||||
}
|
||||
|
||||
whisper_token whisper_token_eot(struct whisper_context * ctx) {
|
||||
return ctx->vocab.token_eot;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user