mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-27 12:49:28 +02:00
Merge 4c60e6b0c1
into 6e7629b146
This commit is contained in:
commit
e9b76cb34a
@ -482,13 +482,14 @@ extern "C" {
|
|||||||
int duration_ms; // audio duration to process in ms
|
int duration_ms; // audio duration to process in ms
|
||||||
|
|
||||||
bool translate;
|
bool translate;
|
||||||
bool no_context; // do not use past transcription (if any) as initial prompt for the decoder
|
bool no_context; // do not use past transcription (if any) as initial prompt for the decoder
|
||||||
bool no_timestamps; // do not generate timestamps
|
bool carry_initial_prompt; // carry the initial prompt to the next call
|
||||||
bool single_segment; // force single segment output (useful for streaming)
|
bool no_timestamps; // do not generate timestamps
|
||||||
bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
|
bool single_segment; // force single segment output (useful for streaming)
|
||||||
bool print_progress; // print progress information
|
bool print_special; // print special tokens (e.g. <SOT>, <EOT>, <BEG>, etc.)
|
||||||
bool print_realtime; // print results from within whisper.cpp (avoid it, use callback instead)
|
bool print_progress; // print progress information
|
||||||
bool print_timestamps; // print timestamps for each text segment when printing realtime
|
bool print_realtime; // print results from within whisper.cpp (avoid it, use callback instead)
|
||||||
|
bool print_timestamps; // print timestamps for each text segment when printing realtime
|
||||||
|
|
||||||
// [EXPERIMENTAL] token-level timestamps
|
// [EXPERIMENTAL] token-level timestamps
|
||||||
bool token_timestamps; // enable token-level timestamps
|
bool token_timestamps; // enable token-level timestamps
|
||||||
|
@ -4793,14 +4793,15 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
|||||||
/*.offset_ms =*/ 0,
|
/*.offset_ms =*/ 0,
|
||||||
/*.duration_ms =*/ 0,
|
/*.duration_ms =*/ 0,
|
||||||
|
|
||||||
/*.translate =*/ false,
|
/*.translate =*/ false,
|
||||||
/*.no_context =*/ true,
|
/*.no_context =*/ true,
|
||||||
/*.no_timestamps =*/ false,
|
/*.carry_initial_prompt =*/ false,
|
||||||
/*.single_segment =*/ false,
|
/*.no_timestamps =*/ false,
|
||||||
/*.print_special =*/ false,
|
/*.single_segment =*/ false,
|
||||||
/*.print_progress =*/ true,
|
/*.print_special =*/ false,
|
||||||
/*.print_realtime =*/ false,
|
/*.print_progress =*/ true,
|
||||||
/*.print_timestamps =*/ true,
|
/*.print_realtime =*/ false,
|
||||||
|
/*.print_timestamps =*/ true,
|
||||||
|
|
||||||
/*.token_timestamps =*/ false,
|
/*.token_timestamps =*/ false,
|
||||||
/*.thold_pt =*/ 0.01f,
|
/*.thold_pt =*/ 0.01f,
|
||||||
@ -5601,6 +5602,7 @@ int whisper_full_with_state(
|
|||||||
prompt_tokens.resize(-n_needed);
|
prompt_tokens.resize(-n_needed);
|
||||||
n_needed = whisper_tokenize(ctx, params.initial_prompt, prompt_tokens.data(), prompt_tokens.size());
|
n_needed = whisper_tokenize(ctx, params.initial_prompt, prompt_tokens.data(), prompt_tokens.size());
|
||||||
}
|
}
|
||||||
|
remaining_prompt_length = ctx->model.hparams.n_text_ctx / 2 - 1 - initial_prompt_tokens.size();
|
||||||
prompt_tokens.resize(n_needed);
|
prompt_tokens.resize(n_needed);
|
||||||
params.prompt_tokens = prompt_tokens.data();
|
params.prompt_tokens = prompt_tokens.data();
|
||||||
params.prompt_n_tokens = prompt_tokens.size();
|
params.prompt_n_tokens = prompt_tokens.size();
|
||||||
@ -5757,9 +5759,21 @@ int whisper_full_with_state(
|
|||||||
// init prompt and kv cache for the current iteration
|
// init prompt and kv cache for the current iteration
|
||||||
// TODO: do not recompute the prompt if it is the same as previous time
|
// TODO: do not recompute the prompt if it is the same as previous time
|
||||||
{
|
{
|
||||||
prompt.clear();
|
// LLMs think we should add this if block here
|
||||||
|
if (params.carry_initial_prompt) {
|
||||||
|
// Prepend initial_prompt_tokens to the prompt
|
||||||
|
int nignored = std::max((int)initial_prompt_tokens.size(), prompt_past.size());
|
||||||
|
std::vector<whisper_token> remaining_prompt(prompt_past.begin() + nignored, prompt_past.end());
|
||||||
|
remaining_prompt.resize(std::min(remaining_prompt.size(), remaining_prompt_length));
|
||||||
|
prompt.clear();
|
||||||
|
prompt.insert(prompt.end(), initial_prompt_tokens.begin(), initial_prompt_tokens.end());
|
||||||
|
prompt.insert(prompt.end(), remaining_prompt.begin(), remaining_prompt.end());
|
||||||
|
} else {
|
||||||
|
prompt.clear();
|
||||||
|
}
|
||||||
|
|
||||||
// if we have already generated some text, use it as a prompt to condition the next generation
|
// if we have already generated some text, use it as a prompt to condition the next generation
|
||||||
|
// But maybe we can put it here?
|
||||||
if (!prompt_past.empty() && t_cur < 0.5f && params.n_max_text_ctx > 0) {
|
if (!prompt_past.empty() && t_cur < 0.5f && params.n_max_text_ctx > 0) {
|
||||||
int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
|
int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user