mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-15 23:27:53 +02:00
whisper : support speaker segmentation (local diarization) of mono audio via tinydiarize (#1058)
* add HuggingFace mirror to download ggml model * support tdrz via simple hack overriding solm tokens * fix incorrect translate/transcribe token_ids that are not static const * add apollo 13 sample for tdrz demo * render [SPEAKER TURN] consistently in all terminal output using vocab.id_to_token * extend whisper_segment with speaker_turn_next field and save in json output * fix failing go build * slipped in some python syntax whoops * whisper : finalize tinydiarize support (add flag + fixes) * whisper : tdrz support for word-level timestamps (respect max_len) * java : try to fix tests after adding tdrz_enable flag * main : remove TODO leftover * java : fix params order list after adding "tdrz_enable" * whisper : fix solm and add nosp token * main : print tinydiarize help --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -270,13 +270,13 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
|
||||
}
|
||||
|
||||
// Task tokens
|
||||
func Whisper_token_translate() Token {
|
||||
return Token(C.whisper_token_translate())
|
||||
func (ctx *Context) Whisper_token_translate() Token {
|
||||
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
|
||||
}
|
||||
|
||||
// Task tokens
|
||||
func Whisper_token_transcribe() Token {
|
||||
return Token(C.whisper_token_transcribe())
|
||||
func (ctx *Context) Whisper_token_transcribe() Token {
|
||||
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
|
||||
}
|
||||
|
||||
// Performance information
|
||||
|
Reference in New Issue
Block a user