mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-16 03:08:32 +02:00
whisper : support speaker segmentation (local diarization) of mono audio via tinydiarize (#1058)
* add HuggingFace mirror to download ggml model * support tdrz via simple hack overriding solm tokens * fix incorrect translate/transcribe token_ids that are not static const * add apollo 13 sample for tdrz demo * render [SPEAKER TURN] consistently in all terminal output using vocab.id_to_token * extend whisper_segment with speaker_turn_next field and save in json output * fix failing go build * slipped in some python syntax whoops * whisper : finalize tinydiarize support (add flag + fixes) * whisper : tdrz support for word-level timestamps (respect max_len) * java : try to fix tests after adding tdrz_enable flag * main : remove TODO leftover * java : fix params order list after adding "tdrz_enable" * whisper : fix solm and add nosp token * main : print tinydiarize help --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -22,7 +22,7 @@ function get_script_path() {
|
||||
models_path="$(get_script_path)"
|
||||
|
||||
# Whisper models
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small.en-tdrz" "small" "medium.en" "medium" "large-v1" "large" )
|
||||
|
||||
# list available models
|
||||
function list_models {
|
||||
@ -50,6 +50,12 @@ if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check if model contains `tdrz` and update the src and pfx accordingly
|
||||
if [[ $model == *"tdrz"* ]]; then
|
||||
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
|
||||
pfx="resolve/main/ggml"
|
||||
fi
|
||||
|
||||
# download ggml model
|
||||
|
||||
printf "Downloading ggml model $model from '$src' ...\n"
|
||||
|
Reference in New Issue
Block a user