2024-01-12 13:11:04 +01:00
|
|
|
#!/bin/sh
|
2022-09-26 08:36:51 +02:00
|
|
|
|
|
|
|
# This script downloads Whisper model files that have already been converted to ggml format.
|
|
|
|
# This way you don't have to convert them yourself.
|
|
|
|
|
2022-11-15 18:47:06 +01:00
|
|
|
#src="https://ggml.ggerganov.com"
|
|
|
|
#pfx="ggml-model-whisper"
|
|
|
|
|
2023-03-22 19:44:56 +01:00
|
|
|
src="https://huggingface.co/ggerganov/whisper.cpp"
|
2022-11-15 18:47:06 +01:00
|
|
|
pfx="resolve/main/ggml"
|
|
|
|
|
2024-01-26 16:39:54 +01:00
|
|
|
BOLD="\033[1m"
|
|
|
|
RESET='\033[0m'
|
|
|
|
|
2022-10-26 02:35:11 +02:00
|
|
|
# get the path of this script
|
2024-01-12 13:11:04 +01:00
|
|
|
get_script_path() {
|
2022-10-26 02:35:11 +02:00
|
|
|
if [ -x "$(command -v realpath)" ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
dirname "$(realpath "$0")"
|
2022-10-26 02:35:11 +02:00
|
|
|
else
|
2024-01-12 13:11:04 +01:00
|
|
|
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
|
|
|
|
echo "$_ret"
|
2022-10-26 02:35:11 +02:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2023-12-22 12:16:02 +01:00
|
|
|
models_path="${2:-$(get_script_path)}"
|
2022-09-26 08:36:51 +02:00
|
|
|
|
|
|
|
# Whisper models
|
2024-01-26 16:39:54 +01:00
|
|
|
models="tiny
|
|
|
|
tiny.en
|
2024-01-12 13:11:04 +01:00
|
|
|
tiny-q5_1
|
|
|
|
tiny.en-q5_1
|
|
|
|
base
|
2024-01-26 16:39:54 +01:00
|
|
|
base.en
|
2024-01-12 13:11:04 +01:00
|
|
|
base-q5_1
|
|
|
|
base.en-q5_1
|
2024-01-26 16:39:54 +01:00
|
|
|
small
|
2024-01-12 13:11:04 +01:00
|
|
|
small.en
|
|
|
|
small.en-tdrz
|
|
|
|
small-q5_1
|
|
|
|
small.en-q5_1
|
|
|
|
medium
|
|
|
|
medium.en
|
|
|
|
medium-q5_0
|
|
|
|
medium.en-q5_0
|
|
|
|
large-v1
|
|
|
|
large-v2
|
2024-01-26 16:39:54 +01:00
|
|
|
large-v2-q5_0
|
2024-01-12 13:11:04 +01:00
|
|
|
large-v3
|
2024-10-01 14:57:06 +02:00
|
|
|
large-v3-q5_0
|
|
|
|
large-v3-turbo
|
|
|
|
large-v3-turbo-q5_0"
|
2022-09-26 08:36:51 +02:00
|
|
|
|
|
|
|
# list available models
|
2024-01-12 13:11:04 +01:00
|
|
|
list_models() {
|
2022-09-26 08:36:51 +02:00
|
|
|
printf "\n"
|
2024-01-26 16:39:54 +01:00
|
|
|
printf "Available models:"
|
|
|
|
model_class=""
|
2024-01-12 13:11:04 +01:00
|
|
|
for model in $models; do
|
2024-01-26 16:39:54 +01:00
|
|
|
this_model_class="${model%%[.-]*}"
|
|
|
|
if [ "$this_model_class" != "$model_class" ]; then
|
|
|
|
printf "\n "
|
|
|
|
model_class=$this_model_class
|
|
|
|
fi
|
2024-01-12 13:11:04 +01:00
|
|
|
printf " %s" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
done
|
|
|
|
printf "\n\n"
|
|
|
|
}
|
|
|
|
|
2023-12-22 12:16:02 +01:00
|
|
|
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
printf "Usage: %s <model> [models_path]\n" "$0"
|
2022-09-26 08:36:51 +02:00
|
|
|
list_models
|
2024-01-26 16:39:54 +01:00
|
|
|
printf "___________________________________________________________\n"
|
|
|
|
printf "${BOLD}.en${RESET} = english-only ${BOLD}-q5_[01]${RESET} = quantized ${BOLD}-tdrz${RESET} = tinydiarize\n"
|
2022-09-26 08:36:51 +02:00
|
|
|
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
model=$1
|
|
|
|
|
2024-01-12 13:11:04 +01:00
|
|
|
if ! echo "$models" | grep -q -w "$model"; then
|
|
|
|
printf "Invalid model: %s\n" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
list_models
|
|
|
|
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2023-07-04 08:45:00 +02:00
|
|
|
# check if model contains `tdrz` and update the src and pfx accordingly
|
2024-01-12 13:11:04 +01:00
|
|
|
if echo "$model" | grep -q "tdrz"; then
|
2023-07-04 08:45:00 +02:00
|
|
|
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
|
|
|
|
pfx="resolve/main/ggml"
|
|
|
|
fi
|
|
|
|
|
2024-01-12 13:11:04 +01:00
|
|
|
echo "$model" | grep -q '^"tdrz"*$'
|
|
|
|
|
2022-09-26 08:36:51 +02:00
|
|
|
# download ggml model
|
|
|
|
|
2024-01-12 13:11:04 +01:00
|
|
|
printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
|
2022-09-26 08:36:51 +02:00
|
|
|
|
2024-01-12 13:11:04 +01:00
|
|
|
cd "$models_path" || exit
|
2022-09-26 08:36:51 +02:00
|
|
|
|
2022-10-25 18:13:08 +02:00
|
|
|
if [ -f "ggml-$model.bin" ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
printf "Model %s already exists. Skipping download.\n" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
2024-08-28 10:46:01 +02:00
|
|
|
if [ -x "$(command -v wget2)" ]; then
|
|
|
|
wget2 --no-config --progress bar -O ggml-"$model".bin $src/$pfx-"$model".bin
|
|
|
|
elif [ -x "$(command -v wget)" ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
|
2022-10-26 02:35:11 +02:00
|
|
|
elif [ -x "$(command -v curl)" ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
|
2022-10-26 02:35:11 +02:00
|
|
|
else
|
|
|
|
printf "Either wget or curl is required to download models.\n"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2022-09-26 08:36:51 +02:00
|
|
|
if [ $? -ne 0 ]; then
|
2024-01-12 13:11:04 +01:00
|
|
|
printf "Failed to download ggml model %s \n" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2024-01-12 13:11:04 +01:00
|
|
|
printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
printf "You can now use it like this:\n\n"
|
2024-01-12 13:11:04 +01:00
|
|
|
printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
|
2022-09-26 08:36:51 +02:00
|
|
|
printf "\n"
|