diff --git a/Makefile b/Makefile index 1203e82..e725dc4 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ main: ggml.o main.o g++ -o main ggml.o main.o + ./main -h ggml.o: ggml.c ggml.h gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c @@ -11,11 +12,7 @@ main.o: main.cpp ggml.h clean: rm -f *.o main -# run the program -run: main - ./main - -# download the following audio samples into folder "./samples": +# download a few audio samples into folder "./samples": .PHONY: samples samples: @echo "Downloading samples..." @@ -28,79 +25,20 @@ samples: @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav + +# if not already downloaded, the following targets download the specified model and +# runs it on all samples in the folder "./samples": + .PHONY: tiny.en -tiny.en: main - @echo "Downloading tiny.en (75 MB just once)" - @mkdir -p models - @if [ ! -f models/ggml-tiny.en.bin ]; then \ - wget --quiet --show-progress -O models/ggml-tiny.en.bin https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin ; \ - fi - @echo "" - @echo "===============================================" - @echo "Running tiny.en on all samples in ./samples ..." - @echo "===============================================" - @echo "" - @for f in samples/*.wav; do \ - echo "----------------------------------------------" ; \ - echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \ - echo "----------------------------------------------" ; \ - echo "" ; \ - ./main -m models/ggml-tiny.en.bin -f $$f ; \ - echo "" ; \ - done - .PHONY: base.en -base.en: main - @echo "Downloading base.en (142 MB just once)" - @mkdir -p models - @if [ ! -f models/ggml-base.en.bin ]; then \ - wget --quiet --show-progress -O models/ggml-base.en.bin https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin ; \ - fi - @echo "" - @echo "===============================================" - @echo "Running base.en on all samples in ./samples ..." - @echo "===============================================" - @echo "" - @for f in samples/*.wav; do \ - echo "----------------------------------------------" ; \ - echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \ - echo "----------------------------------------------" ; \ - echo "" ; \ - ./main -m models/ggml-base.en.bin -f $$f ; \ - echo "" ; \ - done - -.PHONY: small.en -small.en: main - @echo "Downloading small.en (466 MB just once)" - @mkdir -p models - @if [ ! -f models/ggml-small.en.bin ]; then \ - wget --quiet --show-progress -O models/ggml-small.en.bin https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin ; \ - fi - @echo "" - @echo "===============================================" - @echo "Running small.en on all samples in ./samples ..." - @echo "===============================================" - @echo "" - @for f in samples/*.wav; do \ - echo "----------------------------------------------" ; \ - echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \ - echo "----------------------------------------------" ; \ - echo "" ; \ - ./main -m models/ggml-small.en.bin -f $$f ; \ - echo "" ; \ - done - .PHONY: medium.en -medium.en: main - @echo "Downloading medium.en (1.5 GB just once)" - @mkdir -p models - @if [ ! -f models/ggml-medium.en.bin ]; then \ - wget --quiet --show-progress -O models/ggml-medium.en.bin https://ggml.ggerganov.com/ggml-model-whisper-medium.en.bin ; \ - fi +.PHONY: small.en + +tiny.en base.en medium.en small.en: main + bash ./download-ggml-model.sh $@ @echo "" @echo "===============================================" - @echo "Running medium.en on all samples in ./samples ..." + @echo "Running $@ on all samples in ./samples ..." @echo "===============================================" @echo "" @for f in samples/*.wav; do \ @@ -108,6 +46,6 @@ medium.en: main echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \ echo "----------------------------------------------" ; \ echo "" ; \ - ./main -m models/ggml-medium.en.bin -f $$f ; \ + ./main -m models/ggml-$@.bin -f $$f ; \ echo "" ; \ done diff --git a/README.md b/README.md index cb6e603..891a94a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,20 @@ C/C++ port of [OpenAI's Whisper](https://github.com/openai/whisper) speech-to-te ## Usage +To build the main program, run `make`. You can then transribe a `.wav` file like this: + +```bash +$ ./main -f input.wav +``` + +Before running the program, make sure to download one of the ggml Whisper models. For example: + +```bash +bash ./download-ggml-model.sh base.en +``` + +--- + For a quick demo, simply run `make base.en`: ```bash @@ -97,12 +111,12 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav ## Memory usage -| Model | Mem | -| --- | --- | -| tiny.en | ~600 MB | -| base.en | ~800 MB | -| small.en | ~1.6 GB | -| medium.en | ~3.5 GB | +| Model | Disk | Mem | +| --- | --- | --- | +| tiny.en | 75 MB | ~600 MB | +| base.en | 142 MB | ~800 MB | +| small.en | 466 MB | ~1.6 GB | +| medium.en | 1.5 GB | ~3.5 GB | ## ggml format @@ -113,4 +127,6 @@ The original models are converted to a custom binary format. This allows to pack - vocabulary - weights +You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script. + For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) diff --git a/download-ggml-model.sh b/download-ggml-model.sh new file mode 100755 index 0000000..3d5fa50 --- /dev/null +++ b/download-ggml-model.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# This script downloads Whisper model files that have already been converted to ggml format. +# This way you don't have to convert them yourself. + +ggml_path=$(dirname $(realpath $0)) + +# Whisper models +models=( "tiny.en" "base.en" "small.en" "medium.en" ) + +# list available models +function list_models { + printf "\n" + printf " Available models:" + for model in "${models[@]}"; do + printf " $model" + done + printf "\n\n" +} + +if [ "$#" -ne 1 ]; then + printf "Usage: $0 \n" + list_models + + exit 1 +fi + +model=$1 + +if [[ ! " ${models[@]} " =~ " ${model} " ]]; then + printf "Invalid model: $model\n" + list_models + + exit 1 +fi + +# download ggml model + +printf "Downloading ggml model $model ...\n" + +mkdir -p models + +if [ -f "models/ggml-$model.bin" ]; then + printf "Model $model already exists. Skipping download.\n" + exit 0 +fi + +wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin + +if [ $? -ne 0 ]; then + printf "Failed to download ggml model $model \n" + printf "Please try again later or download the original Whisper model files and convert them yourself.\n" + exit 1 +fi + +printf "Done! Model '$model' saved in 'models/ggml-$model.bin'\n" +printf "You can now use it like this:\n\n" +printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n" +printf "\n" diff --git a/main.cpp b/main.cpp index acbaa91..40835ba 100644 --- a/main.cpp +++ b/main.cpp @@ -117,9 +117,9 @@ struct whisper_params { bool verbose = false; bool print_special_tokens = false; - std::string model = "models/whisper-tiny.en/ggml-model.bin"; // model path + std::string model = "models/ggml-base.en.bin"; // model path - std::string fname_inp = "default.wav"; + std::string fname_inp = "samples/jfk.wav"; }; void whisper_print_usage(int argc, char ** argv, const whisper_params & params); @@ -156,6 +156,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { } void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { + fprintf(stderr, "\n"); fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); @@ -1898,7 +1899,6 @@ int main(int argc, char ** argv) { const int64_t t_main_start_us = ggml_time_us(); whisper_params params; - params.model = "models/whisper-tiny.en/ggml-model.bin"; if (whisper_params_parse(argc, argv, params) == false) { return 1; @@ -1927,6 +1927,7 @@ int main(int argc, char ** argv) { if (!whisper_model_load(params.model, model, vocab)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); + whisper_print_usage(argc, argv, {}); return 1; } @@ -1939,6 +1940,7 @@ int main(int argc, char ** argv) { drwav wav; if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) { fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str()); + whisper_print_usage(argc, argv, {}); return 2; }