Update README.md and simplify usage

This commit is contained in:
Georgi Gerganov 2022-09-26 09:36:51 +03:00
parent f2456f8d93
commit 476182e439
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
4 changed files with 98 additions and 83 deletions

View File

@ -1,5 +1,6 @@
main: ggml.o main.o main: ggml.o main.o
g++ -o main ggml.o main.o g++ -o main ggml.o main.o
./main -h
ggml.o: ggml.c ggml.h ggml.o: ggml.c ggml.h
gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c gcc -O3 -mavx -mavx2 -mfma -mf16c -c ggml.c
@ -11,11 +12,7 @@ main.o: main.cpp ggml.h
clean: clean:
rm -f *.o main rm -f *.o main
# run the program # download a few audio samples into folder "./samples":
run: main
./main
# download the following audio samples into folder "./samples":
.PHONY: samples .PHONY: samples
samples: samples:
@echo "Downloading samples..." @echo "Downloading samples..."
@ -28,79 +25,20 @@ samples:
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
# if not already downloaded, the following targets download the specified model and
# runs it on all samples in the folder "./samples":
.PHONY: tiny.en .PHONY: tiny.en
tiny.en: main
@echo "Downloading tiny.en (75 MB just once)"
@mkdir -p models
@if [ ! -f models/ggml-tiny.en.bin ]; then \
wget --quiet --show-progress -O models/ggml-tiny.en.bin https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin ; \
fi
@echo ""
@echo "==============================================="
@echo "Running tiny.en on all samples in ./samples ..."
@echo "==============================================="
@echo ""
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \
echo "" ; \
./main -m models/ggml-tiny.en.bin -f $$f ; \
echo "" ; \
done
.PHONY: base.en .PHONY: base.en
base.en: main
@echo "Downloading base.en (142 MB just once)"
@mkdir -p models
@if [ ! -f models/ggml-base.en.bin ]; then \
wget --quiet --show-progress -O models/ggml-base.en.bin https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin ; \
fi
@echo ""
@echo "==============================================="
@echo "Running base.en on all samples in ./samples ..."
@echo "==============================================="
@echo ""
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \
echo "" ; \
./main -m models/ggml-base.en.bin -f $$f ; \
echo "" ; \
done
.PHONY: small.en
small.en: main
@echo "Downloading small.en (466 MB just once)"
@mkdir -p models
@if [ ! -f models/ggml-small.en.bin ]; then \
wget --quiet --show-progress -O models/ggml-small.en.bin https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin ; \
fi
@echo ""
@echo "==============================================="
@echo "Running small.en on all samples in ./samples ..."
@echo "==============================================="
@echo ""
@for f in samples/*.wav; do \
echo "----------------------------------------------" ; \
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \
echo "" ; \
./main -m models/ggml-small.en.bin -f $$f ; \
echo "" ; \
done
.PHONY: medium.en .PHONY: medium.en
medium.en: main .PHONY: small.en
@echo "Downloading medium.en (1.5 GB just once)"
@mkdir -p models tiny.en base.en medium.en small.en: main
@if [ ! -f models/ggml-medium.en.bin ]; then \ bash ./download-ggml-model.sh $@
wget --quiet --show-progress -O models/ggml-medium.en.bin https://ggml.ggerganov.com/ggml-model-whisper-medium.en.bin ; \
fi
@echo "" @echo ""
@echo "===============================================" @echo "==============================================="
@echo "Running medium.en on all samples in ./samples ..." @echo "Running $@ on all samples in ./samples ..."
@echo "===============================================" @echo "==============================================="
@echo "" @echo ""
@for f in samples/*.wav; do \ @for f in samples/*.wav; do \
@ -108,6 +46,6 @@ medium.en: main
echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \ echo "[+] Running base.en on $$f ... (run 'ffplay $$f' to listen)" ; \
echo "----------------------------------------------" ; \ echo "----------------------------------------------" ; \
echo "" ; \ echo "" ; \
./main -m models/ggml-medium.en.bin -f $$f ; \ ./main -m models/ggml-$@.bin -f $$f ; \
echo "" ; \ echo "" ; \
done done

View File

@ -8,6 +8,20 @@ C/C++ port of [OpenAI's Whisper](https://github.com/openai/whisper) speech-to-te
## Usage ## Usage
To build the main program, run `make`. You can then transribe a `.wav` file like this:
```bash
$ ./main -f input.wav
```
Before running the program, make sure to download one of the ggml Whisper models. For example:
```bash
bash ./download-ggml-model.sh base.en
```
---
For a quick demo, simply run `make base.en`: For a quick demo, simply run `make base.en`:
```bash ```bash
@ -97,12 +111,12 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
## Memory usage ## Memory usage
| Model | Mem | | Model | Disk | Mem |
| --- | --- | | --- | --- | --- |
| tiny.en | ~600 MB | | tiny.en | 75 MB | ~600 MB |
| base.en | ~800 MB | | base.en | 142 MB | ~800 MB |
| small.en | ~1.6 GB | | small.en | 466 MB | ~1.6 GB |
| medium.en | ~3.5 GB | | medium.en | 1.5 GB | ~3.5 GB |
## ggml format ## ggml format
@ -113,4 +127,6 @@ The original models are converted to a custom binary format. This allows to pack
- vocabulary - vocabulary
- weights - weights
You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script.
For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py)

59
download-ggml-model.sh Executable file
View File

@ -0,0 +1,59 @@
#!/bin/bash
# This script downloads Whisper model files that have already been converted to ggml format.
# This way you don't have to convert them yourself.
ggml_path=$(dirname $(realpath $0))
# Whisper models
models=( "tiny.en" "base.en" "small.en" "medium.en" )
# list available models
function list_models {
printf "\n"
printf " Available models:"
for model in "${models[@]}"; do
printf " $model"
done
printf "\n\n"
}
if [ "$#" -ne 1 ]; then
printf "Usage: $0 <model>\n"
list_models
exit 1
fi
model=$1
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then
printf "Invalid model: $model\n"
list_models
exit 1
fi
# download ggml model
printf "Downloading ggml model $model ...\n"
mkdir -p models
if [ -f "models/ggml-$model.bin" ]; then
printf "Model $model already exists. Skipping download.\n"
exit 0
fi
wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
if [ $? -ne 0 ]; then
printf "Failed to download ggml model $model \n"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi
printf "Done! Model '$model' saved in 'models/ggml-$model.bin'\n"
printf "You can now use it like this:\n\n"
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n"
printf "\n"

View File

@ -117,9 +117,9 @@ struct whisper_params {
bool verbose = false; bool verbose = false;
bool print_special_tokens = false; bool print_special_tokens = false;
std::string model = "models/whisper-tiny.en/ggml-model.bin"; // model path std::string model = "models/ggml-base.en.bin"; // model path
std::string fname_inp = "default.wav"; std::string fname_inp = "samples/jfk.wav";
}; };
void whisper_print_usage(int argc, char ** argv, const whisper_params & params); void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -156,6 +156,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
} }
void whisper_print_usage(int argc, char ** argv, const whisper_params & params) { void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "options:\n"); fprintf(stderr, "options:\n");
@ -1898,7 +1899,6 @@ int main(int argc, char ** argv) {
const int64_t t_main_start_us = ggml_time_us(); const int64_t t_main_start_us = ggml_time_us();
whisper_params params; whisper_params params;
params.model = "models/whisper-tiny.en/ggml-model.bin";
if (whisper_params_parse(argc, argv, params) == false) { if (whisper_params_parse(argc, argv, params) == false) {
return 1; return 1;
@ -1927,6 +1927,7 @@ int main(int argc, char ** argv) {
if (!whisper_model_load(params.model, model, vocab)) { if (!whisper_model_load(params.model, model, vocab)) {
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
whisper_print_usage(argc, argv, {});
return 1; return 1;
} }
@ -1939,6 +1940,7 @@ int main(int argc, char ** argv) {
drwav wav; drwav wav;
if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) { if (!drwav_init_file(&wav, params.fname_inp.c_str(), NULL)) {
fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str()); fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], params.fname_inp.c_str());
whisper_print_usage(argc, argv, {});
return 2; return 2;
} }