From 2ef717b293fe93872cc3a03ca77942936a281959 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 1 Oct 2024 15:57:06 +0300 Subject: [PATCH] whisper : add large-v3-turbo (#2440) --- .gitignore | 1 + Makefile | 3 +- README.md | 1 + .../go/examples/go-model-download/main.go | 2 +- examples/livestream.sh | 2 +- examples/twitch.sh | 2 +- models/README.md | 34 ++++++++++--------- models/convert-h5-to-coreml.py | 4 +-- models/convert-whisper-to-coreml.py | 4 +-- models/convert-whisper-to-openvino.py | 4 +-- models/download-coreml-model.sh | 2 +- models/download-ggml-model.cmd | 2 +- models/download-ggml-model.sh | 4 ++- scripts/bench.py | 1 + scripts/convert-all.sh | 2 +- tests/run-tests.sh | 2 +- 16 files changed, 39 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 44648fa7..8301c12b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .cache/ .coreml/ .test/ +.venv/ .vs/ .vscode/ .DS_Store diff --git a/Makefile b/Makefile index 61de7dfe..3c69aa85 100644 --- a/Makefile +++ b/Makefile @@ -1145,8 +1145,9 @@ samples: .PHONY: large-v1 .PHONY: large-v2 .PHONY: large-v3 +.PHONY: large-v3-turbo -tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main +tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo: main bash ./models/download-ggml-model.sh $@ @echo "" @echo "===============================================" diff --git a/README.md b/README.md index 808717cb..7c5b5d17 100644 --- a/README.md +++ b/README.md @@ -236,6 +236,7 @@ make medium make large-v1 make large-v2 make large-v3 +make large-v3-turbo ``` ## Memory usage diff --git a/bindings/go/examples/go-model-download/main.go b/bindings/go/examples/go-model-download/main.go index 3522d881..d0c1cc78 100644 --- a/bindings/go/examples/go-model-download/main.go +++ b/bindings/go/examples/go-model-download/main.go @@ -24,7 +24,7 @@ const ( var ( // The models which will be downloaded, if no model is specified as an argument - modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"} + modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"} ) var ( diff --git a/examples/livestream.sh b/examples/livestream.sh index fbeb6dbc..7c044199 100755 --- a/examples/livestream.sh +++ b/examples/livestream.sh @@ -48,7 +48,7 @@ if [ -n "$3" ]; then fi # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" ) # list available models function list_models { diff --git a/examples/twitch.sh b/examples/twitch.sh index 0403fea9..1cd81428 100755 --- a/examples/twitch.sh +++ b/examples/twitch.sh @@ -21,7 +21,7 @@ help() echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]" echo "options:" echo "-s Step in seconds (default is $step)." - echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')." + echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' 'large-v3-turbo' (default is '$model')." echo "-t Number of threads to use." echo "-h Print this help page." echo diff --git a/models/README.md b/models/README.md index 3ef84a4e..3f5f4b51 100644 --- a/models/README.md +++ b/models/README.md @@ -42,22 +42,24 @@ rmdir models/whisper-medium ## Available models -| Model | Disk | SHA | -| ------------- | ------- | ------------------------------------------ | -| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | -| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | -| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | -| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` | -| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | -| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | -| small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` | -| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | -| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | -| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | -| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | -| large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` | -| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | -| large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` | +| Model | Disk | SHA | +| ------------------- | ------- | ------------------------------------------ | +| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | +| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | +| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | +| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` | +| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | +| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | +| small.en-tdrz | 465 MiB | `b6c6e7e89af1a35c08e6de56b66ca6a02a2fdfa1` | +| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | +| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | +| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | +| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | +| large-v2-q5_0 | 1.1 GiB | `00e39f2196344e901b3a2bd5814807a769bd1630` | +| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | +| large-v3-q5_0 | 1.1 GiB | `e6e2ed78495d403bef4b7cff42ef4aaadcfea8de` | +| large-v3-turbo | 1.5 GiB | `4af2b29d7ec73d781377bfd1758ca957a807e941` | +| large-v3-turbo-q5_0 | 547 MiB | `e050f7970618a659205450ad97eb95a18d69c9ee` | Models are multilingual unless the model name includes `.en`. Models ending in `-q5_0` are [quantized](../README.md#quantization). Models ending in `-tdrz` support local diarization (marking of speaker turns) using [tinydiarize](https://github.com/akashmjn/tinydiarize). More information about models is available [upstream (openai/whisper)](https://github.com/openai/whisper#available-models-and-languages). The list above is a subset of the models supported by the [download-ggml-model.sh](download-ggml-model.sh) script, but many more are available at https://huggingface.co/ggerganov/whisper.cpp/tree/main and elsewhere. diff --git a/models/convert-h5-to-coreml.py b/models/convert-h5-to-coreml.py index 57341ab0..541fa2e1 100644 --- a/models/convert-h5-to-coreml.py +++ b/models/convert-h5-to-coreml.py @@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str): # Ported from models/convert-whisper-to-coreml.py if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) + parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True) parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) args = parser.parse_args() - if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: + if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]: raise ValueError("Invalid model name") pt_target_path = f"models/hf-{args.model_name}.pt" diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py index 046aabd2..f66683e9 100644 --- a/models/convert-whisper-to-coreml.py +++ b/models/convert-whisper-to-coreml.py @@ -283,13 +283,13 @@ def convert_decoder(hparams, model, quantize=False): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) + parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) args = parser.parse_args() - if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: + if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]: raise ValueError("Invalid model name") whisper = load_model(args.model).cpu() diff --git a/models/convert-whisper-to-openvino.py b/models/convert-whisper-to-openvino.py index 5df0be78..3124dd3d 100644 --- a/models/convert-whisper-to-openvino.py +++ b/models/convert-whisper-to-openvino.py @@ -45,10 +45,10 @@ def convert_encoder(hparams, encoder, mname): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) + parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo)", required=True) args = parser.parse_args() - if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: + if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]: raise ValueError("Invalid model name") whisper = load_model(args.model).cpu() diff --git a/models/download-coreml-model.sh b/models/download-coreml-model.sh index 405b355e..dca26a20 100755 --- a/models/download-coreml-model.sh +++ b/models/download-coreml-model.sh @@ -22,7 +22,7 @@ get_script_path() { models_path="$(get_script_path)" # Whisper models -models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3" +models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo" # list available models list_models() { diff --git a/models/download-ggml-model.cmd b/models/download-ggml-model.cmd index 4d21531d..5d5b3b86 100644 --- a/models/download-ggml-model.cmd +++ b/models/download-ggml-model.cmd @@ -8,7 +8,7 @@ popd set argc=0 for %%x in (%*) do set /A argc+=1 -set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 +set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo if %argc% neq 1 ( echo. diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh index ca2cefe7..e555aee8 100755 --- a/models/download-ggml-model.sh +++ b/models/download-ggml-model.sh @@ -46,7 +46,9 @@ large-v1 large-v2 large-v2-q5_0 large-v3 -large-v3-q5_0" +large-v3-q5_0 +large-v3-turbo +large-v3-turbo-q5_0" # list available models list_models() { diff --git a/scripts/bench.py b/scripts/bench.py index 25a09db8..143f4fba 100644 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -64,6 +64,7 @@ models = [ "ggml-large-v1.bin", "ggml-large-v2.bin", "ggml-large-v3.bin", + "ggml-large-v3-turbo.bin", ] diff --git a/scripts/convert-all.sh b/scripts/convert-all.sh index ff765c92..82cc8013 100755 --- a/scripts/convert-all.sh +++ b/scripts/convert-all.sh @@ -1,6 +1,6 @@ #!/bin/bash -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" ) for model in "${models[@]}"; do python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ diff --git a/tests/run-tests.sh b/tests/run-tests.sh index a9606a1f..71f9f2d3 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -19,7 +19,7 @@ cd `dirname $0` # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" "large-v3-turbo" ) # list available models function list_models {