models : add the new "large" model release by OpenAI

The old "large" model is now renamed "large-v1".
If you have been using it, make sure to rename it and download the new
"large" model for best results.
This commit is contained in:
Georgi Gerganov 2022-12-06 18:48:57 +02:00
parent 13e8eb2346
commit 9fe7306f4b
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
8 changed files with 12 additions and 9 deletions

View File

@ -189,9 +189,10 @@ samples:
.PHONY: small
.PHONY: medium.en
.PHONY: medium
.PHONY: large-v1
.PHONY: large
tiny.en tiny base.en base small.en small medium.en medium large: main
tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
bash ./models/download-ggml-model.sh $@
@echo ""
@echo "==============================================="

View File

@ -206,6 +206,7 @@ make small.en
make small
make medium.en
make medium
make large-v1
make large
```
@ -217,7 +218,7 @@ make large
| base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
| small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
| large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
## Another example

View File

@ -34,7 +34,7 @@ if [ -n "$3" ]; then
fi
# Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models
function list_models {

View File

@ -1,6 +1,6 @@
#!/bin/bash
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
for model in "${models[@]}"; do
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/

View File

@ -37,7 +37,8 @@ https://huggingface.co/datasets/ggerganov/whisper.cpp/tree/main
| small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
| large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
| large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
## Model files for testing purposes

View File

@ -7,7 +7,7 @@ popd
set argc=0
for %%x in (%*) do set /A argc+=1
set models=tiny.en tiny base.en base small.en small medium.en medium large
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
if %argc% neq 1 (
echo.

View File

@ -22,7 +22,7 @@ function get_script_path() {
models_path=$(get_script_path)
# Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models
function list_models {

View File

@ -19,7 +19,7 @@
cd `dirname $0`
# Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
# list available models
function list_models {
@ -107,7 +107,7 @@ function run_lang() {
$main -m ../models/ggml-$model.bin -f $fname_dst -l $lang -otxt 2> /dev/null
git diff --no-index --word-diff=color --word-diff-regex=. $fname_dst.txt $lang-$i-ref.txt
git diff --no-index --word-diff=color --word-diff-regex=. $lang-$i-ref.txt $fname_dst.txt
i=$(($i+1))
done