forked from extern/whisper.cpp
Update README.md and finalize the whisper.wasm example
This commit is contained in:
parent
491ecd7056
commit
6b45e37b2b
@ -124,6 +124,7 @@ else()
|
|||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2 /D_CRT_SECURE_NO_WARNINGS=1")
|
||||||
else()
|
else()
|
||||||
if (EMSCRIPTEN)
|
if (EMSCRIPTEN)
|
||||||
|
# we require support for WASM SIMD 128-bit
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -msimd128")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
else()
|
else()
|
||||||
|
2
Makefile
2
Makefile
@ -90,7 +90,7 @@ libwhisper.a: ggml.o whisper.o
|
|||||||
ar rcs libwhisper.a ggml.o whisper.o
|
ar rcs libwhisper.a ggml.o whisper.o
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o main libwhisper.a
|
rm -f *.o main stream libwhisper.a
|
||||||
|
|
||||||
#
|
#
|
||||||
# Examples
|
# Examples
|
||||||
|
@ -289,7 +289,7 @@ You can download the converted models using the [download-ggml-model.sh](downloa
|
|||||||
|
|
||||||
https://ggml.ggerganov.com
|
https://ggml.ggerganov.com
|
||||||
|
|
||||||
For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models).
|
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README in [models](models).
|
||||||
|
|
||||||
## Bindings
|
## Bindings
|
||||||
|
|
||||||
|
@ -1,3 +1,27 @@
|
|||||||
# whisper.wasm
|
# whisper.wasm
|
||||||
|
|
||||||
Live demo: https://whisper.ggerganov.com
|
Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser
|
||||||
|
|
||||||
|
This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
|
||||||
|
implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer -
|
||||||
|
it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3
|
||||||
|
real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about
|
||||||
|
~20-30 seconds).
|
||||||
|
|
||||||
|
This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make
|
||||||
|
sure that [your browser supports them](https://webassembly.org/roadmap/).
|
||||||
|
|
||||||
|
The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and
|
||||||
|
performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both
|
||||||
|
transcription and translation are supported.
|
||||||
|
|
||||||
|
Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page.
|
||||||
|
|
||||||
|
The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the
|
||||||
|
audio is limited to 120 seconds.
|
||||||
|
|
||||||
|
## Live demo
|
||||||
|
|
||||||
|
Link: https://whisper.ggerganov.com
|
||||||
|
|
||||||
|
![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)
|
||||||
|
@ -162,7 +162,7 @@
|
|||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<br><br>
|
<br>
|
||||||
|
|
||||||
<!-- textarea with height filling the rest of the page -->
|
<!-- textarea with height filling the rest of the page -->
|
||||||
<textarea id="output" rows="20"></textarea>
|
<textarea id="output" rows="20"></textarea>
|
||||||
@ -254,6 +254,10 @@
|
|||||||
return new type(buffer);
|
return new type(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// load model
|
||||||
|
//
|
||||||
|
|
||||||
function loadFile(event, fname) {
|
function loadFile(event, fname) {
|
||||||
var file = event.target.files[0] || null;
|
var file = event.target.files[0] || null;
|
||||||
if (file == null) {
|
if (file == null) {
|
||||||
@ -281,6 +285,10 @@
|
|||||||
reader.readAsArrayBuffer(file);
|
reader.readAsArrayBuffer(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// audio file
|
||||||
|
//
|
||||||
|
|
||||||
function loadAudio(event) {
|
function loadAudio(event) {
|
||||||
if (!context) {
|
if (!context) {
|
||||||
context = new AudioContext({sampleRate: 16000});
|
context = new AudioContext({sampleRate: 16000});
|
||||||
@ -327,7 +335,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Microphone
|
// microphone
|
||||||
//
|
//
|
||||||
|
|
||||||
var mediaRecorder = null;
|
var mediaRecorder = null;
|
||||||
|
@ -3,6 +3,6 @@
|
|||||||
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
|
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
|
||||||
|
|
||||||
for model in "${models[@]}"; do
|
for model in "${models[@]}"; do
|
||||||
python3 convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
|
||||||
mv -v models/ggml-model.bin models/ggml-$model.bin
|
mv -v models/ggml-model.bin models/ggml-$model.bin
|
||||||
done
|
done
|
||||||
|
Loading…
Reference in New Issue
Block a user