wip : some unsuccessful experiments using DP

main : add option for word-leve timestamps (very experimental)
stream : add "--capture" option to select capture device (ref #10 )
2025-08-12 13:38:10 +02:00 · 2022-11-01 21:28:30 +02:00 · 2022-10-30 17:06:57 +02:00 · 2022-10-30 08:27:04 +02:00 · 2022-10-30 08:23:52 +02:00 · 2022-10-29 21:28:21 +03:00
56 changed files with 4886 additions and 697 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,22 @@
-sync.sh
+*.o
+.cache/
+.vs/
+.vscode/
+.DS_Store
+
+build/
+build-em/
+build-debug/
+build-release/
+build-sanitize-addr/
+build-sanitize-thread/
+
 main
 stream
-*.o
-.cache
-build/
+bench
+sync.sh
 compile_commands.json
+
+examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
+examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
+examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -7,38 +7,65 @@ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")

 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
    set(WHISPER_STANDALONE ON)
+    include(cmake/GitVars.cmake)
+    include(cmake/BuildTypes.cmake)
 else()
    set(WHISPER_STANDALONE OFF)
 endif()

+if (EMSCRIPTEN)
+    set(BUILD_SHARED_LIBS_DEFAULT OFF)
+
+    option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
+else()
+    if (MINGW)
+        set(BUILD_SHARED_LIBS_DEFAULT OFF)
+    else()
+        set(BUILD_SHARED_LIBS_DEFAULT ON)
+    endif()
+endif()
+
 # options

-option(WHISPER_ALL_WARNINGS            "whisper: enable all compiler warnings" ON)
+option(BUILD_SHARED_LIBS               "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
+
+option(WHISPER_ALL_WARNINGS            "whisper: enable all compiler warnings"                   ON)
 option(WHISPER_ALL_WARNINGS_3RD_PARTY  "whisper: enable all compiler warnings in 3rd party libs" OFF)

-option(WHISPER_SANITIZE_THREAD         "whisper: enable thread sanitizer" OFF)
-option(WHISPER_SANITIZE_ADDRESS        "whisper: enable address sanitizer" OFF)
+option(WHISPER_SANITIZE_THREAD         "whisper: enable thread sanitizer"    OFF)
+option(WHISPER_SANITIZE_ADDRESS        "whisper: enable address sanitizer"   OFF)
 option(WHISPER_SANITIZE_UNDEFINED      "whisper: enable undefined sanitizer" OFF)

-option(WHISPER_BUILD_TESTS             "whisper: build tests" ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_TESTS             "whisper: build tests"    ${WHISPER_STANDALONE})
+option(WHISPER_BUILD_EXAMPLES          "whisper: build examples" ${WHISPER_STANDALONE})

 option(WHISPER_SUPPORT_SDL2            "whisper: support for libSDL2" OFF)

+if (APPLE)
+    option(WHISPER_NO_ACCELERATE       "whisper: disable Accelerate framework" OFF)
+else()
+    option(WHISPER_SUPPORT_OPENBLAS    "whisper: support for OpenBLAS" OFF)
+endif()
+
+option(WHISPER_PERF                    "whisper: enable perf timings"          OFF)
+
 # sanitizers

-if (WHISPER_SANITIZE_THREAD)
-    set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} -fsanitize=thread")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
-endif()
+if (NOT MSVC)
+    if (WHISPER_SANITIZE_THREAD)
+        set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -fsanitize=thread")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
+    endif()

-if (WHISPER_SANITIZE_ADDRESS)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=address -fno-omit-frame-pointer")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
-endif()
+    if (WHISPER_SANITIZE_ADDRESS)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=address -fno-omit-frame-pointer")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+    endif()

-if (WHISPER_SANITIZE_UNDEFINED)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=undefined")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+    if (WHISPER_SANITIZE_UNDEFINED)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}     -fsanitize=undefined")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
+    endif()
 endif()

 #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
@ -47,18 +74,33 @@ endif()
 # dependencies

 set(CMAKE_C_STANDARD   11)
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 20)

 find_package(Threads REQUIRED)

-if (WHISPER_SUPPORT_SDL2)
-    # SDL2
-    find_package(SDL2 REQUIRED)
+# on APPLE - include Accelerate framework
+if (APPLE AND NOT WHISPER_NO_ACCELERATE)
+    find_library(ACCELERATE_FRAMEWORK Accelerate)
+    if (ACCELERATE_FRAMEWORK)
+        message(STATUS "Accelerate framework found")

-    string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
+        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${ACCELERATE_FRAMEWORK})
+        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
+    else()
+        message(WARNING "Accelerate framework not found")
+    endif()
+endif()

-    message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
-    message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
+if (WHISPER_SUPPORT_OPENBLAS)
+    find_library(OPENBLAS_LIB openblas)
+    if (OPENBLAS_LIB)
+        message(STATUS "OpenBLAS found")
+
+        set(WHISPER_EXTRA_LIBS  ${WHISPER_EXTRA_LIBS}  ${OPENBLAS_LIB})
+        set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
+    else()
+        message(WARNING "OpenBLAS not found")
+    endif()
 endif()

 # compiler flags
@ -69,7 +111,7 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
 endif ()

 if (WHISPER_ALL_WARNINGS)
-    if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang")
+    if (NOT MSVC)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
            -Wall                           \
            -Wextra                         \
@ -80,12 +122,14 @@ if (WHISPER_ALL_WARNINGS)
            -Wpointer-arith                 \
        ")
    else()
-        # todo : windows
+        # todo : msvc
    endif()
 endif()

-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
-#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
+if (NOT MSVC)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
+    #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
+endif()

 message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

@ -93,10 +137,23 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
    message(STATUS "ARM detected")
 else()
    message(STATUS "x86 detected")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c")
+    if (MSVC)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
+        set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /arch:AVX2")
+    else()
+        if (EMSCRIPTEN)
+            # we require support for WASM SIMD 128-bit
+            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -msimd128")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+        else()
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mavx2 -mfma -mf16c")
+        endif()
+    endif()
 endif()

+#
 # whisper - this is the main library of the project
+#

 set(TARGET whisper)

@ -109,7 +166,13 @@ target_include_directories(${TARGET} PUBLIC
    .
    )

-target_link_libraries(${TARGET} PRIVATE ${CMAKE_THREAD_LIBS_INIT})
+if (MSVC)
+    target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
+
+    set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
+else()
+    target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
+endif()

 if (BUILD_SHARED_LIBS)
    target_link_libraries(${TARGET} PUBLIC
@ -130,24 +193,23 @@ install(TARGETS ${TARGET}
    ARCHIVE DESTINATION lib/static
    )

+#
+# bindings
+#
+
+add_subdirectory(bindings)
+
+#
 # programs, examples and tests
+#

 if (WHISPER_STANDALONE)
-    # main
-    set(TARGET main)
-    add_executable(${TARGET} main.cpp)
-    target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
-
-    if (WHISPER_SUPPORT_SDL2)
-        # stream
-        set(TARGET stream)
-        add_executable(${TARGET} stream.cpp)
-        target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
-        target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-    endif ()
-
    if (WHISPER_BUILD_TESTS)
        enable_testing()
        add_subdirectory(tests)
    endif ()
+
+    if (WHISPER_BUILD_EXAMPLES)
+        add_subdirectory(examples)
+    endif()
 endif ()
--- a/62
+++ b/62
@ -2,15 +2,26 @@ UNAME_S := $(shell uname -s)
 UNAME_P := $(shell uname -p)
 UNAME_M := $(shell uname -m)

+# Mac OS + Arm can report x86_64
+# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
+ifeq ($(UNAME_S),Darwin)
+	ifneq ($(UNAME_P),arm)
+		SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
+		ifeq ($(SYSCTL_M),1)
+			UNAME_P := arm
+			UNAME_M := arm64
+			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
+		endif
+	endif
+endif
+
 #
 # Compile flags
 #

-CFLAGS   = -O3 -std=c11
-CXXFLAGS = -O3 -std=c++11
-
-CFLAGS   += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
-CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
+CFLAGS   = -I.              -O3 -std=c11  
+CXXFLAGS = -I. -I./examples -O3 -std=c++11
+LDFLAGS  =

 # OS specific
 # TODO: support Windows
@ -22,17 +33,30 @@ ifeq ($(UNAME_S),Darwin)
 	CFLAGS   += -pthread
 	CXXFLAGS += -pthread
 endif
+ifeq ($(UNAME_S),FreeBSD)
+	CFLAGS   += -pthread
+	CXXFLAGS += -pthread
+endif

 # Architecture specific
-ifeq ($(UNAME_P),x86_64)
+# TODO: probably these flags need to be tweaked on some architectures
+#       feel free to update the Makefile for your architecture and send a pull request or issue
+ifeq ($(UNAME_M),x86_64)
 	CFLAGS += -mavx -mavx2 -mfma -mf16c
 endif
-ifneq ($(filter arm%,$(UNAME_P)),)
-	# Mac M1
+ifeq ($(UNAME_M),amd64)
+	CFLAGS += -mavx -mavx2 -mfma -mf16c
 endif
-ifneq ($(filter aarch64%,$(UNAME_P)),)
+ifneq ($(filter arm%,$(UNAME_M)),)
+	# Mac M1 - include Accelerate framework
+	ifeq ($(UNAME_S),Darwin)
+		CFLAGS  += -DGGML_USE_ACCELERATE
+		LDFLAGS += -framework Accelerate
 	endif
-	ifneq ($(filter armv6%,$(UNAME_M)),)
+endif
+ifneq ($(filter aarch64%,$(UNAME_M)),)
+endif
+ifneq ($(filter armv6%,$(UNAME_M)),)
 	# Raspberry Pi 1, 2, 3
 	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 endif
@ -49,8 +73,8 @@ endif
 # Build library + main
 #

-main: main.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main
+main: examples/main/main.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/main/main.cpp whisper.o ggml.o -o main $(LDFLAGS)
 	./main -h

 ggml.o: ggml.c ggml.h
@ -59,8 +83,11 @@ ggml.o: ggml.c ggml.h
 whisper.o: whisper.cpp whisper.h
 	$(CXX) $(CXXFLAGS) -c whisper.cpp

+libwhisper.a: ggml.o whisper.o
+	ar rcs libwhisper.a ggml.o whisper.o
+
 clean:
-	rm -f *.o main
+	rm -f *.o main stream bench libwhisper.a

 #
 # Examples
@ -68,8 +95,11 @@ clean:

 CC_SDL=`sdl2-config --cflags --libs`

-stream: stream.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL)
+stream: examples/stream/stream.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
+
+bench: examples/bench/bench.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)

 #
 # Audio samples
@ -109,7 +139,7 @@ samples:
 .PHONY: large

 tiny.en tiny base.en base small.en small medium.en medium large: main
-	bash ./download-ggml-model.sh $@
+	bash ./models/download-ggml-model.sh $@
 	@echo ""
 	@echo "==============================================="
 	@echo "Running $@ on all samples in ./samples ..."
--- a/README.md
+++ b/README.md
@ -6,26 +6,50 @@
 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:

 - Plain C/C++ implementation without dependencies
- ARM_NEON and AVX intrinsics support
+- Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
+- AVX intrinsics support for x86 architectures
 - Mixed F16 / F32 precision
 - Low memory usage (Flash Attention + Flash Forward)
 - Zero memory allocations at runtime
 - Runs on the CPU
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
- Supported platforms: Linux, Mac OS (Intel and Arm), Raspberry Pi, Android

-## Usage
+Supported platforms:

-To build the main program, run `make`. You can then transcribe a `.wav` file like this:
+- [x] Mac OS (Intel and Arm)
+- [x] [iOS](examples/whisper.objc)
+- [x] Linux
+- [x] [WebAssembly](examples/whisper.wasm)
+- [x] [Windows (MSVC and MinGW)](https://github.com/ggerganov/whisper.cpp/issues/5)
+- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
+- [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)
+
+The entire implementation of the model is contained in 2 source files:
+
+- [ggml.h](ggml.h) / [ggml.c](ggml.c)
+- [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
+
+Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
+As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:
+
+https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
+
+## Quick start
+
+First, download one of the Whisper models converted in [ggml format](models). For example:

 ```bash
-$ ./main -f input.wav
+bash ./models/download-ggml-model.sh base.en
 ```

-Before running the program, make sure to download one of the ggml Whisper models. For example:
+Now build the [main](examples/main) example and transcribe an audio file like this:

 ```bash
-bash ./download-ggml-model.sh base.en
+# build the main example
+make
+
+# transcribe an audio file
+./main -f input.wav
 ```

 ---
@ -34,9 +58,10 @@ For a quick demo, simply run `make base.en`:

 ```java
 $ make base.en
-cc  -O3 -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread   -c ggml.c
-c++ -O3 -std=c++11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread -c whisper.cpp
-c++ -O3 -std=c++11 -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -pthread main.cpp whisper.o ggml.o -o main
+
+cc  -I.              -O3 -std=c11   -pthread -DGGML_USE_ACCELERATE   -c ggml.c
+c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp
+c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main  -framework Accelerate
 ./main -h

 usage: ./main [options] file0.wav file1.wav ...
@ -45,17 +70,23 @@ options:
  -h,       --help           show this help message and exit
  -s SEED,  --seed SEED      RNG seed (default: -1)
  -t N,     --threads N      number of threads to use during computation (default: 4)
+  -ot N,    --offset-t N     time offset in milliseconds (default: 0)
+  -on N,    --offset-n N     segment index offset (default: 0)
  -v,       --verbose        verbose output
            --translate      translate from source language to english
+  -otxt,    --output-txt     output result in a text file
+  -ovtt,    --output-vtt     output result in a vtt file
+  -osrt,    --output-srt     output result in a srt file
  -ps,      --print_special  print special tokens
+  -pc,      --print_colors   print colors
  -nt,      --no_timestamps  do not print timestamps
  -l LANG,  --language LANG  spoken language (default: en)
  -m FNAME, --model FNAME    model path (default: models/ggml-base.en.bin)
  -f FNAME, --file FNAME     input WAV file path

-bash ./download-ggml-model.sh base.en
+bash ./models/download-ggml-model.sh base.en
 Downloading ggml model base.en ...
-models/ggml-base.en.bin            100%[===================================>] 141.11M  6.49MB/s    in 23s
+ggml-base.en.bin               100%[========================>] 141.11M  6.34MB/s    in 24s     
 Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
 You can now use it like this:

@ -83,7 +114,7 @@ whisper_model_load: n_text_layer  = 6
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 2
-whisper_model_load: mem_required  = 377.00 MB
+whisper_model_load: mem_required  = 505.00 MB
 whisper_model_load: adding 1607 extra tokens
 whisper_model_load: ggml ctx size = 163.43 MB
 whisper_model_load: memory size =    22.83 MB
@ -94,19 +125,19 @@ main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, lang =
 [00:00.000 --> 00:11.000]   And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.


-whisper_print_timings:     load time =    77.48 ms
-whisper_print_timings:      mel time =    26.10 ms
-whisper_print_timings:   sample time =     2.19 ms
-whisper_print_timings:   encode time =   632.95 ms / 105.49 ms per layer
-whisper_print_timings:   decode time =    85.11 ms / 14.18 ms per layer
-whisper_print_timings:    total time =   824.14 ms
+whisper_print_timings:     load time =    87.21 ms
+whisper_print_timings:      mel time =    24.26 ms
+whisper_print_timings:   sample time =     3.87 ms
+whisper_print_timings:   encode time =   323.67 ms / 53.94 ms per layer
+whisper_print_timings:   decode time =    83.25 ms / 13.87 ms per layer
+whisper_print_timings:    total time =   522.66 ms
 ```

 The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.

 For detailed usage instructions, run: `./main -h`

-Note that `whisper.cpp` currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
+Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
 For example, you can use `ffmpeg` like this:

 ```java
@ -137,11 +168,24 @@ make medium
 make large
 ```

+## Memory usage
+
+| Model  | Disk   | Mem     | SHA                                        |
+| ---    | ---    | ---     | ---                                        |
+| tiny   |  75 MB | ~280 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
+| base   | 142 MB | ~430 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
+| small  | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
+| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
+| large  | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
+
 ## Another example

 Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
-in less than a minute on a MacBook M1 Pro, using `medium.en` model:
+in about half a minute on a MacBook M1 Pro, using `medium.en` model:

+<details>
+  <summary>Expand to see the result</summary>
+  
 ```java
 $ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8

@ -158,86 +202,111 @@ whisper_model_load: n_text_layer  = 24
 whisper_model_load: n_mels        = 80
 whisper_model_load: f16           = 1
 whisper_model_load: type          = 4
-whisper_model_load: mem_required  = 2502.00 MB
+whisper_model_load: mem_required  = 2610.00 MB
 whisper_model_load: adding 1607 extra tokens
 whisper_model_load: ggml ctx size = 1644.97 MB
 whisper_model_load: memory size =   182.62 MB
 whisper_model_load: model size  =  1462.12 MB
-log_mel_spectrogram: n_sample = 3179750, n_len = 19873
-log_mel_spectrogram: recording length: 198.734375 s

-main: processing 3179750 samples (198.7 sec), 8 threads, lang = english, task = transcribe, timestamps = 1 ...
+main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, lang = en, task = transcribe, timestamps = 1 ...

 [00:00.000 --> 00:08.000]   My fellow Americans, this day has brought terrible news and great sadness to our country.
-[00:08.000 --> 00:17.000]   At 9 o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
-[00:17.000 --> 00:24.000]   A short time later, debris was seen falling from the skies above Texas.
-[00:24.000 --> 00:29.000]   The Columbia's lost. There are no survivors.
+[00:08.000 --> 00:17.000]   At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
+[00:17.000 --> 00:23.000]   A short time later, debris was seen falling from the skies above Texas.
+[00:23.000 --> 00:29.000]   The Columbia's lost. There are no survivors.
 [00:29.000 --> 00:32.000]   On board was a crew of seven.
-[00:32.000 --> 00:43.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain David Brown, Commander William McCool,
-[00:43.000 --> 00:52.000]   Dr. Kultner Aschavla, and Elon Ramon, a Colonel in the Israeli Air Force.
+[00:32.000 --> 00:39.000]   Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
+[00:39.000 --> 00:48.000]   Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
+[00:48.000 --> 00:52.000]   a colonel in the Israeli Air Force.
 [00:52.000 --> 00:58.000]   These men and women assumed great risk in the service to all humanity.
-[00:58.000 --> 01:06.000]   In an age when space flight has come to seem almost routine, it is easy to overlook the dangers of travel by rocket
-[01:06.000 --> 01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
-[01:12.000 --> 01:22.000]   These astronauts knew the dangers, and they faced them willingly, knowing they had a high and noble purpose in life.
-[01:22.000 --> 01:30.000]   Because of their courage, endearing, and idealism, we will miss them all the more.
-[01:30.000 --> 01:40.000]   All Americans today are thinking as well of the families of these men and women who have been given this sudden shock and grief.
-[01:40.000 --> 01:45.000]   You're not alone. Our entire nation agrees with you.
-[01:45.000 --> 01:52.000]   And those you love will always have the respect and gratitude of this country.
+[00:58.000 --> 01:03.000]   In an age when space flight has come to seem almost routine,
+[01:03.000 --> 01:07.000]   it is easy to overlook the dangers of travel by rocket
+[01:07.000 --> 01:12.000]   and the difficulties of navigating the fierce outer atmosphere of the Earth.
+[01:12.000 --> 01:18.000]   These astronauts knew the dangers, and they faced them willingly,
+[01:18.000 --> 01:23.000]   knowing they had a high and noble purpose in life.
+[01:23.000 --> 01:31.000]   Because of their courage and daring and idealism, we will miss them all the more.
+[01:31.000 --> 01:36.000]   All Americans today are thinking as well of the families of these men and women
+[01:36.000 --> 01:40.000]   who have been given this sudden shock and grief.
+[01:40.000 --> 01:45.000]   You're not alone. Our entire nation grieves with you,
+[01:45.000 --> 01:52.000]   and those you love will always have the respect and gratitude of this country.
 [01:52.000 --> 01:56.000]   The cause in which they died will continue.
-[01:56.000 --> 02:07.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery and the longing to understand.
-[02:07.000 --> 02:11.000]   Our journey into space will go on.
+[01:56.000 --> 02:04.000]   Mankind is led into the darkness beyond our world by the inspiration of discovery
+[02:04.000 --> 02:11.000]   and the longing to understand. Our journey into space will go on.
 [02:11.000 --> 02:16.000]   In the skies today, we saw destruction and tragedy.
 [02:16.000 --> 02:22.000]   Yet farther than we can see, there is comfort and hope.
-[02:22.000 --> 02:31.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens who created all these.
-[02:31.000 --> 02:39.000]   He who brings out the starry hosts one by one and calls them each by name."
-[02:39.000 --> 02:46.000]   Because of his great power and mighty strength, not one of them is missing.
-[02:46.000 --> 02:55.000]   The same creator who names the stars also knows the names of the seven souls we mourn today.
-[02:55.000 --> 03:05.000]   The crew of the shuttle Columbia did not return safely to Earth, yet we can pray that all are safely home.
-[03:05.000 --> 03:14.000]   May God bless the grieving families and may God continue to bless America.
-[03:14.000 --> 03:24.000]   [Music]
+[02:22.000 --> 02:29.000]   In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
+[02:29.000 --> 02:35.000]   who created all these. He who brings out the starry hosts one by one
+[02:35.000 --> 02:39.000]   and calls them each by name."
+[02:39.000 --> 02:46.000]   Because of His great power and mighty strength, not one of them is missing.
+[02:46.000 --> 02:55.000]   The same Creator who names the stars also knows the names of the seven souls we mourn today.
+[02:55.000 --> 03:01.000]   The crew of the shuttle Columbia did not return safely to earth,
+[03:01.000 --> 03:05.000]   yet we can pray that all are safely home.
+[03:05.000 --> 03:13.000]   May God bless the grieving families, and may God continue to bless America.
+[03:13.000 --> 03:41.000]   Audio


-main:     load time =   522.18 ms
-main:      mel time =   423.43 ms
-main:   sample time =    31.42 ms
-main:   encode time = 41518.51 ms / 1729.94 ms per layer
-main:   decode time = 14907.22 ms
-main:    total time = 57416.63 ms
+whisper_print_timings:     load time =   575.92 ms
+whisper_print_timings:      mel time =   230.60 ms
+whisper_print_timings:   sample time =    73.19 ms
+whisper_print_timings:   encode time = 19552.61 ms / 814.69 ms per layer
+whisper_print_timings:   decode time = 13249.96 ms / 552.08 ms per layer
+whisper_print_timings:    total time = 33686.27 ms
 ```
+</details>

 ## Real-time audio input example

 This is a naive example of performing real-time inference on audio from your microphone.
-The `stream` tool samples the audio every 3 seconds and runs the transcription continously. More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
+The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
+More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).

 ```java
-$ ./stream -m models/ggml-small.en.bin -t 8
+./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
 ```

-https://user-images.githubusercontent.com/1991296/193465125-c163d304-64f6-4f5d-83e5-72239c9a203e.mp4
+https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+
+## Confidence color-coding
+
+Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
+to highlight words with high or low confidence:
+
+<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">

 ## Implementation details

 - The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
 - The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
- Simple usage is demonstrated in [main.cpp](main.cpp)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp)
+- Sample usage is demonstrated in [main.cpp](examples/main)
+- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
+- Various other examples are available in the [examples](examples) folder
+
+The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
+instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
+the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.

 ## Limitations

- Very basic greedy sampling scheme - always pick up the top token. You can implement your own strategy
 - Inference only
 - No GPU support
+- Very basic greedy sampling scheme - always pick up the token with highest probability.
+  This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
+  from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
+  to run the python code with the following parameters:

-## Memory usage
+  ```
+  whisper --best_of None --beam_size None ...
+  ```

-| Model  | Disk   | Mem     |
-| ---    | ---    | ---     |
-| tiny   |  75 MB | ~240 MB |
-| base   | 142 MB | ~380 MB |
-| small  | 466 MB | ~970 MB |
-| medium | 1.5 GB | ~2.5 GB |
-| large  | 2.9 GB | ~4.6 GB |
+  In the future, `whisper.cpp` will support more sampling strategies.
+
+## Benchmarks
+
+In order to have an objective comparison of the performance of the inference across different system configurations,
+use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
+took to execute it. The results are summarized in the following Github issue:
+
+[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)

 ## ggml format

@ -248,6 +317,18 @@ The original models are converted to a custom binary format. This allows to pack
 - vocabulary
 - weights

-You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script.
+You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script or from here:

-For more details, see the conversion script [convert-pt-to-ggml.py](convert-pt-to-ggml.py) or the README in [models](models).
+https://ggml.ggerganov.com
+
+For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README in [models](models).
+
+## Bindings
+
+- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs)
+- [ ] Python:
+- [ ] Java:
+
+## Examples
+
+There are various examples of using the library for different projects in the [examples](examples) folder. Check them out!
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@ -0,0 +1,3 @@
+if (EMSCRIPTEN)
+    add_subdirectory(javascript)
+endif()
--- a/bindings/javascript/.gitignore
+++ b/bindings/javascript/.gitignore
@ -0,0 +1 @@
+publish.log
--- a/bindings/javascript/CMakeLists.txt
+++ b/bindings/javascript/CMakeLists.txt
@ -0,0 +1,33 @@
+set(TARGET libwhisper)
+
+add_executable(${TARGET}
+    emscripten.cpp
+    )
+
+target_link_libraries(${TARGET} PRIVATE
+    whisper
+    )
+
+unset(EXTRA_FLAGS)
+if (WHISPER_WASM_SINGLE_FILE)
+    set(EXTRA_FLAGS "-s SINGLE_FILE=1")
+    message(STATUS "Embedding WASM inside whisper.js")
+
+    add_custom_command(
+        TARGET libwhisper POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy
+        ${CMAKE_BINARY_DIR}/bin/libwhisper.js
+        ${CMAKE_CURRENT_SOURCE_DIR}/whisper.js
+        )
+endif()
+
+set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
+    --bind \
+    -s USE_PTHREADS=1 \
+    -s PTHREAD_POOL_SIZE=8 \
+    -s INITIAL_MEMORY=1610612736 \
+    -s TOTAL_MEMORY=1610612736 \
+    -s FORCE_FILESYSTEM=1 \
+    -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
+    ${EXTRA_FLAGS} \
+    ")
--- a/bindings/javascript/emscripten.cpp
+++ b/bindings/javascript/emscripten.cpp
@ -0,0 +1,89 @@
+#include "whisper.h"
+
+#include <emscripten.h>
+#include <emscripten/bind.h>
+
+#include <vector>
+#include <thread>
+
+std::vector<struct whisper_context *> g_contexts(4, nullptr);
+
+EMSCRIPTEN_BINDINGS(whisper) {
+    emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
+        for (size_t i = 0; i < g_contexts.size(); ++i) {
+            if (g_contexts[i] == nullptr) {
+                g_contexts[i] = whisper_init(path_model.c_str());
+                if (g_contexts[i] != nullptr) {
+                    return i + 1;
+                } else {
+                    return (size_t) 0;
+                }
+            }
+        }
+
+        return (size_t) 0;
+    }));
+
+    emscripten::function("free", emscripten::optional_override([](size_t index) {
+        --index;
+
+        if (index < g_contexts.size()) {
+            whisper_free(g_contexts[index]);
+            g_contexts[index] = nullptr;
+        }
+    }));
+
+    emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
+        --index;
+
+        if (index >= g_contexts.size()) {
+            return -1;
+        }
+
+        if (g_contexts[index] == nullptr) {
+            return -2;
+        }
+
+        struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
+
+        params.print_realtime       = true;
+        params.print_progress       = false;
+        params.print_timestamps     = true;
+        params.print_special_tokens = false;
+        params.translate            = translate;
+        params.language             = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
+        params.n_threads            = std::min(8, (int) std::thread::hardware_concurrency());
+        params.offset_ms            = 0;
+
+        std::vector<float> pcmf32;
+        const int n = audio["length"].as<int>();
+
+        emscripten::val heap = emscripten::val::module_property("HEAPU8");
+        emscripten::val memory = heap["buffer"];
+
+        pcmf32.resize(n);
+
+        emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
+        memoryView.call<void>("set", audio);
+
+        // print system information
+        {
+            printf("system_info: n_threads = %d / %d | %s\n",
+                    params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
+
+            printf("%s: processing %d samples, %.1f sec, %d threads, %d processors, lang = %s, task = %s ...\n",
+                    __func__, int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE,
+                    params.n_threads, 1,
+                    params.language,
+                    params.translate ? "translate" : "transcribe");
+
+            printf("\n");
+        }
+
+        int ret = whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size());
+
+        whisper_print_timings(g_contexts[index]);
+
+        return ret;
+    }));
+}
--- a/bindings/javascript/whisper.js
+++ b/bindings/javascript/whisper.js
--- a/cmake/BuildTypes.cmake
+++ b/cmake/BuildTypes.cmake
@ -0,0 +1,54 @@
+# Add new build types
+
+# ReleaseGG - Release with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELEASEGG
+    "-O3"
+    CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELEASEGG
+    CMAKE_C_FLAGS_RELEASEGG
+    CMAKE_EXE_LINKER_FLAGS_RELEASEGG
+    CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
+
+# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
+
+SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    "-O2 -g"
+    CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
+    FORCE )
+SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
+    FORCE )
+MARK_AS_ADVANCED(
+    CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
+    CMAKE_C_FLAGS_RELWITHDEBINFOGG
+    CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
+    CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
+
+if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
+endif()
--- a/cmake/GitVars.cmake
+++ b/cmake/GitVars.cmake
@ -0,0 +1,22 @@
+find_package(Git)
+
+# the commit's SHA1
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_SHA1
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the date of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_DATE
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+# the subject of the commit
+execute_process(COMMAND
+    "${GIT_EXECUTABLE}" log -1 --format=%s
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+    OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -0,0 +1,27 @@
+# dependencies
+
+find_package(Threads REQUIRED)
+
+# third-party
+
+if (WHISPER_SUPPORT_SDL2)
+    # SDL2
+    find_package(SDL2 REQUIRED)
+
+    string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
+
+    message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
+    message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
+endif()
+
+# examples
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+if (EMSCRIPTEN)
+    add_subdirectory(whisper.wasm)
+else()
+    add_subdirectory(main)
+    add_subdirectory(stream)
+    add_subdirectory(bench)
+endif()
--- a/examples/bench/CMakeLists.txt
+++ b/examples/bench/CMakeLists.txt
@ -0,0 +1,3 @@
+set(TARGET bench)
+add_executable(${TARGET} bench.cpp)
+target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/bench/README.md
+++ b/examples/bench/README.md
@ -0,0 +1,52 @@
+# bench
+
+A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of the transformer on some random audio data and records the execution time. This way we can have an objective comparison of the performance of the model for various setups.
+
+Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
+
+```bash
+# build the bench tool
+$ make bench
+
+# run it on the small.en model using 4 threads
+$ ./bench -m ./models/ggml-small.en.bin -t 4
+
+whisper_model_load: loading model from './models/ggml-small.en.bin'
+whisper_model_load: n_vocab       = 51864
+whisper_model_load: n_audio_ctx   = 1500
+whisper_model_load: n_audio_state = 768
+whisper_model_load: n_audio_head  = 12
+whisper_model_load: n_audio_layer = 12
+whisper_model_load: n_text_ctx    = 448
+whisper_model_load: n_text_state  = 768
+whisper_model_load: n_text_head   = 12
+whisper_model_load: n_text_layer  = 12
+whisper_model_load: n_mels        = 80
+whisper_model_load: f16           = 1
+whisper_model_load: type          = 3
+whisper_model_load: mem_required  = 1048.00 MB
+whisper_model_load: adding 1607 extra tokens
+whisper_model_load: ggml ctx size = 533.05 MB
+whisper_model_load: memory size =    68.48 MB 
+whisper_model_load: model size  =   464.44 MB
+
+whisper_print_timings:     load time =   240.82 ms
+whisper_print_timings:      mel time =     0.00 ms
+whisper_print_timings:   sample time =     0.00 ms
+whisper_print_timings:   encode time =  1062.21 ms / 88.52 ms per layer
+whisper_print_timings:   decode time =     0.00 ms / 0.00 ms per layer
+whisper_print_timings:    total time =  1303.04 ms
+
+system_info: n_threads = 4 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+
+If you wish, you can submit these results here:
+
+  https://github.com/ggerganov/whisper.cpp/issues/89
+
+Please include the following information:
+
+  - CPU model
+  - Operating system
+  - Compiler
+
+```
--- a/examples/bench/bench.cpp
+++ b/examples/bench/bench.cpp
@ -0,0 +1,95 @@
+#include "whisper.h"
+
+#include <cstdio>
+#include <string>
+#include <thread>
+
+// command-line parameters
+struct whisper_params {
+    int32_t n_threads   = std::min(4, (int32_t) std::thread::hardware_concurrency());
+
+    std::string model     = "models/ggml-base.en.bin";
+};
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
+
+bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-t" || arg == "--threads") {
+            params.n_threads = std::stoi(argv[++i]);
+        } else if (arg == "-m" || arg == "--model") {
+            params.model = argv[++i];
+        } else if (arg == "-h" || arg == "--help") {
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        } else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
+    fprintf(stderr, "\n");
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h,       --help           show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "\n");
+}
+
+int main(int argc, char ** argv) {
+    whisper_params params;
+
+    if (whisper_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    // whisper init
+
+    struct whisper_context * ctx = whisper_init(params.model.c_str());
+
+    {
+        fprintf(stderr, "\n");
+        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
+    }
+
+    if (ctx == nullptr) {
+        fprintf(stderr, "error: failed to initialize whisper context\n");
+        return 2;
+    }
+
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
+        fprintf(stderr, "error: failed to set mel: %d\n", ret);
+        return 3;
+    }
+
+    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
+        fprintf(stderr, "error: failed to encode model: %d\n", ret);
+        return 4;
+    }
+
+    whisper_print_timings(ctx);
+    whisper_free(ctx);
+
+    fprintf(stderr, "\n");
+    fprintf(stderr, "If you wish, you can submit these results here:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  https://github.com/ggerganov/whisper.cpp/issues/89\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "Please include the following information:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  - CPU model\n");
+    fprintf(stderr, "  - Operating system\n");
+    fprintf(stderr, "  - Compiler\n");
+    fprintf(stderr, "\n");
+
+    return 0;
+}
--- a/examples/dr_wav.h
+++ b/examples/dr_wav.h
--- a/examples/main/CMakeLists.txt
+++ b/examples/main/CMakeLists.txt
@ -0,0 +1,3 @@
+set(TARGET main)
+add_executable(${TARGET} main.cpp)
+target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -0,0 +1,26 @@
+# main
+
+This is the main example demonstrating most of the functionality of the Whisper model.
+It can be used as a reference for using the `whisper.cpp` library in other projects.
+
+```
+./main -h
+
+usage: ./main [options] file0.wav file1.wav ...
+
+options:
+  -h,       --help           show this help message and exit
+  -s SEED,  --seed SEED      RNG seed (default: -1)
+  -t N,     --threads N      number of threads to use during computation (default: 4)
+  -o N,     --offset N       offset in milliseconds (default: 0)
+  -v,       --verbose        verbose output
+            --translate      translate from source language to english
+  -otxt,    --output-txt     output result in a text file
+  -ovtt,    --output-vtt     output result in a vtt file
+  -osrt,    --output-srt     output result in a srt file
+  -ps,      --print_special  print special tokens
+  -nt,      --no_timestamps  do not print timestamps
+  -l LANG,  --language LANG  spoken language (default: en)
+  -m FNAME, --model FNAME    model path (default: models/ggml-base.en.bin)
+  -f FNAME, --file FNAME     input WAV file path
+```
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
--- a/examples/stream/CMakeLists.txt
+++ b/examples/stream/CMakeLists.txt
@ -0,0 +1,7 @@
+if (WHISPER_SUPPORT_SDL2)
+    # stream
+    set(TARGET stream)
+    add_executable(${TARGET} stream.cpp)
+    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
+    target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+endif ()
--- a/examples/stream/README.md
+++ b/examples/stream/README.md
@ -0,0 +1,23 @@
+# stream
+
+This is a naive example of performing real-time inference on audio from your microphone.
+The `stream` tool samples the audio every half a second and runs the transcription continously.
+More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
+
+```java
+./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+```
+
+https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
+
+The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
+
+```bash
+# Install SDL2 on Linux
+sudo apt-get install libsdl2-dev
+
+# Install SDL2 on Mac OS
+brew install sdl2
+
+make stream
+```
--- a/examples/stream/stream.cpp
+++ b/examples/stream/stream.cpp
@ -17,6 +17,7 @@
 #include <string>
 #include <thread>
 #include <vector>
+#include <fstream>

 //  500 -> 00:05.000
 // 6000 -> 01:00.000
@ -34,9 +35,11 @@ std::string to_timestamp(int64_t t) {

 // command-line parameters
 struct whisper_params {
-    int32_t seed      = -1; // RNG seed, not used currently
-    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t step_ms   = 3000;
+    int32_t seed       = -1; // RNG seed, not used currently
+    int32_t n_threads  = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t step_ms    = 3000;
+    int32_t length_ms  = 10000;
+    int32_t capture_id = -1;

    bool verbose              = false;
    bool translate            = false;
@ -46,7 +49,7 @@ struct whisper_params {

    std::string language  = "en";
    std::string model     = "models/ggml-base.en.bin";
-    std::string fname_inp = "samples/jfk.wav";
+    std::string fname_out = "";
 };

 void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
@ -61,6 +64,10 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
            params.n_threads = std::stoi(argv[++i]);
        } else if (arg == "--step") {
            params.step_ms = std::stoi(argv[++i]);
+        } else if (arg == "--length") {
+            params.length_ms = std::stoi(argv[++i]);
+        } else if (arg == "-c" || arg == "--capture") {
+            params.capture_id = std::stoi(argv[++i]);
        } else if (arg == "-v" || arg == "--verbose") {
            params.verbose = true;
        } else if (arg == "--translate") {
@ -81,7 +88,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        } else if (arg == "-m" || arg == "--model") {
            params.model = argv[++i];
        } else if (arg == "-f" || arg == "--file") {
-            params.fname_inp = argv[++i];
+            params.fname_out = argv[++i];
        } else if (arg == "-h" || arg == "--help") {
            whisper_print_usage(argc, argv, params);
            exit(0);
@ -104,14 +111,16 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
    fprintf(stderr, "  -s SEED,  --seed SEED      RNG seed (default: -1)\n");
    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
    fprintf(stderr, "            --step N         audio step size in milliseconds (default: %d)\n", params.step_ms);
+    fprintf(stderr, "            --length N       audio length in milliseconds (default: %d)\n", params.length_ms);
+    fprintf(stderr, "  -c ID,    --capture ID     capture device ID (default: -1)\n");
    fprintf(stderr, "  -v,       --verbose        verbose output\n");
    fprintf(stderr, "            --translate      translate from source language to english\n");
-    fprintf(stderr, "  -nc,      --no-context     disable context from earlier audio (default: false)\n");
+    fprintf(stderr, "  -kc,      --keep-context   keep text context from earlier audio (default: false)\n");
    fprintf(stderr, "  -ps,      --print_special  print special tokens\n");
    fprintf(stderr, "  -nt,      --no_timestamps  do not print timestamps\n");
    fprintf(stderr, "  -l LANG,  --language LANG  spoken language (default: %s)\n", params.language.c_str());
    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME     input WAV file path (default: %s)\n", params.fname_inp.c_str());
+    fprintf(stderr, "  -f FNAME, --file FNAME     text output file name (default: no output to file)\n");
    fprintf(stderr, "\n");
 }

@ -139,9 +148,9 @@ bool audio_sdl_init(const int capture_id) {

        {
            int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
-            printf("%s: found %d capture devices:\n", __func__, nDevices);
+            fprintf(stderr, "%s: found %d capture devices:\n", __func__, nDevices);
            for (int i = 0; i < nDevices; i++) {
-                printf("%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
+                fprintf(stderr, "%s:    - Capture device #%d: '%s'\n", __func__, i, SDL_GetAudioDeviceName(i, SDL_TRUE));
            }
        }
    }
@ -159,21 +168,21 @@ bool audio_sdl_init(const int capture_id) {
        capture_spec_requested.samples  = 1024;

        if (capture_id >= 0) {
-            printf("%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
+            fprintf(stderr, "%s: attempt to open capture device %d : '%s' ...\n", __func__, capture_id, SDL_GetAudioDeviceName(capture_id, SDL_TRUE));
            g_dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(capture_id, SDL_TRUE), SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
        } else {
-            printf("%s: attempt to open default capture device ...\n", __func__);
+            fprintf(stderr, "%s: attempt to open default capture device ...\n", __func__);
            g_dev_id_in = SDL_OpenAudioDevice(nullptr, SDL_TRUE, &capture_spec_requested, &capture_spec_obtained, 0);
        }
        if (!g_dev_id_in) {
-            printf("%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
+            fprintf(stderr, "%s: couldn't open an audio device for capture: %s!\n", __func__, SDL_GetError());
            g_dev_id_in = 0;
        } else {
-            printf("%s: obtained spec for input device (SDL Id = %d):\n", __func__, g_dev_id_in);
-            printf("%s:     - sample rate:       %d\n", __func__, capture_spec_obtained.freq);
-            printf("%s:     - format:            %d (required: %d)\n", __func__, capture_spec_obtained.format, capture_spec_requested.format);
-            printf("%s:     - channels:          %d (required: %d)\n", __func__, capture_spec_obtained.channels, capture_spec_requested.channels);
-            printf("%s:     - samples per frame: %d\n", __func__, capture_spec_obtained.samples);
+            fprintf(stderr, "%s: obtained spec for input device (SDL Id = %d):\n", __func__, g_dev_id_in);
+            fprintf(stderr, "%s:     - sample rate:       %d\n", __func__, capture_spec_obtained.freq);
+            fprintf(stderr, "%s:     - format:            %d (required: %d)\n", __func__, capture_spec_obtained.format, capture_spec_requested.format);
+            fprintf(stderr, "%s:     - channels:          %d (required: %d)\n", __func__, capture_spec_obtained.channels, capture_spec_requested.channels);
+            fprintf(stderr, "%s:     - samples per frame: %d\n", __func__, capture_spec_obtained.samples);
        }
    }

@ -196,7 +205,7 @@ int main(int argc, char ** argv) {

    // init audio

-    if (!audio_sdl_init(-1)) {
+    if (!audio_sdl_init(params.capture_id)) {
        fprintf(stderr, "%s: audio_sdl_init() failed!\n", __func__);
        return 1;
    }
@ -206,32 +215,55 @@ int main(int argc, char ** argv) {
    struct whisper_context * ctx = whisper_init(params.model.c_str());

    const int n_samples = (params.step_ms/1000.0)*WHISPER_SAMPLE_RATE;
+    const int n_samples_len = (params.length_ms/1000.0)*WHISPER_SAMPLE_RATE;
    const int n_samples_30s = 30*WHISPER_SAMPLE_RATE;
+
    std::vector<float> pcmf32(n_samples_30s, 0.0f);
    std::vector<float> pcmf32_old;

+    const int n_new_line = params.length_ms / params.step_ms - 1;
+
    // print some info about the processing
    {
-        printf("\n");
+        fprintf(stderr, "\n");
        if (!whisper_is_multilingual(ctx)) {
            if (params.language != "en" || params.translate) {
                params.language = "en";
                params.translate = false;
-                printf("%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
+                fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
            }
        }
-        printf("%s: processing %d samples (%.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
-                __func__, n_samples, float(n_samples)/WHISPER_SAMPLE_RATE, params.n_threads,
+        fprintf(stderr, "%s: processing %d samples (step = %.1f sec / len = %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
+                __func__,
+                n_samples,
+                float(n_samples)/WHISPER_SAMPLE_RATE,
+                float(n_samples_len)/WHISPER_SAMPLE_RATE,
+                params.n_threads,
                params.language.c_str(),
                params.translate ? "translate" : "transcribe",
                params.no_timestamps ? 0 : 1);
-        printf("\n");
+
+        fprintf(stderr, "%s: n_new_line = %d\n", __func__, n_new_line);
+        fprintf(stderr, "\n");
    }

    SDL_PauseAudioDevice(g_dev_id_in, 0);

+    int n_iter = 0;
    bool is_running = true;

+    std::ofstream fout;
+    if (params.fname_out.length() > 0) {
+        fout.open(params.fname_out);
+        if (!fout.is_open()) {
+            fprintf(stderr, "%s: failed to open output file '%s'!\n", __func__, params.fname_out.c_str());
+            return 1;
+        }
+    }
+
+    printf("[Start speaking]");
+    fflush(stdout);
+
    // main audio loop
    while (is_running) {
        // process SDL events:
@ -239,22 +271,35 @@ int main(int argc, char ** argv) {
        while (SDL_PollEvent(&event)) {
            switch (event.type) {
                case SDL_QUIT:
-                    is_running = false;
-                    break;
+                    {
+                        is_running = false;
+                    } break;
                default:
                    break;
            }
        }

-        // process 3 seconds of new audio
+        if (!is_running) {
+            break;
+        }
+
+        // process new audio
+        if (n_iter > 0 && SDL_GetQueuedAudioSize(g_dev_id_in) > 2*n_samples*sizeof(float)) {
+            fprintf(stderr, "\n\n%s: WARNING: cannot process audio fast enough, dropping audio ...\n\n", __func__);
+            SDL_ClearQueuedAudio(g_dev_id_in);
+        }
+
        while (SDL_GetQueuedAudioSize(g_dev_id_in) < n_samples*sizeof(float)) {
            SDL_Delay(1);
        }
+
        const int n_samples_new = SDL_GetQueuedAudioSize(g_dev_id_in)/sizeof(float);

        // take one second from previous iteration
-        // TODO: better strategy
-        const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_30s/30 - n_samples_new));
+        //const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_30s/30 - n_samples_new));
+
+        // take up to params.length_ms audio from previous iteration
+        const int n_samples_take = std::min((int) pcmf32_old.size(), std::max(0, n_samples_len - n_samples_new));

        //printf("processing: take = %d, new = %d, old = %d\n", n_samples_take, n_samples_new, (int) pcmf32_old.size());

@ -270,7 +315,7 @@ int main(int argc, char ** argv) {

        // run the inference
        {
-            whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
+            whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);

            wparams.print_progress       = false;
            wparams.print_special_tokens = params.print_special_tokens;
@ -288,22 +333,47 @@ int main(int argc, char ** argv) {

            // print result;
            {
-                printf("\n");
+                printf("\33[2K\r");
+
+                // print long empty line to clear the previous line
+                printf("%s", std::string(100, ' ').c_str());
+
+                printf("\33[2K\r");

                const int n_segments = whisper_full_n_segments(ctx);
                for (int i = 0; i < n_segments; ++i) {
                    const char * text = whisper_full_get_segment_text(ctx, i);

                    if (params.no_timestamps) {
-                        printf ("%s", text);
+                        printf("%s", text);
                        fflush(stdout);
+
+                        if (params.fname_out.length() > 0) {
+                            fout << text;
+                        }
                    } else {
                        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
                        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);

                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
+
+                        if (params.fname_out.length() > 0) {
+                            fout << "[" << to_timestamp(t0) << " --> " << to_timestamp(t1) << "]  " << text << std::endl;
+                        }
                    }
                }
+
+                if (params.fname_out.length() > 0) {
+                    fout << std::endl;
+                }
+            }
+
+            ++n_iter;
+
+            if ((n_iter % n_new_line) == 0) {
+                printf("\n");
+
+                pcmf32_old.clear();
            }
        }
    }
--- a/examples/whisper.nvim/README.md
+++ b/examples/whisper.nvim/README.md
@ -0,0 +1,84 @@
+# whisper.nvim
+
+Speech-to-text in Neovim
+
+The transcription is performed on the CPU and no data leaves your computer. Works best on Apple Silicon devices.
+
+https://user-images.githubusercontent.com/1991296/198382564-784e9663-2037-4d04-99b8-f39136929b7e.mp4
+
+## Usage
+
+- Simply press `Ctrl-G` in `INSERT`, `VISUAL` or `NORMAL` mode and say something
+- When you are done - press `Ctrl-C` to end the transcription and insert the transcribed text under the cursor
+
+## Installation
+
+*Note: this is a bit tedious and hacky atm, but I hope it will be improved with time*
+
+- Clone this repo and build the `stream` tool:
+
+  ```
+  git clone https://github.com/ggerganov/whisper.cpp
+  cd whisper.cpp
+  make stream
+  ```
+
+- Download the `base.en` Whisper model (140 MB):
+
+  ```
+  ./models/download-ggml-model.sh base.en
+  ```
+
+- Place the [whisper.nvim](whisper.nvim) script somewhere in your PATH and give it execute permissions:
+
+  ```
+  cp examples/whisper.nvim/whisper.nvim ~/bin/
+  chmod u+x ~/bin/whisper.nvim
+  ```
+
+- Fine-tune the script to your preference and machine parameters:
+
+  ```
+  ./stream -t 8 -m models/ggml-base.en.bin --step 350 --length 10000 -f /tmp/whisper.nvim 2> /dev/null
+  ```
+
+  On slower machines, try to increase the `step` parameter.
+
+- Add the following shortcuts to your `~/.config/nvim/init.vim`:
+
+  ```
+  inoremap <C-G>  <C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
+  nnoremap <C-G>       :!whisper.nvim<CR>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR>"ap
+  vnoremap <C-G> c<C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
+  ```
+  
+  Explanation: pressing `Ctrl-G` runs the [whisper.nvim](whisper.nvim) script which in turn calls the `stream` binary to transcribe your speech through the microphone. The results from the transcription are continuously dumped into `/tmp/whisper.nvim`. After you kill the program with `Ctrl-C`, the vim command grabs the last line from the `/tmp/whisper.nvim` file and puts it under the cursor.
+  
+  Probably there is a much more intelligent way to achieve all this, but this is what I could hack in an hour. Any suggestions how to improve this are welcome.
+  
+You are now ready to use speech-to-text in Neovim!
+
+## TODO
+
+There are a lot of ways to improve this idea and I don't have much experience with Vim plugin programming, so contributions are welcome! 
+
+- [ ] **Wrap this into a plugin**
+  
+  It would be great to make a standalone plugin out of this that can be installed with `vim-plug` or similar
+  
+- [ ] **Simplify the `init.vim` mappings (maybe factor out the common call into a separate function)**
+- [ ] **Add Copilot/GPT-3 integration**
+
+  This is probably a very long shot, but I think it will be very cool to have the functionality to select some code and then hit Ctrl-G and say something like:
+  
+  *"refactor this using stl containers"*
+  
+  or
+  
+  *"optimize by sorting the data first"*
+  
+  The plugin would then make an appropriate query using the selected text and code context to Copilot or GPT-3 and return the result.
+
+## Discussion
+
+If you find this idea interesting, you can join the discussion here: https://github.com/ggerganov/whisper.cpp/discussions/108
--- a/examples/whisper.nvim/whisper.nvim
+++ b/examples/whisper.nvim/whisper.nvim
@ -0,0 +1,50 @@
+#!/bin/bash
+
+# INSTRUCTIONS
+#
+# This simple script is called by Neovim to capture audio from the microphone and transcribe it with Whisper.
+# In order for this to work, you need to clone the whisper.cpp repo and build the 'stream' tool
+#
+#   git clone https://github.com/ggerganov/whisper.cpp
+#   cd whisper.cpp
+#   make stream
+#
+# Also, make sure the current script is in your PATH env variable. You should be able to run the following command:
+#
+#   whisper.nvim
+#
+# Next, export the path to the whisper.cpp repository via the WHISPER_CPP_HOME env variable:
+#
+#   export WHISPER_CPP_HOME=/path/to/whisper.cpp
+#
+# Finally, add the following lines to your ~/.config/nvim/init.vim:
+#
+#   inoremap <C-G>  <C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
+#   nnoremap <C-G>       :!whisper.nvim<CR>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR>"ap
+#   vnoremap <C-G> c<C-O>:!whisper.nvim<CR><C-O>:let @a = system("cat /tmp/whisper.nvim \| tail -n 1 \| xargs -0 \| tr -d '\\n' \| sed -e 's/^[[:space:]]*//'")<CR><C-R>a
+#
+# This allows you to press Ctrl-G in order to capture audio from the microphone and transcribe it.
+# When you are done speaking - press Ctrl-C
+#
+
+# the Whisper model to use
+model="base.en"
+
+# export the path to the whisper.cpp repo in the WHISPER_CPP_HOME env variable
+# https://github.com/ggerganov/whisper.cpp
+cd ${WHISPER_CPP_HOME}
+
+if [ ! -f ./stream ] ; then
+    echo "whisper.nvim: the 'stream' executable was not found! WHISPER_CPP_HOME=${WHISPER_CPP_HOME}" > /tmp/whisper.nvim
+    exit 1
+fi
+
+if [ ! -f ./models/ggml-${model}.bin ] ; then
+    echo "whisper.nvim: the '$model' model was not found! WHISPER_CPP_HOME=${WHISPER_CPP_HOME}" > /tmp/whisper.nvim
+    exit 2
+fi
+
+# fine-tune the parameters according to your machine specs
+./stream -t 8 -m models/ggml-base.en.bin --step 350 --length 10000 -f /tmp/whisper.nvim 2> /dev/null
+
+exit 0
--- a/examples/whisper.objc/README.md
+++ b/examples/whisper.objc/README.md
@ -0,0 +1,17 @@
+# whisper.objc
+
+Minimal Obj-C application for automatic offline speech recognition.
+The inference runs locally, on-device.
+
+https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
+
+## Usage
+
+```java
+git clone https://github.com/ggerganov/whisper.cpp
+open whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/
+```
+
+Make sure to build the project in `Release`:
+
+<img width="947" alt="image" src="https://user-images.githubusercontent.com/1991296/197382607-9e1e6d1b-79fa-496f-9d16-b71dc1535701.png">
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
@ -0,0 +1,382 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 56;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
+		18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
+		18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
+		18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8229052BDF00BD2A04 /* Main.storyboard */; };
+		18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8529052BE000BD2A04 /* Assets.xcassets */; };
+		18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */; };
+		18627C8C29052BE000BD2A04 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8B29052BE000BD2A04 /* main.m */; };
+		18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; };
+		18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE"; }; };
+		18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
+		18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
+		18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
+		18627C7C29052BDF00BD2A04 /* SceneDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SceneDelegate.h; sourceTree = "<group>"; };
+		18627C7D29052BDF00BD2A04 /* SceneDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SceneDelegate.m; sourceTree = "<group>"; };
+		18627C7F29052BDF00BD2A04 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = "<group>"; };
+		18627C8029052BDF00BD2A04 /* ViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ViewController.m; sourceTree = "<group>"; };
+		18627C8329052BDF00BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		18627C8529052BE000BD2A04 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		18627C8829052BE000BD2A04 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		18627C8A29052BE000BD2A04 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		18627C8B29052BE000BD2A04 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
+		18627C9229052C2B00BD2A04 /* whisper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = whisper.h; path = ../../../whisper.h; sourceTree = "<group>"; };
+		18627C9329052C4900BD2A04 /* whisper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = whisper.cpp; path = ../../../whisper.cpp; sourceTree = "<group>"; };
+		18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml.c; sourceTree = "<group>"; };
+		18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml.h; sourceTree = "<group>"; };
+		18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		18627C7329052BDF00BD2A04 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		18627C6D29052BDF00BD2A04 = {
+			isa = PBXGroup;
+			children = (
+				18627C7829052BDF00BD2A04 /* whisper.objc */,
+				18627C7729052BDF00BD2A04 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		18627C7729052BDF00BD2A04 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				18627C7629052BDF00BD2A04 /* whisper.objc.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		18627C7829052BDF00BD2A04 /* whisper.objc */ = {
+			isa = PBXGroup;
+			children = (
+				18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */,
+				18627C9729052C6600BD2A04 /* ggml.h */,
+				18627C9529052C5800BD2A04 /* ggml.c */,
+				18627C9329052C4900BD2A04 /* whisper.cpp */,
+				18627C9229052C2B00BD2A04 /* whisper.h */,
+				18627C7929052BDF00BD2A04 /* AppDelegate.h */,
+				18627C7A29052BDF00BD2A04 /* AppDelegate.m */,
+				18627C7C29052BDF00BD2A04 /* SceneDelegate.h */,
+				18627C7D29052BDF00BD2A04 /* SceneDelegate.m */,
+				18627C7F29052BDF00BD2A04 /* ViewController.h */,
+				18627C8029052BDF00BD2A04 /* ViewController.m */,
+				18627C8229052BDF00BD2A04 /* Main.storyboard */,
+				18627C8529052BE000BD2A04 /* Assets.xcassets */,
+				18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */,
+				18627C8A29052BE000BD2A04 /* Info.plist */,
+				18627C8B29052BE000BD2A04 /* main.m */,
+			);
+			path = whisper.objc;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		18627C7529052BDF00BD2A04 /* whisper.objc */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */;
+			buildPhases = (
+				18627C7229052BDF00BD2A04 /* Sources */,
+				18627C7329052BDF00BD2A04 /* Frameworks */,
+				18627C7429052BDF00BD2A04 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = whisper.objc;
+			productName = whisper.objc;
+			productReference = 18627C7629052BDF00BD2A04 /* whisper.objc.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		18627C6E29052BDF00BD2A04 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				BuildIndependentTargetsInParallel = 1;
+				LastUpgradeCheck = 1400;
+				TargetAttributes = {
+					18627C7529052BDF00BD2A04 = {
+						CreatedOnToolsVersion = 14.0.1;
+					};
+				};
+			};
+			buildConfigurationList = 18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */;
+			compatibilityVersion = "Xcode 14.0";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = 18627C6D29052BDF00BD2A04;
+			productRefGroup = 18627C7729052BDF00BD2A04 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				18627C7529052BDF00BD2A04 /* whisper.objc */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		18627C7429052BDF00BD2A04 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
+				18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
+				18627C8429052BDF00BD2A04 /* Main.storyboard in Resources */,
+				18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		18627C7229052BDF00BD2A04 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				18627C8129052BDF00BD2A04 /* ViewController.m in Sources */,
+				18627C9429052C4900BD2A04 /* whisper.cpp in Sources */,
+				18627C9629052C5800BD2A04 /* ggml.c in Sources */,
+				18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */,
+				18627C8C29052BE000BD2A04 /* main.m in Sources */,
+				18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		18627C8229052BDF00BD2A04 /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				18627C8329052BDF00BD2A04 /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		18627C8729052BE000BD2A04 /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				18627C8829052BE000BD2A04 /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		18627C8D29052BE000BD2A04 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
+				MTL_FAST_MATH = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+			};
+			name = Debug;
+		};
+		18627C8E29052BE000BD2A04 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				MTL_FAST_MATH = YES;
+				SDKROOT = iphoneos;
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		18627C9029052BE000BD2A04 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = P8JZH34X63;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = whisper.objc/Info.plist;
+				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
+				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
+				INFOPLIST_KEY_UIMainStoryboardFile = Main;
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		18627C9129052BE000BD2A04 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
+				CODE_SIGN_STYLE = Automatic;
+				CURRENT_PROJECT_VERSION = 1;
+				DEVELOPMENT_TEAM = P8JZH34X63;
+				GENERATE_INFOPLIST_FILE = YES;
+				INFOPLIST_FILE = whisper.objc/Info.plist;
+				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
+				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
+				INFOPLIST_KEY_UIMainStoryboardFile = Main;
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				MARKETING_VERSION = 1.0;
+				PRODUCT_BUNDLE_IDENTIFIER = "com.ggerganov.whisper-objc";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_EMIT_LOC_STRINGS = YES;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		18627C7129052BDF00BD2A04 /* Build configuration list for PBXProject "whisper.objc" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				18627C8D29052BE000BD2A04 /* Debug */,
+				18627C8E29052BE000BD2A04 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		18627C8F29052BE000BD2A04 /* Build configuration list for PBXNativeTarget "whisper.objc" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				18627C9029052BE000BD2A04 /* Debug */,
+				18627C9129052BE000BD2A04 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 18627C6E29052BDF00BD2A04 /* Project object */;
+}
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/contents.xcworkspacedata
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:">
+   </FileRef>
+</Workspace>
--- a/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
+++ b/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
--- a/examples/whisper.objc/whisper.objc/AppDelegate.h
+++ b/examples/whisper.objc/whisper.objc/AppDelegate.h
@ -0,0 +1,14 @@
+//
+//  AppDelegate.h
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import <UIKit/UIKit.h>
+
+@interface AppDelegate : UIResponder <UIApplicationDelegate>
+
+
+@end
+
--- a/examples/whisper.objc/whisper.objc/AppDelegate.m
+++ b/examples/whisper.objc/whisper.objc/AppDelegate.m
@ -0,0 +1,40 @@
+//
+//  AppDelegate.m
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import "AppDelegate.h"
+
+@interface AppDelegate ()
+
+@end
+
+@implementation AppDelegate
+
+
+- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
+    // Override point for customization after application launch.
+    return YES;
+}
+
+
+#pragma mark - UISceneSession lifecycle
+
+
+- (UISceneConfiguration *)application:(UIApplication *)application configurationForConnectingSceneSession:(UISceneSession *)connectingSceneSession options:(UISceneConnectionOptions *)options {
+    // Called when a new scene session is being created.
+    // Use this method to select a configuration to create the new scene with.
+    return [[UISceneConfiguration alloc] initWithName:@"Default Configuration" sessionRole:connectingSceneSession.role];
+}
+
+
+- (void)application:(UIApplication *)application didDiscardSceneSessions:(NSSet<UISceneSession *> *)sceneSessions {
+    // Called when the user discards a scene session.
+    // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
+    // Use this method to release any resources that were specific to the discarded scenes, as they will not return.
+}
+
+
+@end
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/AccentColor.colorset/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/AccentColor.colorset/Contents.json
@ -0,0 +1,11 @@
+{
+  "colors" : [
+    {
+      "idiom" : "universal"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/AppIcon.appiconset/Contents.json
@ -0,0 +1,13 @@
+{
+  "images" : [
+    {
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json
+++ b/examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json
@ -0,0 +1,6 @@
+{
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
--- a/examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard
+++ b/examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard
@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="53" y="375"/>
+        </scene>
+    </scenes>
+</document>
--- a/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
+++ b/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard
@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21225" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <device id="retina6_0" orientation="portrait" appearance="light"/>
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21207"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="System colors in document resources" minToolsVersion="11.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="390" height="844"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMinX="YES" widthSizable="YES" flexibleMinY="YES" heightSizable="YES"/>
+                        <subviews>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="VOi-PT-Rbu">
+                                <rect key="frame" x="35" y="121" width="156" height="49"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
+                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
+                                <state key="normal" title="Start Capturing">
+                                    <color key="titleColor" systemColor="labelColor"/>
+                                </state>
+                                <connections>
+                                    <action selector="toggleCapture:" destination="BYZ-38-t0r" eventType="touchUpInside" id="BuO-Wf-RgV"/>
+                                </connections>
+                            </button>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" text="Status: Idle" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Tgu-2q-eHQ">
+                                <rect key="frame" x="35" y="78" width="232" height="21"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <fontDescription key="fontDescription" type="system" pointSize="17"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" fixedFrame="YES" text="Record some speech and press &quot;Transcribe&quot;. The result will be displayed here." textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="mv2-KD-7jn">
+                                <rect key="frame" x="35" y="248" width="320" height="300"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <color key="backgroundColor" systemColor="systemBackgroundColor"/>
+                                <color key="textColor" systemColor="labelColor"/>
+                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                            </textView>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" lineBreakMode="middleTruncation" id="Brs-xi-o8i">
+                                <rect key="frame" x="35" y="191" width="156" height="49"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <color key="backgroundColor" systemColor="opaqueSeparatorColor"/>
+                                <color key="tintColor" systemColor="opaqueSeparatorColor"/>
+                                <state key="normal" title="Transcribe">
+                                    <color key="titleColor" systemColor="labelColor"/>
+                                </state>
+                                <connections>
+                                    <action selector="onTranscribe:" destination="BYZ-38-t0r" eventType="touchUpInside" id="ond-bx-48O"/>
+                                    <action selector="onTranscribePrepare:" destination="BYZ-38-t0r" eventType="touchDown" id="16T-dN-dfB"/>
+                                </connections>
+                            </button>
+                        </subviews>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                        <color key="backgroundColor" systemColor="systemBackgroundColor"/>
+                        <constraints>
+                            <constraint firstItem="Brs-xi-o8i" firstAttribute="trailing" secondItem="VOi-PT-Rbu" secondAttribute="trailing" id="8mF-AW-cbc"/>
+                        </constraints>
+                    </view>
+                    <connections>
+                        <outlet property="buttonToggleCapture" destination="VOi-PT-Rbu" id="nis-VC-DQO"/>
+                        <outlet property="buttonTranscribe" destination="Brs-xi-o8i" id="N8h-9W-ywb"/>
+                        <outlet property="labelStatusInp" destination="Tgu-2q-eHQ" id="1hH-Ql-K6j"/>
+                        <outlet property="textviewResult" destination="mv2-KD-7jn" id="RBw-0L-iGj"/>
+                    </connections>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="30.769230769230766" y="-28.436018957345969"/>
+        </scene>
+    </scenes>
+    <resources>
+        <systemColor name="labelColor">
+            <color red="0.0" green="0.0" blue="0.0" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+        </systemColor>
+        <systemColor name="opaqueSeparatorColor">
+            <color red="0.77647058823529413" green="0.77647058823529413" blue="0.78431372549019607" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+        </systemColor>
+        <systemColor name="systemBackgroundColor">
+            <color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+        </systemColor>
+    </resources>
+</document>
--- a/examples/whisper.objc/whisper.objc/Info.plist
+++ b/examples/whisper.objc/whisper.objc/Info.plist
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>This app requires microphone access in order to transcribe speech</string>
+	<key>UIApplicationSceneManifest</key>
+	<dict>
+		<key>UIApplicationSupportsMultipleScenes</key>
+		<false/>
+		<key>UISceneConfigurations</key>
+		<dict>
+			<key>UIWindowSceneSessionRoleApplication</key>
+			<array>
+				<dict>
+					<key>UISceneConfigurationName</key>
+					<string>Default Configuration</string>
+					<key>UISceneDelegateClassName</key>
+					<string>SceneDelegate</string>
+					<key>UISceneStoryboardFile</key>
+					<string>Main</string>
+				</dict>
+			</array>
+		</dict>
+	</dict>
+</dict>
+</plist>
--- a/examples/whisper.objc/whisper.objc/SceneDelegate.h
+++ b/examples/whisper.objc/whisper.objc/SceneDelegate.h
@ -0,0 +1,15 @@
+//
+//  SceneDelegate.h
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import <UIKit/UIKit.h>
+
+@interface SceneDelegate : UIResponder <UIWindowSceneDelegate>
+
+@property (strong, nonatomic) UIWindow * window;
+
+@end
+
--- a/examples/whisper.objc/whisper.objc/SceneDelegate.m
+++ b/examples/whisper.objc/whisper.objc/SceneDelegate.m
@ -0,0 +1,57 @@
+//
+//  SceneDelegate.m
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import "SceneDelegate.h"
+
+@interface SceneDelegate ()
+
+@end
+
+@implementation SceneDelegate
+
+
+- (void)scene:(UIScene *)scene willConnectToSession:(UISceneSession *)session options:(UISceneConnectionOptions *)connectionOptions {
+    // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
+    // If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
+    // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
+}
+
+
+- (void)sceneDidDisconnect:(UIScene *)scene {
+    // Called as the scene is being released by the system.
+    // This occurs shortly after the scene enters the background, or when its session is discarded.
+    // Release any resources associated with this scene that can be re-created the next time the scene connects.
+    // The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
+}
+
+
+- (void)sceneDidBecomeActive:(UIScene *)scene {
+    // Called when the scene has moved from an inactive state to an active state.
+    // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
+}
+
+
+- (void)sceneWillResignActive:(UIScene *)scene {
+    // Called when the scene will move from an active state to an inactive state.
+    // This may occur due to temporary interruptions (ex. an incoming phone call).
+}
+
+
+- (void)sceneWillEnterForeground:(UIScene *)scene {
+    // Called as the scene transitions from the background to the foreground.
+    // Use this method to undo the changes made on entering the background.
+}
+
+
+- (void)sceneDidEnterBackground:(UIScene *)scene {
+    // Called as the scene transitions from the foreground to the background.
+    // Use this method to save data, release shared resources, and store enough scene-specific state information
+    // to restore the scene back to its current state.
+}
+
+
+@end
--- a/examples/whisper.objc/whisper.objc/ViewController.h
+++ b/examples/whisper.objc/whisper.objc/ViewController.h
@ -0,0 +1,41 @@
+//
+//  ViewController.h
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import <UIKit/UIKit.h>
+
+#import <AVFoundation/AVFoundation.h>
+#import <AudioToolbox/AudioQueue.h>
+
+#define NUM_BUFFERS 3
+#define MAX_AUDIO_SEC 30
+#define SAMPLE_RATE 16000
+
+struct whisper_context;
+
+typedef struct
+{
+    int ggwaveId;
+    bool isCapturing;
+    UILabel * labelReceived;
+
+    AudioQueueRef queue;
+    AudioStreamBasicDescription dataFormat;
+    AudioQueueBufferRef buffers[NUM_BUFFERS];
+
+    int n_samples;
+    int16_t * audioBufferI16;
+    float   * audioBufferF32;
+
+    struct whisper_context * ctx;
+} StateInp;
+
+@interface ViewController : UIViewController
+{
+    StateInp stateInp;
+}
+
+@end
--- a/examples/whisper.objc/whisper.objc/ViewController.m
+++ b/examples/whisper.objc/whisper.objc/ViewController.m
@ -0,0 +1,240 @@
+//
+//  ViewController.m
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import "ViewController.h"
+
+#import "whisper.h"
+
+#define NUM_BYTES_PER_BUFFER 16*1024
+
+// callback used to process captured audio
+void AudioInputCallback(void * inUserData,
+                        AudioQueueRef inAQ,
+                        AudioQueueBufferRef inBuffer,
+                        const AudioTimeStamp * inStartTime,
+                        UInt32 inNumberPacketDescriptions,
+                        const AudioStreamPacketDescription * inPacketDescs);
+
+@interface ViewController ()
+
+@property (weak, nonatomic) IBOutlet UILabel *labelStatusInp;
+@property (weak, nonatomic) IBOutlet UIButton *buttonToggleCapture;
+@property (weak, nonatomic) IBOutlet UIButton *buttonTranscribe;
+@property (weak, nonatomic) IBOutlet UITextView *textviewResult;
+
+@end
+
+@implementation ViewController
+
+- (void)setupAudioFormat:(AudioStreamBasicDescription*)format
+{
+    format->mSampleRate       = 16000;
+    format->mFormatID         = kAudioFormatLinearPCM;
+    format->mFramesPerPacket  = 1;
+    format->mChannelsPerFrame = 1;
+    format->mBytesPerFrame    = 2;
+    format->mBytesPerPacket   = 2;
+    format->mBitsPerChannel   = 16;
+    format->mReserved         = 0;
+    format->mFormatFlags      = kLinearPCMFormatFlagIsSignedInteger;
+}
+
+- (void)viewDidLoad {
+    [super viewDidLoad];
+
+    // whisper.cpp initialization
+    {
+        // load the model
+        NSString *modelPath = [[NSBundle mainBundle] pathForResource:@"ggml-base.en" ofType:@"bin"];
+
+        // check if the model exists
+        if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
+            NSLog(@"Model file not found");
+            return;
+        }
+
+        NSLog(@"Loading model from %@", modelPath);
+
+        // create ggml context
+        stateInp.ctx = whisper_init([modelPath UTF8String]);
+
+        // check if the model was loaded successfully
+        if (stateInp.ctx == NULL) {
+            NSLog(@"Failed to load model");
+            return;
+        }
+    }
+
+    // initialize audio format and buffers
+    {
+        [self setupAudioFormat:&stateInp.dataFormat];
+
+        stateInp.n_samples = 0;
+        stateInp.audioBufferI16 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(int16_t));
+        stateInp.audioBufferF32 = malloc(MAX_AUDIO_SEC*SAMPLE_RATE*sizeof(float));
+    }
+}
+
+-(IBAction) stopCapturing {
+    NSLog(@"Stop capturing");
+
+    _labelStatusInp.text = @"Status: Idle";
+
+    [_buttonToggleCapture setTitle:@"Start capturing" forState:UIControlStateNormal];
+    [_buttonToggleCapture setBackgroundColor:[UIColor grayColor]];
+
+    stateInp.isCapturing = false;
+
+    AudioQueueStop(stateInp.queue, true);
+    for (int i = 0; i < NUM_BUFFERS; i++) {
+        AudioQueueFreeBuffer(stateInp.queue, stateInp.buffers[i]);
+    }
+
+    AudioQueueDispose(stateInp.queue, true);
+}
+
+- (IBAction)toggleCapture:(id)sender {
+    if (stateInp.isCapturing) {
+        // stop capturing
+        [self stopCapturing];
+
+        return;
+    }
+
+    // initiate audio capturing
+    NSLog(@"Start capturing");
+
+    stateInp.n_samples = 0;
+
+    OSStatus status = AudioQueueNewInput(&stateInp.dataFormat,
+                                         AudioInputCallback,
+                                         &stateInp,
+                                         CFRunLoopGetCurrent(),
+                                         kCFRunLoopCommonModes,
+                                         0,
+                                         &stateInp.queue);
+
+    if (status == 0) {
+        for (int i = 0; i < NUM_BUFFERS; i++) {
+            AudioQueueAllocateBuffer(stateInp.queue, NUM_BYTES_PER_BUFFER, &stateInp.buffers[i]);
+            AudioQueueEnqueueBuffer (stateInp.queue, stateInp.buffers[i], 0, NULL);
+        }
+
+        stateInp.isCapturing = true;
+        status = AudioQueueStart(stateInp.queue, NULL);
+        if (status == 0) {
+            _labelStatusInp.text = @"Status: Capturing";
+            [sender setTitle:@"Stop Capturing" forState:UIControlStateNormal];
+            [_buttonToggleCapture setBackgroundColor:[UIColor redColor]];
+        }
+    }
+
+    if (status != 0) {
+        [self stopCapturing];
+    }
+}
+
+- (IBAction)onTranscribePrepare:(id)sender {
+    _textviewResult.text = @"Processing - please wait ...";
+
+    if (stateInp.isCapturing) {
+        // stop capturing
+        [self stopCapturing];
+
+        return;
+    }
+}
+
+- (IBAction)onTranscribe:(id)sender {
+    NSLog(@"Processing %d samples", stateInp.n_samples);
+
+    // process captured audio
+    // convert I16 to F32
+    for (int i = 0; i < stateInp.n_samples; i++) {
+        stateInp.audioBufferF32[i] = (float)stateInp.audioBufferI16[i] / 32768.0f;
+    }
+
+    // run the model
+    struct whisper_full_params params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
+
+    params.print_realtime       = true;
+    params.print_progress       = false;
+    params.print_timestamps     = true;
+    params.print_special_tokens = false;
+    params.translate            = false;
+    params.language             = "en";
+    params.n_threads            = 4;
+    params.offset_ms            = 0;
+
+    CFTimeInterval startTime = CACurrentMediaTime();
+
+    if (whisper_full(stateInp.ctx, params, stateInp.audioBufferF32, stateInp.n_samples) != 0) {
+        NSLog(@"Failed to run the model");
+        _textviewResult.text = @"Failed to run the model";
+
+        return;
+    }
+
+    CFTimeInterval endTime = CACurrentMediaTime();
+
+    // clear the text in the textview
+    _textviewResult.text = @"";
+
+    int n_segments = whisper_full_n_segments(stateInp.ctx);
+    for (int i = 0; i < n_segments; i++) {
+        const char * text_cur = whisper_full_get_segment_text(stateInp.ctx, i);
+
+        // append the text to the textview
+        _textviewResult.text = [_textviewResult.text stringByAppendingString:[NSString stringWithUTF8String:text_cur]];
+    }
+
+    // internal model timing
+    whisper_print_timings(stateInp.ctx);
+
+    NSLog(@"\nProcessing time: %5.3f", endTime - startTime);
+
+    _textviewResult.text = [_textviewResult.text stringByAppendingString:[NSString stringWithFormat:@"\n\n[processing time: %5.3f s]", endTime - startTime]];
+}
+
+//
+// Callback implmentation
+//
+
+void AudioInputCallback(void * inUserData,
+                        AudioQueueRef inAQ,
+                        AudioQueueBufferRef inBuffer,
+                        const AudioTimeStamp * inStartTime,
+                        UInt32 inNumberPacketDescriptions,
+                        const AudioStreamPacketDescription * inPacketDescs)
+{
+    StateInp * stateInp = (StateInp*)inUserData;
+
+    if (!stateInp->isCapturing) {
+        NSLog(@"Not capturing, ignoring audio");
+        return;
+    }
+
+    const int n = inBuffer->mAudioDataByteSize / 2;
+
+    NSLog(@"Captured %d new samples", n);
+
+    if (stateInp->n_samples + n > MAX_AUDIO_SEC*SAMPLE_RATE) {
+        NSLog(@"Too much audio data, ignoring");
+        return;
+    }
+
+    for (int i = 0; i < n; i++) {
+        stateInp->audioBufferI16[stateInp->n_samples + i] = ((short*)inBuffer->mAudioData)[i];
+    }
+
+    stateInp->n_samples += n;
+
+    // put the buffer back in the queue
+    AudioQueueEnqueueBuffer(stateInp->queue, inBuffer, 0, NULL);
+}
+
+@end
--- a/examples/whisper.objc/whisper.objc/main.m
+++ b/examples/whisper.objc/whisper.objc/main.m
@ -0,0 +1,18 @@
+//
+//  main.m
+//  whisper.objc
+//
+//  Created by Georgi Gerganov on 23.10.22.
+//
+
+#import <UIKit/UIKit.h>
+#import "AppDelegate.h"
+
+int main(int argc, char * argv[]) {
+    NSString * appDelegateClassName;
+    @autoreleasepool {
+        // Setup code that might create autoreleased objects goes here.
+        appDelegateClassName = NSStringFromClass([AppDelegate class]);
+    }
+    return UIApplicationMain(argc, argv, nil, appDelegateClassName);
+}
--- a/examples/whisper.wasm/CMakeLists.txt
+++ b/examples/whisper.wasm/CMakeLists.txt
@ -0,0 +1,4 @@
+set(TARGET whisper.wasm)
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY)
+configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/whisper.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/whisper.js  COPYONLY)
--- a/examples/whisper.wasm/README.md
+++ b/examples/whisper.wasm/README.md
@ -0,0 +1,43 @@
+# whisper.wasm
+
+Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser
+
+This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
+implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer -
+it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3
+real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about
+~20-30 seconds).
+
+This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make
+sure that [your browser supports them](https://webassembly.org/roadmap/).
+
+The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and
+performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both
+transcription and translation are supported.
+
+Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page.
+
+The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the
+audio is limited to 120 seconds.
+
+## Live demo
+
+Link: https://whisper.ggerganov.com
+
+![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png)
+
+
+## Build instructions
+
+```bash
+# build using Emscripten
+git clone https://github.com/ggerganov/whisper.cpp
+cd whisper.cpp
+mkdir build-em && cd build-em
+emcmake cmake ..
+make -j
+
+# copy the produced page to your HTTP path
+cp bin/whisper.wasm/index.html /path/to/html/
+cp bin/whisper.wasm/whisper.js /path/to/html/
+cp bin/libwhisper.worker.js    /path/to/html/
--- a/examples/whisper.wasm/index-tmpl.html
+++ b/examples/whisper.wasm/index-tmpl.html
@ -0,0 +1,485 @@
+<!doctype html>
+<html lang="en-us">
+    <head>
+        <title>whisper.cpp : WASM example</title>
+
+        <style>
+            #output {
+                width: 100%;
+                height: 100%;
+                margin: 0 auto;
+                margin-top: 10px;
+                border-left: 0px;
+                border-right: 0px;
+                padding-left: 0px;
+                padding-right: 0px;
+                display: block;
+                background-color: black;
+                color: white;
+                font-size: 10px;
+                font-family: 'Lucida Console', Monaco, monospace;
+                outline: none;
+                white-space: pre;
+                overflow-wrap: normal;
+                overflow-x: scroll;
+            }
+        </style>
+    </head>
+    <body>
+        <div id="main-container">
+            <b>Minimal <a href="https://github.com/ggerganov/whisper.cpp">whisper.cpp</a> example running fully in the browser</b>
+
+            <br><br>
+
+            Usage instructions:<br>
+            <ul>
+                <li>Load a ggml model file (you can obtain one from <a href="https://ggml.ggerganov.com/">here</a>, recommended: <b>tiny</b> or <b>base</b>)</li>
+                <li>Select audio file to transcribe or record audio from the microphone (sample: <a href="https://whisper.ggerganov.com/jfk.wav">jfk.wav</a>)</li>
+                <li>Click on the "Transcribe" button to start the transcription</li>
+            </ul>
+
+            Note that the computation is quite heavy and may take a few seconds to complete.<br>
+            The transcription results will be displayed in the text area below.<br><br>
+            <b>Important: your browser must support WASM SIMD instructions for this to work.</b>
+
+            <br><br><hr>
+
+            <div id="model">
+                Model:
+                <input type="file" id="file" name="file" onchange="loadFile(event, 'ggml.bin')" />
+            </div>
+
+            <br>
+
+            <!-- radio button to select between file upload or microphone -->
+            <div id="input">
+                Input:
+                <input type="radio" id="file" name="input" value="file" checked="checked" onchange="changeInput('file')" /> File
+                <input type="radio" id="mic" name="input" value="mic" onchange="changeInput('mic')" /> Microphone
+            </div>
+
+            <br>
+
+            <div id="input_file">
+                Audio file:
+                <input type="file" id="file" name="file" onchange="loadAudio(event)" />
+            </div>
+
+            <div id="input_mic" style="display: none;">
+                Microphone:
+                <button id="start" onclick="startRecording()">Start</button>
+                <button id="stop" onclick="stopRecording()" disabled>Stop</button>
+
+                <!-- progress bar to show recording progress -->
+                <br><br>
+                <div id="progress" style="display: none;">
+                    <div id="progress-bar" style="width: 0%; height: 10px; background-color: #4CAF50;"></div>
+                    <div id="progress-text">0%</div>
+                </div>
+            </div>
+
+            <audio controls="controls" id="audio" loop hidden>
+                Your browser does not support the &lt;audio&gt; tag.
+                <source id="source" src="" type="audio/wav" />
+            </audio>
+
+            <hr><br>
+
+            <table>
+                <tr>
+                    <td>
+                        Language:
+                        <select id="language" name="language">
+                            <option value="en">English</option>
+                            <option value="ar">Arabic</option>
+                            <option value="hy">Armenian</option>
+                            <option value="az">Azerbaijani</option>
+                            <option value="eu">Basque</option>
+                            <option value="be">Belarusian</option>
+                            <option value="bn">Bengali</option>
+                            <option value="bg">Bulgarian</option>
+                            <option value="ca">Catalan</option>
+                            <option value="zh">Chinese</option>
+                            <option value="hr">Croatian</option>
+                            <option value="cs">Czech</option>
+                            <option value="da">Danish</option>
+                            <option value="nl">Dutch</option>
+                            <option value="en">English</option>
+                            <option value="et">Estonian</option>
+                            <option value="tl">Filipino</option>
+                            <option value="fi">Finnish</option>
+                            <option value="fr">French</option>
+                            <option value="gl">Galician</option>
+                            <option value="ka">Georgian</option>
+                            <option value="de">German</option>
+                            <option value="el">Greek</option>
+                            <option value="gu">Gujarati</option>
+                            <option value="iw">Hebrew</option>
+                            <option value="hi">Hindi</option>
+                            <option value="hu">Hungarian</option>
+                            <option value="is">Icelandic</option>
+                            <option value="id">Indonesian</option>
+                            <option value="ga">Irish</option>
+                            <option value="it">Italian</option>
+                            <option value="ja">Japanese</option>
+                            <option value="kn">Kannada</option>
+                            <option value="ko">Korean</option>
+                            <option value="la">Latin</option>
+                            <option value="lv">Latvian</option>
+                            <option value="lt">Lithuanian</option>
+                            <option value="mk">Macedonian</option>
+                            <option value="ms">Malay</option>
+                            <option value="mt">Maltese</option>
+                            <option value="no">Norwegian</option>
+                            <option value="fa">Persian</option>
+                            <option value="pl">Polish</option>
+                            <option value="pt">Portuguese</option>
+                            <option value="ro">Romanian</option>
+                            <option value="ru">Russian</option>
+                            <option value="sr">Serbian</option>
+                            <option value="sk">Slovak</option>
+                            <option value="sl">Slovenian</option>
+                            <option value="es">Spanish</option>
+                            <option value="sw">Swahili</option>
+                            <option value="sv">Swedish</option>
+                            <option value="ta">Tamil</option>
+                            <option value="te">Telugu</option>
+                            <option value="th">Thai</option>
+                            <option value="tr">Turkish</option>
+                            <option value="uk">Ukrainian</option>
+                            <option value="ur">Urdu</option>
+                            <option value="vi">Vietnamese</option>
+                            <option value="cy">Welsh</option>
+                            <option value="yi">Yiddish</option>
+                        </select>
+                    </td>
+                    <td>
+                        <button onclick="onProcess(false);">Transcribe</button>
+                    </td>
+                    <td>
+                        <button onclick="onProcess(true);">Translate</button>
+                    </td>
+                </tr>
+            </table>
+
+            <br>
+
+            <!-- textarea with height filling the rest of the page -->
+            <textarea id="output" rows="20"></textarea>
+
+            <br><br>
+
+            <div class="cell-version">
+                <span>
+                    |
+                    Build time: <span class="nav-link">@GIT_DATE@</span> |
+                    Commit hash: <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/commit/@GIT_SHA1@">@GIT_SHA1@</a> |
+                    Commit subject: <span class="nav-link">@GIT_COMMIT_SUBJECT@</span> |
+                    <a class="nav-link" href="https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.wasm">Source Code</a> |
+                </span>
+            </div>
+        </div>
+
+        <script type='text/javascript'>
+            // TODO: convert audio buffer to WAV
+            function setAudio(audio) {
+                //if (audio) {
+                //    // convert to 16-bit PCM
+                //    var blob = new Blob([audio], { type: 'audio/wav' });
+                //    var url = URL.createObjectURL(blob);
+                //    document.getElementById('source').src = url;
+                //    document.getElementById('audio').hidden = false;
+                //    document.getElementById('audio').loop = false;
+                //    document.getElementById('audio').load();
+                //} else {
+                //    document.getElementById('audio').hidden = true;
+                //}
+            }
+
+            function changeInput(input) {
+                if (input == 'file') {
+                    document.getElementById('input_file').style.display = 'block';
+                    document.getElementById('input_mic').style.display = 'none';
+                    document.getElementById('progress').style.display = 'none';
+                } else {
+                    document.getElementById('input_file').style.display = 'none';
+                    document.getElementById('input_mic').style.display = 'block';
+                    document.getElementById('progress').style.display = 'block';
+                }
+            }
+
+            var printTextarea = (function() {
+                    var element = document.getElementById('output');
+                    if (element) element.alue = ''; // clear browser cache
+                    return function(text) {
+                        if (arguments.length > 1) text = Array.prototype.slice.call(arguments).join(' ');
+                        console.log(text);
+                        if (element) {
+                            element.value += text + "\n";
+                            element.scrollTop = element.scrollHeight; // focus on bottom
+                        }
+                    };
+                })();
+
+            var Module = {
+                print: printTextarea,
+                printErr: printTextarea,
+                setStatus: function(text) {
+                    printTextarea('js: ' + text);
+                },
+                monitorRunDependencies: function(left) {
+                }
+            };
+
+            const kMaxAudio_s = 120;
+            const kSampleRate = 16000;
+
+            window.AudioContext = window.AudioContext || window.webkitAudioContext;
+            window.OfflineAudioContext = window.OfflineAudioContext || window.webkitOfflineAudioContext;
+
+            // web audio context
+            var context = null;
+
+            // audio data
+            var audio = null;
+
+            // the whisper instance
+            var instance = null;
+            var model_fname = '';
+
+            // helper function
+            function convertTypedArray(src, type) {
+                var buffer = new ArrayBuffer(src.byteLength);
+                var baseView = new src.constructor(buffer).set(src);
+                return new type(buffer);
+            }
+
+            //
+            // load model
+            //
+
+            function loadFile(event, fname) {
+                var file = event.target.files[0] || null;
+                if (file == null) {
+                    return;
+                }
+
+                printTextarea("js: loading model: " + file.name + ", size: " + file.size + " bytes");
+                printTextarea('js: please wait ...');
+
+                var reader = new FileReader();
+                reader.onload = function(event) {
+                    var buf = new Uint8Array(reader.result);
+
+                    // write to WASM file using whisper.FS_createDataFile
+                    // if the file exists, delete it
+                    try {
+                        Module.FS_unlink(fname);
+                    } catch (e) {
+                    }
+                    Module.FS_createDataFile("/", fname, buf, true, true);
+
+                    model_fname = file.name;
+                    printTextarea('js: loaded model: ' + model_fname + ' size: ' + buf.length);
+                }
+                reader.readAsArrayBuffer(file);
+            }
+
+            //
+            // audio file
+            //
+
+            function loadAudio(event) {
+                if (!context) {
+                    context = new AudioContext({sampleRate: 16000});
+                }
+
+                var file = event.target.files[0] || null;
+                if (file == null) {
+                    return;
+                }
+
+                printTextarea('js: loading audio: ' + file.name + ', size: ' + file.size + ' bytes');
+                printTextarea('js: please wait ...');
+
+                var reader = new FileReader();
+                reader.onload = function(event) {
+                    var buf = new Uint8Array(reader.result);
+
+                    context.decodeAudioData(buf.buffer, function(audioBuffer) {
+                        var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
+                        var source = offlineContext.createBufferSource();
+                        source.buffer = audioBuffer;
+                        source.connect(offlineContext.destination);
+                        source.start(0);
+
+                        offlineContext.startRendering().then(function(renderedBuffer) {
+                            audio = renderedBuffer.getChannelData(0);
+                            printTextarea('js: audio loaded, size: ' + audio.length);
+
+                            // truncate to first 30 seconds
+                            if (audio.length > kMaxAudio_s*kSampleRate) {
+                                audio = audio.slice(0, kMaxAudio_s*kSampleRate);
+                                printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
+                            }
+
+                            setAudio(audio);
+                        });
+                    }, function(e) {
+                        printTextarea('js: error decoding audio: ' + e);
+                        audio = null;
+                        setAudio(audio);
+                    });
+                }
+                reader.readAsArrayBuffer(file);
+            }
+
+            //
+            // microphone
+            //
+
+            var mediaRecorder = null;
+            var doRecording = false;
+            var startTime = 0;
+
+            function stopRecording() {
+                doRecording = false;
+            }
+
+            // record up to kMaxAudio_s seconds of audio from the microphone
+            // check if doRecording is false every 1000 ms and stop recording if so
+            // update progress information
+            function startRecording() {
+                if (!context) {
+                    context = new AudioContext({sampleRate: 16000});
+                }
+
+                document.getElementById('start').disabled = true;
+                document.getElementById('stop').disabled = false;
+
+                document.getElementById('progress-bar').style.width = '0%';
+                document.getElementById('progress-text').innerHTML = '0%';
+
+                doRecording = true;
+                startTime = Date.now();
+
+                var chunks = [];
+                var stream = null;
+
+                navigator.mediaDevices.getUserMedia({audio: true, video: false})
+                    .then(function(s) {
+                        stream = s;
+                        mediaRecorder = new MediaRecorder(stream);
+                        mediaRecorder.ondataavailable = function(e) {
+                            chunks.push(e.data);
+                        };
+                        mediaRecorder.onstop = function(e) {
+                            var blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
+                            chunks = [];
+
+                            document.getElementById('start').disabled = false;
+                            document.getElementById('stop').disabled = true;
+
+                            var reader = new FileReader();
+                            reader.onload = function(event) {
+                                var buf = new Uint8Array(reader.result);
+
+                                context.decodeAudioData(buf.buffer, function(audioBuffer) {
+                                    var offlineContext = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
+                                    var source = offlineContext.createBufferSource();
+                                    source.buffer = audioBuffer;
+                                    source.connect(offlineContext.destination);
+                                    source.start(0);
+
+                                    offlineContext.startRendering().then(function(renderedBuffer) {
+                                        audio = renderedBuffer.getChannelData(0);
+                                        printTextarea('js: audio recorded, size: ' + audio.length);
+
+                                        // truncate to first 30 seconds
+                                        if (audio.length > kMaxAudio_s*kSampleRate) {
+                                            audio = audio.slice(0, kMaxAudio_s*kSampleRate);
+                                            printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
+                                        }
+                                        setAudio(audio);
+                                    });
+                                }, function(e) {
+                                    printTextarea('js: error decoding audio: ' + e);
+                                    audio = null;
+                                    setAudio(audio);
+                                });
+                            }
+
+                            reader.readAsArrayBuffer(blob);
+                        };
+                        mediaRecorder.start();
+                    })
+                    .catch(function(err) {
+                        printTextarea('js: error getting audio stream: ' + err);
+                    });
+
+                var interval = setInterval(function() {
+                    if (!doRecording) {
+                        clearInterval(interval);
+                        mediaRecorder.stop();
+                        stream.getTracks().forEach(function(track) {
+                            track.stop();
+                        });
+                    }
+
+                    document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
+                    document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
+                }, 1000);
+
+                printTextarea('js: recording ...');
+
+                setTimeout(function() {
+                    if (doRecording) {
+                        printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
+                        stopRecording();
+                    }
+                }, kMaxAudio_s*1000);
+            }
+
+            //
+            // transcribe
+            //
+
+            function onProcess(translate) {
+                if (!instance) {
+                    instance = Module.init('ggml.bin');
+
+                    if (instance) {
+                        printTextarea("js: whisper initialized, instance: " + instance);
+                        document.getElementById('model').innerHTML = 'Model loaded: ' + model_fname;
+                    }
+                }
+
+                if (!instance) {
+                    printTextarea("js: failed to initialize whisper");
+                    return;
+                }
+
+                if (!audio) {
+                    printTextarea("js: no audio data");
+                    return;
+                }
+
+                if (instance) {
+                    printTextarea('');
+                    printTextarea('js: processing - this might take a while ...');
+                    printTextarea('js: the page will be unresponsive until the processing is completed');
+                    printTextarea('');
+
+                    setTimeout(function() {
+                        var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
+                        console.log('js: full_default returned: ' + ret);
+                        if (ret) {
+                            printTextarea("js: whisper returned: " + ret);
+                        }
+                    }, 100);
+                }
+            }
+        </script>
+        <script type="text/javascript" src="whisper.js"></script>
+    </body>
+</html>
--- a/extra/bench-all.sh
+++ b/extra/bench-all.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Helper script to run the bench tool on all models and print the results in share-able format
+
+printf "Usage: ./bench.sh [n_threads]\n"
+
+if [ -z "$1" ]; then
+    n_threads=4
+else
+    n_threads=$1
+fi
+
+models=( "tiny" "base" "small" "medium" "large" )
+
+printf "\n"
+printf "Running benchmark for all models\n"
+printf "This can take a while!\n"
+printf "\n"
+
+printf "| CPU | OS | Config | Model | Threads | Load [ms] | Encode [ms] |\n"
+printf "| --- | -- | ------ | ----- | ------- | --------- | ----------- |\n"
+
+for model in "${models[@]}"; do
+    # run once to heat-up the cache
+    ./bench -m ./models/ggml-$model.bin -t $n_threads 2>/dev/null 1>/dev/null
+
+    # actual run
+    # store stderr output in a variable in order to parse it later
+    output=$(./bench -m ./models/ggml-$model.bin -t $n_threads 2>&1)
+
+    # parse the output:
+    load_time=$(echo "$output" | grep "load time" | awk '{print $5}')
+    encode_time=$(echo "$output" | grep "encode time" | awk '{print $5}')
+    system_info=$(echo "$output" | grep "system_info")
+    n_threads=$(echo "$output" | grep "system_info" | awk '{print $4}')
+
+    config=""
+
+    if [[ $system_info == *"AVX2 = 1"* ]]; then
+        config="$config AVX2"
+    fi
+
+    if [[ $system_info == *"NEON = 1"* ]]; then
+        config="$config NEON"
+    fi
+
+    if [[ $system_info == *"BLAS = 1"* ]]; then
+        config="$config BLAS"
+    fi
+
+    printf "| <todo> | <todo> | $config | $model | $n_threads | $load_time | $encode_time |\n"
+done
+
--- a/extra/convert-all.sh
+++ b/extra/convert-all.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
+
+for model in "${models[@]}"; do
+    python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
+    mv -v models/ggml-model.bin models/ggml-$model.bin
+done
--- a/ggml.c
+++ b/ggml.c
@ -1,6 +1,11 @@
 #include "ggml.h"

+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <malloc.h> // using malloc.h with MSC/MINGW
+#elif !defined(__FreeBSD__)
 #include <alloca.h>
+#endif
+
 #include <assert.h>
 #include <time.h>
 #include <math.h>
@ -8,11 +13,46 @@
 #include <string.h>
 #include <stdint.h>
 #include <stdio.h>
+
+#if defined _MSC_VER
+#include <Windows.h>
+
+typedef volatile LONG atomic_int;
+typedef atomic_int atomic_bool;
+
+static void atomic_store(atomic_int* ptr, LONG val) {
+    InterlockedExchange(ptr, val);
+}
+static LONG atomic_load(atomic_int* ptr) {
+    return InterlockedCompareExchange(ptr, 0, 0);
+}
+static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
+    return InterlockedExchangeAdd(ptr, inc);
+}
+static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
+    return atomic_fetch_add(ptr, -(dec));
+}
+
+typedef HANDLE pthread_t;
+
+typedef DWORD thread_ret_t;
+static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) {
+    out = CreateThread(NULL, 0, func, arg, 0, NULL);
+    return out != NULL;
+}
+
+static int pthread_join(pthread_t thread, void* unused) {
+    return (int) WaitForSingleObject(thread, INFINITE);
+}
+#else
+#include <pthread.h>
 #include <stdatomic.h>

-#include <pthread.h>
+typedef void* thread_ret_t;
+#endif

 #define GGML_DEBUG 0
+#define GGML_GELU_FP16

 #if UINTPTR_MAX == 0xFFFFFFFF
    #define GGML_MEM_ALIGN 4
@ -36,6 +76,8 @@

 #ifdef GGML_USE_ACCELERATE
 #include <Accelerate/Accelerate.h>
+#elif GGML_USE_OPENBLAS
+#include <cblas.h>
 #endif

 // floating point type used to accumulate sums
@ -62,7 +104,14 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {

 #else

+#ifdef __wasm_simd128__
+#include <wasm_simd128.h>
+#else
 #include <immintrin.h>
+#endif
+
+// FP16 <-> FP32
+// ref: https://github.com/Maratyszcza/FP16

 static inline float fp32_from_bits(uint32_t w) {
    union {
@ -144,6 +193,25 @@ static ggml_fp16_t table_exp_f16[1 << 16];
 // timing
 //

+#if defined(_MSC_VER)
+static int64_t timer_freq;
+void ggml_time_init(void) {
+    LARGE_INTEGER frequency;
+    QueryPerformanceFrequency(&frequency);
+    timer_freq = frequency.QuadPart;
+}
+int64_t ggml_time_ms(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000) / timer_freq;
+}
+int64_t ggml_time_us(void) {
+    LARGE_INTEGER t;
+    QueryPerformanceCounter(&t);
+    return (t.QuadPart * 1000000) / timer_freq;
+}
+#else
+void ggml_time_init(void) {}
 int64_t ggml_time_ms(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
@ -155,6 +223,7 @@ int64_t ggml_time_us(void) {
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
 }
+#endif

 int64_t ggml_cycles(void) {
    return clock();
@ -254,7 +323,7 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float
        sumf += x[i]*y[i];
    }
 #elif defined(__AVX2__)
-    // AVX 256-bit (unroll 4)
+    // AVX 256-bit
    const int n32 = (n & ~31);

    __m256 sum0 = _mm256_setzero_ps();
@ -296,6 +365,45 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float
    for (int i = n32; i < n; ++i) {
        sumf += x[i]*y[i];
    }
+#elif defined(__wasm_simd128__)
+    // WASM 128-bit
+    const int n16 = (n & ~15);
+
+    v128_t sum0 = wasm_f32x4_splat(0);
+    v128_t sum1 = wasm_f32x4_splat(0);
+    v128_t sum2 = wasm_f32x4_splat(0);
+    v128_t sum3 = wasm_f32x4_splat(0);
+
+    v128_t x0, x1, x2, x3;
+    v128_t y0, y1, y2, y3;
+
+    for (int i = 0; i < n16; i += 16) {
+        x0 = wasm_v128_load(x + i + 0);
+        x1 = wasm_v128_load(x + i + 4);
+        x2 = wasm_v128_load(x + i + 8);
+        x3 = wasm_v128_load(x + i + 12);
+
+        y0 = wasm_v128_load(y + i + 0);
+        y1 = wasm_v128_load(y + i + 4);
+        y2 = wasm_v128_load(y + i + 8);
+        y3 = wasm_v128_load(y + i + 12);
+
+        sum0 = wasm_f32x4_add(sum0, wasm_f32x4_mul(x0, y0));
+        sum1 = wasm_f32x4_add(sum1, wasm_f32x4_mul(x1, y1));
+        sum2 = wasm_f32x4_add(sum2, wasm_f32x4_mul(x2, y2));
+        sum3 = wasm_f32x4_add(sum3, wasm_f32x4_mul(x3, y3));
+    }
+
+    sum0 = wasm_f32x4_add(sum0, sum1);
+    sum2 = wasm_f32x4_add(sum2, sum3);
+    sum0 = wasm_f32x4_add(sum0, sum2);
+
+    sumf = wasm_f32x4_extract_lane(sum0, 0) + wasm_f32x4_extract_lane(sum0, 1) + wasm_f32x4_extract_lane(sum0, 2) + wasm_f32x4_extract_lane(sum0, 3);
+
+    // leftovers
+    for (int i = n16; i < n; ++i) {
+        sumf += x[i]*y[i];
+    }
 #else
    // scalar
    for (int i = 0; i < n; ++i) {
@ -412,7 +520,7 @@ inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t
        sumf += ggml_fp16_to_fp32(x[i])*ggml_fp16_to_fp32(y[i]);
    }
 #elif defined(__AVX2__)
-    // AVX 256-bit (unroll 4)
+    // AVX 256-bit
    const int n32 = (n & ~31);

    __m256 sum0 = _mm256_setzero_ps();
@ -455,6 +563,54 @@ inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t
        //GGML_ASSERT(false);
        sumf += ggml_fp16_to_fp32(x[i])*ggml_fp16_to_fp32(y[i]);
    }
+#elif defined(__wasm_simd128__)
+    // WASM 128-bit
+    const int n16 = (n & ~15);
+
+    v128_t sum0 = wasm_f32x4_splat(0.0f);
+    v128_t sum1 = wasm_f32x4_splat(0.0f);
+    v128_t sum2 = wasm_f32x4_splat(0.0f);
+    v128_t sum3 = wasm_f32x4_splat(0.0f);
+
+    v128_t x0, x1, x2, x3;
+    v128_t y0, y1, y2, y3;
+
+    float tx[16];
+    float ty[16];
+
+    for (int i = 0; i < n16; i += 16) {
+        for (int k = 0; k < 16; ++k) {
+            tx[k] = ggml_fp16_to_fp32(x[i + k]);
+            ty[k] = ggml_fp16_to_fp32(y[i + k]);
+        }
+
+        x0 = wasm_v128_load(tx + 0);
+        x1 = wasm_v128_load(tx + 4);
+        x2 = wasm_v128_load(tx + 8);
+        x3 = wasm_v128_load(tx + 12);
+
+        y0 = wasm_v128_load(ty + 0);
+        y1 = wasm_v128_load(ty + 4);
+        y2 = wasm_v128_load(ty + 8);
+        y3 = wasm_v128_load(ty + 12);
+
+        sum0 = wasm_f32x4_add(sum0, wasm_f32x4_mul(x0, y0));
+        sum1 = wasm_f32x4_add(sum1, wasm_f32x4_mul(x1, y1));
+        sum2 = wasm_f32x4_add(sum2, wasm_f32x4_mul(x2, y2));
+        sum3 = wasm_f32x4_add(sum3, wasm_f32x4_mul(x3, y3));
+    }
+
+    sum0 = wasm_f32x4_add(sum0, sum1);
+    sum2 = wasm_f32x4_add(sum2, sum3);
+    sum0 = wasm_f32x4_add(sum0, sum2);
+
+    sumf = wasm_f32x4_extract_lane(sum0, 0) + wasm_f32x4_extract_lane(sum0, 1) + wasm_f32x4_extract_lane(sum0, 2) + wasm_f32x4_extract_lane(sum0, 3);
+
+    // leftovers
+    for (int i = n16; i < n; ++i) {
+        //GGML_ASSERT(false);
+        sumf += ggml_fp16_to_fp32(x[i])*ggml_fp16_to_fp32(y[i]);
+    }
 #else
    for (int i = 0; i < n; ++i) {
        sumf += ggml_fp16_to_fp32(x[i])*ggml_fp16_to_fp32(y[i]);
@ -501,7 +657,7 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
        y[i] += x[i]*v;
    }
 #elif defined(__AVX2__)
-    // AVX 256-bit (unroll 4)
+    // AVX 256-bit
    const int n32 = (n & ~31);

    const __m256 v4 = _mm256_set1_ps(v);
@ -535,6 +691,41 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
    for (int i = n32; i < n; ++i) {
        y[i] += x[i]*v;
    }
+#elif defined(__wasm_simd128__)
+    // WASM SIMD 128-bit
+    const int n16 = (n & ~15);
+
+    const v128_t v4 = wasm_f32x4_splat(v);
+
+    v128_t x0, x1, x2, x3;
+    v128_t y0, y1, y2, y3;
+
+    for (int i = 0; i < n16; i += 16) {
+        x0 = wasm_v128_load(x + i + 0);
+        x1 = wasm_v128_load(x + i + 4);
+        x2 = wasm_v128_load(x + i + 8);
+        x3 = wasm_v128_load(x + i + 12);
+
+        y0 = wasm_v128_load(y + i + 0);
+        y1 = wasm_v128_load(y + i + 4);
+        y2 = wasm_v128_load(y + i + 8);
+        y3 = wasm_v128_load(y + i + 12);
+
+        y0 = wasm_f32x4_add(y0, wasm_f32x4_mul(x0, v4));
+        y1 = wasm_f32x4_add(y1, wasm_f32x4_mul(x1, v4));
+        y2 = wasm_f32x4_add(y2, wasm_f32x4_mul(x2, v4));
+        y3 = wasm_f32x4_add(y3, wasm_f32x4_mul(x3, v4));
+
+        wasm_v128_store(y + i + 0, y0);
+        wasm_v128_store(y + i + 4, y1);
+        wasm_v128_store(y + i + 8, y2);
+        wasm_v128_store(y + i + 12, y3);
+    }
+
+    // leftovers
+    for (int i = n16; i < n; ++i) {
+        y[i] += x[i]*v;
+    }
 #else
    // scalar
    for (int i = 0; i < n; ++i) {
@ -662,6 +853,54 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, ggml_
        GGML_ASSERT(false);
        y[i] = ggml_fp32_to_fp16(ggml_fp16_to_fp32(y[i]) + ggml_fp16_to_fp32(x[i])*v);
    }
+#elif defined(__wasm_simd128__)
+    // WASM SIMD 128-bit
+    const int n16 = (n & ~15);
+
+    const v128_t v4 = wasm_f32x4_splat(v);
+
+    v128_t x0, x1, x2, x3;
+    v128_t y0, y1, y2, y3;
+
+    float tx[16];
+    float ty[16];
+
+    for (int i = 0; i < n16; i += 16) {
+        for (int k = 0; k < 16; ++k) {
+            tx[k] = ggml_fp16_to_fp32(x[i + k]);
+            ty[k] = ggml_fp16_to_fp32(y[i + k]);
+        }
+
+        x0 = wasm_v128_load(tx + 0);
+        x1 = wasm_v128_load(tx + 4);
+        x2 = wasm_v128_load(tx + 8);
+        x3 = wasm_v128_load(tx + 12);
+
+        y0 = wasm_v128_load(ty + 0);
+        y1 = wasm_v128_load(ty + 4);
+        y2 = wasm_v128_load(ty + 8);
+        y3 = wasm_v128_load(ty + 12);
+
+        y0 = wasm_f32x4_add(y0, wasm_f32x4_mul(x0, v4));
+        y1 = wasm_f32x4_add(y1, wasm_f32x4_mul(x1, v4));
+        y2 = wasm_f32x4_add(y2, wasm_f32x4_mul(x2, v4));
+        y3 = wasm_f32x4_add(y3, wasm_f32x4_mul(x3, v4));
+
+        wasm_v128_store(ty + 0, y0);
+        wasm_v128_store(ty + 4, y1);
+        wasm_v128_store(ty + 8, y2);
+        wasm_v128_store(ty + 12, y3);
+
+        for (int k = 0; k < 16; ++k) {
+            y[i + k] = ggml_fp32_to_fp16(ty[k]);
+        }
+    }
+
+    // leftovers
+    for (int i = n16; i < n; ++i) {
+        GGML_ASSERT(false);
+        y[i] = ggml_fp32_to_fp16(ggml_fp16_to_fp32(y[i]) + ggml_fp16_to_fp32(x[i])*v);
+    }
 #else
    for (int i = 0; i < n; ++i) {
        y[i] = ggml_fp32_to_fp16(ggml_fp16_to_fp32(y[i]) + ggml_fp16_to_fp32(x[i])*v);
@ -685,12 +924,6 @@ inline static float ggml_gelu_f32(float x) {
    return 0.5*x*(1.0 + tanh(SQRT_2_OVER_PI*x*(1.0 + GELU_COEF_A*x*x)));
 }

-inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
-    for (int i = 0; i < n; ++i) {
-        y[i] = ggml_gelu_f32(x[i]);
-    }
-}
-
 inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
    const uint16_t * i16 = (const uint16_t *) x;
    for (int i = 0; i < n; ++i) {
@ -698,6 +931,23 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
    }
 }

+#ifdef GGML_GELU_FP16
+inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
+    uint16_t t;
+    for (int i = 0; i < n; ++i) {
+        ggml_fp16_t fp16 = ggml_fp32_to_fp16(x[i]);
+        memcpy(&t, &fp16, sizeof(uint16_t));
+        y[i] = ggml_fp16_to_fp32(table_gelu_f16[t]);
+    }
+}
+#else
+inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
+    for (int i = 0; i < n; ++i) {
+        y[i] = ggml_gelu_f32(x[i]);
+    }
+}
+#endif
+
 inline static void ggml_vec_sum_f32     (const int n, float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) sum += x[i]; *s += sum; }
 inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { ggml_vec_norm_f32(n, s, x); *s = 1./(*s); }

@ -886,6 +1136,7 @@ struct ggml_state {

 // global state
 struct ggml_state g_state;
+atomic_int g_state_barrier = 0;

 ////////////////////////////////////////////////////////////////////////////////

@ -1015,6 +1266,17 @@ int ggml_up64(int n) {
 ////////////////////////////////////////////////////////////////////////////////

 struct ggml_context * ggml_init(struct ggml_init_params params) {
+    // make this function thread safe
+    {
+        int processing = atomic_fetch_add(&g_state_barrier, 1);
+        while (processing > 0) {
+            // wait for other threads to finish
+            atomic_fetch_sub(&g_state_barrier, 1);
+            sched_yield();
+            processing = atomic_fetch_add(&g_state_barrier, 1);
+        }
+    }
+
    static bool is_first_call = true;
    if (is_first_call) {
        const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
@ -1058,6 +1320,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {

    if (ctx == NULL) {
        GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
+
+        atomic_fetch_sub(&g_state_barrier, 1);
+
        return NULL;
    }

@ -1072,10 +1337,25 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {

    ggml_assert_aligned(ctx->mem_buffer);

+    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
+
+    atomic_fetch_sub(&g_state_barrier, 1);
+
    return ctx;
 }

 void ggml_free(struct ggml_context * ctx) {
+    // make this function thread safe
+    {
+        int processing = atomic_fetch_add(&g_state_barrier, 1);
+        while (processing > 0) {
+            // wait for other threads to finish
+            atomic_fetch_sub(&g_state_barrier, 1);
+            sched_yield();
+            processing = atomic_fetch_add(&g_state_barrier, 1);
+        }
+    }
+
    for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
        if (&g_state.contexts[i].context == ctx) {
            g_state.contexts[i].used = false;
@ -1087,11 +1367,15 @@ void ggml_free(struct ggml_context * ctx) {
                free(ctx->mem_buffer);
            }

+            atomic_fetch_sub(&g_state_barrier, 1);
+
            return;
        }
    }

    GGML_PRINT_DEBUG("%s: context not found\n", __func__);
+
+    atomic_fetch_sub(&g_state_barrier, 1);
 }

 size_t ggml_used_mem(const struct ggml_context * ctx) {
@ -2836,13 +3120,15 @@ void ggml_compute_forward_add_f32(
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
        struct ggml_tensor * dst) {
-    GGML_ASSERT(params->ith == 0);
    GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));

    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }

+    const int ith = params->ith;
+    const int nth = params->nth;
+
    const int n  = ggml_nrows(src0);
    const int nc = src0->ne[0];

@ -2859,7 +3145,7 @@ void ggml_compute_forward_add_f32(
    GGML_ASSERT(nb00 == sizeof(float));

    if (nb10 == sizeof(float)) {
-        for (int j = 0; j < n; j++) {
+        for (int j = ith; j < n; j += nth) {
            ggml_vec_add_f32(nc,
                    (float *) ((char *) dst->data  + j*nb1),
                    (float *) ((char *) src0->data + j*nb01),
@ -2867,7 +3153,7 @@ void ggml_compute_forward_add_f32(
        }
    } else {
        // src1 is not contiguous
-        for (int j = 0; j < n; j++) {
+        for (int j = ith; j < n; j += nth) {
            float * dst_ptr  = (float *) ((char *) dst->data  + j*nb1);
            float * src0_ptr = (float *) ((char *) src0->data + j*nb01);
            for (int i = 0; i < nc; i++) {
@ -3638,14 +3924,16 @@ void ggml_compute_forward_norm_f32(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
        struct ggml_tensor * dst) {
-    assert(params->ith == 0);
-    assert(ggml_are_same_shape(src0, dst));
+    GGML_ASSERT(ggml_are_same_shape(src0, dst));

    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }

-    assert(src0->nb[0] == sizeof(float));
+    GGML_ASSERT(src0->nb[0] == sizeof(float));
+
+    const int ith = params->ith;
+    const int nth = params->nth;

    const int ne00 = src0->ne[0];
    const int ne01 = src0->ne[1];
@ -3665,7 +3953,7 @@ void ggml_compute_forward_norm_f32(
    // TODO: optimize
    for (int i03 = 0; i03 < ne03; i03++) {
        for (int i02 = 0; i02 < ne02; i02++) {
-            for (int i01 = 0; i01 < ne01; i01++) {
+            for (int i01 = ith; i01 < ne01; i01 += nth) {
                const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);

                ggml_float mean = 0.0;
@ -3714,6 +4002,28 @@ void ggml_compute_forward_norm(

 // ggml_compute_forward_mul_mat

+// helper function to determine if it is better to use BLAS or not
+// for large matrices, BLAS is faster
+bool ggml_compute_forward_mul_mat_use_blas(
+        const struct ggml_tensor * src0,
+        const struct ggml_tensor * src1,
+              struct ggml_tensor * dst) {
+    UNUSED(src0);
+
+    const int ne10 = src1->ne[0];
+
+    const int ne0 = dst->ne[0];
+    const int ne1 = dst->ne[1];
+
+    // TODO: find the optimal values for these
+    if (ggml_is_contiguous(src1) && ne0 >= 32 && ne1 >= 32 && ne10 >= 32) {
+        //printf("BLAS: %d %d %d\n", ne0, ne1, ne10);
+        return true;
+    }
+
+    return false;
+}
+
 void ggml_compute_forward_mul_mat_f32(
        const struct ggml_compute_params * params,
        const struct ggml_tensor * src0,
@ -3781,6 +4091,45 @@ void ggml_compute_forward_mul_mat_f32(
    // nb00 <  nb01 - src0 is transposed
    //   compute by src0 columns

+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
+        GGML_ASSERT(ggml_is_contiguous(src0));
+        GGML_ASSERT(nb10 == sizeof(float));
+
+        if (params->ith != 0) return;
+
+        if (params->type == GGML_TASK_INIT) {
+            return;
+        }
+
+        if (params->type == GGML_TASK_FINALIZE) {
+            return;
+        }
+
+        for (int i03 = 0; i03 < ne03; i03++) {
+            for (int i02 = 0; i02 < ne02; i02++) {
+                const float * x = (float *) (src0->data);
+                const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
+
+                float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
+
+                // zT = y * xT
+                {
+                    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+                            ne11, ne01, ne10,
+                            1.0f,    y, ne10,
+                                     x, ne10,
+                            0.0f,    d, ne01);
+                }
+            }
+        }
+
+        //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
+
+        return;
+    }
+#endif
+
    if (params->type == GGML_TASK_INIT) {
        if (nb01 >= nb00) {
            return;
@ -3817,78 +4166,6 @@ void ggml_compute_forward_mul_mat_f32(
        return;
    }

-//#ifdef GGML_USE_ACCELERATE
-//    // try to use BLAS
-//
-//    if (nb01 >= nb00 && ne0 > 1024 && ne1 > 1024) {
-//        if (params->ith != 0) return;
-//        printf("XXXXXXXX\n");
-//
-//        GGML_ASSERT(ggml_is_contiguous(src0));
-//        GGML_ASSERT(ggml_is_contiguous(src1));
-//
-//        printf("ne00 = %d, ne01 = %d, ne02 = %d, ne03 = %d\n", ne00, ne01, ne02, ne03);
-//        printf("ne10 = %d, ne11 = %d, ne12 = %d, ne13 = %d\n", ne10, ne11, ne12, ne13);
-//        printf("ne0  = %d, ne1  = %d, ne2  = %d, ne3  = %d\n", ne0, ne1, ne2, ne3);
-//
-//        printf("nb00 = %d, nb01 = %d, nb02 = %d, nb03 = %d\n", nb00, nb01, nb02, nb03);
-//        printf("nb10 = %d, nb11 = %d, nb12 = %d, nb13 = %d\n", nb10, nb11, nb12, nb13);
-//        printf("nb0  = %d, nb1  = %d, nb2  = %d, nb3  = %d\n", nb0, nb1, nb2, nb3);
-//
-//        float * const wdata = params->wdata;
-//
-//        int64_t tsum = 0.0;
-//        for (int i03 = 0; i03 < ne03; i03++) {
-//            for (int i02 = 0; i02 < ne02; i02++) {
-//                const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
-//                const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
-//                      float * z = (float *) ((char *)  dst->data + i02*nb2  + i03*nb3);
-//
-//                // transpose src1
-//                for (int j = 0; j < ne11; ++j) {
-//                    for (int i = 0; i < ne10; ++i) {
-//                        wdata[i*ne11 + j] = y[j*ne10 + i];
-//                    }
-//                }
-//
-//                {
-//                    const int64_t tt0 = ggml_time_us();
-//                    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
-//                            1500, 1500, 64,
-//                            1.0, x, 64,
-//                            wdata, 1500,
-//                            0.0, z, 1500);
-//                    const int64_t tt1 = ggml_time_us();
-//                    tsum += tt1 - tt0;
-//                }
-//
-//                // transpose z
-//                for (int j = 0; j < ne1; ++j) {
-//                    for (int i = 0; i < ne0; ++i) {
-//                        wdata[i*ne1 + j] = z[j*ne0 + i];
-//                    }
-//                }
-//
-//                memcpy(z, wdata, ne0*ne1*sizeof(float));
-//
-//                //cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
-//                //        ne0, ne1, 64,
-//                //        1.0f,
-//                //        x, ne00,
-//                //        y, ne11,
-//                //        0.0f,
-//                //        z, 1500);
-//            }
-//        }
-//        printf("time = %f ms\n", tsum/1000.0);
-//        return;
-//    } else {
-//        //cblas_sgemv(CblasRowMajor, CblasTrans,   ne00, ne01, 1.0, src0->data, ne01, src1->data, 1, 0.0, dst->data, 1);
-//    }
-//
-//#endif
-
-
    if (nb01 >= nb00) {
        // TODO: do not support transposed src1
        assert(nb10 == sizeof(float));
@ -4033,24 +4310,24 @@ void ggml_compute_forward_mul_mat_f16_f32(
    const int ith = params->ith;
    const int nth = params->nth;

-    assert(ne02 == ne12);
-    assert(ne03 == ne13);
-    assert(ne2  == ne12);
-    assert(ne3  == ne13);
+    GGML_ASSERT(ne02 == ne12);
+    GGML_ASSERT(ne03 == ne13);
+    GGML_ASSERT(ne2  == ne12);
+    GGML_ASSERT(ne3  == ne13);

    // TODO: we don't support permuted src0
-    assert(nb00 == sizeof(ggml_fp16_t) || nb01 == sizeof(ggml_fp16_t));
+    GGML_ASSERT(nb00 == sizeof(ggml_fp16_t) || nb01 == sizeof(ggml_fp16_t));

    // dst cannot be transposed or permuted
-    assert(nb0 == sizeof(float));
-    assert(nb0 <= nb1);
-    assert(nb1 <= nb2);
-    assert(nb2 <= nb3);
+    GGML_ASSERT(nb0 == sizeof(float));
+    GGML_ASSERT(nb0 <= nb1);
+    GGML_ASSERT(nb1 <= nb2);
+    GGML_ASSERT(nb2 <= nb3);

-    assert(ne0 == ne01);
-    assert(ne1 == ne11);
-    assert(ne2 == ne02);
-    assert(ne3 == ne03);
+    GGML_ASSERT(ne0 == ne01);
+    GGML_ASSERT(ne1 == ne11);
+    GGML_ASSERT(ne2 == ne02);
+    GGML_ASSERT(ne3 == ne03);

    // nb01 >= nb00 - src0 is not transposed
    //   compute by src0 rows
@ -4058,6 +4335,73 @@ void ggml_compute_forward_mul_mat_f16_f32(
    // nb00 <  nb01 - src0 is transposed
    //   compute by src0 columns

+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
+        GGML_ASSERT(nb10 == sizeof(float));
+
+        if (params->ith != 0) return;
+
+        if (params->type == GGML_TASK_INIT) {
+            return;
+        }
+
+        if (params->type == GGML_TASK_FINALIZE) {
+            return;
+        }
+
+        float * const wdata = params->wdata;
+
+        for (int i03 = 0; i03 < ne03; i03++) {
+            for (int i02 = 0; i02 < ne02; i02++) {
+                {
+                    int id = 0;
+                    for (int i01 = 0; i01 < ne01; ++i01) {
+                        for (int i00 = 0; i00 < ne00; ++i00) {
+                            wdata[id++] = ggml_fp16_to_fp32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
+                        }
+                    }
+                }
+
+                const float * x = wdata;
+                const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
+
+                //      float * z =                          wdata + ne00*ne01;
+
+                // z = x * yT
+                //{
+                //    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+                //            ne01, ne11, ne00,
+                //            1.0f, x, ne00,
+                //                  y, ne00,
+                //            0.0f, z, ne11);
+                //}
+
+                float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
+
+                // transpose z
+                //for (int j = 0; j < ne11; ++j) {
+                //    for (int i = 0; i < ne01; ++i) {
+                //        d[j*ne01 + i] = z[i*ne11 + j];
+                //    }
+                //}
+
+                // zT = y * xT
+                {
+                    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+                            ne11, ne01, ne10,
+                            1.0f,    y, ne10,
+                                     x, ne10,
+                            0.0f,    d, ne01);
+                }
+            }
+        }
+
+        //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
+
+        return;
+    }
+#endif
+
    if (params->type == GGML_TASK_INIT) {
        if (nb01 >= nb00) {
            ggml_fp16_t * const wdata = params->wdata;
@ -6407,7 +6751,7 @@ void * ggml_graph_compute_one(void * data) {
    return NULL;
 }

-void * ggml_graph_compute_thread(void * data) {
+thread_ret_t ggml_graph_compute_thread(void * data) {
    struct ggml_compute_state * state = (struct ggml_compute_state *) data;

    const int n_threads = state->shared->n_threads;
@ -6418,7 +6762,7 @@ void * ggml_graph_compute_thread(void * data) {
        } else {
            while (atomic_load(&state->shared->has_work)) {
                if (atomic_load(&state->shared->stop)) {
-                    return NULL;
+                    return 0;
                }
                ggml_lock_lock  (&state->shared->spin);
                ggml_lock_unlock(&state->shared->spin);
@ -6430,7 +6774,7 @@ void * ggml_graph_compute_thread(void * data) {
        // wait for work
        while (!atomic_load(&state->shared->has_work)) {
            if (atomic_load(&state->shared->stop)) {
-                return NULL;
+                return 0;
            }
            ggml_lock_lock  (&state->shared->spin);
            ggml_lock_unlock(&state->shared->spin);
@ -6449,7 +6793,7 @@ void * ggml_graph_compute_thread(void * data) {
        }
    }

-    return NULL;
+    return 0;
 }

 void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
@ -6503,7 +6847,13 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)

            switch (node->op) {
                case GGML_OP_DUP:
+                    {
+                        node->n_tasks = 1;
+                    } break;
                case GGML_OP_ADD:
+                    {
+                        node->n_tasks = 1;
+                    } break;
                case GGML_OP_SUB:
                case GGML_OP_MUL:
                case GGML_OP_DIV:
@ -6522,11 +6872,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                    } break;
                case GGML_OP_GELU:
                    {
-                        node->n_tasks = MIN(n_threads, ggml_nrows(node->src0));
+                        node->n_tasks = n_threads;
                    } break;
                case GGML_OP_NORM:
                    {
-                        node->n_tasks = 1;
+                        node->n_tasks = n_threads;
                    } break;
                case GGML_OP_MUL_MAT:
                    {
@ -6541,7 +6891,15 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                        } else {
                            if (node->src0->type == GGML_TYPE_F16 &&
                                node->src1->type == GGML_TYPE_F32) {
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+                                if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) {
+                                    cur = sizeof(float)*(node->src0->ne[0]*node->src0->ne[1]);
+                                } else {
+                                    cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
+                                }
+#else
                                cur = sizeof(ggml_fp16_t)*ggml_nelements(node->src1);
+#endif
                            } else if (node->src0->type == GGML_TYPE_F32 &&
                                       node->src1->type == GGML_TYPE_F32) {
                                cur = 0;
@ -6554,7 +6912,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                    } break;
                case GGML_OP_SCALE:
                    {
-                        node->n_tasks = MIN(n_threads, ggml_nrows(node->src0));
+                        node->n_tasks = n_threads;
                    } break;
                case GGML_OP_CPY:
                case GGML_OP_RESHAPE:
@ -6568,7 +6926,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                    } break;
                case GGML_OP_SOFT_MAX:
                    {
-                        node->n_tasks = MIN(n_threads, ggml_nrows(node->src0));
+                        node->n_tasks = n_threads;
                    } break;
                case GGML_OP_ROPE:
                    {
@ -6683,7 +7041,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
        struct ggml_compute_params params = {
            /*.type  =*/ GGML_TASK_INIT,
            /*.ith   =*/ 0,
-            /*.nth   =*/ n_threads,
+            /*.nth   =*/ node->n_tasks,
            /*.wsize =*/ cgraph->work ? ggml_nbytes(cgraph->work) : 0,
            /*.wdata =*/ cgraph->work ? cgraph->work->data : NULL,
        };
@ -6867,9 +7225,9 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {

        perf_total_per_op_us[node->op] += node->perf_time_us;

-        GGML_PRINT(" - %3d: [ %6d, %6d] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
+        GGML_PRINT(" - %3d: [ %6d, %6d, %6d] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
                i,
-                node->ne[0], node->ne[1],
+                node->ne[0], node->ne[1], node->ne[2],
                GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
                (double) node->perf_cycles  / (double) ggml_cycles_per_ms(),
                (double) node->perf_cycles  / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
@ -7237,7 +7595,7 @@ enum ggml_opt_result ggml_opt_adam(

        {
            const int64_t t_end_cpu = ggml_cycles();
-            GGML_PRINT_DEBUG("time iter:      %5.3f s\n", (t_end_cpu - t_start_cpu)/CLOCKS_PER_SEC);
+            GGML_PRINT_DEBUG("time iter:      %5.3f s\n", ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC);
            UNUSED(t_end_cpu);

            const int64_t t_end_wall = ggml_time_us();
@ -7708,3 +8066,53 @@ enum ggml_opt_result ggml_opt(
 }

 ////////////////////////////////////////////////////////////////////////////////
+
+int ggml_cpu_has_avx2(void) {
+#if defined(__AVX2__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int ggml_cpu_has_avx512(void) {
+#if defined(__AVX512F__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int ggml_cpu_has_neon(void) {
+#if defined(__ARM_NEON__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int ggml_cpu_has_fp16_va(void) {
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int ggml_cpu_has_wasm_simd(void) {
+#if defined(__wasm_simd128__)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+int ggml_cpu_has_blas(void) {
+#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
--- a/ggml.h
+++ b/ggml.h
@ -11,7 +11,7 @@ extern "C" {
 #define GGML_MAX_DIMS     4
 #define GGML_MAX_NODES    4096
 #define GGML_MAX_PARAMS   16
-#define GGML_MAX_CONTEXTS 16
+#define GGML_MAX_CONTEXTS 64
 #define GGML_MAX_OPT      4

 #ifdef __ARM_NEON
@ -136,6 +136,7 @@ struct ggml_init_params {
    void * mem_buffer; // if NULL, memory will be allocated internally
 };

+void ggml_time_init(void);
 int64_t ggml_time_ms(void);
 int64_t ggml_time_us(void);
 int64_t ggml_cycles(void);
@ -547,6 +548,17 @@ enum ggml_opt_result ggml_opt(
        struct ggml_opt_params params,
        struct ggml_tensor * f);

+//
+// system info
+//
+
+int ggml_cpu_has_avx2(void);
+int ggml_cpu_has_avx512(void);
+int ggml_cpu_has_neon(void);
+int ggml_cpu_has_fp16_va(void);
+int ggml_cpu_has_wasm_simd(void);
+int ggml_cpu_has_blas(void);
+
 #ifdef  __cplusplus
 }
 #endif
--- a/main.cpp
+++ b/main.cpp
@ -1,243 +0,0 @@
-#include "whisper.h"
-
-// third-party utilities
-// use your favorite implementations
-#define DR_WAV_IMPLEMENTATION
-#include "dr_wav.h"
-
-#include <cstdio>
-#include <string>
-#include <thread>
-#include <vector>
-
-//  500 -> 00:05.000
-// 6000 -> 01:00.000
-std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
-
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
-
-    return std::string(buf);
-}
-
-// command-line parameters
-struct whisper_params {
-    int32_t seed      = -1; // RNG seed, not used currently
-    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t offset_ms = 0;
-
-    bool verbose              = false;
-    bool translate            = false;
-    bool print_special_tokens = false;
-    bool no_timestamps        = false;
-
-    std::string language  = "en";
-    std::string model     = "models/ggml-base.en.bin";
-
-    std::vector<std::string> fname_inp = {};
-};
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
-
-bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
-    for (int i = 1; i < argc; i++) {
-        std::string arg = argv[i];
-
-        if (arg[0] != '-') {
-            params.fname_inp.push_back(arg);
-            continue;
-        }
-
-        if (arg == "-s" || arg == "--seed") {
-            params.seed = std::stoi(argv[++i]);
-        } else if (arg == "-t" || arg == "--threads") {
-            params.n_threads = std::stoi(argv[++i]);
-        } else if (arg == "-o" || arg == "--offset") {
-            params.offset_ms = std::stoi(argv[++i]);
-        } else if (arg == "-v" || arg == "--verbose") {
-            params.verbose = true;
-        } else if (arg == "--translate") {
-            params.translate = true;
-        } else if (arg == "-l" || arg == "--language") {
-            params.language = argv[++i];
-            if (whisper_lang_id(params.language.c_str()) == -1) {
-                fprintf(stderr, "error: unknown language '%s'\n", params.language.c_str());
-                whisper_print_usage(argc, argv, params);
-                exit(0);
-            }
-        } else if (arg == "-ps" || arg == "--print_special") {
-            params.print_special_tokens = true;
-        } else if (arg == "-nt" || arg == "--no_timestamps") {
-            params.no_timestamps = true;
-        } else if (arg == "-m" || arg == "--model") {
-            params.model = argv[++i];
-        } else if (arg == "-f" || arg == "--file") {
-            params.fname_inp.push_back(argv[++i]);
-        } else if (arg == "-h" || arg == "--help") {
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        } else {
-            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
-            whisper_print_usage(argc, argv, params);
-            exit(0);
-        }
-    }
-
-    return true;
-}
-
-void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
-    fprintf(stderr, "\n");
-    fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
-    fprintf(stderr, "\n");
-    fprintf(stderr, "options:\n");
-    fprintf(stderr, "  -h,       --help           show this help message and exit\n");
-    fprintf(stderr, "  -s SEED,  --seed SEED      RNG seed (default: -1)\n");
-    fprintf(stderr, "  -t N,     --threads N      number of threads to use during computation (default: %d)\n", params.n_threads);
-    fprintf(stderr, "  -o N,     --offset N       offset in milliseconds (default: %d)\n", params.offset_ms);
-    fprintf(stderr, "  -v,       --verbose        verbose output\n");
-    fprintf(stderr, "            --translate      translate from source language to english\n");
-    fprintf(stderr, "  -ps,      --print_special  print special tokens\n");
-    fprintf(stderr, "  -nt,      --no_timestamps  do not print timestamps\n");
-    fprintf(stderr, "  -l LANG,  --language LANG  spoken language (default: %s)\n", params.language.c_str());
-    fprintf(stderr, "  -m FNAME, --model FNAME    model path (default: %s)\n", params.model.c_str());
-    fprintf(stderr, "  -f FNAME, --file FNAME     input WAV file path\n");
-    fprintf(stderr, "\n");
-}
-
-int main(int argc, char ** argv) {
-    whisper_params params;
-
-    if (whisper_params_parse(argc, argv, params) == false) {
-        return 1;
-    }
-
-    if (params.seed < 0) {
-        params.seed = time(NULL);
-    }
-
-    if (params.fname_inp.empty()) {
-        fprintf(stderr, "error: no input files specified\n");
-        whisper_print_usage(argc, argv, params);
-        return 1;
-    }
-
-    // whisper init
-
-    struct whisper_context * ctx = whisper_init(params.model.c_str());
-
-    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
-        const auto fname_inp = params.fname_inp[f];
-
-        // WAV input
-        std::vector<float> pcmf32;
-        {
-            drwav wav;
-            if (!drwav_init_file(&wav, fname_inp.c_str(), NULL)) {
-                fprintf(stderr, "%s: failed to open WAV file '%s' - check your input\n", argv[0], fname_inp.c_str());
-                whisper_print_usage(argc, argv, {});
-                return 2;
-            }
-
-            if (wav.channels != 1 && wav.channels != 2) {
-                fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", argv[0], fname_inp.c_str());
-                return 3;
-            }
-
-            if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
-                fprintf(stderr, "%s: WAV file '%s' must be 16 kHz\n", argv[0], fname_inp.c_str());
-                return 4;
-            }
-
-            if (wav.bitsPerSample != 16) {
-                fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", argv[0], fname_inp.c_str());
-                return 5;
-            }
-
-            int n = wav.totalPCMFrameCount;
-
-            std::vector<int16_t> pcm16;
-            pcm16.resize(n*wav.channels);
-            drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
-            drwav_uninit(&wav);
-
-            // convert to mono, float
-            pcmf32.resize(n);
-            if (wav.channels == 1) {
-                for (int i = 0; i < n; i++) {
-                    pcmf32[i] = float(pcm16[i])/32768.0f;
-                }
-            } else {
-                for (int i = 0; i < n; i++) {
-                    pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
-                }
-            }
-        }
-
-        // print some info about the processing
-        {
-            printf("\n");
-            if (!whisper_is_multilingual(ctx)) {
-                if (params.language != "en" || params.translate) {
-                    params.language = "en";
-                    params.translate = false;
-                    printf("%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__);
-                }
-            }
-            printf("%s: processing '%s' (%d samples, %.1f sec), %d threads, lang = %s, task = %s, timestamps = %d ...\n",
-                    __func__, fname_inp.c_str(), int(pcmf32.size()), float(pcmf32.size())/WHISPER_SAMPLE_RATE, params.n_threads,
-                    params.language.c_str(),
-                    params.translate ? "translate" : "transcribe",
-                    params.no_timestamps ? 0 : 1);
-            printf("\n");
-        }
-
-        // run the inference
-        {
-            whisper_full_params wparams = whisper_full_default_params(WHISPER_DECODE_GREEDY);
-
-            wparams.print_realtime       = true;
-            wparams.print_progress       = false;
-            wparams.print_timestamps     = !params.no_timestamps;
-            wparams.print_special_tokens = params.print_special_tokens;
-            wparams.translate            = params.translate;
-            wparams.language             = params.language.c_str();
-            wparams.n_threads            = params.n_threads;
-            wparams.offset_ms            = params.offset_ms;
-
-            if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
-                fprintf(stderr, "%s: failed to process audio\n", argv[0]);
-                return 6;
-            }
-
-            // print result;
-            if (!wparams.print_realtime) {
-                printf("\n");
-
-                const int n_segments = whisper_full_n_segments(ctx);
-                for (int i = 0; i < n_segments; ++i) {
-                    const char * text = whisper_full_get_segment_text(ctx, i);
-
-                    if (params.no_timestamps) {
-                        printf ("%s", text);
-                        fflush(stdout);
-                    } else {
-                        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
-                        const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
-
-                        printf ("[%s --> %s]  %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text);
-                    }
-                }
-            }
-        }
-    }
-
-    whisper_print_timings(ctx);
-    whisper_free(ctx);
-
-    return 0;
-}
--- a/models/README.md
+++ b/models/README.md
@ -4,14 +4,14 @@ The [original Whisper PyTorch models provided by OpenAI](https://github.com/open
 have been converted to custom `ggml` format in order to be able to load them in C/C++. The conversion has been performed using the
 [convert-pt-to-ggml.py](convert-pt-to-ggml.py) script. You can either obtain the original models and generate the `ggml` files
 yourself using the conversion script, or you can use the [download-ggml-model.sh](download-ggml-model.sh) script to download the
-already converted models.
+already converted models from https://ggml.ggerganov.com

 Sample usage:

 ```java
 $ ./download-ggml-model.sh base.en
 Downloading ggml model base.en ...
-models/ggml-base.en.bin          100%[=============================================>] 141.11M  5.41MB/s    in 22s     
+models/ggml-base.en.bin          100%[=============================================>] 141.11M  5.41MB/s    in 22s
 Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
 You can now use it like this:

--- a/models/convert-pt-to-ggml.py
+++ b/models/convert-pt-to-ggml.py
@ -234,7 +234,7 @@ dir_tokenizer = tokenizer.name_or_path
 # output in the same directory as the model
 fname_out = dir_out + "/ggml-model.bin"

-with open(dir_tokenizer + "/vocab.json", "r") as f:
+with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
    tokens = json.load(f)

 # use 16-bit or 32-bit floats
@ -271,7 +271,7 @@ byte_decoder = {v:k for k, v in byte_encoder.items()}
 fout.write(struct.pack("i", len(tokens)))

 for key in tokens:
-    text = bytearray([byte_decoder[c] for c in key]).decode('utf-8', errors='replace').encode('utf-8')
+    text = bytearray([byte_decoder[c] for c in key])
    fout.write(struct.pack("i", len(text)))
    fout.write(text)

--- a/models/download-ggml-model.sh
+++ b/models/download-ggml-model.sh
@ -3,7 +3,17 @@
 # This script downloads Whisper model files that have already been converted to ggml format.
 # This way you don't have to convert them yourself.

-ggml_path=$(dirname $(realpath $0))
+# get the path of this script
+function get_script_path() {
+    if [ -x "$(command -v realpath)" ]; then
+        echo "$(dirname $(realpath $0))"
+    else
+        local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
+        echo "$ret"
+    fi
+}
+
+models_path=$(get_script_path)

 # Whisper models
 models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
@ -38,14 +48,22 @@ fi

 printf "Downloading ggml model $model ...\n"

-mkdir -p models
+cd $models_path

-if [ -f "models/ggml-$model.bin" ]; then
+if [ -f "ggml-$model.bin" ]; then
    printf "Model $model already exists. Skipping download.\n"
    exit 0
 fi

-wget --quiet --show-progress -O models/ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
+if [ -x "$(command -v wget)" ]; then
+    wget --quiet --show-progress -O ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
+elif [ -x "$(command -v curl)" ]; then
+    curl --output ggml-$model.bin https://ggml.ggerganov.com/ggml-model-whisper-$model.bin
+else
+    printf "Either wget or curl is required to download models.\n"
+    exit 1
+fi
+

 if [ $? -ne 0 ]; then
    printf "Failed to download ggml model $model \n"
--- a/samples/README.md
+++ b/samples/README.md
@ -0,0 +1,6 @@
+# Audio samples
+
+This folder contains various audio files used for testing.
+If you want to quickly get some more samples, simply run `make samples`. This will download several public audio files and convert them to appropriate 16-bit WAV format using `ffmpeg`
+
+https://github.com/ggerganov/whisper.cpp/blob/a09ce6e8899198015729ffc49ae10f67370906b1/Makefile#L104-L123
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,3 +1,7 @@
+if (EMSCRIPTEN)
+    return()
+endif()
+
 set(TEST_TARGET test-main-tiny)
 add_test(NAME ${TEST_TARGET}
    COMMAND $<TARGET_FILE:main>
--- a/whisper.cpp
+++ b/whisper.cpp
@ -1,9 +1,11 @@
+#define WHISPER_BUILD
 #include "whisper.h"

 #include "ggml.h"

 #include <algorithm>
 #include <cassert>
+#define _USE_MATH_DEFINES
 #include <cmath>
 #include <cstdio>
 #include <cstring>
@ -14,7 +16,7 @@
 #include <vector>

 #define USE_FLASH_ATTN
-#define USE_FLASH_FF
+//#define USE_FLASH_FF

 // available whisper models
 enum e_model {
@ -147,19 +149,19 @@ static const std::map<e_model, size_t> MEM_REQ_ENCODE = {
 };

 static const std::map<e_model, size_t> MEM_REQ_ENCODE_LAYER = {
-    { MODEL_TINY,     64ull*MB },
-    { MODEL_BASE,     84ull*MB },
-    { MODEL_SMALL,   128ull*MB },
-    { MODEL_MEDIUM,  172ull*MB },
-    { MODEL_LARGE,   216ull*MB },
+    { MODEL_TINY,    104ull*MB },
+    { MODEL_BASE,    138ull*MB },
+    { MODEL_SMALL,   208ull*MB },
+    { MODEL_MEDIUM,  280ull*MB },
+    { MODEL_LARGE,   354ull*MB },
 };

 static const std::map<e_model, size_t> MEM_REQ_DECODE = {
-    { MODEL_TINY,     94ull*MB },
-    { MODEL_BASE,     96ull*MB },
-    { MODEL_SMALL,    98ull*MB },
-    { MODEL_MEDIUM,  100ull*MB },
-    { MODEL_LARGE,   102ull*MB },
+    { MODEL_TINY,    200ull*MB },
+    { MODEL_BASE,    202ull*MB },
+    { MODEL_SMALL,   204ull*MB },
+    { MODEL_MEDIUM,  206ull*MB },
+    { MODEL_LARGE,   208ull*MB },
 };

 static const std::map<e_model, size_t> MEM_REQ_DECODE_LAYER = {
@ -209,16 +211,13 @@ struct whisper_vocab {
    }
 };

-struct whisper_result {
-    int64_t t;
-    whisper_token id;
-};
-
 struct whisper_segment {
    int64_t t0;
    int64_t t1;

    std::string text;
+
+    std::vector<whisper_token_data> tokens;
 };

 // medium
@ -378,8 +377,12 @@ struct whisper_model {
    struct ggml_tensor * memory_cross_k;
    struct ggml_tensor * memory_cross_v;

-    //
+    // context
    struct ggml_context * ctx;
+    struct ggml_context * ctx_mem;
+
+    // tensors
+    int n_loaded;
    std::map<std::string, struct ggml_tensor *> tensors;
 };

@ -391,9 +394,10 @@ struct whisper_context {
    int64_t t_decode_us = 0;
    int64_t t_start_us  = 0;

-    std::vector<uint8_t> buf_model;
-    std::vector<uint8_t> buf_compute;
-    std::vector<uint8_t> buf_compute_layer;
+    std::vector<uint8_t> * buf_model; // the model buffer is read-only and can be shared between processors
+    std::vector<uint8_t>   buf_memory;
+    std::vector<uint8_t>   buf_compute;
+    std::vector<uint8_t>   buf_compute_layer;

    whisper_model model;
    whisper_vocab vocab;
@ -403,7 +407,6 @@ struct whisper_context {
    std::vector<float> probs;
    std::vector<float> logits;

-    std::vector<whisper_result>  result_cur;
    std::vector<whisper_segment> result_all;

    std::vector<whisper_token> prompt_past;
@ -421,7 +424,7 @@ struct whisper_context {
 // see the convert-pt-to-ggml.py script for details
 //
 bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
-    printf("%s: loading model from '%s'\n", __func__, fname.c_str());
+    fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());

    auto & model = wctx.model;
    auto & vocab = wctx.vocab;
@ -480,30 +483,33 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
            model.type = e_model::MODEL_LARGE;
        }

-        printf("%s: n_vocab       = %d\n", __func__, hparams.n_vocab);
-        printf("%s: n_audio_ctx   = %d\n", __func__, hparams.n_audio_ctx);
-        printf("%s: n_audio_state = %d\n", __func__, hparams.n_audio_state);
-        printf("%s: n_audio_head  = %d\n", __func__, hparams.n_audio_head);
-        printf("%s: n_audio_layer = %d\n", __func__, hparams.n_audio_layer);
-        printf("%s: n_text_ctx    = %d\n", __func__, hparams.n_text_ctx);
-        printf("%s: n_text_state  = %d\n", __func__, hparams.n_text_state);
-        printf("%s: n_text_head   = %d\n", __func__, hparams.n_text_head);
-        printf("%s: n_text_layer  = %d\n", __func__, hparams.n_text_layer);
-        printf("%s: n_mels        = %d\n", __func__, hparams.n_mels);
-        printf("%s: f16           = %d\n", __func__, hparams.f16);
-        printf("%s: type          = %d\n", __func__, model.type);
+        fprintf(stderr, "%s: n_vocab       = %d\n", __func__, hparams.n_vocab);
+        fprintf(stderr, "%s: n_audio_ctx   = %d\n", __func__, hparams.n_audio_ctx);
+        fprintf(stderr, "%s: n_audio_state = %d\n", __func__, hparams.n_audio_state);
+        fprintf(stderr, "%s: n_audio_head  = %d\n", __func__, hparams.n_audio_head);
+        fprintf(stderr, "%s: n_audio_layer = %d\n", __func__, hparams.n_audio_layer);
+        fprintf(stderr, "%s: n_text_ctx    = %d\n", __func__, hparams.n_text_ctx);
+        fprintf(stderr, "%s: n_text_state  = %d\n", __func__, hparams.n_text_state);
+        fprintf(stderr, "%s: n_text_head   = %d\n", __func__, hparams.n_text_head);
+        fprintf(stderr, "%s: n_text_layer  = %d\n", __func__, hparams.n_text_layer);
+        fprintf(stderr, "%s: n_mels        = %d\n", __func__, hparams.n_mels);
+        fprintf(stderr, "%s: f16           = %d\n", __func__, hparams.f16);
+        fprintf(stderr, "%s: type          = %d\n", __func__, model.type);

-        wctx.buf_model.resize(MEM_REQ_MODEL.at(model.type));
+        wctx.buf_model = new std::vector<uint8_t>();
+        wctx.buf_model->resize(MEM_REQ_MODEL.at(model.type));
+        wctx.buf_memory.resize(std::max(MEM_REQ_MODEL.at(model.type), MEM_REQ_MODEL.at(model.type))); // TODO: TMP !!!
        wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
        wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));

        // this is the total memory required to run the inference
        const size_t mem_required =
-                   wctx.buf_model.size() +
+                   wctx.buf_model->size() +
+                   wctx.buf_memory.size() +
                   wctx.buf_compute.size() +
                   wctx.buf_compute_layer.size();

-        printf("%s: mem_required  = %.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
+        fprintf(stderr, "%s: mem_required  = %.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
    }

    // load mel filters
@ -553,7 +559,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
        }

        if (n_vocab < model.hparams.n_vocab) {
-            printf("%s: adding %d extra tokens\n", __func__, model.hparams.n_vocab - n_vocab);
+            fprintf(stderr, "%s: adding %d extra tokens\n", __func__, model.hparams.n_vocab - n_vocab);
            for (int i = n_vocab; i < model.hparams.n_vocab; i++) {
                if (i > vocab.token_beg) {
                    word = "[_TT_" + std::to_string(i - vocab.token_beg) + "]";
@ -582,6 +588,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {


    size_t ctx_size = 0;
+    size_t ctx_mem_size = 0;

    {
        const auto & hparams = model.hparams;
@ -690,22 +697,22 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
            ctx_size += n_text_layer*(             n_text_state*ggml_type_size(GGML_TYPE_F32)); // cross_attn_ln_1_b
        }

-        ctx_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
-        ctx_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v
+        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_k
+        ctx_mem_size += n_text_layer*n_text_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_v

-        ctx_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
-        ctx_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v
+        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_k
+        ctx_mem_size += n_text_layer*n_audio_ctx*n_text_state*ggml_type_size(GGML_TYPE_F16); // memory_cross_v

        ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead

-        printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
+        fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
    }

    // create the ggml context
    {
        struct ggml_init_params params = {
-            .mem_size   = wctx.buf_model.size(),
-            .mem_buffer = wctx.buf_model.data(),
+            .mem_size   = wctx.buf_model->size(),
+            .mem_buffer = wctx.buf_model->data(),
        };

        model.ctx = ggml_init(params);
@ -715,6 +722,20 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
        }
    }

+    // create the ggml memory context
+    {
+        struct ggml_init_params params = {
+            .mem_size   = wctx.buf_memory.size(),
+            .mem_buffer = wctx.buf_memory.data(),
+        };
+
+        model.ctx_mem = ggml_init(params);
+        if (!model.ctx_mem) {
+            fprintf(stderr, "%s: ggml_init() failed\n", __func__);
+            return false;
+        }
+    }
+
    // prepare memory for the weights
    {
        auto & ctx = model.ctx;
@ -913,7 +934,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {

    // key + value memory
    {
-        auto & ctx = model.ctx;
+        auto & ctx = model.ctx_mem;

        const auto & hparams = model.hparams;

@ -945,14 +966,15 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
            ggml_nbytes(model.memory_k)       + ggml_nbytes(model.memory_v) +
            ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);

-        printf("%s: memory size = %8.2f MB \n", __func__, memory_size/1024.0/1024.0);
+        fprintf(stderr, "%s: memory size = %8.2f MB\n", __func__, memory_size/1024.0/1024.0);
    }

    // load weights
    {
-        int n_loaded = 0;
        size_t total_size = 0;

+        model.n_loaded = 0;
+
        while (true) {
            int32_t n_dims;
            int32_t length;
@ -1005,15 +1027,15 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {

            //printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
            total_size += ggml_nbytes(tensor);
-            n_loaded++;
+            model.n_loaded++;
        }

-        printf("%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
+        fprintf(stderr, "%s: model size  = %8.2f MB\n", __func__, total_size/1024.0/1024.0);

-        if (n_loaded == 0) {
-            printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
-        } else if (n_loaded != (int) model.tensors.size()) {
-            fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
+        if (model.n_loaded == 0) {
+            fprintf(stderr, "%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
+        } else if (model.n_loaded != (int) model.tensors.size()) {
+            fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), model.n_loaded);
            return false;
        }
    }
@ -1781,9 +1803,11 @@ bool whisper_decode(
 }

 // the most basic sampling scheme - select the top token
-whisper_vocab::id whisper_sample_best(
+whisper_token_data whisper_sample_best(
        const whisper_vocab & vocab,
-        const float * probs, bool need_timestamp) {
+        const float * probs) {
+    whisper_token_data result;
+
    int n_logits = vocab.id_to_token.size();

    std::vector<std::pair<double, whisper_vocab::id>> probs_id;
@ -1793,9 +1817,39 @@ whisper_vocab::id whisper_sample_best(
        probs_id.push_back(std::make_pair(probs[i], i));
    }

-    const int top_k = 4;
+    {
+        double sum_ts =  0.0;
+        double max_ts = -1.0;
+        double max_tx = -1.0;
+
+        for (int i = 0; i < vocab.token_beg; i++) {
+            max_tx = std::max(max_tx, probs_id[i].first);
+        }
+
+        for (int i = vocab.token_beg; i < n_logits; i++) {
+            sum_ts += probs_id[i].first;
+            if  (probs_id[i].first > max_ts) {
+                max_ts = probs_id[i].first;
+                result.tid = probs_id[i].second;
+            }
+        }
+
+        // if the probability sum of all timestamp tokens is higher than the max probability of the text tokens - sample a
+        // timestamp token
+        if (sum_ts > max_tx) {
+            // ref: https://github.com/openai/whisper/blob/0b1ba3d46ebf7fe6f953acfd8cad62a4f851b49f/whisper/decoding.py#L430-L438
+            for (int i = 0; i < vocab.token_beg; i++) {
+                probs_id[i].first = -INFINITY;
+            }
+        }
+
+        result.pt = max_ts/(sum_ts + 1e-10);
+        result.ptsum = sum_ts;
+    }

    // find the top K tokens
+    const int top_k = 4;
+
    std::partial_sort(
            probs_id.begin(),
            probs_id.begin() + top_k, probs_id.end(),
@ -1810,15 +1864,6 @@ whisper_vocab::id whisper_sample_best(
    //    printf("%d: '%s' %f, %d\n", i, vocab.id_to_token.at(probs_id[i].second).c_str(), probs_id[i].first, probs_id[i].second);
    //}

-    if (need_timestamp) {
-        // at the end of the 30-second audio segment, we start giving preference to time tokens
-        for (int i = 0; i < top_k; i++) {
-            if (probs_id[i].second > vocab.token_beg + 1300 && probs_id[i].first > 0.01*probs_id[0].first) {
-                return probs_id[i].second;
-            }
-        }
-    }
-
    int res = 0;
    while ((probs_id[res].second == vocab.token_sot ||
            probs_id[res].second == vocab.token_solm ||
@ -1827,7 +1872,10 @@ whisper_vocab::id whisper_sample_best(
        res++;
    }

-    return probs_id[res].second;
+    result.id = probs_id[res].second;
+    result.p  = probs_id[res].first;
+
+    return result;
 }

 // samples only from the timestamps tokens
@ -1863,14 +1911,19 @@ whisper_vocab::id whisper_sample_timestamp(
    return probs_id[0].second;
 }

-static std::string to_timestamp(int64_t t) {
-    int64_t sec = t/100;
-    int64_t msec = t - sec*100;
-    int64_t min = sec/60;
-    sec = sec - min*60;
+//  500 -> 00:05.000
+// 6000 -> 01:00.000
+static std::string to_timestamp(int64_t t, bool comma = false) {
+    int64_t msec = t * 10;
+    int64_t hr = msec / (1000 * 60 * 60);
+    msec = msec - hr * (1000 * 60 * 60);
+    int64_t min = msec / (1000 * 60);
+    msec = msec - min * (1000 * 60);
+    int64_t sec = msec / 1000;
+    msec = msec - sec * 1000;

    char buf[32];
-    snprintf(buf, sizeof(buf), "%02d:%02d.%03d", (int) min, (int) sec, (int) msec);
+    snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);

    return std::string(buf);
 }
@ -2072,6 +2125,8 @@ bool log_mel_spectrogram(
 //

 struct whisper_context * whisper_init(const char * path_model) {
+    ggml_time_init();
+
    whisper_context * ctx = new whisper_context;

    const int64_t t_start_us = ggml_time_us();
@ -2090,6 +2145,9 @@ struct whisper_context * whisper_init(const char * path_model) {

 void whisper_free(struct whisper_context * ctx) {
    if (ctx) {
+        if (ctx->buf_model) {
+            delete ctx->buf_model;
+        }
        delete ctx;
    }
 }
@ -2152,11 +2210,11 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
    return 0;
 }

-whisper_token whisper_sample_best(struct whisper_context * ctx, bool need_timestamp) {
+struct whisper_token_data whisper_sample_best(struct whisper_context * ctx) {
    const int64_t t_start_sample_us = ggml_time_us();

    // TODO: simplify
-    auto res = whisper_sample_best(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab), need_timestamp);
+    auto res = whisper_sample_best(ctx->vocab, ctx->probs.data() + (ctx->probs.size() - ctx->vocab.n_vocab));

    ctx->t_sample_us += ggml_time_us() - t_start_sample_us;

@ -2242,63 +2300,83 @@ whisper_token whisper_token_transcribe() {
 void whisper_print_timings(struct whisper_context * ctx) {
    const int64_t t_end_us = ggml_time_us();

-    printf("\n\n");
-    printf("%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
-    printf("%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
-    printf("%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
-    printf("%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
-    printf("%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
-    printf("%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
+    fprintf(stderr, "%s:      mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
+    fprintf(stderr, "%s:   sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
+    fprintf(stderr, "%s:   encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
+    fprintf(stderr, "%s:   decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
+    fprintf(stderr, "%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
 }

 ////////////////////////////////////////////////////////////////////////////

-struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy) {
+struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
    struct whisper_full_params result;

    switch (strategy) {
-        case WHISPER_DECODE_GREEDY:
+        case WHISPER_SAMPLING_GREEDY:
            {
-                result = (struct whisper_full_params) {
-                    .strategy  = WHISPER_DECODE_GREEDY,
-                    .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
-                    .offset_ms = 0,
+                result = {
+                    /*.strategy             =*/ WHISPER_SAMPLING_GREEDY,

-                    .translate            = false,
-                    .no_context           = false,
-                    .print_special_tokens = false,
-                    .print_progress       = true,
-                    .print_realtime       = false,
-                    .print_timestamps     = true,
+                    /*.n_threads            =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+                    /*.n_max_text_ctx       =*/ 16384,
+                    /*.offset_ms            =*/ 0,

-                    .language = "en",
+                    /*.translate            =*/ false,
+                    /*.no_context           =*/ false,
+                    /*.print_special_tokens =*/ false,
+                    /*.print_progress       =*/ true,
+                    /*.print_realtime       =*/ false,
+                    /*.print_timestamps     =*/ true,

-                    .greedy = {
-                        .n_past = 0,
+                    /*.language             =*/ "en",
+
+                    /*.greedy               =*/ {
+                        /*.n_past =*/ 0,
                    },
+
+                    /*.beam_search          =*/ {
+                        /*.n_past     =*/ -1,
+                        /*.beam_width =*/ -1,
+                        /*.n_best     =*/ -1,
+                    },
+
+                    /*.new_segment_callback =*/ nullptr,
+                    /*.new_segment_callback_user_data =*/ nullptr,
                };
            } break;
-        case WHISPER_DECODE_BEAM_SEARCH:
+        case WHISPER_SAMPLING_BEAM_SEARCH:
            {
-                result = (struct whisper_full_params) {
-                    .strategy  = WHISPER_DECODE_GREEDY,
-                    .n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
-                    .offset_ms = 0,
+                result = {
+                    /*.strategy             =*/ WHISPER_SAMPLING_BEAM_SEARCH,

-                    .translate            = false,
-                    .no_context           = false,
-                    .print_special_tokens = false,
-                    .print_progress       = true,
-                    .print_realtime       = false,
-                    .print_timestamps     = true,
+                    /*.n_threads            =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
+                    /*.n_max_text_ctx       =*/ 16384,
+                    /*.offset_ms            =*/ 0,

-                    .language = "en",
+                    /*.translate            =*/ false,
+                    /*.no_context           =*/ false,
+                    /*.print_special_tokens =*/ false,
+                    /*.print_progress       =*/ true,
+                    /*.print_realtime       =*/ false,
+                    /*.print_timestamps     =*/ true,

-                    .beam_search = {
-                        .n_past = 0,
-                        .beam_width = 10,
-                        .n_best = 5,
+                    /*.language             =*/ "en",
+
+                    /*.greedy               =*/ {
+                        /*.n_past =*/ -1,
                    },
+
+                    /*.beam_search          =*/ {
+                        /*.n_past     =*/ 0,
+                        /*.beam_width =*/ 10,
+                        /*.n_best     =*/ 5,
+                    },
+
+                    /*.new_segment_callback =*/ nullptr,
+                    /*.new_segment_callback_user_data =*/ nullptr,
                };
            } break;
    }
@ -2311,12 +2389,26 @@ int whisper_full(
        struct whisper_full_params params,
        const float * samples,
        int n_samples) {
+    // clear old results
+    auto & result_all = ctx->result_all;
+
+    result_all.clear();
+
    // compute log mel spectrogram
    if (whisper_pcm_to_mel(ctx, samples, n_samples, params.n_threads) != 0) {
        fprintf(stderr, "%s: failed to compute log mel spectrogram\n", __func__);
        return -1;
    }

+    const int seek_start = params.offset_ms/10;
+
+    // if length of spectrogram is less than 1s (100 samples), then return
+    // basically don't process anything that is less than 1s
+    // see issue #39: https://github.com/ggerganov/whisper.cpp/issues/39
+    if (whisper_n_len(ctx) < 100 + seek_start) {
+        return 0;
+    }
+
    // the accumulated text context so far
    auto & prompt_past = ctx->prompt_past;
    if (params.no_context) {
@ -2334,22 +2426,23 @@ int whisper_full(
        }
    }

-    auto & result_all = ctx->result_all;
-    auto & result_cur = ctx->result_cur;
-
-    result_all.clear();
-
    int progress_prev = 0;
    int progress_step = 5;

+    std::vector<whisper_token_data> tokens_cur;
+    tokens_cur.reserve(whisper_n_text_ctx(ctx));
+
+    std::vector<whisper_token> prompt;
+    prompt.reserve(whisper_n_text_ctx(ctx));
+
    // main loop
-    int seek = params.offset_ms/10;
+    int seek = seek_start;
    while (true) {
        int progress_cur = (100*seek)/whisper_n_len(ctx);
        while (progress_cur >= progress_prev + progress_step) {
            progress_prev += progress_step;
            if (params.print_progress) {
-                printf("%s: progress = %3d%%\n", __func__, progress_prev);
+                fprintf(stderr, "%s: progress = %3d%%\n", __func__, progress_prev);
            }
        }

@ -2363,13 +2456,12 @@ int whisper_full(
            return 7;
        }

-        std::vector<whisper_token> prompt;
-
        int n_past = 0;
+        prompt.clear();

        // if we have already generated some text, use it as a prompt to condition the next generation
        if (prompt_past.size() > 0) {
-            int n_take = std::min(whisper_n_text_ctx(ctx)/2, int(prompt_past.size()));
+            int n_take = std::min(std::min(params.n_max_text_ctx, whisper_n_text_ctx(ctx)/2), int(prompt_past.size()));

            prompt = { whisper_token_prev(ctx) };
            prompt.insert(prompt.begin() + 1, prompt_past.end() - n_take, prompt_past.end());
@ -2386,13 +2478,13 @@ int whisper_full(
        // print the prompt
        //printf("\n\n");
        //for (int i = 0; i < prompt.size(); i++) {
-        //    printf("%s: prompt[%d] = %s\n", __func__, i, vocab.id_to_token[prompt[i]].c_str());
+        //    printf("%s: prompt[%d] = %s\n", __func__, i, ctx->vocab.id_to_token[prompt[i]].c_str());
        //}
        //printf("\n\n");

        // the accumulated transcription in the current interation
        int result_len = 0;
-        result_cur.clear();
+        tokens_cur.clear();

        for (int i = 0; i < whisper_n_text_ctx(ctx)/2 - 4; ++i) {
            if (whisper_decode(ctx, prompt.data(), prompt.size(), n_past, params.n_threads) != 0) {
@ -2411,33 +2503,45 @@ int whisper_full(
            // feel free to experiment!
            //
            {
-                whisper_token id  = 0;
-                whisper_token tid = whisper_token_beg(ctx);
+                auto token = whisper_sample_best(ctx);

-                id = whisper_sample_best(ctx, result_len == 0);
-                if (i > 0) {
-                    tid = whisper_sample_timestamp(ctx);
+                if (i == 0) {
+                    token.tid = whisper_token_beg(ctx);
                }

-                // update sliding window
-                if (id > whisper_token_beg(ctx)) {
-                    seek_delta = 2*(id - whisper_token_beg(ctx));
+                // timestamp token - update sliding window
+                if (token.id > whisper_token_beg(ctx)) {
+                    seek_delta = 2*(token.id - whisper_token_beg(ctx));
                    result_len = i + 1;
                }

                // add it to the context
-                prompt.push_back(id);
-                result_cur.push_back({ seek + 2*(tid - whisper_token_beg(ctx)), id });
+                prompt.push_back(token.id);
+                tokens_cur.push_back(token);

-                //printf("%s: %s\n", __func__, ctx->vocab.id_to_token[id].c_str());
+                //{
+                //    const auto tt = token.pt > 0.10 ? ctx->vocab.id_to_token[token.tid] : "[?]";
+                //    printf("%s: %10s %6.3f '%s'\n", __func__, tt.c_str(), token.pt, ctx->vocab.id_to_token[token.id].c_str());
+                //}

                // end of text token
-                if (id == whisper_token_eot(ctx)) {
+                if (token.id == whisper_token_eot(ctx)) {
                    if (result_len == 0) {
-                        result_len = i + 1;
+                        if (seek + seek_delta + 100 >= whisper_n_len(ctx)) {
+                            result_len = i + 1;
+                        } else {
+                            // TODO: figure out how to resolve this
+                            fprintf(stderr, "\n%s: failed to generate timestamp token - this should not happen\n\n", __func__);
+                        }
                    }
                    break;
                }
+
+                // TESTS: if no tensors are loaded, it means we are running tests
+                if (ctx->model.n_loaded == 0) {
+                    seek_delta = 100*WHISPER_CHUNK_SIZE;
+                    break;
+                }
            }

            if (done) {
@ -2445,25 +2549,30 @@ int whisper_full(
            }
        }

-        result_cur.resize(result_len);
+        tokens_cur.resize(result_len);

-        for (const auto & r : result_cur) {
+        for (const auto & r : tokens_cur) {
            prompt_past.push_back(r.id);
        }

        // store the text from this iteration
-        if (result_cur.size() > 0) {
-            auto t0 = result_cur.front().t;
+        if (tokens_cur.size() > 0) {
+            int  i0 = 0;
+            auto t0 = seek + 2*(tokens_cur.front().tid - whisper_token_beg(ctx));

            std::string text = "";

-            for (int i = 0; i < (int) result_cur.size(); i++) {
-                if (params.print_special_tokens == false && result_cur[i].id >= whisper_token_eot(ctx)) {
+            for (int i = 0; i < (int) tokens_cur.size(); i++) {
+                //printf("%s: %18s %6.3f %18s %6.3f\n", __func__,
+                //        ctx->vocab.id_to_token[tokens_cur[i].id].c_str(), tokens_cur[i].p,
+                //        ctx->vocab.id_to_token[tokens_cur[i].tid].c_str(), tokens_cur[i].pt);
+
+                if (params.print_special_tokens == false && tokens_cur[i].id >= whisper_token_eot(ctx)) {
                } else {
-                    text += whisper_token_to_str(ctx, result_cur[i].id);
+                    text += whisper_token_to_str(ctx, tokens_cur[i].id);
                }
-                if (result_cur[i].id > whisper_token_beg(ctx)) {
-                    const auto t1 = result_cur[i].t;
+                if (tokens_cur[i].id > whisper_token_beg(ctx)) {
+                    const auto t1 = seek + 2*(tokens_cur[i].tid - whisper_token_beg(ctx));
                    if (!text.empty()) {
                        if (params.print_realtime) {
                            if (params.print_timestamps) {
@ -2474,14 +2583,21 @@ int whisper_full(
                            }
                        }

-                        result_all.push_back({ t0, t1, text });
+                        result_all.push_back({ t0, t1, text, {} });
+                        for (int j = i0; j <= i; j++) {
+                            result_all.back().tokens.push_back(tokens_cur[j]);
+                        }
+                        if (params.new_segment_callback) {
+                            params.new_segment_callback(ctx, params.new_segment_callback_user_data);
+                        }
                    }
                    text = "";
-                    while (i < (int) result_cur.size() && result_cur[i].id > whisper_token_beg(ctx)) {
+                    while (i < (int) tokens_cur.size() && tokens_cur[i].id > whisper_token_beg(ctx)) {
                        i++;
                    }
                    i--;
-                    t0 = result_cur[i].t;
+                    t0 = t1;
+                    i0 = i + 1;
                }
            }

@ -2497,7 +2613,13 @@ int whisper_full(
                    }
                }

-                result_all.push_back({ t0, t1, text });
+                result_all.push_back({ t0, t1, text, {} });
+                for (int j = i0; j < (int) tokens_cur.size(); j++) {
+                    result_all.back().tokens.push_back(tokens_cur[j]);
+                }
+                if (params.new_segment_callback) {
+                    params.new_segment_callback(ctx, params.new_segment_callback_user_data);
+                }
            }
        }

@ -2507,6 +2629,156 @@ int whisper_full(
    return 0;
 }

+int whisper_full_parallel(
+        struct whisper_context * ctx,
+        struct whisper_full_params params,
+        const float * samples,
+        int n_samples,
+        const int n_processors) {
+    if (n_processors == 1) {
+        return whisper_full(ctx, params, samples, n_samples);
+    }
+
+    int ret = 0;
+
+    // prepare separate contexts for each thread
+    std::vector<struct whisper_context> ctxs(n_processors - 1);
+
+    for (int i = 0; i < n_processors - 1; ++i) {
+        ctxs[i] = *ctx;
+
+        auto & model = ctxs[i].model;
+
+        // create the ggml memory context
+        {
+            struct ggml_init_params params = {
+                .mem_size   = ctxs[i].buf_memory.size(),
+                .mem_buffer = ctxs[i].buf_memory.data(),
+            };
+
+            model.ctx_mem = ggml_init(params);
+            if (!model.ctx_mem) {
+                fprintf(stderr, "%s: ggml_init() failed\n", __func__);
+                return false;
+            }
+        }
+
+        // separate key + value memory for each processor
+        {
+            auto & ctx = model.ctx_mem;
+
+            const auto & hparams = model.hparams;
+
+            const int n_text_state = hparams.n_text_state;
+            const int n_text_layer = hparams.n_text_layer;
+            const int n_text_ctx   = hparams.n_text_ctx;
+
+            // key/value memory for the self-attention layer
+            {
+                const int n_mem      = n_text_layer*n_text_ctx;
+                const int n_elements = n_text_state*n_mem;
+
+                model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+                model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+            }
+
+            // key/value memory for the cross-attention layer
+            {
+                const int n_audio_ctx   = hparams.n_audio_ctx;
+
+                const int n_mem      = n_text_layer*n_audio_ctx;
+                const int n_elements = n_text_state*n_mem;
+
+                model.memory_cross_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+                model.memory_cross_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements);
+            }
+
+            const size_t memory_size =
+                ggml_nbytes(model.memory_k)       + ggml_nbytes(model.memory_v) +
+                ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
+        }
+    }
+
+    const int offset_samples = (WHISPER_SAMPLE_RATE*params.offset_ms)/1000;
+    const int n_samples_per_processor = (n_samples - offset_samples)/n_processors;
+
+    // the calling thread will process the first chunk
+    // while the other threads will process the remaining chunks
+
+    std::vector<std::thread> workers(n_processors - 1);
+    for (int i = 0; i < n_processors - 1; ++i) {
+        const int start_samples = offset_samples + (i + 1)*n_samples_per_processor;
+        const int n_samples_cur = (i == n_processors - 2) ? n_samples - start_samples : n_samples_per_processor;
+
+        auto params_cur = params;
+
+        params_cur.offset_ms = 0;
+        params_cur.print_progress = false;
+        params_cur.print_realtime = false;
+
+        params_cur.new_segment_callback = nullptr;
+        params_cur.new_segment_callback_user_data = nullptr;
+
+        workers[i] = std::thread(whisper_full, &ctxs[i], std::move(params_cur), samples + start_samples, n_samples_cur);
+    }
+
+    {
+        auto params_cur = params;
+
+        ret = whisper_full(ctx, std::move(params_cur), samples, offset_samples + n_samples_per_processor);
+    }
+
+    for (int i = 0; i < n_processors - 1; ++i) {
+        workers[i].join();
+    }
+
+    const int64_t offset_t = (int64_t) params.offset_ms/10.0;
+
+    // combine results into ctx->result_all
+    for (int i = 0; i < n_processors - 1; ++i) {
+        auto & results_i = ctxs[i].result_all;
+
+        for (int j = 0; j < (int) results_i.size(); ++j) {
+            // correct the segment timestamp taking into account the offset
+            results_i[j].t0 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
+            results_i[j].t1 += 100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t;
+
+            // make sure that segments are not overlapping
+            if (ctx->result_all.size() > 0) {
+                results_i[j].t0 = std::max(results_i[j].t0, ctx->result_all.back().t1);
+            }
+
+            ctx->result_all.push_back(std::move(results_i[j]));
+
+            // call the new_segment_callback for each segment
+            if (params.new_segment_callback) {
+                params.new_segment_callback(ctx, params.new_segment_callback_user_data);
+            }
+        }
+
+        ctx->t_mel_us    += ctxs[i].t_mel_us;
+        ctx->t_sample_us += ctxs[i].t_sample_us;
+        ctx->t_encode_us += ctxs[i].t_encode_us;
+        ctx->t_decode_us += ctxs[i].t_decode_us;
+    }
+
+    // average the timings
+    ctx->t_mel_us    /= n_processors;
+    ctx->t_sample_us /= n_processors;
+    ctx->t_encode_us /= n_processors;
+    ctx->t_decode_us /= n_processors;
+
+    // print information about the audio boundaries
+    fprintf(stderr, "\n");
+    fprintf(stderr, "%s: the audio has been split into %d chunks at the following times:\n", __func__, n_processors);
+    for (int i = 0; i < n_processors - 1; ++i) {
+        fprintf(stderr, "%s: split %d - %s\n", __func__, (i + 1), to_timestamp(100*((i + 1)*n_samples_per_processor)/WHISPER_SAMPLE_RATE + offset_t).c_str());
+    }
+    fprintf(stderr, "%s: the transcription quality may be degraded near these boundaries\n", __func__);
+
+    return ret;
+}
+
 int whisper_full_n_segments(struct whisper_context * ctx) {
    return ctx->result_all.size();
 }
@ -2522,3 +2794,37 @@ int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment)
 const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment) {
    return ctx->result_all[i_segment].text.c_str();
 }
+
+int whisper_full_n_tokens(struct whisper_context * ctx, int i_segment) {
+    return ctx->result_all[i_segment].tokens.size();
+}
+
+const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token) {
+    return ctx->vocab.id_to_token[ctx->result_all[i_segment].tokens[i_token].id].c_str();
+}
+
+whisper_token whisper_full_get_token_id(struct whisper_context * ctx, int i_segment, int i_token) {
+    return ctx->result_all[i_segment].tokens[i_token].id;
+}
+
+struct whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token) {
+    return ctx->result_all[i_segment].tokens[i_token];
+}
+
+float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token) {
+    return ctx->result_all[i_segment].tokens[i_token].p;
+}
+
+const char * whisper_print_system_info() {
+    static std::string s;
+
+    s  = "";
+    s += "AVX2 = "      + std::to_string(ggml_cpu_has_avx2())      + " | ";
+    s += "AVX512 = "    + std::to_string(ggml_cpu_has_avx512())    + " | ";
+    s += "NEON = "      + std::to_string(ggml_cpu_has_neon())      + " | ";
+    s += "FP16_VA = "   + std::to_string(ggml_cpu_has_fp16_va())   + " | ";
+    s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
+    s += "BLAS = "      + std::to_string(ggml_cpu_has_blas())      + " | ";
+
+    return s.c_str();
+}
--- a/whisper.h
+++ b/whisper.h
@ -2,6 +2,7 @@
 #define WHISPER_H

 #include <stdint.h>
+#include <stdbool.h>

 #ifdef WHISPER_SHARED
 #    ifdef _WIN32
@ -30,34 +31,92 @@ extern "C" {
    //
    // C interface
    //
-
-    // TODO: documentation will come soon
+    // The following interface is thread-safe as long as the sample whisper_context is not used by multiple threads
+    // concurrently.
+    //
+    // Basic usage:
+    //
+    //     #include "whisper.h"
+    //
+    //     ...
+    //
+    //     struct whisper_context * ctx = whisper_init("/path/to/ggml-base.en.bin");
+    //
+    //     if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
+    //         fprintf(stderr, "failed to process audio\n");
+    //         return 7;
+    //     }
+    //
+    //     const int n_segments = whisper_full_n_segments(ctx);
+    //     for (int i = 0; i < n_segments; ++i) {
+    //         const char * text = whisper_full_get_segment_text(ctx, i);
+    //         printf("%s", text);
+    //     }
+    //
+    //     whisper_free(ctx);
+    //
+    //     ...
+    //
+    // This is a demonstration of the most straightforward usage of the library.
+    // "pcmf32" contains the RAW audio data in 32-bit floating point format.
+    //
+    // The interface also allows for more fine-grained control over the computation, but it requires a deeper
+    // understanding of how the model works.
+    //

    struct whisper_context;

    typedef int whisper_token;

+    struct whisper_token_data {
+        whisper_token id;  // token id
+        whisper_token tid; // forced timestamp token id
+
+        float p;     // probability of the token
+        float pt;    // probability of the timestamp token
+        float ptsum; // sum of probabilities of all timestamp tokens
+    };
+
+    // Allocates all memory needed for the model and loads the model from the given file.
+    // Returns NULL on failure.
    WHISPER_API struct whisper_context * whisper_init(const char * path_model);
+
+    // Frees all memory allocated by the model.
    WHISPER_API void whisper_free(struct whisper_context * ctx);

+    // Convert RAW PCM audio to log mel spectrogram.
+    // The resulting spectrogram is stored inside the provided whisper context.
+    // Returns 0 on success
    WHISPER_API int whisper_pcm_to_mel(
            struct whisper_context * ctx,
            const float * samples,
            int n_samples,
            int n_threads);

+    // This can be used to set a custom log mel spectrogram inside the provided whisper context.
+    // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
    // n_mel must be 80
+    // Returns 0 on success
    WHISPER_API int whisper_set_mel(
            struct whisper_context * ctx,
            const float * data,
            int n_len,
            int n_mel);

+    // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
+    // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
+    // offset can be used to specify the offset of the first frame in the spectrogram.
+    // Returns 0 on success
    WHISPER_API int whisper_encode(
            struct whisper_context * ctx,
            int offset,
            int n_threads);

+    // Run the Whisper decoder to obtain the logits and probabilities for the next token.
+    // Make sure to call whisper_encode() first.
+    // tokens + n_tokens is the provided context for the decoder.
+    // n_past is the number of tokens to use from previous decoder calls.
+    // Returns 0 on success
    WHISPER_API int whisper_decode(
            struct whisper_context * ctx,
            const whisper_token * tokens,
@ -65,10 +124,15 @@ extern "C" {
            int n_past,
            int n_threads);

-    WHISPER_API whisper_token whisper_sample_best(struct whisper_context * ctx, bool need_timestamp);
+    // Token sampling methods.
+    // These are provided for convenience and can be used after each call to whisper_decode().
+    // You can also implement your own sampling method using the whisper_get_probs() function.
+    // whisper_sample_best() returns the token with the highest probability
+    // whisper_sample_timestamp() returns the most probable timestamp token
+    WHISPER_API struct whisper_token_data whisper_sample_best(struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_sample_timestamp(struct whisper_context * ctx);

-    // return the id of the specified language, returns -1 if not found
+    // Return the id of the specified language, returns -1 if not found
    WHISPER_API int whisper_lang_id(const char * lang);

    WHISPER_API int whisper_n_len          (struct whisper_context * ctx); // mel length
@ -76,10 +140,13 @@ extern "C" {
    WHISPER_API int whisper_n_text_ctx     (struct whisper_context * ctx);
    WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);

+    // The probabilities for the next token
    WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);

+    // Token Id -> String. Uses the vocabulary in the provided context
    WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);

+    // Special tokens
    WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_sot (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_prev(struct whisper_context * ctx);
@ -87,22 +154,31 @@ extern "C" {
    WHISPER_API whisper_token whisper_token_not (struct whisper_context * ctx);
    WHISPER_API whisper_token whisper_token_beg (struct whisper_context * ctx);

+    // Task tokens
    WHISPER_API whisper_token whisper_token_translate ();
    WHISPER_API whisper_token whisper_token_transcribe();

+    // Performance information
    WHISPER_API void whisper_print_timings(struct whisper_context * ctx);

    ////////////////////////////////////////////////////////////////////////////

-    enum whisper_decode_strategy {
-        WHISPER_DECODE_GREEDY,
-        WHISPER_DECODE_BEAM_SEARCH,
+    // Available sampling strategies
+    enum whisper_sampling_strategy {
+        WHISPER_SAMPLING_GREEDY,      // Always select the most probable token
+        WHISPER_SAMPLING_BEAM_SEARCH, // TODO: not implemented yet!
    };

+    // Text segment callback
+    // Called on every newly generated text segment
+    // Use the whisper_full_...() functions to obtain the text segments
+    typedef void (*whisper_new_segment_callback)(struct whisper_context * ctx, void * user_data);
+
    struct whisper_full_params {
-        enum whisper_decode_strategy strategy;
+        enum whisper_sampling_strategy strategy;

        int n_threads;
+        int n_max_text_ctx;
        int offset_ms;

        bool translate;
@ -114,35 +190,68 @@ extern "C" {

        const char * language;

-        union {
-            struct {
-                int n_past;
-            } greedy;
+        struct {
+            int n_past;
+        } greedy;

-            struct {
-                int n_past;
-                int beam_width;
-                int n_best;
-            } beam_search;
-        };
+        struct {
+            int n_past;
+            int beam_width;
+            int n_best;
+        } beam_search;
+
+        whisper_new_segment_callback new_segment_callback;
+        void * new_segment_callback_user_data;
    };

-    WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_decode_strategy strategy);
+    WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);

-    // full whisper run - encode + decode
+    // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
+    // Uses the specified decoding strategy to obtain the text.
    WHISPER_API int whisper_full(
            struct whisper_context * ctx,
            struct whisper_full_params params,
            const float * samples,
            int n_samples);

+    // Split the input audio in chunks and process each chunk separately using whisper_full()
+    // It seems this approach can offer some speedup in some cases.
+    // However, the transcription accuracy can be worse at the beginning and end of each chunk.
+    WHISPER_API int whisper_full_parallel(
+            struct whisper_context * ctx,
+            struct whisper_full_params params,
+            const float * samples,
+            int n_samples,
+            const int n_processors);
+
+    // Number of generated text segments.
+    // A segment can be a few words, a sentence, or even a paragraph.
    WHISPER_API int whisper_full_n_segments(struct whisper_context * ctx);

+    // Get the start and end time of the specified segment.
    WHISPER_API int64_t whisper_full_get_segment_t0(struct whisper_context * ctx, int i_segment);
    WHISPER_API int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment);

+    // Get the text of the specified segment.
    WHISPER_API const char * whisper_full_get_segment_text(struct whisper_context * ctx, int i_segment);

+    // Get number of tokens in the specified segment.
+    WHISPER_API int whisper_full_n_tokens(struct whisper_context * ctx, int i_segment);
+
+    // Get the token text of the specified token in the specified segment.
+    WHISPER_API const char * whisper_full_get_token_text(struct whisper_context * ctx, int i_segment, int i_token);
+    WHISPER_API whisper_token whisper_full_get_token_id (struct whisper_context * ctx, int i_segment, int i_token);
+
+    // Get token data for the specified token in the specified segment.
+    // This contains probabilities, timestamps, etc.
+    WHISPER_API struct whisper_token_data whisper_full_get_token_data(struct whisper_context * ctx, int i_segment, int i_token);
+
+    // Get the probability of the specified token in the specified segment.
+    WHISPER_API float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int i_token);
+
+    // Print system information
+    WHISPER_API const char * whisper_print_system_info();
+
 #ifdef __cplusplus
 }
 #endif
Author	SHA1	Message	Date
Georgi Gerganov	210a6fb83c	wip : some unsuccessful experiments using DP	2022-11-01 21:28:30 +02:00
Georgi Gerganov	57fb46f307	main : add option for word-leve timestamps (very experimental)	2022-10-30 17:06:57 +02:00
Georgi Gerganov	5a9e4260a6	stream : add "--capture" option to select capture device (ref #10 )	2022-10-30 08:27:04 +02:00
Georgi Gerganov	eba62e0fa1	close #113 : fix struct whisper_token_data	2022-10-30 08:23:52 +02:00
Georgi Gerganov	69bdb6624a	minor : update whisper.js	2022-10-29 21:28:21 +03:00
Georgi Gerganov	12fb303d9d	whisper.wasm : update system info print	2022-10-29 20:32:41 +03:00
Georgi Gerganov	234f414652	ref #5 : update CMake for Windows build - __AVX2__ should already be defined due to /arch:AVX2 - _CRT_SECURE_NO_WARNINGS should be defined both for shared and static lib	2022-10-29 19:41:50 +03:00
Georgi Gerganov	014a119052	minor : fix multiple definitions of to_timestamp()	2022-10-29 19:37:19 +03:00
Georgi Gerganov	dec40be58f	parallel : print time of audio boundaries + fix timings	2022-10-29 19:37:19 +03:00
Georgi Gerganov	e5044f87d9	ggml : fix barrier	2022-10-29 19:37:19 +03:00
Georgi Gerganov	2827cbbbe8	main : merge parallel example in main	2022-10-29 19:37:19 +03:00
Georgi Gerganov	0b2dc3c82c	parallel : working	2022-10-29 19:37:19 +03:00
Georgi Gerganov	a272f10b2e	ggml : fix thread-safety of ggml_init and ggml_free	2022-10-29 19:37:19 +03:00
Georgi Gerganov	85d6e1e1e7	main : fix sampling time + add max_context parameter	2022-10-29 19:37:19 +03:00
Georgi Gerganov	72e9cdd6bf	parallel : adding tool for parallel transformer inference	2022-10-29 19:37:19 +03:00
Borislav Stanimirov	c565c569e7	Define WHISPER_BUILD so as to export symbols on Windows	2022-10-29 13:23:09 +03:00
Georgi Gerganov	2c281d190b	Update README.md	2022-10-28 22:09:40 +03:00
Georgi Gerganov	b89f8960ca	Update README.md	2022-10-28 21:40:52 +03:00
Georgi Gerganov	6f82320b05	Create README.md	2022-10-28 20:25:37 +03:00
Georgi Gerganov	2298310dd8	whisper.nvim : add helper script for the Neovim integration	2022-10-28 20:25:37 +03:00
Georgi Gerganov	8347a7bb6a	stream : few updates to make it compatible for Vim usage (#99 )	2022-10-27 22:10:50 +03:00
Georgi Gerganov	fbd513b813	Add OpenBLAS support Supported via CMake - just add: cmake .. -DWHISPER_SUPPORT_OPENBLAS=ON On Ubuntu, you have to install the library like this: apt install libopenblas-dev Unfortunately, I don't observe any benefit compared to the original AVX2 + FP16 implementation. Maybe I'm missing something	2022-10-27 18:31:49 +03:00
Georgi Gerganov	ebb01b9e33	Print system info at start of program	2022-10-27 17:22:19 +03:00
Mikhail Grigorev	9820234f13	Fixed compile definitions and link libraries for MSVC	2022-10-27 17:20:49 +03:00
Georgi Gerganov	a22e5741d8	Add helper script to benchmark all models Simply run: $ ./extra/bench-all.sh	2022-10-26 23:25:38 +03:00
Georgi Gerganov	2400660f3f	Print system info in main	2022-10-26 22:54:09 +03:00
Georgi Gerganov	058a27b2e5	Create README.md	2022-10-26 18:14:10 +03:00
andypayne	a09ce6e889	Changes to work by default on macOS - use curl when wget is not available, and use an alternative method to get the script path when realpath is not available.	2022-10-26 12:18:18 +03:00
Georgi Gerganov	a6c786d5dc	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	9ccafa8792	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	89d8ee3ee5	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	91dcf5f35b	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	113a4f06d8	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	47e78b7288	Update README.md	2022-10-25 20:53:48 +03:00
Georgi Gerganov	34bb3ab0cf	ggml : add system info functions	2022-10-25 20:53:48 +03:00
Georgi Gerganov	c6710efde2	refactoring : move main + stream in examples + other stuff	2022-10-25 20:53:48 +03:00
Georgi Gerganov	4c68f4cac0	main : fix SRT timestamp to use comma "," instead of dot "."	2022-10-24 18:28:23 +03:00
Georgi Gerganov	728676927f	Update README.md	2022-10-24 18:26:21 +03:00
Georgi Gerganov	d4f94ce427	Update README.md	2022-10-24 18:23:07 +03:00
Georgi Gerganov	a52ee08c1e	objc : polishing the sample application	2022-10-24 18:23:07 +03:00
Georgi Gerganov	b41f4a90eb	Create README.md	2022-10-24 18:23:07 +03:00
Georgi Gerganov	bb1ee266d2	ios : whisper.objc example	2022-10-24 18:23:07 +03:00
Georgi Gerganov	5f7e9fa2dc	ref #68 , #79 : fix segment time output	2022-10-23 13:30:30 +03:00
Georgi Gerganov	181b762de8	Update README.md	2022-10-23 12:47:51 +03:00
Georgi Gerganov	3d37ad5133	Merge pull request #78 from jokkebk/Specify-utf8-for-vocab.json Add enconding parameter to vocab.json opening to fix errors	2022-10-23 12:23:04 +03:00
Joonas Pihlajamaa	4e887dc350	Add enconding parameter to vocab.json opening to fix errors	2022-10-23 11:55:01 +03:00
Georgi Gerganov	4196856c7b	Update README.md	2022-10-23 10:24:36 +03:00
Georgi Gerganov	705198f063	Update README.md	2022-10-23 10:12:10 +03:00
Georgi Gerganov	3e69a6071d	Update README.md	2022-10-23 08:04:33 +03:00
Georgi Gerganov	f3dae90c31	Update README.md	2022-10-22 21:17:21 +03:00
Georgi Gerganov	6a81ed3e78	main : print colors + no timestamps	2022-10-22 21:17:21 +03:00
Georgi Gerganov	7affd309d3	whisper : add new-segment callback Can be used to process new segments as they are being generated. Sample usage in main, for printing the resulting segments during the inference.	2022-10-22 21:17:21 +03:00
Georgi Gerganov	8f95c25aed	main : refactor subtitle output	2022-10-22 21:17:21 +03:00
Georgi Gerganov	31ff0c6a1f	wip : experimental color coding of tokens based on probabilities	2022-10-22 21:17:21 +03:00
Georgi Gerganov	f4aa01c2f8	Update README.md	2022-10-22 19:30:35 +03:00
Georgi Gerganov	8c1d970088	Update README.md	2022-10-22 19:00:25 +03:00
Georgi Gerganov	6b45e37b2b	Update README.md and finalize the whisper.wasm example	2022-10-22 18:54:01 +03:00
Georgi Gerganov	491ecd7056	wip : polishing WASM example	2022-10-22 18:54:01 +03:00
Georgi Gerganov	db460b78ff	wip : WASM 128-bit SIMD support	2022-10-22 18:54:01 +03:00
Georgi Gerganov	e905c6f827	wip : initial WASM port Works but it is very slow because no SIMD is used. For example, jfk.wav is processed in ~23 seconds using "tiny.en" model	2022-10-22 18:54:01 +03:00
Georgi Gerganov	7d0dee7a8a	ref #68 : add option "-on" to specify segment index offset for SRT Also, change option "-o" to "-ot"	2022-10-21 18:14:53 +03:00
Georgi Gerganov	8d15a1c635	ci : fix and re-enable tests (2nd try)	2022-10-21 15:57:20 +03:00
Georgi Gerganov	692aa0784f	Revert "ci : fix and re-enable tests" This reverts commit `80aefc9514`.	2022-10-21 15:36:19 +03:00
Georgi Gerganov	80aefc9514	ci : fix and re-enable tests	2022-10-21 15:27:30 +03:00
Georgi Gerganov	5698b51718	Update README.md	2022-10-20 17:52:59 +03:00
Georgi Gerganov	3fe3898ebb	Update README.md	2022-10-20 17:43:56 +03:00
Georgi Gerganov	81c185576c	Update README.md	2022-10-20 17:39:31 +03:00
Georgi Gerganov	744bd47685	Merge pull request #67 from undefdev/defensive-apple-arm-make added handling for falsely as x86_64 announced ARM Macs	2022-10-19 09:29:43 +03:00
Georgi Gerganov	66b3169d39	ci : disable tests temporarily	2022-10-19 08:37:18 +03:00
undef	19a780afe5	added handling for falsely as x86_64 announced ARM Macs	2022-10-19 01:01:53 +02:00
Georgi Gerganov	1969ee4bc7	Update README.md	2022-10-18 22:20:35 +03:00
Georgi Gerganov	0e4fd43400	stream : print warning when processing is not fast enough	2022-10-18 20:15:06 +03:00
Georgi Gerganov	19817711b4	Add reference to FP16 repo	2022-10-18 19:48:34 +03:00
Georgi Gerganov	7eeef0358a	ref #52 : improve greedy sampling strategy Force timestamp token to be sampled if the probability sum over all timestamp tokens is above the probability of any other token	2022-10-18 19:48:15 +03:00
Georgi Gerganov	632660abb9	CMake support for Accelerate framework	2022-10-18 18:51:59 +03:00
Georgi Gerganov	e36aabe00d	Correct implementation of FP16 GELU Can toggle it via the GGML_GELU_FP16 macro	2022-10-18 18:42:08 +03:00
Georgi Gerganov	2d171ced32	close #32 : add comment about thread-safety of the C-style API	2022-10-18 18:27:57 +03:00
Georgi Gerganov	e30cf83158	ref #57 , #62 , #63 : remove unions in C-api + remove designated initializers We are not ready for designated initializers - many compilers do not support this C++ feature yet, so removing it's non-trivial usages.	2022-10-18 18:17:24 +03:00
Georgi Gerganov	d6b84b2a23	ref #62 : fix build for some compilers For some reason, new version of GCC panic when the struct type is not specified explicitly	2022-10-18 10:57:03 +03:00
Georgi Gerganov	b4a3875b2c	Revert recent sampling change It does not actually help and seems to produce worse results on some of the samples	2022-10-18 08:26:16 +03:00
Georgi Gerganov	cf67bfffa0	Fix EOT token handling If it is the end of the audio, pick all sampled tokens. Otherwise, print error message.	2022-10-18 00:53:06 +03:00
Georgi Gerganov	91632eb6ea	Revert GELU change Seems it does not work on x86 for some reason	2022-10-18 00:45:08 +03:00
Georgi Gerganov	b81a81d543	Link Accelerate framework to "stream" example	2022-10-18 00:12:51 +03:00
Georgi Gerganov	d14823582d	Try to improve the sampling strategy a bit It sill fails sometimes when it does not sample a timestamp token for the entire segment. We now print a message in such cases	2022-10-18 00:12:51 +03:00
Georgi Gerganov	20d8e7a309	Fix memory sizes	2022-10-18 00:12:51 +03:00
Georgi Gerganov	72d967bce4	Use Accelerate framework on Apple silicon Huge performance improvement in the Encode (almost x2 on MacBook M1 Pro) Also various extra optimizations: - Multi-threaded NORM operator - Faster GELU via F16 cast	2022-10-18 00:12:51 +03:00
Georgi Gerganov	130b5c02d6	Adding helper script for converting the PT models	2022-10-18 00:12:51 +03:00
Georgi Gerganov	0e858f080d	close #56 : build on FreeBSD Thanks to @abelbabel for the contribution	2022-10-17 18:10:16 +03:00
Georgi Gerganov	f24d940ca9	Merge pull request #58 from r0y6a3n0/master fix decode missing token issue	2022-10-17 18:06:02 +03:00
RyanChang	949f97a8b4	fix missing token issue	2022-10-17 21:19:45 +08:00
Georgi Gerganov	0ad085f5e8	ref #48 : clear results at the start of whisper_full This way, even if the input audio is empty, the previous results will be removed.	2022-10-15 09:55:28 +03:00
Georgi Gerganov	36945162fa	Update README.md (ref #50 )	2022-10-15 09:40:08 +03:00
Georgi Gerganov	b2f1600aa3	Update README.md	2022-10-12 21:25:42 +03:00
0/0	b799226973	check if spectogram length is <100 before doing anything else fixes #39	2022-10-12 07:32:42 +03:00
Topping1	1348796a93	Update README.md (#43 ) * Update README.md Updated README.md to list new features, such as subtitle file support (VTT and SRT) * Update README.md Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>	2022-10-12 07:32:14 +03:00
Georgi Gerganov	40609cb49b	Merge pull request #42 from iboB/msvc-build ref #5 : MSVC build	2022-10-12 07:31:41 +03:00
Borislav Stanimirov	0b45d25151	Building with MSVC	2022-10-11 21:40:46 +03:00
Borislav Stanimirov	28252352d7	Visual Studio ignored dirs	2022-10-11 20:57:33 +03:00
Georgi Gerganov	8d94358251	Update README.md	2022-10-11 00:36:32 +03:00
Georgi Gerganov	ad6693fb64	Update README.md	2022-10-10 22:16:25 +03:00
Georgi Gerganov	01c9e96f64	stream : improve real-time transcription	2022-10-10 22:06:27 +03:00
Georgi Gerganov	63b6786767	Minor	2022-10-10 22:06:27 +03:00
Georgi Gerganov	f7ab81fe51	Update README.md	2022-10-10 22:05:37 +03:00
Georgi Gerganov	eac4f12777	Merge pull request #36 from Topping1/master Fix SRT timestamp format from mm:ss.sss to hh:mm:ss.sss	2022-10-10 09:13:31 +03:00
Georgi Gerganov	9d5723435f	ref #35 : add <stdbool.h> to whisper.h "bool" type is not implicitly defined for some compilers.	2022-10-10 08:11:18 +03:00
Georgi Gerganov	6e29d8453c	Merge pull request #34 from tazz4843/master Add static library make target	2022-10-10 08:05:57 +03:00
Topping1	50b5fe964c	Update main.cpp	2022-10-09 23:35:10 -05:00
0/0	64752acd27	add static library make target	2022-10-09 19:16:42 -06:00
Georgi Gerganov	7edaa7da4b	Merge pull request #31 from lkwq007/master Add MinGW support	2022-10-09 17:52:46 +03:00
lnyan	4bbb8a587b	Add MinGW support	2022-10-09 22:26:37 +08:00
Georgi Gerganov	4a6bf11db3	Minor	2022-10-08 18:13:26 +03:00
Georgi Gerganov	9bbca3110f	ref #9 : add API documentation in whisper.h	2022-10-08 18:09:56 +03:00
Georgi Gerganov	5e563ef635	Fix Makefile for MacBook Intel	2022-10-08 17:35:55 +03:00
Georgi Gerganov	2ca8cc77b2	ref #17 : print whisper logs to stderr Only the transcribed/translted text is printed to stdout. This way, one can redirect the result to a file.	2022-10-08 17:28:06 +03:00
Georgi Gerganov	8c7c018893	ref #17 : add options to output result to file Support for: - plain text - VTT - SRT	2022-10-08 17:22:22 +03:00